Annotation of embedaddon/curl/docs/examples/htmltitle.cpp, revision 1.1.1.1
1.1 misho 1: /***************************************************************************
2: * _ _ ____ _
3: * Project ___| | | | _ \| |
4: * / __| | | | |_) | |
5: * | (__| |_| | _ <| |___
6: * \___|\___/|_| \_\_____|
7: *
8: * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al.
9: *
10: * This software is licensed as described in the file COPYING, which
11: * you should have received as part of this distribution. The terms
12: * are also available at https://curl.haxx.se/docs/copyright.html.
13: *
14: * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15: * copies of the Software, and permit persons to whom the Software is
16: * furnished to do so, under the terms of the COPYING file.
17: *
18: * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19: * KIND, either express or implied.
20: *
21: ***************************************************************************/
22: /* <DESC>
23: * Get a web page, extract the title with libxml.
24: * </DESC>
25:
26: Written by Lars Nilsson
27:
28: GNU C++ compile command line suggestion (edit paths accordingly):
29:
30: g++ -Wall -I/opt/curl/include -I/opt/libxml/include/libxml2 htmltitle.cpp \
31: -o htmltitle -L/opt/curl/lib -L/opt/libxml/lib -lcurl -lxml2
32: */
33: #include <stdio.h>
34: #include <string.h>
35: #include <stdlib.h>
36: #include <string>
37: #include <curl/curl.h>
38: #include <libxml/HTMLparser.h>
39:
40: //
41: // Case-insensitive string comparison
42: //
43:
44: #ifdef _MSC_VER
45: #define COMPARE(a, b) (!_stricmp((a), (b)))
46: #else
47: #define COMPARE(a, b) (!strcasecmp((a), (b)))
48: #endif
49:
50: //
51: // libxml callback context structure
52: //
53:
54: struct Context
55: {
56: Context(): addTitle(false) { }
57:
58: bool addTitle;
59: std::string title;
60: };
61:
62: //
63: // libcurl variables for error strings and returned data
64:
65: static char errorBuffer[CURL_ERROR_SIZE];
66: static std::string buffer;
67:
68: //
69: // libcurl write callback function
70: //
71:
72: static int writer(char *data, size_t size, size_t nmemb,
73: std::string *writerData)
74: {
75: if(writerData == NULL)
76: return 0;
77:
78: writerData->append(data, size*nmemb);
79:
80: return size * nmemb;
81: }
82:
83: //
84: // libcurl connection initialization
85: //
86:
87: static bool init(CURL *&conn, char *url)
88: {
89: CURLcode code;
90:
91: conn = curl_easy_init();
92:
93: if(conn == NULL) {
94: fprintf(stderr, "Failed to create CURL connection\n");
95: exit(EXIT_FAILURE);
96: }
97:
98: code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer);
99: if(code != CURLE_OK) {
100: fprintf(stderr, "Failed to set error buffer [%d]\n", code);
101: return false;
102: }
103:
104: code = curl_easy_setopt(conn, CURLOPT_URL, url);
105: if(code != CURLE_OK) {
106: fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer);
107: return false;
108: }
109:
110: code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L);
111: if(code != CURLE_OK) {
112: fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer);
113: return false;
114: }
115:
116: code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer);
117: if(code != CURLE_OK) {
118: fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer);
119: return false;
120: }
121:
122: code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer);
123: if(code != CURLE_OK) {
124: fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer);
125: return false;
126: }
127:
128: return true;
129: }
130:
131: //
132: // libxml start element callback function
133: //
134:
135: static void StartElement(void *voidContext,
136: const xmlChar *name,
137: const xmlChar **attributes)
138: {
139: Context *context = static_cast<Context *>(voidContext);
140:
141: if(COMPARE(reinterpret_cast<char *>(name), "TITLE")) {
142: context->title = "";
143: context->addTitle = true;
144: }
145: (void) attributes;
146: }
147:
148: //
149: // libxml end element callback function
150: //
151:
152: static void EndElement(void *voidContext,
153: const xmlChar *name)
154: {
155: Context *context = static_cast<Context *>(voidContext);
156:
157: if(COMPARE(reinterpret_cast<char *>(name), "TITLE"))
158: context->addTitle = false;
159: }
160:
161: //
162: // Text handling helper function
163: //
164:
165: static void handleCharacters(Context *context,
166: const xmlChar *chars,
167: int length)
168: {
169: if(context->addTitle)
170: context->title.append(reinterpret_cast<char *>(chars), length);
171: }
172:
173: //
174: // libxml PCDATA callback function
175: //
176:
177: static void Characters(void *voidContext,
178: const xmlChar *chars,
179: int length)
180: {
181: Context *context = static_cast<Context *>(voidContext);
182:
183: handleCharacters(context, chars, length);
184: }
185:
186: //
187: // libxml CDATA callback function
188: //
189:
190: static void cdata(void *voidContext,
191: const xmlChar *chars,
192: int length)
193: {
194: Context *context = static_cast<Context *>(voidContext);
195:
196: handleCharacters(context, chars, length);
197: }
198:
199: //
200: // libxml SAX callback structure
201: //
202:
203: static htmlSAXHandler saxHandler =
204: {
205: NULL,
206: NULL,
207: NULL,
208: NULL,
209: NULL,
210: NULL,
211: NULL,
212: NULL,
213: NULL,
214: NULL,
215: NULL,
216: NULL,
217: NULL,
218: NULL,
219: StartElement,
220: EndElement,
221: NULL,
222: Characters,
223: NULL,
224: NULL,
225: NULL,
226: NULL,
227: NULL,
228: NULL,
229: NULL,
230: cdata,
231: NULL
232: };
233:
234: //
235: // Parse given (assumed to be) HTML text and return the title
236: //
237:
238: static void parseHtml(const std::string &html,
239: std::string &title)
240: {
241: htmlParserCtxtPtr ctxt;
242: Context context;
243:
244: ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",
245: XML_CHAR_ENCODING_NONE);
246:
247: htmlParseChunk(ctxt, html.c_str(), html.size(), 0);
248: htmlParseChunk(ctxt, "", 0, 1);
249:
250: htmlFreeParserCtxt(ctxt);
251:
252: title = context.title;
253: }
254:
255: int main(int argc, char *argv[])
256: {
257: CURL *conn = NULL;
258: CURLcode code;
259: std::string title;
260:
261: // Ensure one argument is given
262:
263: if(argc != 2) {
264: fprintf(stderr, "Usage: %s <url>\n", argv[0]);
265: exit(EXIT_FAILURE);
266: }
267:
268: curl_global_init(CURL_GLOBAL_DEFAULT);
269:
270: // Initialize CURL connection
271:
272: if(!init(conn, argv[1])) {
273: fprintf(stderr, "Connection initializion failed\n");
274: exit(EXIT_FAILURE);
275: }
276:
277: // Retrieve content for the URL
278:
279: code = curl_easy_perform(conn);
280: curl_easy_cleanup(conn);
281:
282: if(code != CURLE_OK) {
283: fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer);
284: exit(EXIT_FAILURE);
285: }
286:
287: // Parse the (assumed) HTML code
288: parseHtml(buffer, title);
289:
290: // Display the extracted title
291: printf("Title: %s\n", title.c_str());
292:
293: return EXIT_SUCCESS;
294: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>