Annotation of embedaddon/curl/docs/examples/htmltitle.cpp, revision 1.1
1.1 ! misho 1: /***************************************************************************
! 2: * _ _ ____ _
! 3: * Project ___| | | | _ \| |
! 4: * / __| | | | |_) | |
! 5: * | (__| |_| | _ <| |___
! 6: * \___|\___/|_| \_\_____|
! 7: *
! 8: * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al.
! 9: *
! 10: * This software is licensed as described in the file COPYING, which
! 11: * you should have received as part of this distribution. The terms
! 12: * are also available at https://curl.haxx.se/docs/copyright.html.
! 13: *
! 14: * You may opt to use, copy, modify, merge, publish, distribute and/or sell
! 15: * copies of the Software, and permit persons to whom the Software is
! 16: * furnished to do so, under the terms of the COPYING file.
! 17: *
! 18: * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
! 19: * KIND, either express or implied.
! 20: *
! 21: ***************************************************************************/
! 22: /* <DESC>
! 23: * Get a web page, extract the title with libxml.
! 24: * </DESC>
! 25:
! 26: Written by Lars Nilsson
! 27:
! 28: GNU C++ compile command line suggestion (edit paths accordingly):
! 29:
! 30: g++ -Wall -I/opt/curl/include -I/opt/libxml/include/libxml2 htmltitle.cpp \
! 31: -o htmltitle -L/opt/curl/lib -L/opt/libxml/lib -lcurl -lxml2
! 32: */
! 33: #include <stdio.h>
! 34: #include <string.h>
! 35: #include <stdlib.h>
! 36: #include <string>
! 37: #include <curl/curl.h>
! 38: #include <libxml/HTMLparser.h>
! 39:
! 40: //
! 41: // Case-insensitive string comparison
! 42: //
! 43:
! 44: #ifdef _MSC_VER
! 45: #define COMPARE(a, b) (!_stricmp((a), (b)))
! 46: #else
! 47: #define COMPARE(a, b) (!strcasecmp((a), (b)))
! 48: #endif
! 49:
! 50: //
! 51: // libxml callback context structure
! 52: //
! 53:
! 54: struct Context
! 55: {
! 56: Context(): addTitle(false) { }
! 57:
! 58: bool addTitle;
! 59: std::string title;
! 60: };
! 61:
! 62: //
! 63: // libcurl variables for error strings and returned data
! 64:
! 65: static char errorBuffer[CURL_ERROR_SIZE];
! 66: static std::string buffer;
! 67:
! 68: //
! 69: // libcurl write callback function
! 70: //
! 71:
! 72: static int writer(char *data, size_t size, size_t nmemb,
! 73: std::string *writerData)
! 74: {
! 75: if(writerData == NULL)
! 76: return 0;
! 77:
! 78: writerData->append(data, size*nmemb);
! 79:
! 80: return size * nmemb;
! 81: }
! 82:
! 83: //
! 84: // libcurl connection initialization
! 85: //
! 86:
! 87: static bool init(CURL *&conn, char *url)
! 88: {
! 89: CURLcode code;
! 90:
! 91: conn = curl_easy_init();
! 92:
! 93: if(conn == NULL) {
! 94: fprintf(stderr, "Failed to create CURL connection\n");
! 95: exit(EXIT_FAILURE);
! 96: }
! 97:
! 98: code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer);
! 99: if(code != CURLE_OK) {
! 100: fprintf(stderr, "Failed to set error buffer [%d]\n", code);
! 101: return false;
! 102: }
! 103:
! 104: code = curl_easy_setopt(conn, CURLOPT_URL, url);
! 105: if(code != CURLE_OK) {
! 106: fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer);
! 107: return false;
! 108: }
! 109:
! 110: code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L);
! 111: if(code != CURLE_OK) {
! 112: fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer);
! 113: return false;
! 114: }
! 115:
! 116: code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer);
! 117: if(code != CURLE_OK) {
! 118: fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer);
! 119: return false;
! 120: }
! 121:
! 122: code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer);
! 123: if(code != CURLE_OK) {
! 124: fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer);
! 125: return false;
! 126: }
! 127:
! 128: return true;
! 129: }
! 130:
! 131: //
! 132: // libxml start element callback function
! 133: //
! 134:
! 135: static void StartElement(void *voidContext,
! 136: const xmlChar *name,
! 137: const xmlChar **attributes)
! 138: {
! 139: Context *context = static_cast<Context *>(voidContext);
! 140:
! 141: if(COMPARE(reinterpret_cast<char *>(name), "TITLE")) {
! 142: context->title = "";
! 143: context->addTitle = true;
! 144: }
! 145: (void) attributes;
! 146: }
! 147:
! 148: //
! 149: // libxml end element callback function
! 150: //
! 151:
! 152: static void EndElement(void *voidContext,
! 153: const xmlChar *name)
! 154: {
! 155: Context *context = static_cast<Context *>(voidContext);
! 156:
! 157: if(COMPARE(reinterpret_cast<char *>(name), "TITLE"))
! 158: context->addTitle = false;
! 159: }
! 160:
! 161: //
! 162: // Text handling helper function
! 163: //
! 164:
! 165: static void handleCharacters(Context *context,
! 166: const xmlChar *chars,
! 167: int length)
! 168: {
! 169: if(context->addTitle)
! 170: context->title.append(reinterpret_cast<char *>(chars), length);
! 171: }
! 172:
! 173: //
! 174: // libxml PCDATA callback function
! 175: //
! 176:
! 177: static void Characters(void *voidContext,
! 178: const xmlChar *chars,
! 179: int length)
! 180: {
! 181: Context *context = static_cast<Context *>(voidContext);
! 182:
! 183: handleCharacters(context, chars, length);
! 184: }
! 185:
! 186: //
! 187: // libxml CDATA callback function
! 188: //
! 189:
! 190: static void cdata(void *voidContext,
! 191: const xmlChar *chars,
! 192: int length)
! 193: {
! 194: Context *context = static_cast<Context *>(voidContext);
! 195:
! 196: handleCharacters(context, chars, length);
! 197: }
! 198:
! 199: //
! 200: // libxml SAX callback structure
! 201: //
! 202:
! 203: static htmlSAXHandler saxHandler =
! 204: {
! 205: NULL,
! 206: NULL,
! 207: NULL,
! 208: NULL,
! 209: NULL,
! 210: NULL,
! 211: NULL,
! 212: NULL,
! 213: NULL,
! 214: NULL,
! 215: NULL,
! 216: NULL,
! 217: NULL,
! 218: NULL,
! 219: StartElement,
! 220: EndElement,
! 221: NULL,
! 222: Characters,
! 223: NULL,
! 224: NULL,
! 225: NULL,
! 226: NULL,
! 227: NULL,
! 228: NULL,
! 229: NULL,
! 230: cdata,
! 231: NULL
! 232: };
! 233:
! 234: //
! 235: // Parse given (assumed to be) HTML text and return the title
! 236: //
! 237:
! 238: static void parseHtml(const std::string &html,
! 239: std::string &title)
! 240: {
! 241: htmlParserCtxtPtr ctxt;
! 242: Context context;
! 243:
! 244: ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",
! 245: XML_CHAR_ENCODING_NONE);
! 246:
! 247: htmlParseChunk(ctxt, html.c_str(), html.size(), 0);
! 248: htmlParseChunk(ctxt, "", 0, 1);
! 249:
! 250: htmlFreeParserCtxt(ctxt);
! 251:
! 252: title = context.title;
! 253: }
! 254:
! 255: int main(int argc, char *argv[])
! 256: {
! 257: CURL *conn = NULL;
! 258: CURLcode code;
! 259: std::string title;
! 260:
! 261: // Ensure one argument is given
! 262:
! 263: if(argc != 2) {
! 264: fprintf(stderr, "Usage: %s <url>\n", argv[0]);
! 265: exit(EXIT_FAILURE);
! 266: }
! 267:
! 268: curl_global_init(CURL_GLOBAL_DEFAULT);
! 269:
! 270: // Initialize CURL connection
! 271:
! 272: if(!init(conn, argv[1])) {
! 273: fprintf(stderr, "Connection initializion failed\n");
! 274: exit(EXIT_FAILURE);
! 275: }
! 276:
! 277: // Retrieve content for the URL
! 278:
! 279: code = curl_easy_perform(conn);
! 280: curl_easy_cleanup(conn);
! 281:
! 282: if(code != CURLE_OK) {
! 283: fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer);
! 284: exit(EXIT_FAILURE);
! 285: }
! 286:
! 287: // Parse the (assumed) HTML code
! 288: parseHtml(buffer, title);
! 289:
! 290: // Display the extracted title
! 291: printf("Title: %s\n", title.c_str());
! 292:
! 293: return EXIT_SUCCESS;
! 294: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>