Return to htmltitle.cpp CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / curl / docs / examples |
1.1 ! misho 1: /*************************************************************************** ! 2: * _ _ ____ _ ! 3: * Project ___| | | | _ \| | ! 4: * / __| | | | |_) | | ! 5: * | (__| |_| | _ <| |___ ! 6: * \___|\___/|_| \_\_____| ! 7: * ! 8: * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al. ! 9: * ! 10: * This software is licensed as described in the file COPYING, which ! 11: * you should have received as part of this distribution. The terms ! 12: * are also available at https://curl.haxx.se/docs/copyright.html. ! 13: * ! 14: * You may opt to use, copy, modify, merge, publish, distribute and/or sell ! 15: * copies of the Software, and permit persons to whom the Software is ! 16: * furnished to do so, under the terms of the COPYING file. ! 17: * ! 18: * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY ! 19: * KIND, either express or implied. ! 20: * ! 21: ***************************************************************************/ ! 22: /* <DESC> ! 23: * Get a web page, extract the title with libxml. ! 24: * </DESC> ! 25: ! 26: Written by Lars Nilsson ! 27: ! 28: GNU C++ compile command line suggestion (edit paths accordingly): ! 29: ! 30: g++ -Wall -I/opt/curl/include -I/opt/libxml/include/libxml2 htmltitle.cpp \ ! 31: -o htmltitle -L/opt/curl/lib -L/opt/libxml/lib -lcurl -lxml2 ! 32: */ ! 33: #include <stdio.h> ! 34: #include <string.h> ! 35: #include <stdlib.h> ! 36: #include <string> ! 37: #include <curl/curl.h> ! 38: #include <libxml/HTMLparser.h> ! 39: ! 40: // ! 41: // Case-insensitive string comparison ! 42: // ! 43: ! 44: #ifdef _MSC_VER ! 45: #define COMPARE(a, b) (!_stricmp((a), (b))) ! 46: #else ! 47: #define COMPARE(a, b) (!strcasecmp((a), (b))) ! 48: #endif ! 49: ! 50: // ! 51: // libxml callback context structure ! 52: // ! 53: ! 54: struct Context ! 55: { ! 56: Context(): addTitle(false) { } ! 57: ! 58: bool addTitle; ! 59: std::string title; ! 60: }; ! 61: ! 62: // ! 63: // libcurl variables for error strings and returned data ! 64: ! 65: static char errorBuffer[CURL_ERROR_SIZE]; ! 66: static std::string buffer; ! 67: ! 68: // ! 69: // libcurl write callback function ! 70: // ! 71: ! 72: static int writer(char *data, size_t size, size_t nmemb, ! 73: std::string *writerData) ! 74: { ! 75: if(writerData == NULL) ! 76: return 0; ! 77: ! 78: writerData->append(data, size*nmemb); ! 79: ! 80: return size * nmemb; ! 81: } ! 82: ! 83: // ! 84: // libcurl connection initialization ! 85: // ! 86: ! 87: static bool init(CURL *&conn, char *url) ! 88: { ! 89: CURLcode code; ! 90: ! 91: conn = curl_easy_init(); ! 92: ! 93: if(conn == NULL) { ! 94: fprintf(stderr, "Failed to create CURL connection\n"); ! 95: exit(EXIT_FAILURE); ! 96: } ! 97: ! 98: code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer); ! 99: if(code != CURLE_OK) { ! 100: fprintf(stderr, "Failed to set error buffer [%d]\n", code); ! 101: return false; ! 102: } ! 103: ! 104: code = curl_easy_setopt(conn, CURLOPT_URL, url); ! 105: if(code != CURLE_OK) { ! 106: fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer); ! 107: return false; ! 108: } ! 109: ! 110: code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L); ! 111: if(code != CURLE_OK) { ! 112: fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer); ! 113: return false; ! 114: } ! 115: ! 116: code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer); ! 117: if(code != CURLE_OK) { ! 118: fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer); ! 119: return false; ! 120: } ! 121: ! 122: code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer); ! 123: if(code != CURLE_OK) { ! 124: fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer); ! 125: return false; ! 126: } ! 127: ! 128: return true; ! 129: } ! 130: ! 131: // ! 132: // libxml start element callback function ! 133: // ! 134: ! 135: static void StartElement(void *voidContext, ! 136: const xmlChar *name, ! 137: const xmlChar **attributes) ! 138: { ! 139: Context *context = static_cast<Context *>(voidContext); ! 140: ! 141: if(COMPARE(reinterpret_cast<char *>(name), "TITLE")) { ! 142: context->title = ""; ! 143: context->addTitle = true; ! 144: } ! 145: (void) attributes; ! 146: } ! 147: ! 148: // ! 149: // libxml end element callback function ! 150: // ! 151: ! 152: static void EndElement(void *voidContext, ! 153: const xmlChar *name) ! 154: { ! 155: Context *context = static_cast<Context *>(voidContext); ! 156: ! 157: if(COMPARE(reinterpret_cast<char *>(name), "TITLE")) ! 158: context->addTitle = false; ! 159: } ! 160: ! 161: // ! 162: // Text handling helper function ! 163: // ! 164: ! 165: static void handleCharacters(Context *context, ! 166: const xmlChar *chars, ! 167: int length) ! 168: { ! 169: if(context->addTitle) ! 170: context->title.append(reinterpret_cast<char *>(chars), length); ! 171: } ! 172: ! 173: // ! 174: // libxml PCDATA callback function ! 175: // ! 176: ! 177: static void Characters(void *voidContext, ! 178: const xmlChar *chars, ! 179: int length) ! 180: { ! 181: Context *context = static_cast<Context *>(voidContext); ! 182: ! 183: handleCharacters(context, chars, length); ! 184: } ! 185: ! 186: // ! 187: // libxml CDATA callback function ! 188: // ! 189: ! 190: static void cdata(void *voidContext, ! 191: const xmlChar *chars, ! 192: int length) ! 193: { ! 194: Context *context = static_cast<Context *>(voidContext); ! 195: ! 196: handleCharacters(context, chars, length); ! 197: } ! 198: ! 199: // ! 200: // libxml SAX callback structure ! 201: // ! 202: ! 203: static htmlSAXHandler saxHandler = ! 204: { ! 205: NULL, ! 206: NULL, ! 207: NULL, ! 208: NULL, ! 209: NULL, ! 210: NULL, ! 211: NULL, ! 212: NULL, ! 213: NULL, ! 214: NULL, ! 215: NULL, ! 216: NULL, ! 217: NULL, ! 218: NULL, ! 219: StartElement, ! 220: EndElement, ! 221: NULL, ! 222: Characters, ! 223: NULL, ! 224: NULL, ! 225: NULL, ! 226: NULL, ! 227: NULL, ! 228: NULL, ! 229: NULL, ! 230: cdata, ! 231: NULL ! 232: }; ! 233: ! 234: // ! 235: // Parse given (assumed to be) HTML text and return the title ! 236: // ! 237: ! 238: static void parseHtml(const std::string &html, ! 239: std::string &title) ! 240: { ! 241: htmlParserCtxtPtr ctxt; ! 242: Context context; ! 243: ! 244: ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "", ! 245: XML_CHAR_ENCODING_NONE); ! 246: ! 247: htmlParseChunk(ctxt, html.c_str(), html.size(), 0); ! 248: htmlParseChunk(ctxt, "", 0, 1); ! 249: ! 250: htmlFreeParserCtxt(ctxt); ! 251: ! 252: title = context.title; ! 253: } ! 254: ! 255: int main(int argc, char *argv[]) ! 256: { ! 257: CURL *conn = NULL; ! 258: CURLcode code; ! 259: std::string title; ! 260: ! 261: // Ensure one argument is given ! 262: ! 263: if(argc != 2) { ! 264: fprintf(stderr, "Usage: %s <url>\n", argv[0]); ! 265: exit(EXIT_FAILURE); ! 266: } ! 267: ! 268: curl_global_init(CURL_GLOBAL_DEFAULT); ! 269: ! 270: // Initialize CURL connection ! 271: ! 272: if(!init(conn, argv[1])) { ! 273: fprintf(stderr, "Connection initializion failed\n"); ! 274: exit(EXIT_FAILURE); ! 275: } ! 276: ! 277: // Retrieve content for the URL ! 278: ! 279: code = curl_easy_perform(conn); ! 280: curl_easy_cleanup(conn); ! 281: ! 282: if(code != CURLE_OK) { ! 283: fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer); ! 284: exit(EXIT_FAILURE); ! 285: } ! 286: ! 287: // Parse the (assumed) HTML code ! 288: parseHtml(buffer, title); ! 289: ! 290: // Display the extracted title ! 291: printf("Title: %s\n", title.c_str()); ! 292: ! 293: return EXIT_SUCCESS; ! 294: }