Annotation of embedaddon/curl/docs/examples/htmltitle.cpp, revision 1.1

1.1     ! misho       1: /***************************************************************************
        !             2:  *                                  _   _ ____  _
        !             3:  *  Project                     ___| | | |  _ \| |
        !             4:  *                             / __| | | | |_) | |
        !             5:  *                            | (__| |_| |  _ <| |___
        !             6:  *                             \___|\___/|_| \_\_____|
        !             7:  *
        !             8:  * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al.
        !             9:  *
        !            10:  * This software is licensed as described in the file COPYING, which
        !            11:  * you should have received as part of this distribution. The terms
        !            12:  * are also available at https://curl.haxx.se/docs/copyright.html.
        !            13:  *
        !            14:  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
        !            15:  * copies of the Software, and permit persons to whom the Software is
        !            16:  * furnished to do so, under the terms of the COPYING file.
        !            17:  *
        !            18:  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
        !            19:  * KIND, either express or implied.
        !            20:  *
        !            21:  ***************************************************************************/
        !            22: /* <DESC>
        !            23:  * Get a web page, extract the title with libxml.
        !            24:  * </DESC>
        !            25: 
        !            26:  Written by Lars Nilsson
        !            27: 
        !            28:  GNU C++ compile command line suggestion (edit paths accordingly):
        !            29: 
        !            30:  g++ -Wall -I/opt/curl/include -I/opt/libxml/include/libxml2 htmltitle.cpp \
        !            31:  -o htmltitle -L/opt/curl/lib -L/opt/libxml/lib -lcurl -lxml2
        !            32: */
        !            33: #include <stdio.h>
        !            34: #include <string.h>
        !            35: #include <stdlib.h>
        !            36: #include <string>
        !            37: #include <curl/curl.h>
        !            38: #include <libxml/HTMLparser.h>
        !            39: 
        !            40: //
        !            41: //  Case-insensitive string comparison
        !            42: //
        !            43: 
        !            44: #ifdef _MSC_VER
        !            45: #define COMPARE(a, b) (!_stricmp((a), (b)))
        !            46: #else
        !            47: #define COMPARE(a, b) (!strcasecmp((a), (b)))
        !            48: #endif
        !            49: 
        !            50: //
        !            51: //  libxml callback context structure
        !            52: //
        !            53: 
        !            54: struct Context
        !            55: {
        !            56:   Context(): addTitle(false) { }
        !            57: 
        !            58:   bool addTitle;
        !            59:   std::string title;
        !            60: };
        !            61: 
        !            62: //
        !            63: //  libcurl variables for error strings and returned data
        !            64: 
        !            65: static char errorBuffer[CURL_ERROR_SIZE];
        !            66: static std::string buffer;
        !            67: 
        !            68: //
        !            69: //  libcurl write callback function
        !            70: //
        !            71: 
        !            72: static int writer(char *data, size_t size, size_t nmemb,
        !            73:                   std::string *writerData)
        !            74: {
        !            75:   if(writerData == NULL)
        !            76:     return 0;
        !            77: 
        !            78:   writerData->append(data, size*nmemb);
        !            79: 
        !            80:   return size * nmemb;
        !            81: }
        !            82: 
        !            83: //
        !            84: //  libcurl connection initialization
        !            85: //
        !            86: 
        !            87: static bool init(CURL *&conn, char *url)
        !            88: {
        !            89:   CURLcode code;
        !            90: 
        !            91:   conn = curl_easy_init();
        !            92: 
        !            93:   if(conn == NULL) {
        !            94:     fprintf(stderr, "Failed to create CURL connection\n");
        !            95:     exit(EXIT_FAILURE);
        !            96:   }
        !            97: 
        !            98:   code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer);
        !            99:   if(code != CURLE_OK) {
        !           100:     fprintf(stderr, "Failed to set error buffer [%d]\n", code);
        !           101:     return false;
        !           102:   }
        !           103: 
        !           104:   code = curl_easy_setopt(conn, CURLOPT_URL, url);
        !           105:   if(code != CURLE_OK) {
        !           106:     fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer);
        !           107:     return false;
        !           108:   }
        !           109: 
        !           110:   code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L);
        !           111:   if(code != CURLE_OK) {
        !           112:     fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer);
        !           113:     return false;
        !           114:   }
        !           115: 
        !           116:   code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer);
        !           117:   if(code != CURLE_OK) {
        !           118:     fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer);
        !           119:     return false;
        !           120:   }
        !           121: 
        !           122:   code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer);
        !           123:   if(code != CURLE_OK) {
        !           124:     fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer);
        !           125:     return false;
        !           126:   }
        !           127: 
        !           128:   return true;
        !           129: }
        !           130: 
        !           131: //
        !           132: //  libxml start element callback function
        !           133: //
        !           134: 
        !           135: static void StartElement(void *voidContext,
        !           136:                          const xmlChar *name,
        !           137:                          const xmlChar **attributes)
        !           138: {
        !           139:   Context *context = static_cast<Context *>(voidContext);
        !           140: 
        !           141:   if(COMPARE(reinterpret_cast<char *>(name), "TITLE")) {
        !           142:     context->title = "";
        !           143:     context->addTitle = true;
        !           144:   }
        !           145:   (void) attributes;
        !           146: }
        !           147: 
        !           148: //
        !           149: //  libxml end element callback function
        !           150: //
        !           151: 
        !           152: static void EndElement(void *voidContext,
        !           153:                        const xmlChar *name)
        !           154: {
        !           155:   Context *context = static_cast<Context *>(voidContext);
        !           156: 
        !           157:   if(COMPARE(reinterpret_cast<char *>(name), "TITLE"))
        !           158:     context->addTitle = false;
        !           159: }
        !           160: 
        !           161: //
        !           162: //  Text handling helper function
        !           163: //
        !           164: 
        !           165: static void handleCharacters(Context *context,
        !           166:                              const xmlChar *chars,
        !           167:                              int length)
        !           168: {
        !           169:   if(context->addTitle)
        !           170:     context->title.append(reinterpret_cast<char *>(chars), length);
        !           171: }
        !           172: 
        !           173: //
        !           174: //  libxml PCDATA callback function
        !           175: //
        !           176: 
        !           177: static void Characters(void *voidContext,
        !           178:                        const xmlChar *chars,
        !           179:                        int length)
        !           180: {
        !           181:   Context *context = static_cast<Context *>(voidContext);
        !           182: 
        !           183:   handleCharacters(context, chars, length);
        !           184: }
        !           185: 
        !           186: //
        !           187: //  libxml CDATA callback function
        !           188: //
        !           189: 
        !           190: static void cdata(void *voidContext,
        !           191:                   const xmlChar *chars,
        !           192:                   int length)
        !           193: {
        !           194:   Context *context = static_cast<Context *>(voidContext);
        !           195: 
        !           196:   handleCharacters(context, chars, length);
        !           197: }
        !           198: 
        !           199: //
        !           200: //  libxml SAX callback structure
        !           201: //
        !           202: 
        !           203: static htmlSAXHandler saxHandler =
        !           204: {
        !           205:   NULL,
        !           206:   NULL,
        !           207:   NULL,
        !           208:   NULL,
        !           209:   NULL,
        !           210:   NULL,
        !           211:   NULL,
        !           212:   NULL,
        !           213:   NULL,
        !           214:   NULL,
        !           215:   NULL,
        !           216:   NULL,
        !           217:   NULL,
        !           218:   NULL,
        !           219:   StartElement,
        !           220:   EndElement,
        !           221:   NULL,
        !           222:   Characters,
        !           223:   NULL,
        !           224:   NULL,
        !           225:   NULL,
        !           226:   NULL,
        !           227:   NULL,
        !           228:   NULL,
        !           229:   NULL,
        !           230:   cdata,
        !           231:   NULL
        !           232: };
        !           233: 
        !           234: //
        !           235: //  Parse given (assumed to be) HTML text and return the title
        !           236: //
        !           237: 
        !           238: static void parseHtml(const std::string &html,
        !           239:                       std::string &title)
        !           240: {
        !           241:   htmlParserCtxtPtr ctxt;
        !           242:   Context context;
        !           243: 
        !           244:   ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",
        !           245:                                   XML_CHAR_ENCODING_NONE);
        !           246: 
        !           247:   htmlParseChunk(ctxt, html.c_str(), html.size(), 0);
        !           248:   htmlParseChunk(ctxt, "", 0, 1);
        !           249: 
        !           250:   htmlFreeParserCtxt(ctxt);
        !           251: 
        !           252:   title = context.title;
        !           253: }
        !           254: 
        !           255: int main(int argc, char *argv[])
        !           256: {
        !           257:   CURL *conn = NULL;
        !           258:   CURLcode code;
        !           259:   std::string title;
        !           260: 
        !           261:   // Ensure one argument is given
        !           262: 
        !           263:   if(argc != 2) {
        !           264:     fprintf(stderr, "Usage: %s <url>\n", argv[0]);
        !           265:     exit(EXIT_FAILURE);
        !           266:   }
        !           267: 
        !           268:   curl_global_init(CURL_GLOBAL_DEFAULT);
        !           269: 
        !           270:   // Initialize CURL connection
        !           271: 
        !           272:   if(!init(conn, argv[1])) {
        !           273:     fprintf(stderr, "Connection initializion failed\n");
        !           274:     exit(EXIT_FAILURE);
        !           275:   }
        !           276: 
        !           277:   // Retrieve content for the URL
        !           278: 
        !           279:   code = curl_easy_perform(conn);
        !           280:   curl_easy_cleanup(conn);
        !           281: 
        !           282:   if(code != CURLE_OK) {
        !           283:     fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer);
        !           284:     exit(EXIT_FAILURE);
        !           285:   }
        !           286: 
        !           287:   // Parse the (assumed) HTML code
        !           288:   parseHtml(buffer, title);
        !           289: 
        !           290:   // Display the extracted title
        !           291:   printf("Title: %s\n", title.c_str());
        !           292: 
        !           293:   return EXIT_SUCCESS;
        !           294: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>