Annotation of embedaddon/curl/docs/examples/htmltidy.c, revision 1.1

1.1     ! misho       1: /***************************************************************************
        !             2:  *                                  _   _ ____  _
        !             3:  *  Project                     ___| | | |  _ \| |
        !             4:  *                             / __| | | | |_) | |
        !             5:  *                            | (__| |_| |  _ <| |___
        !             6:  *                             \___|\___/|_| \_\_____|
        !             7:  *
        !             8:  * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al.
        !             9:  *
        !            10:  * This software is licensed as described in the file COPYING, which
        !            11:  * you should have received as part of this distribution. The terms
        !            12:  * are also available at https://curl.haxx.se/docs/copyright.html.
        !            13:  *
        !            14:  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
        !            15:  * copies of the Software, and permit persons to whom the Software is
        !            16:  * furnished to do so, under the terms of the COPYING file.
        !            17:  *
        !            18:  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
        !            19:  * KIND, either express or implied.
        !            20:  *
        !            21:  ***************************************************************************/
        !            22: /* <DESC>
        !            23:  * Download a document and use libtidy to parse the HTML.
        !            24:  * </DESC>
        !            25:  */
        !            26: /*
        !            27:  * LibTidy => https://www.html-tidy.org/
        !            28:  */
        !            29: 
        !            30: #include <stdio.h>
        !            31: #include <tidy.h>
        !            32: #include <tidybuffio.h>
        !            33: #include <curl/curl.h>
        !            34: 
        !            35: /* curl write callback, to fill tidy's input buffer...  */
        !            36: uint write_cb(char *in, uint size, uint nmemb, TidyBuffer *out)
        !            37: {
        !            38:   uint r;
        !            39:   r = size * nmemb;
        !            40:   tidyBufAppend(out, in, r);
        !            41:   return r;
        !            42: }
        !            43: 
        !            44: /* Traverse the document tree */
        !            45: void dumpNode(TidyDoc doc, TidyNode tnod, int indent)
        !            46: {
        !            47:   TidyNode child;
        !            48:   for(child = tidyGetChild(tnod); child; child = tidyGetNext(child) ) {
        !            49:     ctmbstr name = tidyNodeGetName(child);
        !            50:     if(name) {
        !            51:       /* if it has a name, then it's an HTML tag ... */
        !            52:       TidyAttr attr;
        !            53:       printf("%*.*s%s ", indent, indent, "<", name);
        !            54:       /* walk the attribute list */
        !            55:       for(attr = tidyAttrFirst(child); attr; attr = tidyAttrNext(attr) ) {
        !            56:         printf(tidyAttrName(attr));
        !            57:         tidyAttrValue(attr)?printf("=\"%s\" ",
        !            58:                                    tidyAttrValue(attr)):printf(" ");
        !            59:       }
        !            60:       printf(">\n");
        !            61:     }
        !            62:     else {
        !            63:       /* if it doesn't have a name, then it's probably text, cdata, etc... */
        !            64:       TidyBuffer buf;
        !            65:       tidyBufInit(&buf);
        !            66:       tidyNodeGetText(doc, child, &buf);
        !            67:       printf("%*.*s\n", indent, indent, buf.bp?(char *)buf.bp:"");
        !            68:       tidyBufFree(&buf);
        !            69:     }
        !            70:     dumpNode(doc, child, indent + 4); /* recursive */
        !            71:   }
        !            72: }
        !            73: 
        !            74: 
        !            75: int main(int argc, char **argv)
        !            76: {
        !            77:   if(argc == 2) {
        !            78:     CURL *curl;
        !            79:     char curl_errbuf[CURL_ERROR_SIZE];
        !            80:     TidyDoc tdoc;
        !            81:     TidyBuffer docbuf = {0};
        !            82:     TidyBuffer tidy_errbuf = {0};
        !            83:     int err;
        !            84: 
        !            85:     curl = curl_easy_init();
        !            86:     curl_easy_setopt(curl, CURLOPT_URL, argv[1]);
        !            87:     curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf);
        !            88:     curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
        !            89:     curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
        !            90:     curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb);
        !            91: 
        !            92:     tdoc = tidyCreate();
        !            93:     tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */
        !            94:     tidyOptSetInt(tdoc, TidyWrapLen, 4096);
        !            95:     tidySetErrorBuffer(tdoc, &tidy_errbuf);
        !            96:     tidyBufInit(&docbuf);
        !            97: 
        !            98:     curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf);
        !            99:     err = curl_easy_perform(curl);
        !           100:     if(!err) {
        !           101:       err = tidyParseBuffer(tdoc, &docbuf); /* parse the input */
        !           102:       if(err >= 0) {
        !           103:         err = tidyCleanAndRepair(tdoc); /* fix any problems */
        !           104:         if(err >= 0) {
        !           105:           err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */
        !           106:           if(err >= 0) {
        !           107:             dumpNode(tdoc, tidyGetRoot(tdoc), 0); /* walk the tree */
        !           108:             fprintf(stderr, "%s\n", tidy_errbuf.bp); /* show errors */
        !           109:           }
        !           110:         }
        !           111:       }
        !           112:     }
        !           113:     else
        !           114:       fprintf(stderr, "%s\n", curl_errbuf);
        !           115: 
        !           116:     /* clean-up */
        !           117:     curl_easy_cleanup(curl);
        !           118:     tidyBufFree(&docbuf);
        !           119:     tidyBufFree(&tidy_errbuf);
        !           120:     tidyRelease(tdoc);
        !           121:     return err;
        !           122: 
        !           123:   }
        !           124:   else
        !           125:     printf("usage: %s <url>\n", argv[0]);
        !           126: 
        !           127:   return 0;
        !           128: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>