embedaddon/php/ext/mbstring/ucgendat/ucgendat.c - annotate

Return to ucgendat.c CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / php / ext / mbstring / ucgendat
Annotation of embedaddon/php/ext/mbstring/ucgendat/ucgendat.c, revision 1.1.1.1

1.1       misho       1: /* Further modified for PHP */
                      2: /* $Id: ucgendat.c 304057 2010-10-05 02:34:35Z cataphract $ */
                      3: 
                      4: /* $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucgendat.c,v 1.36.2.4 2007/01/02 21:43:51 kurt Exp $ */
                      5: /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
                      6:  *
                      7:  * Copyright 1998-2007 The OpenLDAP Foundation.
                      8:  * All rights reserved.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted only as authorized by the OpenLDAP
                     12:  * Public License.
                     13:  *
                     14:  * A copy of this license is available at
                     15:  * <http://www.OpenLDAP.org/license.html>.
                     16:  */
                     17: 
                     18: /* Copyright 2001 Computing Research Labs, New Mexico State University
                     19:  *
                     20:  * Permission is hereby granted, free of charge, to any person obtaining a
                     21:  * copy of this software and associated documentation files (the "Software"),
                     22:  * to deal in the Software without restriction, including without limitation
                     23:  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
                     24:  * and/or sell copies of the Software, and to permit persons to whom the
                     25:  * Software is furnished to do so, subject to the following conditions:
                     26:  *
                     27:  * The above copyright notice and this permission notice shall be included in
                     28:  * all copies or substantial portions of the Software.
                     29:  *
                     30:  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
                     31:  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
                     32:  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
                     33:  * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
                     34:  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
                     35:  * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
                     36:  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
                     37:  */
                     38: /* orig Id: ucgendat.c,v 1.4 2001/01/02 18:46:20 mleisher Exp $" */
                     39: 
                     40: #include <stdio.h>
                     41: #include <ctype.h>
                     42: #include <stdlib.h>
                     43: #include <string.h>
                     44: #include <unistd.h>
                     45: 
                     46: #define ac_uint2 unsigned short
                     47: #define ac_uint4 unsigned int
                     48: #define LDAP_DIRSEP "/"
                     49: #define AC_MEMCPY memcpy
                     50: 
                     51: #ifndef HARDCODE_DATA
                     52: #define        HARDCODE_DATA   1
                     53: #endif
                     54: 
                     55: #undef ishdigit
                     56: #define ishdigit(cc) (((cc) >= '0' && (cc) <= '9') ||\
                     57:                       ((cc) >= 'A' && (cc) <= 'F') ||\
                     58:                       ((cc) >= 'a' && (cc) <= 'f'))
                     59: 
                     60: /*
                     61:  * A header written to the output file with the byte-order-mark and the number
                     62:  * of property nodes.
                     63:  */
                     64: static ac_uint2 hdr[2] = {0xfeff, 0};
                     65: 
                     66: #define NUMPROPS 50
                     67: #define NEEDPROPS (NUMPROPS + (4 - (NUMPROPS & 3)))
                     68: 
                     69: typedef struct {
                     70:     char *name;
                     71:     int len;
                     72: } _prop_t;
                     73: 
                     74: /*
                     75:  * List of properties expected to be found in the Unicode Character Database
                     76:  * including some implementation specific properties.
                     77:  *
                     78:  * The implementation specific properties are:
                     79:  * Cm = Composed (can be decomposed)
                     80:  * Nb = Non-breaking
                     81:  * Sy = Symmetric (has left and right forms)
                     82:  * Hd = Hex digit
                     83:  * Qm = Quote marks
                     84:  * Mr = Mirroring
                     85:  * Ss = Space, other
                     86:  * Cp = Defined character
                     87:  */
                     88: static _prop_t props[NUMPROPS] = {
                     89:     {"Mn", 2}, {"Mc", 2}, {"Me", 2}, {"Nd", 2}, {"Nl", 2}, {"No", 2},
                     90:     {"Zs", 2}, {"Zl", 2}, {"Zp", 2}, {"Cc", 2}, {"Cf", 2}, {"Cs", 2},
                     91:     {"Co", 2}, {"Cn", 2}, {"Lu", 2}, {"Ll", 2}, {"Lt", 2}, {"Lm", 2},
                     92:     {"Lo", 2}, {"Pc", 2}, {"Pd", 2}, {"Ps", 2}, {"Pe", 2}, {"Po", 2},
                     93:     {"Sm", 2}, {"Sc", 2}, {"Sk", 2}, {"So", 2}, {"L",  1}, {"R",  1},
                     94:     {"EN", 2}, {"ES", 2}, {"ET", 2}, {"AN", 2}, {"CS", 2}, {"B",  1},
                     95:     {"S",  1}, {"WS", 2}, {"ON", 2},
                     96:     {"Cm", 2}, {"Nb", 2}, {"Sy", 2}, {"Hd", 2}, {"Qm", 2}, {"Mr", 2},
                     97:     {"Ss", 2}, {"Cp", 2}, {"Pi", 2}, {"Pf", 2}, {"AL", 2}
                     98: };
                     99: 
                    100: typedef struct {
                    101:     ac_uint4 *ranges;
                    102:     ac_uint2 used;
                    103:     ac_uint2 size;
                    104: } _ranges_t;
                    105: 
                    106: static _ranges_t proptbl[NUMPROPS];
                    107: 
                    108: /*
                    109:  * Make sure this array is sized to be on a 4-byte boundary at compile time.
                    110:  */
                    111: static ac_uint2 propcnt[NEEDPROPS];
                    112: 
                    113: /*
                    114:  * Array used to collect a decomposition before adding it to the decomposition
                    115:  * table.
                    116:  */
                    117: static ac_uint4 dectmp[64];
                    118: static ac_uint4 dectmp_size;
                    119: 
                    120: typedef struct {
                    121:     ac_uint4 code;
                    122:     ac_uint2 size;
                    123:     ac_uint2 used;
                    124:     ac_uint4 *decomp;
                    125: } _decomp_t;
                    126: 
                    127: /*
                    128:  * List of decomposition.  Created and expanded in order as the characters are
                    129:  * encountered. First list contains canonical mappings, second also includes
                    130:  * compatibility mappings.
                    131:  */
                    132: static _decomp_t *decomps;
                    133: static ac_uint4 decomps_used;
                    134: static ac_uint4 decomps_size;
                    135: 
                    136: static _decomp_t *kdecomps;
                    137: static ac_uint4 kdecomps_used;
                    138: static ac_uint4 kdecomps_size;
                    139: 
                    140: /*
                    141:  * Composition exclusion table stuff.
                    142:  */
                    143: #define COMPEX_SET(c) (compexs[(c) >> 5] |= (1 << ((c) & 31)))
                    144: #define COMPEX_TEST(c) (compexs[(c) >> 5] & (1 << ((c) & 31)))
                    145: static ac_uint4 compexs[8192];
                    146: 
                    147: /*
                    148:  * Struct for holding a composition pair, and array of composition pairs
                    149:  */
                    150: typedef struct {
                    151:     ac_uint4 comp;
                    152:     ac_uint4 count;
                    153:     ac_uint4 code1;
                    154:     ac_uint4 code2;
                    155: } _comp_t;
                    156: 
                    157: #if 0
                    158: static _comp_t *comps;
                    159: #endif
                    160: static ac_uint4 comps_used;
                    161: 
                    162: /*
                    163:  * Types and lists for handling lists of case mappings.
                    164:  */
                    165: typedef struct {
                    166:     ac_uint4 key;
                    167:     ac_uint4 other1;
                    168:     ac_uint4 other2;
                    169: } _case_t;
                    170: 
                    171: static _case_t *upper;
                    172: static _case_t *lower;
                    173: static _case_t *title;
                    174: static ac_uint4 upper_used;
                    175: static ac_uint4 upper_size;
                    176: static ac_uint4 lower_used;
                    177: static ac_uint4 lower_size;
                    178: static ac_uint4 title_used;
                    179: static ac_uint4 title_size;
                    180: 
                    181: /*
                    182:  * Array used to collect case mappings before adding them to a list.
                    183:  */
                    184: static ac_uint4 cases[3];
                    185: 
                    186: /*
                    187:  * An array to hold ranges for combining classes.
                    188:  */
                    189: static ac_uint4 *ccl;
                    190: static ac_uint4 ccl_used;
                    191: static ac_uint4 ccl_size;
                    192: 
                    193: /*
                    194:  * Structures for handling numbers.
                    195:  */
                    196: typedef struct {
                    197:     ac_uint4 code;
                    198:     ac_uint4 idx;
                    199: } _codeidx_t;
                    200: 
                    201: typedef struct {
                    202:     short numerator;
                    203:     short denominator;
                    204: } _num_t;
                    205: 
                    206: /*
                    207:  * Arrays to hold the mapping of codes to numbers.
                    208:  */
                    209: static _codeidx_t *ncodes;
                    210: static ac_uint4 ncodes_used;
                    211: static ac_uint4 ncodes_size;
                    212: 
                    213: static _num_t *nums;
                    214: static ac_uint4 nums_used;
                    215: static ac_uint4 nums_size;
                    216: 
                    217: /*
                    218:  * Array for holding numbers.
                    219:  */
                    220: static _num_t *nums;
                    221: static ac_uint4 nums_used;
                    222: static ac_uint4 nums_size;
                    223: 
                    224: static void
                    225: add_range(ac_uint4 start, ac_uint4 end, char *p1, char *p2)
                    226: {
                    227:     int i, j, k, len;
                    228:     _ranges_t *rlp;
                    229:     char *name;
                    230: 
                    231:     for (k = 0; k < 2; k++) {
                    232:         if (k == 0) {
                    233:             name = p1;
                    234:             len = 2;
                    235:         } else {
                    236:             if (p2 == 0)
                    237:               break;
                    238: 
                    239:             name = p2;
                    240:             len = 1;
                    241:         }
                    242: 
                    243:         for (i = 0; i < NUMPROPS; i++) {
                    244:             if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
                    245:               break;
                    246:         }
                    247: 
                    248:         if (i == NUMPROPS)
                    249:           continue;
                    250: 
                    251:         rlp = &proptbl[i];
                    252: 
                    253:         /*
                    254:          * Resize the range list if necessary.
                    255:          */
                    256:         if (rlp->used == rlp->size) {
                    257:             if (rlp->size == 0)
                    258:               rlp->ranges = (ac_uint4 *)
                    259:                   malloc(sizeof(ac_uint4) << 3);
                    260:             else
                    261:               rlp->ranges = (ac_uint4 *)
                    262:                   realloc((char *) rlp->ranges,
                    263:                           sizeof(ac_uint4) * (rlp->size + 8));
                    264:             rlp->size += 8;
                    265:         }
                    266: 
                    267:         /*
                    268:          * If this is the first code for this property list, just add it
                    269:          * and return.
                    270:          */
                    271:         if (rlp->used == 0) {
                    272:             rlp->ranges[0] = start;
                    273:             rlp->ranges[1] = end;
                    274:             rlp->used += 2;
                    275:             continue;
                    276:         }
                    277: 
                    278:         /*
                    279:          * Optimize the case of adding the range to the end.
                    280:          */
                    281:         j = rlp->used - 1;
                    282:         if (start > rlp->ranges[j]) {
                    283:             j = rlp->used;
                    284:             rlp->ranges[j++] = start;
                    285:             rlp->ranges[j++] = end;
                    286:             rlp->used = j;
                    287:             continue;
                    288:         }
                    289: 
                    290:         /*
                    291:          * Need to locate the insertion point.
                    292:          */
                    293:         for (i = 0;
                    294:              i < rlp->used && start > rlp->ranges[i + 1] + 1; i += 2) ;
                    295: 
                    296:         /*
                    297:          * If the start value lies in the current range, then simply set the
                    298:          * new end point of the range to the end value passed as a parameter.
                    299:          */
                    300:         if (rlp->ranges[i] <= start && start <= rlp->ranges[i + 1] + 1) {
                    301:             rlp->ranges[i + 1] = end;
                    302:             return;
                    303:         }
                    304: 
                    305:         /*
                    306:          * Shift following values up by two.
                    307:          */
                    308:         for (j = rlp->used; j > i; j -= 2) {
                    309:             rlp->ranges[j] = rlp->ranges[j - 2];
                    310:             rlp->ranges[j + 1] = rlp->ranges[j - 1];
                    311:         }
                    312: 
                    313:         /*
                    314:          * Add the new range at the insertion point.
                    315:          */
                    316:         rlp->ranges[i] = start;
                    317:         rlp->ranges[i + 1] = end;
                    318:         rlp->used += 2;
                    319:     }
                    320: }
                    321: 
                    322: static void
                    323: ordered_range_insert(ac_uint4 c, char *name, int len)
                    324: {
                    325:     int i, j;
                    326:     ac_uint4 s, e;
                    327:     _ranges_t *rlp;
                    328: 
                    329:     if (len == 0)
                    330:       return;
                    331: 
                    332:     /*
                    333:      * Deal with directionality codes introduced in Unicode 3.0.
                    334:      */
                    335:     if ((len == 2 && memcmp(name, "BN", 2) == 0) ||
                    336:         (len == 3 &&
                    337:          (memcmp(name, "NSM", 3) == 0 || memcmp(name, "PDF", 3) == 0 ||
                    338:           memcmp(name, "LRE", 3) == 0 || memcmp(name, "LRO", 3) == 0 ||
                    339:           memcmp(name, "RLE", 3) == 0 || memcmp(name, "RLO", 3) == 0))) {
                    340:         /*
                    341:          * Mark all of these as Other Neutral to preserve compatibility with
                    342:          * older versions.
                    343:          */
                    344:         len = 2;
                    345:         name = "ON";
                    346:     }
                    347: 
                    348:     for (i = 0; i < NUMPROPS; i++) {
                    349:         if (props[i].len == len && memcmp(props[i].name, name, len) == 0)
                    350:           break;
                    351:     }
                    352: 
                    353:     if (i == NUMPROPS)
                    354:       return;
                    355: 
                    356:     /*
                    357:      * Have a match, so insert the code in order.
                    358:      */
                    359:     rlp = &proptbl[i];
                    360: 
                    361:     /*
                    362:      * Resize the range list if necessary.
                    363:      */
                    364:     if (rlp->used == rlp->size) {
                    365:         if (rlp->size == 0)
                    366:           rlp->ranges = (ac_uint4 *)
                    367:               malloc(sizeof(ac_uint4) << 3);
                    368:         else
                    369:           rlp->ranges = (ac_uint4 *)
                    370:               realloc((char *) rlp->ranges,
                    371:                       sizeof(ac_uint4) * (rlp->size + 8));
                    372:         rlp->size += 8;
                    373:     }
                    374: 
                    375:     /*
                    376:      * If this is the first code for this property list, just add it
                    377:      * and return.
                    378:      */
                    379:     if (rlp->used == 0) {
                    380:         rlp->ranges[0] = rlp->ranges[1] = c;
                    381:         rlp->used += 2;
                    382:         return;
                    383:     }
                    384: 
                    385:     /*
                    386:      * Optimize the cases of extending the last range and adding new ranges to
                    387:      * the end.
                    388:      */
                    389:     j = rlp->used - 1;
                    390:     e = rlp->ranges[j];
                    391:     s = rlp->ranges[j - 1];
                    392: 
                    393:     if (c == e + 1) {
                    394:         /*
                    395:          * Extend the last range.
                    396:          */
                    397:         rlp->ranges[j] = c;
                    398:         return;
                    399:     }
                    400: 
                    401:     if (c > e + 1) {
                    402:         /*
                    403:          * Start another range on the end.
                    404:          */
                    405:         j = rlp->used;
                    406:         rlp->ranges[j] = rlp->ranges[j + 1] = c;
                    407:         rlp->used += 2;
                    408:         return;
                    409:     }
                    410: 
                    411:     if (c >= s)
                    412:       /*
                    413:        * The code is a duplicate of a code in the last range, so just return.
                    414:        */
                    415:       return;
                    416: 
                    417:     /*
                    418:      * The code should be inserted somewhere before the last range in the
                    419:      * list.  Locate the insertion point.
                    420:      */
                    421:     for (i = 0;
                    422:          i < rlp->used && c > rlp->ranges[i + 1] + 1; i += 2) ;
                    423: 
                    424:     s = rlp->ranges[i];
                    425:     e = rlp->ranges[i + 1];
                    426: 
                    427:     if (c == e + 1)
                    428:       /*
                    429:        * Simply extend the current range.
                    430:        */
                    431:       rlp->ranges[i + 1] = c;
                    432:     else if (c < s) {
                    433:         /*
                    434:          * Add a new entry before the current location.  Shift all entries
                    435:          * before the current one up by one to make room.
                    436:          */
                    437:         for (j = rlp->used; j > i; j -= 2) {
                    438:             rlp->ranges[j] = rlp->ranges[j - 2];
                    439:             rlp->ranges[j + 1] = rlp->ranges[j - 1];
                    440:         }
                    441:         rlp->ranges[i] = rlp->ranges[i + 1] = c;
                    442: 
                    443:         rlp->used += 2;
                    444:     }
                    445: }
                    446: 
                    447: static void
                    448: add_decomp(ac_uint4 code, short compat)
                    449: {
                    450:     ac_uint4 i, j, size;
                    451:     _decomp_t **pdecomps;
                    452:     ac_uint4 *pdecomps_used;
                    453:     ac_uint4 *pdecomps_size;
                    454: 
                    455:     if (compat) {
                    456:        pdecomps = &kdecomps;
                    457:        pdecomps_used = &kdecomps_used;
                    458:        pdecomps_size = &kdecomps_size;
                    459:     } else {
                    460:        pdecomps = &decomps;
                    461:        pdecomps_used = &decomps_used;
                    462:        pdecomps_size = &decomps_size;
                    463:     }
                    464:     
                    465:     /*
                    466:      * Add the code to the composite property.
                    467:      */
                    468:     if (!compat) {
                    469:        ordered_range_insert(code, "Cm", 2);
                    470:     }
                    471: 
                    472:     /*
                    473:      * Locate the insertion point for the code.
                    474:      */
                    475:     for (i = 0; i < *pdecomps_used && code > (*pdecomps)[i].code; i++) ;
                    476: 
                    477:     /*
                    478:      * Allocate space for a new decomposition.
                    479:      */
                    480:     if (*pdecomps_used == *pdecomps_size) {
                    481:         if (*pdecomps_size == 0)
                    482:           *pdecomps = (_decomp_t *) malloc(sizeof(_decomp_t) << 3);
                    483:         else
                    484:           *pdecomps = (_decomp_t *)
                    485:               realloc((char *) *pdecomps,
                    486:                       sizeof(_decomp_t) * (*pdecomps_size + 8));
                    487:         (void) memset((char *) (*pdecomps + *pdecomps_size), '\0',
                    488:                       sizeof(_decomp_t) << 3);
                    489:         *pdecomps_size += 8;
                    490:     }
                    491: 
                    492:     if (i < *pdecomps_used && code != (*pdecomps)[i].code) {
                    493:         /*
                    494:          * Shift the decomps up by one if the codes don't match.
                    495:          */
                    496:         for (j = *pdecomps_used; j > i; j--)
                    497:           (void) AC_MEMCPY((char *) &(*pdecomps)[j], (char *) &(*pdecomps)[j - 1],
                    498:                         sizeof(_decomp_t));
                    499:     }
                    500: 
                    501:     /*
                    502:      * Insert or replace a decomposition.
                    503:      */
                    504:     size = dectmp_size + (4 - (dectmp_size & 3));
                    505:     if ((*pdecomps)[i].size < size) {
                    506:         if ((*pdecomps)[i].size == 0)
                    507:           (*pdecomps)[i].decomp = (ac_uint4 *)
                    508:               malloc(sizeof(ac_uint4) * size);
                    509:         else
                    510:           (*pdecomps)[i].decomp = (ac_uint4 *)
                    511:               realloc((char *) (*pdecomps)[i].decomp,
                    512:                       sizeof(ac_uint4) * size);
                    513:         (*pdecomps)[i].size = size;
                    514:     }
                    515: 
                    516:     if ((*pdecomps)[i].code != code)
                    517:       (*pdecomps_used)++;
                    518: 
                    519:     (*pdecomps)[i].code = code;
                    520:     (*pdecomps)[i].used = dectmp_size;
                    521:     (void) AC_MEMCPY((char *) (*pdecomps)[i].decomp, (char *) dectmp,
                    522:                   sizeof(ac_uint4) * dectmp_size);
                    523: 
                    524:     /*
                    525:      * NOTICE: This needs changing later so it is more general than simply
                    526:      * pairs.  This calculation is done here to simplify allocation elsewhere.
                    527:      */
                    528:     if (!compat && dectmp_size == 2)
                    529:       comps_used++;
                    530: }
                    531: 
                    532: static void
                    533: add_title(ac_uint4 code)
                    534: {
                    535:     ac_uint4 i, j;
                    536: 
                    537:     /*
                    538:      * Always map the code to itself.
                    539:      */
                    540:     cases[2] = code;
                    541: 
                    542:     if (title_used == title_size) {
                    543:         if (title_size == 0)
                    544:           title = (_case_t *) malloc(sizeof(_case_t) << 3);
                    545:         else
                    546:           title = (_case_t *) realloc((char *) title,
                    547:                                       sizeof(_case_t) * (title_size + 8));
                    548:         title_size += 8;
                    549:     }
                    550: 
                    551:     /*
                    552:      * Locate the insertion point.
                    553:      */
                    554:     for (i = 0; i < title_used && code > title[i].key; i++) ;
                    555: 
                    556:     if (i < title_used) {
                    557:         /*
                    558:          * Shift the array up by one.
                    559:          */
                    560:         for (j = title_used; j > i; j--)
                    561:           (void) AC_MEMCPY((char *) &title[j], (char *) &title[j - 1],
                    562:                         sizeof(_case_t));
                    563:     }
                    564: 
                    565:     title[i].key = cases[2];    /* Title */
                    566:     title[i].other1 = cases[0]; /* Upper */
                    567:     title[i].other2 = cases[1]; /* Lower */
                    568: 
                    569:     title_used++;
                    570: }
                    571: 
                    572: static void
                    573: add_upper(ac_uint4 code)
                    574: {
                    575:     ac_uint4 i, j;
                    576: 
                    577:     /*
                    578:      * Always map the code to itself.
                    579:      */
                    580:     cases[0] = code;
                    581: 
                    582:     /*
                    583:      * If the title case character is not present, then make it the same as
                    584:      * the upper case.
                    585:      */
                    586:     if (cases[2] == 0)
                    587:       cases[2] = code;
                    588: 
                    589:     if (upper_used == upper_size) {
                    590:         if (upper_size == 0)
                    591:           upper = (_case_t *) malloc(sizeof(_case_t) << 3);
                    592:         else
                    593:           upper = (_case_t *) realloc((char *) upper,
                    594:                                       sizeof(_case_t) * (upper_size + 8));
                    595:         upper_size += 8;
                    596:     }
                    597: 
                    598:     /*
                    599:      * Locate the insertion point.
                    600:      */
                    601:     for (i = 0; i < upper_used && code > upper[i].key; i++) ;
                    602: 
                    603:     if (i < upper_used) {
                    604:         /*
                    605:          * Shift the array up by one.
                    606:          */
                    607:         for (j = upper_used; j > i; j--)
                    608:           (void) AC_MEMCPY((char *) &upper[j], (char *) &upper[j - 1],
                    609:                         sizeof(_case_t));
                    610:     }
                    611: 
                    612:     upper[i].key = cases[0];    /* Upper */
                    613:     upper[i].other1 = cases[1]; /* Lower */
                    614:     upper[i].other2 = cases[2]; /* Title */
                    615: 
                    616:     upper_used++;
                    617: }
                    618: 
                    619: static void
                    620: add_lower(ac_uint4 code)
                    621: {
                    622:     ac_uint4 i, j;
                    623: 
                    624:     /*
                    625:      * Always map the code to itself.
                    626:      */
                    627:     cases[1] = code;
                    628: 
                    629:     /*
                    630:      * If the title case character is empty, then make it the same as the
                    631:      * upper case.
                    632:      */
                    633:     if (cases[2] == 0)
                    634:       cases[2] = cases[0];
                    635: 
                    636:     if (lower_used == lower_size) {
                    637:         if (lower_size == 0)
                    638:           lower = (_case_t *) malloc(sizeof(_case_t) << 3);
                    639:         else
                    640:           lower = (_case_t *) realloc((char *) lower,
                    641:                                       sizeof(_case_t) * (lower_size + 8));
                    642:         lower_size += 8;
                    643:     }
                    644: 
                    645:     /*
                    646:      * Locate the insertion point.
                    647:      */
                    648:     for (i = 0; i < lower_used && code > lower[i].key; i++) ;
                    649: 
                    650:     if (i < lower_used) {
                    651:         /*
                    652:          * Shift the array up by one.
                    653:          */
                    654:         for (j = lower_used; j > i; j--)
                    655:           (void) AC_MEMCPY((char *) &lower[j], (char *) &lower[j - 1],
                    656:                         sizeof(_case_t));
                    657:     }
                    658: 
                    659:     lower[i].key = cases[1];    /* Lower */
                    660:     lower[i].other1 = cases[0]; /* Upper */
                    661:     lower[i].other2 = cases[2]; /* Title */
                    662: 
                    663:     lower_used++;
                    664: }
                    665: 
                    666: static void
                    667: ordered_ccl_insert(ac_uint4 c, ac_uint4 ccl_code)
                    668: {
                    669:     ac_uint4 i, j;
                    670: 
                    671:     if (ccl_used == ccl_size) {
                    672:         if (ccl_size == 0)
                    673:           ccl = (ac_uint4 *) malloc(sizeof(ac_uint4) * 24);
                    674:         else
                    675:           ccl = (ac_uint4 *)
                    676:               realloc((char *) ccl, sizeof(ac_uint4) * (ccl_size + 24));
                    677:         ccl_size += 24;
                    678:     }
                    679: 
                    680:     /*
                    681:      * Optimize adding the first item.
                    682:      */
                    683:     if (ccl_used == 0) {
                    684:         ccl[0] = ccl[1] = c;
                    685:         ccl[2] = ccl_code;
                    686:         ccl_used += 3;
                    687:         return;
                    688:     }
                    689: 
                    690:     /*
                    691:      * Handle the special case of extending the range on the end.  This
                    692:      * requires that the combining class codes are the same.
                    693:      */
                    694:     if (ccl_code == ccl[ccl_used - 1] && c == ccl[ccl_used - 2] + 1) {
                    695:         ccl[ccl_used - 2] = c;
                    696:         return;
                    697:     }
                    698: 
                    699:     /*
                    700:      * Handle the special case of adding another range on the end.
                    701:      */
                    702:     if (c > ccl[ccl_used - 2] + 1 ||
                    703:         (c == ccl[ccl_used - 2] + 1 && ccl_code != ccl[ccl_used - 1])) {
                    704:         ccl[ccl_used++] = c;
                    705:         ccl[ccl_used++] = c;
                    706:         ccl[ccl_used++] = ccl_code;
                    707:         return;
                    708:     }
                    709: 
                    710:     /*
                    711:      * Locate either the insertion point or range for the code.
                    712:      */
                    713:     for (i = 0; i < ccl_used && c > ccl[i + 1] + 1; i += 3) ;
                    714: 
                    715:     if (ccl_code == ccl[i + 2] && c == ccl[i + 1] + 1) {
                    716:         /*
                    717:          * Extend an existing range.
                    718:          */
                    719:         ccl[i + 1] = c;
                    720:         return;
                    721:     } else if (c < ccl[i]) {
                    722:         /*
                    723:          * Start a new range before the current location.
                    724:          */
                    725:         for (j = ccl_used; j > i; j -= 3) {
                    726:             ccl[j] = ccl[j - 3];
                    727:             ccl[j - 1] = ccl[j - 4];
                    728:             ccl[j - 2] = ccl[j - 5];
                    729:         }
                    730:         ccl[i] = ccl[i + 1] = c;
                    731:         ccl[i + 2] = ccl_code;
                    732:     }
                    733: }
                    734: 
                    735: /*
                    736:  * Adds a number if it does not already exist and returns an index value
                    737:  * multiplied by 2.
                    738:  */
                    739: static ac_uint4
                    740: make_number(short num, short denom)
                    741: {
                    742:     ac_uint4 n;
                    743: 
                    744:     /*
                    745:      * Determine if the number already exists.
                    746:      */
                    747:     for (n = 0; n < nums_used; n++) {
                    748:         if (nums[n].numerator == num && nums[n].denominator == denom)
                    749:           return n << 1;
                    750:     }
                    751: 
                    752:     if (nums_used == nums_size) {
                    753:         if (nums_size == 0)
                    754:           nums = (_num_t *) malloc(sizeof(_num_t) << 3);
                    755:         else
                    756:           nums = (_num_t *) realloc((char *) nums,
                    757:                                     sizeof(_num_t) * (nums_size + 8));
                    758:         nums_size += 8;
                    759:     }
                    760: 
                    761:     n = nums_used++;
                    762:     nums[n].numerator = num;
                    763:     nums[n].denominator = denom;
                    764: 
                    765:     return n << 1;
                    766: }
                    767: 
                    768: static void
                    769: add_number(ac_uint4 code, short num, short denom)
                    770: {
                    771:     ac_uint4 i, j;
                    772: 
                    773:     /*
                    774:      * Insert the code in order.
                    775:      */
                    776:     for (i = 0; i < ncodes_used && code > ncodes[i].code; i++) ;
                    777: 
                    778:     /*
                    779:      * Handle the case of the codes matching and simply replace the number
                    780:      * that was there before.
                    781:      */
                    782:     if (i < ncodes_used && code == ncodes[i].code) {
                    783:         ncodes[i].idx = make_number(num, denom);
                    784:         return;
                    785:     }
                    786: 
                    787:     /*
                    788:      * Resize the array if necessary.
                    789:      */
                    790:     if (ncodes_used == ncodes_size) {
                    791:         if (ncodes_size == 0)
                    792:           ncodes = (_codeidx_t *) malloc(sizeof(_codeidx_t) << 3);
                    793:         else
                    794:           ncodes = (_codeidx_t *)
                    795:               realloc((char *) ncodes, sizeof(_codeidx_t) * (ncodes_size + 8));
                    796: 
                    797:         ncodes_size += 8;
                    798:     }
                    799: 
                    800:     /*
                    801:      * Shift things around to insert the code if necessary.
                    802:      */
                    803:     if (i < ncodes_used) {
                    804:         for (j = ncodes_used; j > i; j--) {
                    805:             ncodes[j].code = ncodes[j - 1].code;
                    806:             ncodes[j].idx = ncodes[j - 1].idx;
                    807:         }
                    808:     }
                    809:     ncodes[i].code = code;
                    810:     ncodes[i].idx = make_number(num, denom);
                    811: 
                    812:     ncodes_used++;
                    813: }
                    814: 
                    815: /*
                    816:  * This routine assumes that the line is a valid Unicode Character Database
                    817:  * entry.
                    818:  */
                    819: static void
                    820: read_cdata(FILE *in)
                    821: {
                    822:     ac_uint4 i, lineno, skip, code, ccl_code;
                    823:     short wnum, neg, number[2], compat;
                    824:     char line[512], *s, *e;
                    825: 
                    826:     lineno = skip = 0;
                    827:     while (fgets(line, sizeof(line), in)) {
                    828:        if( (s=strchr(line, '\n')) ) *s = '\0';
                    829:         lineno++;
                    830: 
                    831:         /*
                    832:          * Skip blank lines and lines that start with a '#'.
                    833:          */
                    834:         if (line[0] == 0 || line[0] == '#')
                    835:           continue;
                    836: 
                    837:         /*
                    838:          * If lines need to be skipped, do it here.
                    839:          */
                    840:         if (skip) {
                    841:             skip--;
                    842:             continue;
                    843:         }
                    844: 
                    845:         /*
                    846:          * Collect the code.  The code can be up to 6 hex digits in length to
                    847:          * allow surrogates to be specified.
                    848:          */
                    849:         for (s = line, i = code = 0; *s != ';' && i < 6; i++, s++) {
                    850:             code <<= 4;
                    851:             if (*s >= '0' && *s <= '9')
                    852:               code += *s - '0';
                    853:             else if (*s >= 'A' && *s <= 'F')
                    854:               code += (*s - 'A') + 10;
                    855:             else if (*s >= 'a' && *s <= 'f')
                    856:               code += (*s - 'a') + 10;
                    857:         }
                    858: 
                    859:         /*
                    860:          * Handle the following special cases:
                    861:          * 1. 4E00-9FA5 CJK Ideographs.
                    862:          * 2. AC00-D7A3 Hangul Syllables.
                    863:          * 3. D800-DFFF Surrogates.
                    864:          * 4. E000-F8FF Private Use Area.
                    865:          * 5. F900-FA2D Han compatibility.
                    866:         * ...Plus additional ranges in newer Unicode versions...
                    867:          */
                    868:         switch (code) {
                    869:          case 0x3400:
                    870:            /* CJK Ideograph Extension A */
                    871:             add_range(0x3400, 0x4db5, "Lo", "L");
                    872: 
                    873:             add_range(0x3400, 0x4db5, "Cp", 0);
                    874: 
                    875:            skip = 1;
                    876:            break;
                    877:           case 0x4e00:
                    878:             /*
                    879:              * The Han ideographs.
                    880:              */
                    881:             add_range(0x4e00, 0x9fff, "Lo", "L");
                    882: 
                    883:             /*
                    884:              * Add the characters to the defined category.
                    885:              */
                    886:             add_range(0x4e00, 0x9fa5, "Cp", 0);
                    887: 
                    888:             skip = 1;
                    889:             break;
                    890:           case 0xac00:
                    891:             /*
                    892:              * The Hangul syllables.
                    893:              */
                    894:             add_range(0xac00, 0xd7a3, "Lo", "L");
                    895: 
                    896:             /*
                    897:              * Add the characters to the defined category.
                    898:              */
                    899:             add_range(0xac00, 0xd7a3, "Cp", 0);
                    900: 
                    901:             skip = 1;
                    902:             break;
                    903:           case 0xd800:
                    904:             /*
                    905:              * Make a range of all surrogates and assume some default
                    906:              * properties.
                    907:              */
                    908:             add_range(0x010000, 0x10ffff, "Cs", "L");
                    909:             skip = 5;
                    910:             break;
                    911:           case 0xe000:
                    912:             /*
                    913:              * The Private Use area.  Add with a default set of properties.
                    914:              */
                    915:             add_range(0xe000, 0xf8ff, "Co", "L");
                    916:             skip = 1;
                    917:             break;
                    918:           case 0xf900:
                    919:             /*
                    920:              * The CJK compatibility area.
                    921:              */
                    922:             add_range(0xf900, 0xfaff, "Lo", "L");
                    923: 
                    924:             /*
                    925:              * Add the characters to the defined category.
                    926:              */
                    927:             add_range(0xf900, 0xfaff, "Cp", 0);
                    928: 
                    929:             skip = 1;
                    930:            break;
                    931:          case 0x20000:
                    932:            /* CJK Ideograph Extension B */
                    933:             add_range(0x20000, 0x2a6d6, "Lo", "L");
                    934: 
                    935:             add_range(0x20000, 0x2a6d6, "Cp", 0);
                    936: 
                    937:            skip = 1;
                    938:            break;
                    939:          case 0xf0000:
                    940:            /* Plane 15 private use */
                    941:            add_range(0xf0000, 0xffffd, "Co", "L");
                    942:            skip = 1;
                    943:            break;
                    944: 
                    945:          case 0x100000:
                    946:            /* Plane 16 private use */
                    947:            add_range(0x100000, 0x10fffd, "Co", "L");
                    948:            skip = 1;
                    949:            break;
                    950:         }
                    951: 
                    952:         if (skip)
                    953:           continue;
                    954: 
                    955:         /*
                    956:          * Add the code to the defined category.
                    957:          */
                    958:         ordered_range_insert(code, "Cp", 2);
                    959: 
                    960:         /*
                    961:          * Locate the first character property field.
                    962:          */
                    963:         for (i = 0; *s != 0 && i < 2; s++) {
                    964:             if (*s == ';')
                    965:               i++;
                    966:         }
                    967:         for (e = s; *e && *e != ';'; e++) ;
                    968:     
                    969:         ordered_range_insert(code, s, e - s);
                    970: 
                    971:         /*
                    972:          * Locate the combining class code.
                    973:          */
                    974:         for (s = e; *s != 0 && i < 3; s++) {
                    975:             if (*s == ';')
                    976:               i++;
                    977:         }
                    978: 
                    979:         /*
                    980:          * Convert the combining class code from decimal.
                    981:          */
                    982:         for (ccl_code = 0, e = s; *e && *e != ';'; e++)
                    983:           ccl_code = (ccl_code * 10) + (*e - '0');
                    984: 
                    985:         /*
                    986:          * Add the code if it not 0.
                    987:          */
                    988:         if (ccl_code != 0)
                    989:           ordered_ccl_insert(code, ccl_code);
                    990: 
                    991:         /*
                    992:          * Locate the second character property field.
                    993:          */
                    994:         for (s = e; *s != 0 && i < 4; s++) {
                    995:             if (*s == ';')
                    996:               i++;
                    997:         }
                    998:         for (e = s; *e && *e != ';'; e++) ;
                    999: 
                   1000:         ordered_range_insert(code, s, e - s);
                   1001: 
                   1002:         /*
                   1003:          * Check for a decomposition.
                   1004:          */
                   1005:         s = ++e;
                   1006:         if (*s != ';') {
                   1007:            compat = *s == '<';
                   1008:            if (compat) {
                   1009:                /*
                   1010:                 * Skip compatibility formatting tag.
                   1011:                 */
                   1012:                while (*s++ != '>');
                   1013:            }
                   1014:             /*
                   1015:              * Collect the codes of the decomposition.
                   1016:              */
                   1017:             for (dectmp_size = 0; *s != ';'; ) {
                   1018:                 /*
                   1019:                  * Skip all leading non-hex digits.
                   1020:                  */
                   1021:                 while (!ishdigit(*s))
                   1022:                  s++;
                   1023: 
                   1024:                 for (dectmp[dectmp_size] = 0; ishdigit(*s); s++) {
                   1025:                     dectmp[dectmp_size] <<= 4;
                   1026:                     if (*s >= '0' && *s <= '9')
                   1027:                       dectmp[dectmp_size] += *s - '0';
                   1028:                     else if (*s >= 'A' && *s <= 'F')
                   1029:                       dectmp[dectmp_size] += (*s - 'A') + 10;
                   1030:                     else if (*s >= 'a' && *s <= 'f')
                   1031:                       dectmp[dectmp_size] += (*s - 'a') + 10;
                   1032:                 }
                   1033:                 dectmp_size++;
                   1034:             }
                   1035: 
                   1036:             /*
                   1037:              * If there are any codes in the temporary decomposition array,
                   1038:              * then add the character with its decomposition.
                   1039:              */
                   1040:             if (dectmp_size > 0) {
                   1041:                if (!compat) {
                   1042:                    add_decomp(code, 0);
                   1043:                }
                   1044:                add_decomp(code, 1);
                   1045:            }
                   1046:         }
                   1047: 
                   1048:         /*
                   1049:          * Skip to the number field.
                   1050:          */
                   1051:         for (i = 0; i < 3 && *s; s++) {
                   1052:             if (*s == ';')
                   1053:               i++;
                   1054:         }
                   1055: 
                   1056:         /*
                   1057:          * Scan the number in.
                   1058:          */
                   1059:         number[0] = number[1] = 0;
                   1060:         for (e = s, neg = wnum = 0; *e && *e != ';'; e++) {
                   1061:             if (*e == '-') {
                   1062:                 neg = 1;
                   1063:                 continue;
                   1064:             }
                   1065: 
                   1066:             if (*e == '/') {
                   1067:                 /*
                   1068:                  * Move the the denominator of the fraction.
                   1069:                  */
                   1070:                 if (neg)
                   1071:                   number[wnum] *= -1;
                   1072:                 neg = 0;
                   1073:                 e++;
                   1074:                 wnum++;
                   1075:             }
                   1076:             number[wnum] = (number[wnum] * 10) + (*e - '0');
                   1077:         }
                   1078: 
                   1079:         if (e > s) {
                   1080:             /*
                   1081:              * Adjust the denominator in case of integers and add the number.
                   1082:              */
                   1083:             if (wnum == 0)
                   1084:               number[1] = 1;
                   1085: 
                   1086:             add_number(code, number[0], number[1]);
                   1087:         }
                   1088: 
                   1089:         /*
                   1090:          * Skip to the start of the possible case mappings.
                   1091:          */
                   1092:         for (s = e, i = 0; i < 4 && *s; s++) {
                   1093:             if (*s == ';')
                   1094:               i++;
                   1095:         }
                   1096: 
                   1097:         /*
                   1098:          * Collect the case mappings.
                   1099:          */
                   1100:         cases[0] = cases[1] = cases[2] = 0;
                   1101:         for (i = 0; i < 3; i++) {
                   1102:             while (ishdigit(*s)) {
                   1103:                 cases[i] <<= 4;
                   1104:                 if (*s >= '0' && *s <= '9')
                   1105:                   cases[i] += *s - '0';
                   1106:                 else if (*s >= 'A' && *s <= 'F')
                   1107:                   cases[i] += (*s - 'A') + 10;
                   1108:                 else if (*s >= 'a' && *s <= 'f')
                   1109:                   cases[i] += (*s - 'a') + 10;
                   1110:                 s++;
                   1111:             }
                   1112:             if (*s == ';')
                   1113:               s++;
                   1114:         }
                   1115:         if (cases[0] && cases[1])
                   1116:           /*
                   1117:            * Add the upper and lower mappings for a title case character.
                   1118:            */
                   1119:           add_title(code);
                   1120:         else if (cases[1])
                   1121:           /*
                   1122:            * Add the lower and title case mappings for the upper case
                   1123:            * character.
                   1124:            */
                   1125:           add_upper(code);
                   1126:         else if (cases[0])
                   1127:           /*
                   1128:            * Add the upper and title case mappings for the lower case
                   1129:            * character.
                   1130:            */
                   1131:           add_lower(code);
                   1132:     }
                   1133: }
                   1134: 
                   1135: #if 0
                   1136: 
                   1137: static _decomp_t *
                   1138: find_decomp(ac_uint4 code, short compat)
                   1139: {
                   1140:     long l, r, m;
                   1141:     _decomp_t *decs;
                   1142:     
                   1143:     l = 0;
                   1144:     r = (compat ? kdecomps_used : decomps_used) - 1;
                   1145:     decs = compat ? kdecomps : decomps;
                   1146:     while (l <= r) {
                   1147:         m = (l + r) >> 1;
                   1148:         if (code > decs[m].code)
                   1149:           l = m + 1;
                   1150:         else if (code < decs[m].code)
                   1151:           r = m - 1;
                   1152:         else
                   1153:           return &decs[m];
                   1154:     }
                   1155:     return 0;
                   1156: }
                   1157: 
                   1158: static void
                   1159: decomp_it(_decomp_t *d, short compat)
                   1160: {
                   1161:     ac_uint4 i;
                   1162:     _decomp_t *dp;
                   1163: 
                   1164:     for (i = 0; i < d->used; i++) {
                   1165:         if ((dp = find_decomp(d->decomp[i], compat)) != 0)
                   1166:           decomp_it(dp, compat);
                   1167:         else
                   1168:           dectmp[dectmp_size++] = d->decomp[i];
                   1169:     }
                   1170: }
                   1171: 
                   1172: 
                   1173: /*
                   1174:  * Expand all decompositions by recursively decomposing each character
                   1175:  * in the decomposition.
                   1176:  */
                   1177: static void
                   1178: expand_decomp(void)
                   1179: {
                   1180:     ac_uint4 i;
                   1181: 
                   1182:     for (i = 0; i < decomps_used; i++) {
                   1183:         dectmp_size = 0;
                   1184:         decomp_it(&decomps[i], 0);
                   1185:         if (dectmp_size > 0)
                   1186:           add_decomp(decomps[i].code, 0);
                   1187:     }
                   1188: 
                   1189:     for (i = 0; i < kdecomps_used; i++) {
                   1190:         dectmp_size = 0;
                   1191:         decomp_it(&kdecomps[i], 1);
                   1192:         if (dectmp_size > 0)
                   1193:           add_decomp(kdecomps[i].code, 1);
                   1194:     }
                   1195: }
                   1196: 
                   1197: static int
                   1198: cmpcomps(const void *v_comp1, const void *v_comp2)
                   1199: {
                   1200:        const _comp_t *comp1 = v_comp1, *comp2 = v_comp2;
                   1201:     long diff = comp1->code1 - comp2->code1;
                   1202: 
                   1203:     if (!diff)
                   1204:        diff = comp1->code2 - comp2->code2;
                   1205:     return (int) diff;
                   1206: }
                   1207: 
                   1208: #endif
                   1209: 
                   1210: /*
                   1211:  * Load composition exclusion data
                   1212:  */
                   1213: static void
                   1214: read_compexdata(FILE *in)
                   1215: {
                   1216:     ac_uint2 i;
                   1217:     ac_uint4 code;
                   1218:     char line[512], *s;
                   1219: 
                   1220:     (void) memset((char *) compexs, 0, sizeof(compexs));
                   1221: 
                   1222:     while (fgets(line, sizeof(line), in)) {
                   1223:        if( (s=strchr(line, '\n')) ) *s = '\0';
                   1224:         /*
                   1225:          * Skip blank lines and lines that start with a '#'.
                   1226:          */
                   1227:         if (line[0] == 0 || line[0] == '#')
                   1228:            continue;
                   1229: 
                   1230:        /*
                   1231:          * Collect the code.  Assume max 6 digits
                   1232:          */
                   1233: 
                   1234:        for (s = line, i = code = 0; *s != '#' && i < 6; i++, s++) {
                   1235:            if (isspace((unsigned char)*s)) break;
                   1236:             code <<= 4;
                   1237:             if (*s >= '0' && *s <= '9')
                   1238:                code += *s - '0';
                   1239:             else if (*s >= 'A' && *s <= 'F')
                   1240:                code += (*s - 'A') + 10;
                   1241:             else if (*s >= 'a' && *s <= 'f')
                   1242:                code += (*s - 'a') + 10;
                   1243:         }
                   1244:         COMPEX_SET(code);
                   1245:     }
                   1246: }
                   1247: 
                   1248: #if 0
                   1249: 
                   1250: /*
                   1251:  * Creates array of compositions from decomposition array
                   1252:  */
                   1253: static void
                   1254: create_comps(void)
                   1255: {
                   1256:     ac_uint4 i, cu;
                   1257: 
                   1258:     comps = (_comp_t *) malloc(comps_used * sizeof(_comp_t));
                   1259: 
                   1260:     for (i = cu = 0; i < decomps_used; i++) {
                   1261:        if (decomps[i].used != 2 || COMPEX_TEST(decomps[i].code))
                   1262:            continue;
                   1263:        comps[cu].comp = decomps[i].code;
                   1264:        comps[cu].count = 2;
                   1265:        comps[cu].code1 = decomps[i].decomp[0];
                   1266:        comps[cu].code2 = decomps[i].decomp[1];
                   1267:        cu++;
                   1268:     }
                   1269:     comps_used = cu;
                   1270:     qsort(comps, comps_used, sizeof(_comp_t), cmpcomps);
                   1271: }
                   1272: 
                   1273: #endif
                   1274: 
                   1275: #if HARDCODE_DATA
                   1276: static void
                   1277: write_case(FILE *out, _case_t *tab, int num, int first)
                   1278: {
                   1279:     int i;
                   1280: 
                   1281:     for (i=0; i<num; i++) {
                   1282:        if (first) first = 0;
                   1283:        else fprintf(out, ",");
                   1284:        fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx",
                   1285:                (unsigned long) tab[i].key, (unsigned long) tab[i].other1,
                   1286:                (unsigned long) tab[i].other2);
                   1287:     }
                   1288: }
                   1289: 
                   1290: #define PREF "static const "
                   1291: 
                   1292: #endif
                   1293: 
                   1294: static void
                   1295: write_cdata(char *opath)
                   1296: {
                   1297:     FILE *out;
                   1298:        ac_uint4 bytes;
                   1299:     ac_uint4 i, idx, nprops;
                   1300: #if !(HARDCODE_DATA)
                   1301:     ac_uint2 casecnt[2];
                   1302: #endif
                   1303:     char path[BUFSIZ];
                   1304: #if HARDCODE_DATA
                   1305:     int j, k;
                   1306: 
                   1307:     /*****************************************************************
                   1308:      *
                   1309:      * Generate the ctype data.
                   1310:      *
                   1311:      *****************************************************************/
                   1312: 
                   1313:     /*
                   1314:      * Open the output file.
                   1315:      */
                   1316:     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "uctable.h", opath);
                   1317:     if ((out = fopen(path, "w")) == 0)
                   1318:       return;
                   1319: #else
                   1320:     /*
                   1321:      * Open the ctype.dat file.
                   1322:      */
                   1323:     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "ctype.dat", opath);
                   1324:     if ((out = fopen(path, "wb")) == 0)
                   1325:       return;
                   1326: #endif
                   1327: 
                   1328:     /*
                   1329:      * Collect the offsets for the properties.  The offsets array is
                   1330:      * on a 4-byte boundary to keep things efficient for architectures
                   1331:      * that need such a thing.
                   1332:      */
                   1333:     for (i = idx = 0; i < NUMPROPS; i++) {
                   1334:         propcnt[i] = (proptbl[i].used != 0) ? idx : 0xffff;
                   1335:         idx += proptbl[i].used;
                   1336:     }
                   1337: 
                   1338:     /*
                   1339:      * Add the sentinel index which is used by the binary search as the upper
                   1340:      * bound for a search.
                   1341:      */
                   1342:     propcnt[i] = idx;
                   1343: 
                   1344:     /*
                   1345:      * Record the actual number of property lists.  This may be different than
                   1346:      * the number of offsets actually written because of aligning on a 4-byte
                   1347:      * boundary.
                   1348:      */
                   1349:     hdr[1] = NUMPROPS;
                   1350: 
                   1351:     /*
                   1352:      * Calculate the byte count needed and pad the property counts array to a
                   1353:      * 4-byte boundary.
                   1354:      */
                   1355:     if ((bytes = sizeof(ac_uint2) * (NUMPROPS + 1)) & 3)
                   1356:       bytes += 4 - (bytes & 3);
                   1357:     nprops = bytes / sizeof(ac_uint2);
                   1358:     bytes += sizeof(ac_uint4) * idx;
                   1359: 
                   1360: #if HARDCODE_DATA
                   1361:     fprintf(out,
                   1362:         "/* This file was generated from a modified version UCData's ucgendat.\n"
                   1363:         " *\n"
                   1364:         " *                     DO NOT EDIT THIS FILE!\n"
                   1365:         " * \n"
                   1366:         " * Instead, compile ucgendat.c (bundled with PHP in ext/mbstring), download\n"
                   1367:         " * the appropriate UnicodeData-x.x.x.txt and CompositionExclusions-x.x.x.txt\n"
                   1368:         " * files from  http://www.unicode.org/Public/ and run this program.\n"
                   1369:         " *\n"
                   1370:         " * More information can be found in the UCData package. Unfortunately,\n"
                   1371:         " * the project's page doesn't seem to be live anymore, so you can use\n"
                   1372:         " * OpenLDAPs modified copy (look in libraries/liblunicode/ucdata) */\n\n");
                   1373: 
                   1374:     fprintf(out, PREF "unsigned short _ucprop_size = %d;\n\n", NUMPROPS);
                   1375: 
                   1376:     fprintf(out, PREF "unsigned short  _ucprop_offsets[] = {");
                   1377: 
                   1378:     for (i = 0; i<nprops; i++) {
                   1379:        if (i) fprintf(out, ",");
                   1380:        if (!(i&7)) fprintf(out, "\n\t");
                   1381:        else fprintf(out, " ");
                   1382:        fprintf(out, "0x%04x", propcnt[i]);
                   1383:     }
                   1384:     fprintf(out, "\n};\n\n");
                   1385: 
                   1386:     fprintf(out, PREF "unsigned int _ucprop_ranges[] = {");
                   1387: 
                   1388:     k = 0;
                   1389:     for (i = 0; i < NUMPROPS; i++) {
                   1390:        if (proptbl[i].used > 0) {
                   1391:          for (j=0; j<proptbl[i].used; j++) {
                   1392:            if (k) fprintf(out, ",");
                   1393:            if (!(k&3)) fprintf(out,"\n\t");
                   1394:            else fprintf(out, " ");
                   1395:            k++;
                   1396:            fprintf(out, "0x%08lx", (unsigned long) proptbl[i].ranges[j]);
                   1397:          }
                   1398:        }
                   1399:     }
                   1400:     fprintf(out, "\n};\n\n");
                   1401: #else
                   1402:     /*
                   1403:      * Write the header.
                   1404:      */
                   1405:     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
                   1406: 
                   1407:     /*
                   1408:      * Write the byte count.
                   1409:      */
                   1410:     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
                   1411: 
                   1412:     /*
                   1413:      * Write the property list counts.
                   1414:      */
                   1415:     fwrite((char *) propcnt, sizeof(ac_uint2), nprops, out);
                   1416: 
                   1417:     /*
                   1418:      * Write the property lists.
                   1419:      */
                   1420:     for (i = 0; i < NUMPROPS; i++) {
                   1421:         if (proptbl[i].used > 0)
                   1422:           fwrite((char *) proptbl[i].ranges, sizeof(ac_uint4),
                   1423:                  proptbl[i].used, out);
                   1424:     }
                   1425: 
                   1426:     fclose(out);
                   1427: #endif
                   1428: 
                   1429:     /*****************************************************************
                   1430:      *
                   1431:      * Generate the case mapping data.
                   1432:      *
                   1433:      *****************************************************************/
                   1434: 
                   1435: #if HARDCODE_DATA
                   1436:     fprintf(out, PREF "unsigned int _uccase_size = %ld;\n\n",
                   1437:         (long) (upper_used + lower_used + title_used));
                   1438: 
                   1439:     fprintf(out,
                   1440:         "/* Starting indexes of the case tables\n"
                   1441:         " * UpperIndex = 0\n"
                   1442:         " * LowerIndex = _uccase_len[0]\n"
                   1443:         " * TitleIndex = LowerIndex + _uccase_len[1] */\n\n");
                   1444:     fprintf(out, PREF "unsigned short _uccase_len[2] = {%ld, %ld};\n\n",
                   1445:         (long) upper_used * 3, (long) lower_used * 3);
                   1446:     fprintf(out, PREF "unsigned int _uccase_map[] = {");
                   1447: 
                   1448:     if (upper_used > 0)
                   1449:       /*
                   1450:        * Write the upper case table.
                   1451:        */
                   1452:       write_case(out, upper, upper_used, 1);
                   1453: 
                   1454:     if (lower_used > 0)
                   1455:       /*
                   1456:        * Write the lower case table.
                   1457:        */
                   1458:       write_case(out, lower, lower_used, !upper_used);
                   1459: 
                   1460:     if (title_used > 0)
                   1461:       /*
                   1462:        * Write the title case table.
                   1463:        */
                   1464:       write_case(out, title, title_used, !(upper_used||lower_used));
                   1465: 
                   1466:     if (!(upper_used || lower_used || title_used))
                   1467:        fprintf(out, "\t0");
                   1468: 
                   1469:     fprintf(out, "\n};\n\n");
                   1470: #else
                   1471:     /*
                   1472:      * Open the case.dat file.
                   1473:      */
                   1474:     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "case.dat", opath);
                   1475:     if ((out = fopen(path, "wb")) == 0)
                   1476:       return;
                   1477: 
                   1478:     /*
                   1479:      * Write the case mapping tables.
                   1480:      */
                   1481:     hdr[1] = upper_used + lower_used + title_used;
                   1482:     casecnt[0] = upper_used;
                   1483:     casecnt[1] = lower_used;
                   1484: 
                   1485:     /*
                   1486:      * Write the header.
                   1487:      */
                   1488:     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
                   1489: 
                   1490:     /*
                   1491:      * Write the upper and lower case table sizes.
                   1492:      */
                   1493:     fwrite((char *) casecnt, sizeof(ac_uint2), 2, out);
                   1494: 
                   1495:     if (upper_used > 0)
                   1496:       /*
                   1497:        * Write the upper case table.
                   1498:        */
                   1499:       fwrite((char *) upper, sizeof(_case_t), upper_used, out);
                   1500: 
                   1501:     if (lower_used > 0)
                   1502:       /*
                   1503:        * Write the lower case table.
                   1504:        */
                   1505:       fwrite((char *) lower, sizeof(_case_t), lower_used, out);
                   1506: 
                   1507:     if (title_used > 0)
                   1508:       /*
                   1509:        * Write the title case table.
                   1510:        */
                   1511:       fwrite((char *) title, sizeof(_case_t), title_used, out);
                   1512: 
                   1513:     fclose(out);
                   1514: #endif
                   1515: 
                   1516: #if 0
                   1517: 
                   1518:     /*****************************************************************
                   1519:      *
                   1520:      * Generate the composition data.
                   1521:      *
                   1522:      *****************************************************************/
                   1523:     
                   1524:     /*
                   1525:      * Create compositions from decomposition data
                   1526:      */
                   1527:     create_comps();
                   1528:     
                   1529: #if HARDCODE_DATA
                   1530:     fprintf(out, PREF "ac_uint4 _uccomp_size = %ld;\n\n",
                   1531:         comps_used * 4L);
                   1532: 
                   1533:     fprintf(out, PREF "ac_uint4 _uccomp_data[] = {");
                   1534: 
                   1535:      /*
                   1536:       * Now, if comps exist, write them out.
                   1537:       */
                   1538:     if (comps_used > 0) {
                   1539:        for (i=0; i<comps_used; i++) {
                   1540:            if (i) fprintf(out, ",");
                   1541:            fprintf(out, "\n\t0x%08lx, 0x%08lx, 0x%08lx, 0x%08lx",
                   1542:                (unsigned long) comps[i].comp, (unsigned long) comps[i].count,
                   1543:                (unsigned long) comps[i].code1, (unsigned long) comps[i].code2);
                   1544:        }
                   1545:     } else {
                   1546:        fprintf(out, "\t0");
                   1547:     }
                   1548:     fprintf(out, "\n};\n\n");
                   1549: #else
                   1550:     /*
                   1551:      * Open the comp.dat file.
                   1552:      */
                   1553:     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "comp.dat", opath);
                   1554:     if ((out = fopen(path, "wb")) == 0)
                   1555:        return;
                   1556:     
                   1557:     /*
                   1558:      * Write the header.
                   1559:      */
                   1560:     hdr[1] = (ac_uint2) comps_used * 4;
                   1561:     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
                   1562:     
                   1563:     /*
                   1564:      * Write out the byte count to maintain header size.
                   1565:      */
                   1566:     bytes = comps_used * sizeof(_comp_t);
                   1567:     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
                   1568:     
                   1569:     /*
                   1570:      * Now, if comps exist, write them out.
                   1571:      */
                   1572:     if (comps_used > 0)
                   1573:         fwrite((char *) comps, sizeof(_comp_t), comps_used, out);
                   1574:     
                   1575:     fclose(out);
                   1576: #endif
                   1577:     
                   1578:     /*****************************************************************
                   1579:      *
                   1580:      * Generate the decomposition data.
                   1581:      *
                   1582:      *****************************************************************/
                   1583: 
                   1584:     /*
                   1585:      * Fully expand all decompositions before generating the output file.
                   1586:      */
                   1587:     expand_decomp();
                   1588: 
                   1589: #if HARDCODE_DATA
                   1590:     fprintf(out, PREF "ac_uint4 _ucdcmp_size = %ld;\n\n",
                   1591:         decomps_used * 2L);
                   1592: 
                   1593:     fprintf(out, PREF "ac_uint4 _ucdcmp_nodes[] = {");
                   1594: 
                   1595:     if (decomps_used) {
                   1596:        /*
                   1597:         * Write the list of decomp nodes.
                   1598:         */
                   1599:        for (i = idx = 0; i < decomps_used; i++) {
                   1600:            fprintf(out, "\n\t0x%08lx, 0x%08lx,",
                   1601:                (unsigned long) decomps[i].code, (unsigned long) idx);
                   1602:            idx += decomps[i].used;
                   1603:        }
                   1604: 
                   1605:        /*
                   1606:         * Write the sentinel index as the last decomp node.
                   1607:         */
                   1608:        fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx);
                   1609: 
                   1610:        fprintf(out, PREF "ac_uint4 _ucdcmp_decomp[] = {");
                   1611:        /*
                   1612:         * Write the decompositions themselves.
                   1613:         */
                   1614:        k = 0;
                   1615:        for (i = 0; i < decomps_used; i++)
                   1616:          for (j=0; j<decomps[i].used; j++) {
                   1617:            if (k) fprintf(out, ",");
                   1618:            if (!(k&3)) fprintf(out,"\n\t");
                   1619:            else fprintf(out, " ");
                   1620:            k++;
                   1621:            fprintf(out, "0x%08lx", (unsigned long) decomps[i].decomp[j]);
                   1622:          }
                   1623:        fprintf(out, "\n};\n\n");
                   1624:     }
                   1625: #else
                   1626:     /*
                   1627:      * Open the decomp.dat file.
                   1628:      */
                   1629:     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "decomp.dat", opath);
                   1630:     if ((out = fopen(path, "wb")) == 0)
                   1631:       return;
                   1632: 
                   1633:     hdr[1] = decomps_used;
                   1634: 
                   1635:     /*
                   1636:      * Write the header.
                   1637:      */
                   1638:     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
                   1639: 
                   1640:     /*
                   1641:      * Write a temporary byte count which will be calculated as the
                   1642:      * decompositions are written out.
                   1643:      */
                   1644:     bytes = 0;
                   1645:     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
                   1646: 
                   1647:     if (decomps_used) {
                   1648:         /*
                   1649:          * Write the list of decomp nodes.
                   1650:          */
                   1651:         for (i = idx = 0; i < decomps_used; i++) {
                   1652:             fwrite((char *) &decomps[i].code, sizeof(ac_uint4), 1, out);
                   1653:             fwrite((char *) &idx, sizeof(ac_uint4), 1, out);
                   1654:             idx += decomps[i].used;
                   1655:         }
                   1656: 
                   1657:         /*
                   1658:          * Write the sentinel index as the last decomp node.
                   1659:          */
                   1660:         fwrite((char *) &idx, sizeof(ac_uint4), 1, out);
                   1661: 
                   1662:         /*
                   1663:          * Write the decompositions themselves.
                   1664:          */
                   1665:         for (i = 0; i < decomps_used; i++)
                   1666:           fwrite((char *) decomps[i].decomp, sizeof(ac_uint4),
                   1667:                  decomps[i].used, out);
                   1668: 
                   1669:         /*
                   1670:          * Seek back to the beginning and write the byte count.
                   1671:          */
                   1672:         bytes = (sizeof(ac_uint4) * idx) +
                   1673:             (sizeof(ac_uint4) * ((hdr[1] << 1) + 1));
                   1674:         fseek(out, sizeof(ac_uint2) << 1, 0L);
                   1675:         fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
                   1676: 
                   1677:         fclose(out);
                   1678:     }
                   1679: #endif
                   1680: 
                   1681: #ifdef HARDCODE_DATA
                   1682:     fprintf(out, PREF "ac_uint4 _uckdcmp_size = %ld;\n\n",
                   1683:         kdecomps_used * 2L);
                   1684: 
                   1685:     fprintf(out, PREF "ac_uint4 _uckdcmp_nodes[] = {");
                   1686: 
                   1687:     if (kdecomps_used) {
                   1688:        /*
                   1689:         * Write the list of kdecomp nodes.
                   1690:         */
                   1691:        for (i = idx = 0; i < kdecomps_used; i++) {
                   1692:            fprintf(out, "\n\t0x%08lx, 0x%08lx,",
                   1693:                (unsigned long) kdecomps[i].code, (unsigned long) idx);
                   1694:            idx += kdecomps[i].used;
                   1695:        }
                   1696: 
                   1697:        /*
                   1698:         * Write the sentinel index as the last decomp node.
                   1699:         */
                   1700:        fprintf(out, "\n\t0x%08lx\n};\n\n", (unsigned long) idx);
                   1701: 
                   1702:        fprintf(out, PREF "ac_uint4 _uckdcmp_decomp[] = {");
                   1703: 
                   1704:        /*
                   1705:         * Write the decompositions themselves.
                   1706:         */
                   1707:        k = 0;
                   1708:        for (i = 0; i < kdecomps_used; i++)
                   1709:          for (j=0; j<kdecomps[i].used; j++) {
                   1710:            if (k) fprintf(out, ",");
                   1711:            if (!(k&3)) fprintf(out,"\n\t");
                   1712:            else fprintf(out, " ");
                   1713:            k++;
                   1714:            fprintf(out, "0x%08lx", (unsigned long) kdecomps[i].decomp[j]);
                   1715:          }
                   1716:        fprintf(out, "\n};\n\n");
                   1717:     }
                   1718: #else
                   1719:     /*
                   1720:      * Open the kdecomp.dat file.
                   1721:      */
                   1722:     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "kdecomp.dat", opath);
                   1723:     if ((out = fopen(path, "wb")) == 0)
                   1724:       return;
                   1725: 
                   1726:     hdr[1] = kdecomps_used;
                   1727: 
                   1728:     /*
                   1729:      * Write the header.
                   1730:      */
                   1731:     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
                   1732: 
                   1733:     /*
                   1734:      * Write a temporary byte count which will be calculated as the
                   1735:      * decompositions are written out.
                   1736:      */
                   1737:     bytes = 0;
                   1738:     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
                   1739: 
                   1740:     if (kdecomps_used) {
                   1741:         /*
                   1742:          * Write the list of kdecomp nodes.
                   1743:          */
                   1744:         for (i = idx = 0; i < kdecomps_used; i++) {
                   1745:             fwrite((char *) &kdecomps[i].code, sizeof(ac_uint4), 1, out);
                   1746:             fwrite((char *) &idx, sizeof(ac_uint4), 1, out);
                   1747:             idx += kdecomps[i].used;
                   1748:         }
                   1749: 
                   1750:         /*
                   1751:          * Write the sentinel index as the last decomp node.
                   1752:          */
                   1753:         fwrite((char *) &idx, sizeof(ac_uint4), 1, out);
                   1754: 
                   1755:         /*
                   1756:          * Write the decompositions themselves.
                   1757:          */
                   1758:         for (i = 0; i < kdecomps_used; i++)
                   1759:           fwrite((char *) kdecomps[i].decomp, sizeof(ac_uint4),
                   1760:                  kdecomps[i].used, out);
                   1761: 
                   1762:         /*
                   1763:          * Seek back to the beginning and write the byte count.
                   1764:          */
                   1765:         bytes = (sizeof(ac_uint4) * idx) +
                   1766:             (sizeof(ac_uint4) * ((hdr[1] << 1) + 1));
                   1767:         fseek(out, sizeof(ac_uint2) << 1, 0L);
                   1768:         fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
                   1769: 
                   1770:         fclose(out);
                   1771:     }
                   1772: #endif
                   1773: 
                   1774:     /*****************************************************************
                   1775:      *
                   1776:      * Generate the combining class data.
                   1777:      *
                   1778:      *****************************************************************/
                   1779: #ifdef HARDCODE_DATA
                   1780:     fprintf(out, PREF "ac_uint4 _uccmcl_size = %ld;\n\n", (long) ccl_used);
                   1781: 
                   1782:     fprintf(out, PREF "ac_uint4 _uccmcl_nodes[] = {");
                   1783: 
                   1784:     if (ccl_used > 0) {
                   1785:        /*
                   1786:         * Write the combining class ranges out.
                   1787:         */
                   1788:        for (i = 0; i<ccl_used; i++) {
                   1789:            if (i) fprintf(out, ",");
                   1790:            if (!(i&3)) fprintf(out, "\n\t");
                   1791:            else fprintf(out, " ");
                   1792:            fprintf(out, "0x%08lx", (unsigned long) ccl[i]);
                   1793:        }
                   1794:     } else {
                   1795:        fprintf(out, "\t0");
                   1796:     }
                   1797:     fprintf(out, "\n};\n\n");
                   1798: #else
                   1799:     /*
                   1800:      * Open the cmbcl.dat file.
                   1801:      */
                   1802:     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "cmbcl.dat", opath);
                   1803:     if ((out = fopen(path, "wb")) == 0)
                   1804:       return;
                   1805: 
                   1806:     /*
                   1807:      * Set the number of ranges used.  Each range has a combining class which
                   1808:      * means each entry is a 3-tuple.
                   1809:      */
                   1810:     hdr[1] = ccl_used / 3;
                   1811: 
                   1812:     /*
                   1813:      * Write the header.
                   1814:      */
                   1815:     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
                   1816: 
                   1817:     /*
                   1818:      * Write out the byte count to maintain header size.
                   1819:      */
                   1820:     bytes = ccl_used * sizeof(ac_uint4);
                   1821:     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
                   1822: 
                   1823:     if (ccl_used > 0)
                   1824:       /*
                   1825:        * Write the combining class ranges out.
                   1826:        */
                   1827:       fwrite((char *) ccl, sizeof(ac_uint4), ccl_used, out);
                   1828: 
                   1829:     fclose(out);
                   1830: #endif
                   1831: 
                   1832:     /*****************************************************************
                   1833:      *
                   1834:      * Generate the number data.
                   1835:      *
                   1836:      *****************************************************************/
                   1837: 
                   1838: #if HARDCODE_DATA
                   1839:     fprintf(out, PREF "ac_uint4 _ucnum_size = %lu;\n\n",
                   1840:         (unsigned long)ncodes_used<<1);
                   1841: 
                   1842:     fprintf(out, PREF "ac_uint4 _ucnum_nodes[] = {");
                   1843: 
                   1844:     /*
                   1845:      * Now, if number mappings exist, write them out.
                   1846:      */
                   1847:     if (ncodes_used > 0) {
                   1848:        for (i = 0; i<ncodes_used; i++) {
                   1849:            if (i) fprintf(out, ",");
                   1850:            if (!(i&1)) fprintf(out, "\n\t");
                   1851:            else fprintf(out, " ");
                   1852:            fprintf(out, "0x%08lx, 0x%08lx",
                   1853:                (unsigned long) ncodes[i].code, (unsigned long) ncodes[i].idx);
                   1854:        }
                   1855:        fprintf(out, "\n};\n\n");
                   1856: 
                   1857:        fprintf(out, PREF "short _ucnum_vals[] = {");
                   1858:        for (i = 0; i<nums_used; i++) {
                   1859:            if (i) fprintf(out, ",");
                   1860:            if (!(i&3)) fprintf(out, "\n\t");
                   1861:            else fprintf(out, " ");
                   1862:            if (nums[i].numerator < 0) {
                   1863:                fprintf(out, "%6d, 0x%04x",
                   1864:                  nums[i].numerator, nums[i].denominator);
                   1865:            } else {
                   1866:                fprintf(out, "0x%04x, 0x%04x",
                   1867:                  nums[i].numerator, nums[i].denominator);
                   1868:            }
                   1869:        }
                   1870:        fprintf(out, "\n};\n\n");
                   1871:     }
                   1872: #else
                   1873:     /*
                   1874:      * Open the num.dat file.
                   1875:      */
                   1876:     snprintf(path, sizeof path, "%s" LDAP_DIRSEP "num.dat", opath);
                   1877:     if ((out = fopen(path, "wb")) == 0)
                   1878:       return;
                   1879: 
                   1880:     /*
                   1881:      * The count part of the header will be the total number of codes that
                   1882:      * have numbers.
                   1883:      */
                   1884:     hdr[1] = (ac_uint2) (ncodes_used << 1);
                   1885:     bytes = (ncodes_used * sizeof(_codeidx_t)) + (nums_used * sizeof(_num_t));
                   1886: 
                   1887:     /*
                   1888:      * Write the header.
                   1889:      */
                   1890:     fwrite((char *) hdr, sizeof(ac_uint2), 2, out);
                   1891: 
                   1892:     /*
                   1893:      * Write out the byte count to maintain header size.
                   1894:      */
                   1895:     fwrite((char *) &bytes, sizeof(ac_uint4), 1, out);
                   1896: 
                   1897:     /*
                   1898:      * Now, if number mappings exist, write them out.
                   1899:      */
                   1900:     if (ncodes_used > 0) {
                   1901:         fwrite((char *) ncodes, sizeof(_codeidx_t), ncodes_used, out);
                   1902:         fwrite((char *) nums, sizeof(_num_t), nums_used, out);
                   1903:     }
                   1904: #endif
                   1905: 
                   1906: #endif
                   1907: 
                   1908:     fclose(out);
                   1909: }
                   1910: 
                   1911: static void
                   1912: usage(char *prog)
                   1913: {
                   1914:     fprintf(stderr,
                   1915:             "Usage: %s [-o output-directory|-x composition-exclusions]", prog);
                   1916:     fprintf(stderr, " datafile1 datafile2 ...\n\n");
                   1917:     fprintf(stderr,
                   1918:             "-o output-directory\n\t\tWrite the output files to a different");
                   1919:     fprintf(stderr, " directory (default: .).\n");
                   1920:     fprintf(stderr,
                   1921:             "-x composition-exclusion\n\t\tFile of composition codes");
                   1922:     fprintf(stderr, " that should be excluded.\n");
                   1923:     exit(1);
                   1924: }
                   1925: 
                   1926: int
                   1927: main(int argc, char *argv[])
                   1928: {
                   1929:     FILE *in;
                   1930:     char *prog, *opath;
                   1931: 
                   1932:     prog = argv[1];
                   1933: 
                   1934:     opath = 0;
                   1935:     in = stdin;
                   1936: 
                   1937:     argc--;
                   1938:     argv++;
                   1939: 
                   1940:     while (argc > 0) {
                   1941:         if (argv[0][0] == '-') {
                   1942:             switch (argv[0][1]) {
                   1943:               case 'o':
                   1944:                 argc--;
                   1945:                 argv++;
                   1946:                 opath = argv[0];
                   1947:                 break;
                   1948:               case 'x':
                   1949:                 argc--;
                   1950:                 argv++;
                   1951:                 if ((in = fopen(argv[0], "r")) == 0)
                   1952:                   fprintf(stderr,
                   1953:                           "%s: unable to open composition exclusion file %s\n",
                   1954:                           prog, argv[0]);
                   1955:                 else {
                   1956:                     read_compexdata(in);
                   1957:                     fclose(in);
                   1958:                     in = 0;
                   1959:                 }
                   1960:                 break;
                   1961:               default:
                   1962:                 usage(prog);
                   1963:             }
                   1964:         } else {
                   1965:             if (in != stdin && in != NULL)
                   1966:               fclose(in);
                   1967:             if ((in = fopen(argv[0], "r")) == 0)
                   1968:               fprintf(stderr, "%s: unable to open ctype file %s\n",
                   1969:                       prog, argv[0]);
                   1970:             else {
                   1971:                 read_cdata(in);
                   1972:                 fclose(in);
                   1973:                 in = 0;
                   1974:            }
                   1975:         }
                   1976:         argc--;
                   1977:         argv++;
                   1978:     }
                   1979: 
                   1980:     if (opath == 0)
                   1981:       opath = ".";
                   1982:     write_cdata(opath);
                   1983: 
                   1984:     return 0;
                   1985: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>