early-roguelike/urogue/dict.c - annotate

Return to dict.c CVS log
Up to [contributed] / early-roguelike / urogue
Annotation of early-roguelike/urogue/dict.c, Revision 1.1.1.1

1.1       rubenllo    1: /*
                      2:     dict.c
                      3:
                      4:     UltraRogue: The Ultimate Adventure in the Dungeons of Doom
                      5:     Copyright (C) 1995 Herb Chong
                      6:     All rights reserved.
                      7:
                      8:     See the file LICENSE.TXT for full copyright and licensing information.
                      9: */
                     10:
                     11: /******************
                     12: **  Change history:
                     13: **
                     14: **  (AK:04/03/95) - In dict_create, initialize hook for extensions to dictionary structure.
                     15: **  (AK:04/04/95) - In dict_insert, only set any_ptr when a new string entry is created.
                     16: **  (AK:04/17/95) - Added dict_union and dict_merge.
                     17: **  (AK:04/18/95) - In dict_create, added code to create signature,
                     18: **                  table of contents, and parameter array.
                     19: **  (AK:04/18/95) - Revised dict_load, dict_save
                     20: **  (AK:04/18/95) - Added dict_import
                     21: **
                     22: ******************/
                     23:
                     24: static char sccsid[] = "%W% %G%";
                     25:
                     26: #include <stdlib.h>
                     27: #include <string.h>
                     28: #if !defined(OS2) && !defined(_WIN32)
                     29:    #include <unistd.h>
                     30: #endif
                     31:
                     32: #include "dict.h"
                     33: #include "dictutil.h"
                     34:
                     35: #define BUFLEN      300
                     36:
                     37: /*************************************************************************
                     38: *       Generic dictionary and hash table functions for word and
                     39: *       string storage.
                     40: *
                     41: *       Implements generic dictionary management by using a
                     42: *       hash table with direct chaining.  All terms are stored in a
                     43: *       separate, unsorted, string table. When inserting into the
                     44: *       hash table exceeds the current size, allocate a new hash table
                     45: *       and rehash the old contents into the new table. When the string
                     46: *       table fills, append more empty entries onto the old table and
                     47: *       continue adding. Inserting a string that already exists increments
                     48: *       the count for the string. Deleting a string only decrements the
                     49: *       count to no less than 0. This will allow a 0 occurence string to be
                     50: *       retrieved. To remove the 0 occurence string table entries, you must
                     51: *       rebuild the dictionary. Merging from one dictionary to a new one will
                     52: *       do this because it only copies nonzero occurences.
                     53: *
                     54: *       Assumptions:
                     55: *               unsigned long >= 32 bits
                     56: *               int >= 16 bits
                     57: *               char == 8 bits
                     58: *               number of entries < 2^28
                     59: *************************************************************************/
                     60:
                     61: /*************************************************************************
                     62: *       hash_string: calculate hash value for string, modified
                     63: *       version of hashpjw() from the new Dragon book.
                     64: *
                     65: *       s - string to calculate hash for
                     66: *
                     67: *       Returns: hash value
                     68: *************************************************************************/
                     69: static unsigned long hash_string(const char *s)
                     70: {
                     71:         unsigned long h = 0;
                     72:         unsigned long g = 0;
                     73:
                     74:         while (*s) {
                     75:                 h <<= 4;
                     76:                 h += *s++;
                     77:                 if ((g = h & 0xf0000000) != 0)
                     78:                         h ^= g >> 24;
                     79:         }
                     80:         return h ^ g;
                     81: }
                     82:
                     83: /*************************************************************************
                     84: *       dict_create: create a dictionary and initialize its structures
                     85: *
                     86: *       toc_size - number of entries in table of contents ( >= 4 )
                     87: *       initial_string_count - number of strings to hold as initial allocation
                     88: *       initial_hash_chains - size of hash table, must be power of 4
                     89: *       max_chain_length - max number of elements in hash chain before signalling growth
                     90: *
                     91: *       Returns: pointer to dictionary
                     92: *               NULL - error creating dictionary
                     93: *               non-NULL - sucessful creation
                     94: *************************************************************************/
                     95:
                     96: DICTIONARY *dict_create(
                     97:         const long toc_size,
                     98:         const long initial_string_count,
                     99:         const long initial_hash_chains,
                    100:         const long max_chain_length )
                    101: {
                    102:         DICTIONARY      *dtemp = NULL;
                    103:         STRING_ENTRY    *stemp = NULL;
                    104:         long    *ltemp = NULL;
                    105:         char    *sltemp;
                    106:         long    i, j, tocsize;
                    107:
                    108:         /* check for a power of 4 in number of initial hash entries */
                    109:         switch(initial_hash_chains) {
                    110:         case 0x00000004:
                    111:         case 0x00000010:
                    112:         case 0x00000040:
                    113:         case 0x00000100:
                    114:         case 0x00000400:
                    115:         case 0x00001000:
                    116:         case 0x00004000:
                    117:         case 0x00010000:
                    118:         case 0x00040000:
                    119:         case 0x00100000:
                    120:         case 0x00400000:
                    121:         case 0x01000000:
                    122:         case 0x04000000:
                    123:                 break;
                    124:         default:
                    125:                 return NULL;
                    126:         }
                    127:
                    128:         /* Allocate the dictionary structure */
                    129:         if ((dtemp = (DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL)
                    130:                 goto err_exit;
                    131:
                    132:         /* string count has to be within range */
                    133:         j = initial_string_count;
                    134:         if (j > 0x04000000)
                    135:                 return NULL;
                    136:
                    137:         /* force a reasonable value */
                    138:         if (j < 100)
                    139:                 j = 100;
                    140:
                    141:         /* Allocate the string table and string array */
                    142:         if ((stemp = (STRING_ENTRY *)malloc(sizeof(STRING_ENTRY) * j)) == NULL)
                    143:                 goto err_exit;
                    144:         if ((sltemp = (char *)malloc(8 * j)) == NULL)
                    145:                 goto err_exit;
                    146:
                    147:         /* Allocate the hash table */
                    148:         if ((ltemp = (long *)malloc(sizeof(long) * initial_hash_chains)) == NULL)
                    149:                 goto err_exit;
                    150:
                    151:         /* Allocate the parameter array */
                    152:         if ( (dtemp->parm = (DICT_PARM_ENTRY *)malloc(14*sizeof(DICT_PARM_ENTRY))) == NULL)
                    153:                 goto err_exit;
                    154:
                    155:         /* Allocate the signature structure */
                    156:         if ( (dtemp->sig=(DICT_SIG*)malloc(sizeof(DICT_SIG))) == NULL )
                    157:                 goto err_exit;
                    158:
                    159:         /* Allocate the Table of Contents */
                    160:         tocsize = toc_size;
                    161:         if ( tocsize < 4 )
                    162:            tocsize = 4;
                    163:         dtemp->sig->toc_size = tocsize;
                    164:         if ( (dtemp->toc=(DICT_TOC_ENTRY*)malloc(dtemp->sig->toc_size*sizeof(DICT_TOC_ENTRY))) == NULL )
                    165:                 goto err_exit;
                    166:
                    167:         dtemp->check_value = DICT_VALIDATE;                     /* validation value */
                    168:         dtemp->flags = DICT_NONE;                               /* no flags set */
                    169:         dtemp->entry_count = 0;                                 /* nothing in either table */
                    170:
                    171:         dtemp->string_table = stemp;                            /* connect string table */
                    172:         dtemp->string_max = j;                                  /* size of string table */
                    173:         dtemp->scan_string_index = DICT_ENTRY_NONE;             /* not pointing to anything in scan */
                    174:         dtemp->string_growth_count = 0;                         /* haven't grown any */
                    175:
                    176:         dtemp->string_array = sltemp;                           /* character array for strings */
                    177:         dtemp->array_size = j * 8;                              /* how many bytes available */
                    178:         dtemp->array_used = 0;                                  /* nothing used yet */
                    179:         dtemp->array_growth_count = 0;                          /* haven't grown any */
                    180:
                    181:         /* force maximum hash chain length to a reasonable value */
                    182:         i = max_chain_length;
                    183:         if (i < 1 || i > 200)
                    184:                 i = 200;
                    185:         dtemp->chains = ltemp;                                  /* connect hash table */
                    186:         dtemp->longest_chain_length = 0;                        /* nothing in table yet */
                    187:         dtemp->allowable_chain_length = i;                      /* chain length limit */
                    188:         dtemp->table_size = initial_hash_chains;                /* size of hash table */
                    189:         dtemp->hash_mask = initial_hash_chains - 1;             /* mask for mod() function */
                    190:         dtemp->hash_growth_count = 0;                           /* haven't grown any */
                    191:
                    192:         /* string table is empty */
                    193:         for (i = 0 ; i < j ; i++) {
                    194:                 dtemp->string_table[i].string_offset = DICT_ENTRY_NONE;
                    195:                 dtemp->string_table[i].count = 0;
                    196:                 dtemp->string_table[i].next = DICT_ENTRY_NONE;
                    197:                 dtemp->string_table[i].flags = 0;
                    198:                 dtemp->string_table[i].hash_value = 0;
                    199:         }
                    200:
                    201:         /* no entries chained */
                    202:         for (i = 0; i < initial_hash_chains; i++)
                    203:                 dtemp->chains[i] = DICT_ENTRY_NONE;
                    204:
                    205:         /* Initialize hook to extended dictionary structure  */
                    206:         dtemp->ext = NULL;
                    207:
                    208:         /* Set up the parameter array */
                    209:         if ( dict_set_parm_ids(dtemp) == FALSE )
                    210:                 goto err_exit;
                    211:         if ( dict_set_parm_values(dtemp) == FALSE )
                    212:                 goto err_exit;
                    213:
                    214:         /* Set up the Table of Contents */
                    215:         strcpy( dtemp->toc[0].id , "PARM" );
                    216:         dtemp->toc[0].size = dtemp->sig->nparms * sizeof(DICT_PARM_ENTRY);
                    217:         dtemp->toc[0].ptr = dtemp->parm;
                    218:         dtemp->toc[0].type = 0;
                    219:
                    220:         strcpy( dtemp->toc[1].id , "HASH" );
                    221:         dtemp->toc[1].size = dtemp->table_size * sizeof(long);
                    222:         dtemp->toc[1].ptr = dtemp->chains;
                    223:         dtemp->toc[1].type = 0;
                    224:
                    225:         strcpy( dtemp->toc[2].id , "STTB" );
                    226:         dtemp->toc[2].size = dtemp->string_max * sizeof(char);
                    227:         dtemp->toc[2].ptr = dtemp->string_table;
                    228:         dtemp->toc[2].type = 0;
                    229:
                    230:         strcpy( dtemp->toc[3].id , "STAR" );
                    231:         dtemp->toc[3].size = dtemp->array_size * sizeof(STRING_ENTRY);
                    232:         dtemp->toc[3].ptr = dtemp->string_array;
                    233:         dtemp->toc[3].type = 0;
                    234:
                    235:         /* Set up the signature */
                    236:         dtemp->sig->check_value = DICT_VALIDATE;
                    237:         dtemp->sig->toc_size = tocsize;
                    238:         dtemp->sig->nparms = 14;
                    239:
                    240:         return dtemp;
                    241:
                    242:         /* error exit - saves duplicate code */
                    243: err_exit:
                    244:
                    245:         dict_destroy( dtemp );
                    246:         return NULL;
                    247: }
                    248:
                    249: /*************************************************************************
                    250: *       dict_destroy: discard a dictionary and its contents
                    251: *               multiple calls to destroy the same dictionary is safe
                    252: *
                    253: *       dict - pointer to a dictionary to discard
                    254: *
                    255: *       Returns: status code
                    256: *               TRUE - dictionary destroyed
                    257: *               FALSE - error when destroying dictionary
                    258: *       Note: Does free the dictionary structure itself.
                    259: *************************************************************************/
                    260: BOOLEANC dict_destroy(DICTIONARY *dict)
                    261: {
                    262:         int  i;
                    263:
                    264:         /* have to point to something */
                    265:         if (dict == NULL)
                    266:                 return FALSE;
                    267:
                    268:         /* check value has to be OK */
                    269:         if (dict->check_value != DICT_VALIDATE)
                    270:                 return FALSE;
                    271:
                    272:         if ( (dict->sig==NULL) || (dict->toc==NULL) || (dict->parm==NULL) )
                    273:                 return FALSE;
                    274:
                    275:         /* Free the type=0 tables */
                    276:         for ( i = 0 ; i < dict->sig->toc_size ; i++ ) {
                    277:            if ( dict->toc[i].ptr != NULL && dict->toc[i].type == 0 )
                    278:                 free( dict->toc[i].ptr );
                    279:         } /* endfor */
                    280:
                    281:         /* Free the Table of Contents and signature */
                    282:         free( dict->toc );
                    283:         free( dict->sig );
                    284:
                    285:         /* Free the dictionary structure itself */
                    286:         free( dict );
                    287:
                    288:         return TRUE;
                    289: }
                    290:
                    291: /*************************************************************************
                    292: *       dict_insert: add entries into the dictionary, growing as necessary
                    293: *
                    294: *       dict - pointer to dictionary to insert into
                    295: *       s - string to insert
                    296: *       count - count of occurences of string
                    297: *       flags - flag bits associated with the string
                    298: *       number - pointer to long to return word number
                    299: *
                    300: *       Returns: pointer to new entry in dictionary
                    301: *               NULL - error during insert
                    302: *               non-NULL - pointer to inserted or updated entry
                    303: *
                    304: *       Notes: if the entry already exists, increment the count.
                    305: *       If NULL is returned, the dictionary state can no longer
                    306: *       be trusted. Terminating with an error code would be
                    307: *       safest.
                    308: *************************************************************************/
                    309: STRING_ENTRY *dict_insert(DICTIONARY *dict, char *s,
                    310:         const long occurences, const unsigned long flags,
                    311:         void *any_ptr, long *number)
                    312: {
                    313:         unsigned long   hash_value;
                    314:         long    hash_index;
                    315:         long    string_index = -1;
                    316:         long    he2;
                    317:         int     chain_len;
                    318:
                    319:         /* have to point to something */
                    320:         if (dict == NULL)
                    321:                 return FALSE;
                    322:
                    323:         /* check value has to be OK */
                    324:         if (dict->check_value != DICT_VALIDATE)
                    325:                 return FALSE;
                    326:
                    327:         /* must have a string */
                    328:         if (s == NULL) {
                    329:                 *number = -1;
                    330:                 return FALSE;
                    331:         }
                    332:
                    333:         /* must be non-NULL */
                    334:         if (s[0] == '\0') {
                    335:                 *number = -1;
                    336:                 return FALSE;
                    337:         }
                    338:
                    339:         /* figure out which chain it should go into */
                    340:         hash_value = hash_string(s);
                    341:         hash_index = hash_value & dict->hash_mask;
                    342:
                    343:         /* look for the entry in the chain */
                    344:         for (he2 = dict->chains[hash_index], chain_len = 0; he2 != DICT_ENTRY_NONE;
                    345:                         he2 = dict->string_table[he2].next, chain_len++) {
                    346:                 char    *sa = &dict->string_array[dict->string_table[he2].string_offset];
                    347:
                    348:                 /* compare hash_value and then string, in that order, for speed */
                    349:                 if (dict->string_table[he2].hash_value == hash_value && !strcmp(s, sa)) {
                    350:                         string_index = he2;
                    351:                         break;
                    352:                 }
                    353:         }
                    354:
                    355:         /* string wasn't found */
                    356:         if (string_index < 0) {
                    357:                 /* make sure there is room in string entry table */
                    358:                 if (dict->entry_count + 1 > dict->string_max) {
                    359:                         /* make new table 20% bigger */
                    360:                         dict->string_max = (dict->string_max * 6) / 5;
                    361:                         dict->string_table = (STRING_ENTRY *)realloc(dict->string_table,
                    362:                                         sizeof(STRING_ENTRY) * dict->string_max);
                    363:                         if (dict->string_table == NULL)
                    364:                                 return NULL;
                    365:                         dict->string_growth_count++;
                    366:                 }
                    367:                 string_index = dict->entry_count;
                    368:                 dict->entry_count++;
                    369:
                    370:                 /* make sure there is room in the string array */
                    371:                 if (dict->array_used + (long)strlen(s) + 1 > dict->array_size) {
                    372:                         dict->array_size = (dict->array_size * 6) / 5;
                    373:                         dict->string_array = (char *) realloc(dict->string_array,
                    374:                                 dict->array_size);
                    375:                         if (dict->string_array == NULL)
                    376:                                 return NULL;
                    377:                 }
                    378:
                    379:                 /* fill in starting values */
                    380:                 strcpy(&dict->string_array[dict->array_used], s);
                    381:                 dict->string_table[string_index].string_offset = dict->array_used;
                    382:                 dict->array_used += (long) strlen(s) + 1;
                    383:                 dict->string_table[string_index].count = 0 ;
                    384:                 dict->string_table[string_index].flags = DICT_NONE;
                    385:                 dict->string_table[string_index].any_ptr = any_ptr; /* (AK:04/04/95) */
                    386:                 dict->string_table[string_index].hash_value = hash_value;
                    387:
                    388:                 /* hook entry at beginning of chain */
                    389:                 dict->string_table[string_index].next = dict->chains[hash_index];
                    390:                 dict->chains[hash_index] = string_index;
                    391:
                    392:                 /* record chain lengths */
                    393:                 chain_len++;
                    394:                 if (chain_len > dict->longest_chain_length)
                    395:                         dict->longest_chain_length = chain_len;
                    396:
                    397:                 /* if a chain is too long */
                    398:                 if (chain_len > dict->allowable_chain_length) {
                    399:                         long    new_size = dict->table_size * 4;
                    400:                         long    new_mask = new_size - 1;
                    401:                         long    *hetemp;
                    402:                         long    i;
                    403:                         int     longest_chain;
                    404:
                    405:                         if ((hetemp = (long *)malloc(sizeof(long) * new_size)) == NULL)
                    406:                                 return NULL;
                    407:
                    408:                         /* hash table chains are empty */
                    409:                         for (i = 0; i < new_size; i++)
                    410:                                 hetemp[i] = DICT_ENTRY_NONE;
                    411:
                    412:                         /* reset all chains */
                    413:                         for (i = 0; i < dict->entry_count; i++)
                    414:                                 dict->string_table[i].next = DICT_ENTRY_NONE;
                    415:
                    416:                         /* recreate hash table entries by reinserting all strings */
                    417:                         for (i = 0; i < dict->entry_count; i++) {
                    418:                                 long    he = dict->string_table[i].hash_value & new_mask;
                    419:
                    420:                                 dict->string_table[i].next = hetemp[he];
                    421:                                 hetemp[he] = i;
                    422:                         }
                    423:
                    424:                         /* find longest chain length */
                    425:                         for (i = 0, longest_chain = 0; i < new_size; i++) {
                    426:                                 int     len;
                    427:                                 long    cur;
                    428:
                    429:                                 for (cur = hetemp[i], len = 0; cur != DICT_ENTRY_NONE;
                    430:                                                 cur = dict->string_table[cur].next)
                    431:                                     len++;
                    432:                                         ;
                    433:                                 if (len > longest_chain)
                    434:                                         longest_chain = len;
                    435:                         }
                    436:
                    437:                         /* delete old table and attach new one */
                    438:                         free(dict->chains);
                    439:                         dict->chains = hetemp;
                    440:                         dict->longest_chain_length = longest_chain;
                    441:                         dict->table_size = new_size;
                    442:                         dict->hash_mask = new_mask;
                    443:
                    444:                         /* keep track of growth */
                    445:                         dict->hash_growth_count++;
                    446:                 }
                    447:         }
                    448:
                    449:         dict->string_table[string_index].count += occurences;
                    450:         dict->string_table[string_index].flags |= flags;
                    451:         *number = string_index;
                    452:
                    453:         return &dict->string_table[string_index];
                    454: }
                    455:
                    456: /*************************************************************************
                    457: *       dict_delete: deletes an entry from the dictionary
                    458: *       (Actually, only decrements the entry's count)
                    459: *
                    460: *       dict - pointer to dictionary to delete
                    461: *       s - string to find and delete
                    462: *       count - count to decrement entry by
                    463: *
                    464: *       Returns: status code
                    465: *               TRUE - entry has been deleted
                    466: *               FALSE - entry wasn't found or error occured
                    467: *************************************************************************/
                    468: BOOLEANC dict_delete(const DICTIONARY *dict, const char *s, const long count)
                    469: {
                    470:         STRING_ENTRY    *se;
                    471:         long    n;
                    472:
                    473:         /* find the string */
                    474:         if ((se = dict_search(dict, s, &n)) == NULL)
                    475:                 return FALSE;
                    476:
                    477:         /* decrement count and make sure it stays valid */
                    478:         se->count -= count;
                    479:         if (se->count < 0)
                    480:                 se->count = 0;
                    481:         return TRUE;
                    482: }
                    483:
                    484: /*************************************************************************
                    485: *       dict_search: look for entries in the dictionary
                    486: *
                    487: *       dict - pointer to dictionary to search
                    488: *       s - string to search for
                    489: *       number - pointer to long to return string number
                    490: *
                    491: *       Returns: pointer to string entry
                    492: *               NULL - entry not found
                    493: *               non-NULL - pointer to string entry
                    494: *************************************************************************/
                    495: STRING_ENTRY *dict_search(const DICTIONARY *dict, const char *s,
                    496:         long *number)
                    497: {
                    498:         unsigned long   hash_value;
                    499:         long    hash_index;
                    500:         long    string_index = -1;
                    501:         long    he;
                    502:
                    503:         /* have to point to something */
                    504:         if (dict == NULL)
                    505:                 return NULL;
                    506:
                    507:         /* check value has to be OK */
                    508:         if (dict->check_value != DICT_VALIDATE)
                    509:                 return NULL;
                    510:
                    511:         /* must have a string */
                    512:         if (s == NULL) {
                    513:                 *number = -1;
                    514:                 return NULL;
                    515:         }
                    516:
                    517:         /* must be non-NULL */
                    518:         if (s[0] == '\0') {
                    519:                 *number = -1;
                    520:                 return FALSE;
                    521:         }
                    522:
                    523:         /* figure out which chain it should be in */
                    524:         hash_value = hash_string(s);
                    525:         hash_index = hash_value & dict->hash_mask;
                    526:
                    527:         /* look for the entry in the chain */
                    528:         for (he = dict->chains[hash_index]; he != DICT_ENTRY_NONE;
                    529:                         he = dict->string_table[he].next) {
                    530:                 char    *sa = (char*)(&dict->string_array[dict->string_table[he].string_offset]);
                    531:
                    532:                 /* compare hash_value and then string, in that order, for speed */
                    533:                 if (dict->string_table[he].hash_value == hash_value && !strcmp(s, sa)) {
                    534:                         string_index = he;
                    535:                         break;
                    536:                 }
                    537:         }
                    538:         if (string_index < 0) {
                    539:                 *number = -1;
                    540:                 return NULL;
                    541:         }
                    542:
                    543:         *number = string_index;
                    544:         return dict->string_table+string_index;
                    545: }
                    546:
                    547: /*************************************************************************
                    548: *       dict_union: merges contents of 2 dictionaries into
                    549: *       a third
                    550: *
                    551: *       dict1 - pointer to dictionary 1
                    552: *       dict2 - pointer to dictionary 2
                    553: *
                    554: *       Returns: dictionary pointer
                    555: *               NULL - failed to merge
                    556: *               non-NULL - pointer to new dictionary
                    557: *
                    558: *       Notes: entries of the same string have their counts
                    559: *       added and their flags ORed together.
                    560: *************************************************************************/
                    561: DICTIONARY *dict_union( const DICTIONARY *dict1 , const DICTIONARY *dict2 )
                    562: {
                    563:         DICTIONARY    *dict = NULL;
                    564:         STRING_ENTRY  *se, *se2;
                    565:         long          initial_string_count, initial_hash_chains, max_chain_length;
                    566:         long          i, string_index;
                    567:
                    568:         /***********
                    569:         **  Initialize the new dictionary.
                    570:         ***********/
                    571:
                    572:         if ( dict1==NULL || dict2==NULL )
                    573:                 goto err_exit;
                    574:         if ((dict=(DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL)
                    575:                 goto err_exit;
                    576:
                    577:         initial_string_count = dict1->string_max;
                    578:         initial_hash_chains = dict1->table_size;
                    579:         max_chain_length = dict1->allowable_chain_length;
                    580:         dict = dict_create( 4,
                    581:                             initial_string_count,
                    582:                             initial_hash_chains,
                    583:                             max_chain_length );
                    584:
                    585:         /***********
                    586:         **  Copy the entries from dict1 into the new dictionary.
                    587:         ***********/
                    588:
                    589:         for ( i = 0 ; i < dict1->entry_count ; i++ ) {
                    590:                 se = (STRING_ENTRY*)&dict1->string_table[i];
                    591:                 if ( se->count > 0 ) {
                    592:                         se2 = dict_insert(
                    593:                                   dict,
                    594:                                   dict1->string_array+se->string_offset,
                    595:                                   se->count,
                    596:                                   se->flags,
                    597:                                   NULL,
                    598:                                   &string_index );
                    599:                         if ( se2 == NULL )
                    600:                                 goto err_exit;
                    601:                 } /* endif */
                    602:         } /* endfor */
                    603:
                    604:         /*  Merge the entries from dict2 into the new dictionary. */
                    605:         if ( dict_merge(dict,dict2,FALSE) == FALSE )
                    606:                 goto err_exit;
                    607:
                    608:         /*  Success. Return a pointer to the new dictionary.  */
                    609:         return( dict );
                    610:
                    611:         /*  Failure. Ignominiously erase our tracks and return NULL. */
                    612: err_exit:
                    613:         dict_destroy( dict );
                    614:         return NULL;
                    615: }
                    616:
                    617: /*************************************************************************
                    618: *       dict_merge: merges the contents of a dictionary into
                    619: *       another one, updating the contents of the destination
                    620: *
                    621: *       dst - dictionary to update
                    622: *       src - dictionary to add
                    623: *       move - boolean flag
                    624: *               TRUE - move to dest
                    625: *               FALSE - copy to dest
                    626: *
                    627: *       Returns: status code
                    628: *               TRUE - merge completed
                    629: *               FALSE - error on merge
                    630: *
                    631: *       Notes: entries of the same string have their counts
                    632: *       added. At the end of a move, src is empty. If there
                    633: *       is an error during a move, the contents of both
                    634: *       dictionaries cannot be trusted.
                    635: *************************************************************************/
                    636: BOOLEANC dict_merge(const DICTIONARY *dst, const DICTIONARY *src, const BOOLEANC move)
                    637: {
                    638:         STRING_ENTRY  *se, *se2;
                    639:         DICTIONARY    *dict1, *dict2;
                    640:         long          i, string_index, index;
                    641:
                    642:         dict1 = (DICTIONARY*)src;
                    643:         dict2 = (DICTIONARY*)dst;
                    644:
                    645:         /***********
                    646:         **  Copy the dictionary entries into the new dictionary.
                    647:         ***********/
                    648:
                    649:         for ( i = 0 ; i < dict1->entry_count ; i++ ) {
                    650:                 se = (STRING_ENTRY*)&dict1->string_table[i];
                    651:                 if ( se->count > 0 ) {
                    652:                         se2 = dict_insert(
                    653:                                   dict2,
                    654:                                   dict1->string_array+se->string_offset,
                    655:                                   se->count,
                    656:                                   se->flags,
                    657:                                   NULL,
                    658:                                   &string_index );
                    659:                         if ( se2 == NULL )
                    660:                                 goto err_exit;
                    661:                 } /* endif */
                    662:         } /* endfor */
                    663:
                    664:         /***********
                    665:         **  Set up the dictionary parameter vector.
                    666:         ***********/
                    667:
                    668:         if ( dict_set_parm_values(dict2) == FALSE )
                    669:                 return( FALSE );
                    670:
                    671:         /***********
                    672:         **  Update the table of contents.
                    673:         **     PARM HASH STTB STAR
                    674:         ***********/
                    675:
                    676:         if ( (index=dict_toc_index(dict2,"HASH")) == -1)
                    677:                 goto err_exit;
                    678:         dict2->toc[index].size = dict2->table_size * sizeof(long);
                    679:         dict2->toc[index].ptr = dict2->chains;
                    680:
                    681:         if ( (index=dict_toc_index(dict2,"STTB")) == -1)
                    682:                 goto err_exit;
                    683:         dict2->toc[index].size = dict2->string_max * sizeof(char);
                    684:         dict2->toc[index].ptr = dict2->string_table;
                    685:
                    686:         if ( (index=dict_toc_index(dict2,"STAR")) == -1)
                    687:                 goto err_exit;
                    688:         dict2->toc[index].size = dict2->array_size * sizeof(STRING_ENTRY);
                    689:         dict2->toc[index].ptr = dict2->string_array;
                    690:
                    691:         /***********
                    692:         **  Update the signature
                    693:         ***********/
                    694:
                    695:         dict2->sig->checksum =
                    696:                 compute_checksum( 4*sizeof(DICT_TOC_ENTRY) , (char*)(dict2->toc) );
                    697:
                    698:         /***********
                    699:         **  If this is a move, destroy the source dictionary
                    700:         ***********/
                    701:
                    702:         if ( move == TRUE )
                    703:                 if ( dict_destroy(dict1) == FALSE )
                    704:                         goto err_exit;
                    705:
                    706:         /***********
                    707:         **  Success
                    708:         ***********/
                    709:
                    710:         return( TRUE );
                    711:
                    712:         /***********
                    713:         **  Failure
                    714:         ***********/
                    715:
                    716: err_exit:
                    717:         dict_destroy( dict2 );
                    718:         return FALSE;
                    719: }
                    720:
                    721: /*************************************************************************
                    722: *       dict_string_by_number: return string pointer for
                    723: *       a given string number
                    724: *
                    725: *       number - string number
                    726: *
                    727: *       Returns: pointer to string entry
                    728: *               NULL - entry not found
                    729: *               non-NULL - pointer to string entry
                    730: *************************************************************************/
                    731: STRING_ENTRY *dict_string_by_number(const DICTIONARY *dict, const long number)
                    732: {
                    733:         if (dict == NULL)
                    734:                 return NULL;
                    735:
                    736:         /* check value has to be OK */
                    737:         if (dict->check_value != DICT_VALIDATE)
                    738:                 return NULL;
                    739:
                    740:         /* string number has to be within range */
                    741:         if (number < 0 || number >= dict->entry_count)
                    742:                 return NULL;
                    743:
                    744:         return dict->string_table+number;
                    745: }
                    746:
                    747: /*************************************************************************
                    748: *       dict_scan_begin: begin a scan of the dictionary
                    749: *
                    750: *       dict - pointer to dictionary to scan
                    751: *
                    752: *       Returns: status code
                    753: *               TRUE - scan initialized
                    754: *               FALSE - error
                    755: *
                    756: *       Notes: if a scan is already in progress, it is
                    757: *       abandoned and the scan is reset to the
                    758: *       beginning.
                    759: *************************************************************************/
                    760: BOOLEANC dict_scan_begin(DICTIONARY *dict)
                    761: {
                    762:         /* have to point to something */
                    763:         if (dict == NULL)
                    764:                 return FALSE;
                    765:
                    766:         /* check value has to be OK */
                    767:         if (dict->check_value != DICT_VALIDATE)
                    768:                 return FALSE;
                    769:
                    770:         /* point to first entry in string table */
                    771:         dict->scan_string_index = 0;
                    772:         return TRUE;
                    773: }
                    774:
                    775: /*************************************************************************
                    776: *       dict_scan_next: get the next entry in a scan
                    777: *
                    778: *       dict - pointer to dictionary to continue scanning
                    779: *
                    780: *       Returns: pointer to string entry
                    781: *               NULL - no more entries
                    782: *               non-NULL - next string entry
                    783: *************************************************************************/
                    784: STRING_ENTRY *dict_scan_next(DICTIONARY *dict)
                    785: {
                    786:         /* have to point to something */
                    787:         if (dict == NULL)
                    788:                 return NULL;
                    789:
                    790:         /* check value has to be OK */
                    791:         if (dict->check_value != DICT_VALIDATE)
                    792:                 return NULL;
                    793:
                    794:         /* scan index has to be within range */
                    795:         if (dict->scan_string_index < 0
                    796:                         || dict->scan_string_index > dict->entry_count)
                    797:                 return NULL;
                    798:
                    799:         /* for first non-empty table entry */
                    800:         while (dict->scan_string_index < dict->entry_count
                    801:                         && dict->string_table[dict->scan_string_index].count == 0)
                    802:                 dict->scan_string_index++;
                    803:
                    804:         /* past end of table? */
                    805:         if (dict->scan_string_index >= dict->entry_count)
                    806:                 return NULL;
                    807:
                    808:         return &dict->string_table[dict->scan_string_index++];
                    809: }
                    810:
                    811:
                    812: /*************************************************************************
                    813: *       dict_load - load a compiled dictionary into memory
                    814: *                   creates a new dictionary
                    815: *
                    816: *       fname - fully qualified file name
                    817: *
                    818: *       Returns: pointer to created dictionary structure
                    819: *                (NULL on failure)
                    820: *************************************************************************/
                    821: DICTIONARY *dict_load(const char *fname)
                    822: {
                    823:         DICTIONARY       *dict = NULL;
                    824:         int              code, index;
                    825:         FILE             *fi;
                    826:         int              ntoc;
                    827:
                    828:         if ( (fi=fopen((char*)fname,"rb")) == NULL ) {
                    829:                 signal_error( "dict_load: could not open file" , (char*)fname , 0 );
                    830:                 goto err_exit;
                    831:         } /* endif */
                    832:
                    833:         if ((dict = (DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL) {
                    834:                 /* signal_error( "dict_load: alloc failed" , "" , 0 ); */
                    835:                 goto err_exit;
                    836:         } /* endif */
                    837:
                    838:         /* Read the dictionary signature record */
                    839:         if ((dict->sig = (DICT_SIG *)malloc(sizeof(DICT_SIG))) == NULL) {
                    840:                 goto err_exit;
                    841:         } /* endif */
                    842:         code = block_read( fi , (char*)(dict->sig) , sizeof(DICT_SIG) , 0 );
                    843:         if ( code == -1 ) {
                    844:                 signal_error( "dict_load: could not read signature" , (char*)fname , 0 );
                    845:                 goto err_exit;
                    846:         } /* endif */
                    847:
                    848:         if ( dict->sig->check_value != DICT_VALIDATE ) {
                    849:                 signal_error( "dict_load: could not validate file" , (char*)fname , 0 );
                    850:                 goto err_exit;
                    851:         } /* endif */
                    852:         dict->check_value = dict->sig->check_value;
                    853:
                    854:         /* Read the dictionary Table of Contents */
                    855:         ntoc = dict->sig->toc_size;
                    856:         dict->toc = (DICT_TOC_ENTRY *) malloc( ntoc * sizeof(DICT_TOC_ENTRY) );
                    857:         if ( dict->toc == NULL ) {
                    858:                 signal_error( "dict_load: alloc of TOC failed" , "" , 0 );
                    859:                 goto err_exit;
                    860:         } /* endif */
                    861:         code = block_read( fi ,
                    862:                            (char*)(dict->toc) ,
                    863:                            ntoc * sizeof(DICT_TOC_ENTRY) ,
                    864:                            sizeof(DICT_SIG) );
                    865:         if ( code == -1 ) {
                    866:                 signal_error( "dict_load: could not read Table of Contents" , (char*)fname , 0 );
                    867:                 goto err_exit;
                    868:         } /* endif */
                    869:
                    870:         /* Read the dictionary parameters */
                    871:         dict->parm = (DICT_PARM_ENTRY *)
                    872:                          dict_load_block( dict , "PARM" , fi , NULL );
                    873:         if ( dict->parm == NULL ) {
                    874:                 signal_error( "dict_load: could not load parameter table" , "" , 0 );
                    875:                 goto err_exit;
                    876:         } /* endif */
                    877:         index = dict_toc_index( dict , "PARM" );
                    878:         dict->sig->nparms = dict->toc[index].size / sizeof(DICT_PARM_ENTRY);
                    879:
                    880:         /* Set the parameter values in the dictionary structure */
                    881:         if ( dict_set_parm_variables(dict) == FALSE )
                    882:                 goto err_exit;
                    883:
                    884:         /* Load the string array */
                    885:         dict->string_array = (char *)
                    886:                 dict_load_block( dict , "STAR" , fi , NULL );
                    887:         if ( dict->string_array == NULL ) {
                    888:                 signal_error( "dict_load: could not load string array" , (char*)fname , 0 );
                    889:                 goto err_exit;
                    890:         } /* endif */
                    891:
                    892:         /* Load the string table */
                    893:         dict->string_table = (STRING_ENTRY *)
                    894:                 dict_load_block( dict , "STTB" , fi , NULL );
                    895:         if ( dict->string_table == NULL ) {
                    896:                 signal_error( "dict_load: could not load string table" , (char*)fname , 0 );
                    897:                 goto err_exit;
                    898:         } /* endif */
                    899:
                    900:         /* Load the hash table */
                    901:         dict->chains = (long *)
                    902:                 dict_load_block( dict , "HASH" , fi , NULL );
                    903:         if ( dict->chains == NULL ) {
                    904:                 signal_error( "dict_load: could not load hash table" , (char*)fname , 0 );
                    905:                 goto err_exit;
                    906:         } /* endif */
                    907:
                    908:         /*  Initialize the hook for dictionary extensions  */
                    909:         dict->ext = NULL;
                    910:
                    911:         /*  Success  */
                    912:         fclose( fi );
                    913:         return( dict );
                    914:
                    915:         /*  Failure  */
                    916: err_exit:
                    917:         if ( fi != NULL )
                    918:                 fclose( fi );
                    919:         dict_destroy( dict );
                    920:         return NULL;
                    921: }
                    922:
                    923:
                    924: /*************************************************************************
                    925: *       dict_save - save a dictionary from memory into a file
                    926: *
                    927: *       dict - pointer to dictionary to save
                    928: *       fname - full qualified file name prefix of dictionary
                    929: *
                    930: *       Returns: status code
                    931: *               TRUE - dictionary was saved sucessfully
                    932: *               FALSE - error during save
                    933: *************************************************************************/
                    934: BOOLEANC dict_save( DICTIONARY *dict, const char *fname )
                    935: {
                    936:         int   index, ret_code;
                    937:         FILE  *fo = NULL;
                    938:
                    939:         /* Have to be pointing at a valid dictionary */
                    940:         if ( dict == NULL || dict->sig->check_value != DICT_VALIDATE )
                    941:                 goto err_exit;
                    942:
                    943:         /* Open the file for output */
                    944:         if ( (fo=fopen((char*)fname,"wb")) == NULL )
                    945:                 goto err_exit;
                    946:
                    947:         /*  Make the table of contents entries current  */
                    948:         /*  Note: This will not be necessary once the data is stored in EVECTORs  */
                    949:
                    950:         if ( (index=dict_toc_index(dict,"PARM")) == -1 )
                    951:                 goto err_exit;
                    952:         dict->toc[index].size = dict->sig->nparms * sizeof(DICT_PARM_ENTRY);
                    953:         dict->toc[index].ptr = dict->parm;
                    954:
                    955:         if ( (index=dict_toc_index(dict,"STAR")) == -1 )
                    956:                 goto err_exit;
                    957:         dict->toc[index].size = dict->array_size * sizeof(char);
                    958:         dict->toc[index].ptr = dict->string_array;
                    959:
                    960:         if ( (index=dict_toc_index(dict,"STTB")) == -1 )
                    961:                 goto err_exit;
                    962:         dict->toc[index].size = dict->string_max * sizeof(STRING_ENTRY);
                    963:         dict->toc[index].ptr = dict->string_table;
                    964:
                    965:         if ( (index=dict_toc_index(dict,"HASH")) == -1 )
                    966:                 goto err_exit;
                    967:         dict->toc[index].size = dict->table_size * sizeof(long);
                    968:         dict->toc[index].ptr = dict->chains;
                    969:
                    970:         /*  Reset the TOC offsets and checksums for ALL tables  */
                    971:         /*  (not just type=0 tables)                            */
                    972:         dict_reset_toc_offsets( dict );
                    973:
                    974:         /* Set the dictionary parm structure from the parameter values */
                    975:         if ( dict_set_parm_values(dict) == FALSE )
                    976:                 goto err_exit;
                    977:
                    978:         /* Save the signature */
                    979:         dict->sig->checksum = compute_checksum( sizeof(DICT_SIG) , (char*)(dict->sig) );
                    980:         ret_code = block_write( fo ,
                    981:                                 (char*)dict->sig ,
                    982:                                 sizeof(DICT_SIG) );
                    983:         if ( ret_code == -1 )
                    984:                 goto err_exit;
                    985:
                    986:         /* Save the table of contents */
                    987:         ret_code = block_write( fo,
                    988:                                 (char*)dict->toc,
                    989:                                 dict->sig->toc_size * sizeof(DICT_TOC_ENTRY) );
                    990:         if ( ret_code == -1 )
                    991:                 goto err_exit;
                    992:
                    993:         /* Save the tables */
                    994:         /* For now, only save type=0 tables */
                    995:         for ( index = 0 ; index < dict->sig->toc_size ; index++ ) {
                    996:                 if ( dict->toc[index].type == 0 ) {  /* Ordinary table */
                    997:                         ret_code = dict_save_block( dict , dict->toc[index].id , fo );
                    998:                         if ( ret_code == FALSE )
                    999:                                 goto err_exit;
                   1000:                      } /* endif */
                   1001:         } /* endfor */
                   1002:
                   1003:         /*  Success  */
                   1004:         fclose( fo );
                   1005:         return TRUE;
                   1006:
                   1007:         /*  Failure  */
                   1008: err_exit:
                   1009:         if ( fo != NULL )
                   1010:                 fclose( fo );
                   1011:         return FALSE;
                   1012: }
                   1013:
                   1014:
                   1015: /*************************************************************************
                   1016: *       dict_import: read in an ASCII dictionary.
                   1017: *
                   1018: *       dict_fname - name of dictionary file
                   1019: *       parameters to create a DICTIONARY structure (see dict_create)
                   1020: *
                   1021: *       Returns: pointer to created DICTIONARY structure
                   1022: *                (NULL on failure)
                   1023: *
                   1024: *************************************************************************/
                   1025: DICTIONARY *dict_import( const char *dict_fname ,
                   1026:                          const long initial_string_count ,
                   1027:                          const long initial_hash_entries ,
                   1028:                          const long max_chain_length )
                   1029: {
                   1030:         DICTIONARY     *dict;
                   1031:         char           buffer[BUFLEN], ch;
                   1032:         int            index, c, c0;
                   1033:         long           number;
                   1034:         FILE           *fi = NULL;
                   1035:
                   1036:         /***********
                   1037:         **  Dictionary setup.
                   1038:         ***********/
                   1039:
                   1040:         dict = dict_create( 4,
                   1041:                             initial_string_count ,
                   1042:                             initial_hash_entries ,
                   1043:                             max_chain_length );
                   1044:         if ( dict == NULL )
                   1045:            goto err_exit;
                   1046:
                   1047:         /***********
                   1048:         **  Read the dictionary file
                   1049:         **  Each line should have one word or a string delimited by '|'
                   1050:         ***********/
                   1051:
                   1052:         if ( (fi=fopen(dict_fname,"r")) == NULL )
                   1053:                 goto err_exit;
                   1054:         while( fgets(buffer,BUFLEN,fi) != NULL ) {
                   1055:                 c0 = 0;
                   1056:                    /*  Skip to non-blank  */
                   1057:                 while ( (c0<BUFLEN-2) && (buffer[c0]==' ') ) ++c0;
                   1058:                 if ( buffer[c0] == '|' ) {
                   1059:                         c = ++c0;
                   1060:                         ch = '|';
                   1061:                 } else {
                   1062:                         c = c0;
                   1063:                         ch = ' ';
                   1064:                 } /* endif */
                   1065:                    /*  Scan to blank or matching '|' */
                   1066:                 while ( (c<BUFLEN-1) && (buffer[c]!='\0') &&
                   1067:                         (buffer[c]!='\n') && (buffer[c]!=ch)  )
                   1068:                         ++c;
                   1069:                 buffer[c] = '\0';
                   1070:                    /*  Insert the word  */
                   1071:                 if ( dict_insert(dict,buffer+c0,1,0,NULL,&number) == NULL )
                   1072:                         goto err_exit;
                   1073:         } /* endwhile */
                   1074:
                   1075:         /***********
                   1076:         **  Fill in the dictionary parameter vector.
                   1077:         ***********/
                   1078:
                   1079:         if ( dict_set_parm_values(dict) == FALSE )
                   1080:                 goto err_exit;
                   1081:
                   1082:         /***********
                   1083:         **  Update the table of contents for HASH STTB STAR
                   1084:         ***********/
                   1085:
                   1086:         if ( (index=dict_toc_index(dict,"HASH")) == -1 )
                   1087:                 goto err_exit;
                   1088:         dict->toc[index].size = dict->table_size * sizeof(long);
                   1089:
                   1090:         if ( (index=dict_toc_index(dict,"STTB")) == -1 )
                   1091:                 goto err_exit;
                   1092:         dict->toc[index].size = dict->string_max * sizeof(char);
                   1093:
                   1094:         if ( (index=dict_toc_index(dict,"STAR")) == -1 )
                   1095:                 goto err_exit;
                   1096:         dict->toc[index].size = dict->array_size * sizeof(STRING_ENTRY);
                   1097:
                   1098:            /*  Success. Return a pointer to the new dictionary.  */
                   1099:         fclose(fi);
                   1100:         return( dict );
                   1101:
                   1102:            /*  Failure. Ignominiously erase our tracks and return NULL.  */
                   1103: err_exit:
                   1104:         if ( fi != NULL )
                   1105:            fclose(fi);
                   1106:         dict_destroy( dict );
                   1107:         return NULL;
                   1108: }
                   1109:
                   1110: /*************************************************************************
                   1111: *       dict_export - save an extended dictionary from memory into
                   1112: *                     an ASCII file
                   1113: *
                   1114: *       dict - pointer to dictionary to save
                   1115: *       fname - full qualified file name prefix of dictionary
                   1116: *
                   1117: *       Returns: status code
                   1118: *               TRUE - dictionary was saved sucessfully
                   1119: *               FALSE - error during save
                   1120: *
                   1121: *************************************************************************/
                   1122: BOOLEANC dict_export( DICTIONARY *dict , const char *fname )
                   1123: {
                   1124:         FILE          *fp = NULL;
                   1125:         STRING_ENTRY  *se;
                   1126:         int       i;
                   1127:
                   1128:         /* have to point to something */
                   1129:         if (dict == NULL)
                   1130:                 goto err_exit;
                   1131:
                   1132:         /* check value has to be OK */
                   1133:         if (dict->check_value != DICT_VALIDATE)
                   1134:                 goto err_exit;
                   1135:
                   1136:         /* must have a filename */
                   1137:         if (fname == NULL)
                   1138:                 goto err_exit;
                   1139:
                   1140:         fp = fopen( (char*)fname , "w" );
                   1141:         if ( fp == NULL )
                   1142:                 goto err_exit;
                   1143:
                   1144:         for ( i = 0 ; i < dict->entry_count ; i++ ) {
                   1145:            se = &dict->string_table[i];
                   1146:            fprintf( fp , "|%s|\n" , dict->string_array + se->string_offset );
                   1147:         } /* endfor */
                   1148:         fclose( fp );
                   1149:
                   1150:         /*  Success.  */
                   1151:         fclose(fp);
                   1152:         return TRUE;
                   1153:
                   1154:         /*  Failure.  */
                   1155: err_exit:
                   1156:         if ( fp != NULL )
                   1157:            fclose(fp);
                   1158:         dict_destroy( dict );
                   1159:         return FALSE;
                   1160: }
CVSweb