Annotation of early-roguelike/urogue/dict.c, Revision 1.1.1.1
1.1 rubenllo 1: /*
2: dict.c
3:
4: UltraRogue: The Ultimate Adventure in the Dungeons of Doom
5: Copyright (C) 1995 Herb Chong
6: All rights reserved.
7:
8: See the file LICENSE.TXT for full copyright and licensing information.
9: */
10:
11: /******************
12: ** Change history:
13: **
14: ** (AK:04/03/95) - In dict_create, initialize hook for extensions to dictionary structure.
15: ** (AK:04/04/95) - In dict_insert, only set any_ptr when a new string entry is created.
16: ** (AK:04/17/95) - Added dict_union and dict_merge.
17: ** (AK:04/18/95) - In dict_create, added code to create signature,
18: ** table of contents, and parameter array.
19: ** (AK:04/18/95) - Revised dict_load, dict_save
20: ** (AK:04/18/95) - Added dict_import
21: **
22: ******************/
23:
24: static char sccsid[] = "%W% %G%";
25:
26: #include <stdlib.h>
27: #include <string.h>
28: #if !defined(OS2) && !defined(_WIN32)
29: #include <unistd.h>
30: #endif
31:
32: #include "dict.h"
33: #include "dictutil.h"
34:
35: #define BUFLEN 300
36:
37: /*************************************************************************
38: * Generic dictionary and hash table functions for word and
39: * string storage.
40: *
41: * Implements generic dictionary management by using a
42: * hash table with direct chaining. All terms are stored in a
43: * separate, unsorted, string table. When inserting into the
44: * hash table exceeds the current size, allocate a new hash table
45: * and rehash the old contents into the new table. When the string
46: * table fills, append more empty entries onto the old table and
47: * continue adding. Inserting a string that already exists increments
48: * the count for the string. Deleting a string only decrements the
49: * count to no less than 0. This will allow a 0 occurence string to be
50: * retrieved. To remove the 0 occurence string table entries, you must
51: * rebuild the dictionary. Merging from one dictionary to a new one will
52: * do this because it only copies nonzero occurences.
53: *
54: * Assumptions:
55: * unsigned long >= 32 bits
56: * int >= 16 bits
57: * char == 8 bits
58: * number of entries < 2^28
59: *************************************************************************/
60:
61: /*************************************************************************
62: * hash_string: calculate hash value for string, modified
63: * version of hashpjw() from the new Dragon book.
64: *
65: * s - string to calculate hash for
66: *
67: * Returns: hash value
68: *************************************************************************/
69: static unsigned long hash_string(const char *s)
70: {
71: unsigned long h = 0;
72: unsigned long g = 0;
73:
74: while (*s) {
75: h <<= 4;
76: h += *s++;
77: if ((g = h & 0xf0000000) != 0)
78: h ^= g >> 24;
79: }
80: return h ^ g;
81: }
82:
83: /*************************************************************************
84: * dict_create: create a dictionary and initialize its structures
85: *
86: * toc_size - number of entries in table of contents ( >= 4 )
87: * initial_string_count - number of strings to hold as initial allocation
88: * initial_hash_chains - size of hash table, must be power of 4
89: * max_chain_length - max number of elements in hash chain before signalling growth
90: *
91: * Returns: pointer to dictionary
92: * NULL - error creating dictionary
93: * non-NULL - sucessful creation
94: *************************************************************************/
95:
96: DICTIONARY *dict_create(
97: const long toc_size,
98: const long initial_string_count,
99: const long initial_hash_chains,
100: const long max_chain_length )
101: {
102: DICTIONARY *dtemp = NULL;
103: STRING_ENTRY *stemp = NULL;
104: long *ltemp = NULL;
105: char *sltemp;
106: long i, j, tocsize;
107:
108: /* check for a power of 4 in number of initial hash entries */
109: switch(initial_hash_chains) {
110: case 0x00000004:
111: case 0x00000010:
112: case 0x00000040:
113: case 0x00000100:
114: case 0x00000400:
115: case 0x00001000:
116: case 0x00004000:
117: case 0x00010000:
118: case 0x00040000:
119: case 0x00100000:
120: case 0x00400000:
121: case 0x01000000:
122: case 0x04000000:
123: break;
124: default:
125: return NULL;
126: }
127:
128: /* Allocate the dictionary structure */
129: if ((dtemp = (DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL)
130: goto err_exit;
131:
132: /* string count has to be within range */
133: j = initial_string_count;
134: if (j > 0x04000000)
135: return NULL;
136:
137: /* force a reasonable value */
138: if (j < 100)
139: j = 100;
140:
141: /* Allocate the string table and string array */
142: if ((stemp = (STRING_ENTRY *)malloc(sizeof(STRING_ENTRY) * j)) == NULL)
143: goto err_exit;
144: if ((sltemp = (char *)malloc(8 * j)) == NULL)
145: goto err_exit;
146:
147: /* Allocate the hash table */
148: if ((ltemp = (long *)malloc(sizeof(long) * initial_hash_chains)) == NULL)
149: goto err_exit;
150:
151: /* Allocate the parameter array */
152: if ( (dtemp->parm = (DICT_PARM_ENTRY *)malloc(14*sizeof(DICT_PARM_ENTRY))) == NULL)
153: goto err_exit;
154:
155: /* Allocate the signature structure */
156: if ( (dtemp->sig=(DICT_SIG*)malloc(sizeof(DICT_SIG))) == NULL )
157: goto err_exit;
158:
159: /* Allocate the Table of Contents */
160: tocsize = toc_size;
161: if ( tocsize < 4 )
162: tocsize = 4;
163: dtemp->sig->toc_size = tocsize;
164: if ( (dtemp->toc=(DICT_TOC_ENTRY*)malloc(dtemp->sig->toc_size*sizeof(DICT_TOC_ENTRY))) == NULL )
165: goto err_exit;
166:
167: dtemp->check_value = DICT_VALIDATE; /* validation value */
168: dtemp->flags = DICT_NONE; /* no flags set */
169: dtemp->entry_count = 0; /* nothing in either table */
170:
171: dtemp->string_table = stemp; /* connect string table */
172: dtemp->string_max = j; /* size of string table */
173: dtemp->scan_string_index = DICT_ENTRY_NONE; /* not pointing to anything in scan */
174: dtemp->string_growth_count = 0; /* haven't grown any */
175:
176: dtemp->string_array = sltemp; /* character array for strings */
177: dtemp->array_size = j * 8; /* how many bytes available */
178: dtemp->array_used = 0; /* nothing used yet */
179: dtemp->array_growth_count = 0; /* haven't grown any */
180:
181: /* force maximum hash chain length to a reasonable value */
182: i = max_chain_length;
183: if (i < 1 || i > 200)
184: i = 200;
185: dtemp->chains = ltemp; /* connect hash table */
186: dtemp->longest_chain_length = 0; /* nothing in table yet */
187: dtemp->allowable_chain_length = i; /* chain length limit */
188: dtemp->table_size = initial_hash_chains; /* size of hash table */
189: dtemp->hash_mask = initial_hash_chains - 1; /* mask for mod() function */
190: dtemp->hash_growth_count = 0; /* haven't grown any */
191:
192: /* string table is empty */
193: for (i = 0 ; i < j ; i++) {
194: dtemp->string_table[i].string_offset = DICT_ENTRY_NONE;
195: dtemp->string_table[i].count = 0;
196: dtemp->string_table[i].next = DICT_ENTRY_NONE;
197: dtemp->string_table[i].flags = 0;
198: dtemp->string_table[i].hash_value = 0;
199: }
200:
201: /* no entries chained */
202: for (i = 0; i < initial_hash_chains; i++)
203: dtemp->chains[i] = DICT_ENTRY_NONE;
204:
205: /* Initialize hook to extended dictionary structure */
206: dtemp->ext = NULL;
207:
208: /* Set up the parameter array */
209: if ( dict_set_parm_ids(dtemp) == FALSE )
210: goto err_exit;
211: if ( dict_set_parm_values(dtemp) == FALSE )
212: goto err_exit;
213:
214: /* Set up the Table of Contents */
215: strcpy( dtemp->toc[0].id , "PARM" );
216: dtemp->toc[0].size = dtemp->sig->nparms * sizeof(DICT_PARM_ENTRY);
217: dtemp->toc[0].ptr = dtemp->parm;
218: dtemp->toc[0].type = 0;
219:
220: strcpy( dtemp->toc[1].id , "HASH" );
221: dtemp->toc[1].size = dtemp->table_size * sizeof(long);
222: dtemp->toc[1].ptr = dtemp->chains;
223: dtemp->toc[1].type = 0;
224:
225: strcpy( dtemp->toc[2].id , "STTB" );
226: dtemp->toc[2].size = dtemp->string_max * sizeof(char);
227: dtemp->toc[2].ptr = dtemp->string_table;
228: dtemp->toc[2].type = 0;
229:
230: strcpy( dtemp->toc[3].id , "STAR" );
231: dtemp->toc[3].size = dtemp->array_size * sizeof(STRING_ENTRY);
232: dtemp->toc[3].ptr = dtemp->string_array;
233: dtemp->toc[3].type = 0;
234:
235: /* Set up the signature */
236: dtemp->sig->check_value = DICT_VALIDATE;
237: dtemp->sig->toc_size = tocsize;
238: dtemp->sig->nparms = 14;
239:
240: return dtemp;
241:
242: /* error exit - saves duplicate code */
243: err_exit:
244:
245: dict_destroy( dtemp );
246: return NULL;
247: }
248:
249: /*************************************************************************
250: * dict_destroy: discard a dictionary and its contents
251: * multiple calls to destroy the same dictionary is safe
252: *
253: * dict - pointer to a dictionary to discard
254: *
255: * Returns: status code
256: * TRUE - dictionary destroyed
257: * FALSE - error when destroying dictionary
258: * Note: Does free the dictionary structure itself.
259: *************************************************************************/
260: BOOLEANC dict_destroy(DICTIONARY *dict)
261: {
262: int i;
263:
264: /* have to point to something */
265: if (dict == NULL)
266: return FALSE;
267:
268: /* check value has to be OK */
269: if (dict->check_value != DICT_VALIDATE)
270: return FALSE;
271:
272: if ( (dict->sig==NULL) || (dict->toc==NULL) || (dict->parm==NULL) )
273: return FALSE;
274:
275: /* Free the type=0 tables */
276: for ( i = 0 ; i < dict->sig->toc_size ; i++ ) {
277: if ( dict->toc[i].ptr != NULL && dict->toc[i].type == 0 )
278: free( dict->toc[i].ptr );
279: } /* endfor */
280:
281: /* Free the Table of Contents and signature */
282: free( dict->toc );
283: free( dict->sig );
284:
285: /* Free the dictionary structure itself */
286: free( dict );
287:
288: return TRUE;
289: }
290:
291: /*************************************************************************
292: * dict_insert: add entries into the dictionary, growing as necessary
293: *
294: * dict - pointer to dictionary to insert into
295: * s - string to insert
296: * count - count of occurences of string
297: * flags - flag bits associated with the string
298: * number - pointer to long to return word number
299: *
300: * Returns: pointer to new entry in dictionary
301: * NULL - error during insert
302: * non-NULL - pointer to inserted or updated entry
303: *
304: * Notes: if the entry already exists, increment the count.
305: * If NULL is returned, the dictionary state can no longer
306: * be trusted. Terminating with an error code would be
307: * safest.
308: *************************************************************************/
309: STRING_ENTRY *dict_insert(DICTIONARY *dict, char *s,
310: const long occurences, const unsigned long flags,
311: void *any_ptr, long *number)
312: {
313: unsigned long hash_value;
314: long hash_index;
315: long string_index = -1;
316: long he2;
317: int chain_len;
318:
319: /* have to point to something */
320: if (dict == NULL)
321: return FALSE;
322:
323: /* check value has to be OK */
324: if (dict->check_value != DICT_VALIDATE)
325: return FALSE;
326:
327: /* must have a string */
328: if (s == NULL) {
329: *number = -1;
330: return FALSE;
331: }
332:
333: /* must be non-NULL */
334: if (s[0] == '\0') {
335: *number = -1;
336: return FALSE;
337: }
338:
339: /* figure out which chain it should go into */
340: hash_value = hash_string(s);
341: hash_index = hash_value & dict->hash_mask;
342:
343: /* look for the entry in the chain */
344: for (he2 = dict->chains[hash_index], chain_len = 0; he2 != DICT_ENTRY_NONE;
345: he2 = dict->string_table[he2].next, chain_len++) {
346: char *sa = &dict->string_array[dict->string_table[he2].string_offset];
347:
348: /* compare hash_value and then string, in that order, for speed */
349: if (dict->string_table[he2].hash_value == hash_value && !strcmp(s, sa)) {
350: string_index = he2;
351: break;
352: }
353: }
354:
355: /* string wasn't found */
356: if (string_index < 0) {
357: /* make sure there is room in string entry table */
358: if (dict->entry_count + 1 > dict->string_max) {
359: /* make new table 20% bigger */
360: dict->string_max = (dict->string_max * 6) / 5;
361: dict->string_table = (STRING_ENTRY *)realloc(dict->string_table,
362: sizeof(STRING_ENTRY) * dict->string_max);
363: if (dict->string_table == NULL)
364: return NULL;
365: dict->string_growth_count++;
366: }
367: string_index = dict->entry_count;
368: dict->entry_count++;
369:
370: /* make sure there is room in the string array */
371: if (dict->array_used + (long)strlen(s) + 1 > dict->array_size) {
372: dict->array_size = (dict->array_size * 6) / 5;
373: dict->string_array = (char *) realloc(dict->string_array,
374: dict->array_size);
375: if (dict->string_array == NULL)
376: return NULL;
377: }
378:
379: /* fill in starting values */
380: strcpy(&dict->string_array[dict->array_used], s);
381: dict->string_table[string_index].string_offset = dict->array_used;
382: dict->array_used += (long) strlen(s) + 1;
383: dict->string_table[string_index].count = 0 ;
384: dict->string_table[string_index].flags = DICT_NONE;
385: dict->string_table[string_index].any_ptr = any_ptr; /* (AK:04/04/95) */
386: dict->string_table[string_index].hash_value = hash_value;
387:
388: /* hook entry at beginning of chain */
389: dict->string_table[string_index].next = dict->chains[hash_index];
390: dict->chains[hash_index] = string_index;
391:
392: /* record chain lengths */
393: chain_len++;
394: if (chain_len > dict->longest_chain_length)
395: dict->longest_chain_length = chain_len;
396:
397: /* if a chain is too long */
398: if (chain_len > dict->allowable_chain_length) {
399: long new_size = dict->table_size * 4;
400: long new_mask = new_size - 1;
401: long *hetemp;
402: long i;
403: int longest_chain;
404:
405: if ((hetemp = (long *)malloc(sizeof(long) * new_size)) == NULL)
406: return NULL;
407:
408: /* hash table chains are empty */
409: for (i = 0; i < new_size; i++)
410: hetemp[i] = DICT_ENTRY_NONE;
411:
412: /* reset all chains */
413: for (i = 0; i < dict->entry_count; i++)
414: dict->string_table[i].next = DICT_ENTRY_NONE;
415:
416: /* recreate hash table entries by reinserting all strings */
417: for (i = 0; i < dict->entry_count; i++) {
418: long he = dict->string_table[i].hash_value & new_mask;
419:
420: dict->string_table[i].next = hetemp[he];
421: hetemp[he] = i;
422: }
423:
424: /* find longest chain length */
425: for (i = 0, longest_chain = 0; i < new_size; i++) {
426: int len;
427: long cur;
428:
429: for (cur = hetemp[i], len = 0; cur != DICT_ENTRY_NONE;
430: cur = dict->string_table[cur].next)
431: len++;
432: ;
433: if (len > longest_chain)
434: longest_chain = len;
435: }
436:
437: /* delete old table and attach new one */
438: free(dict->chains);
439: dict->chains = hetemp;
440: dict->longest_chain_length = longest_chain;
441: dict->table_size = new_size;
442: dict->hash_mask = new_mask;
443:
444: /* keep track of growth */
445: dict->hash_growth_count++;
446: }
447: }
448:
449: dict->string_table[string_index].count += occurences;
450: dict->string_table[string_index].flags |= flags;
451: *number = string_index;
452:
453: return &dict->string_table[string_index];
454: }
455:
456: /*************************************************************************
457: * dict_delete: deletes an entry from the dictionary
458: * (Actually, only decrements the entry's count)
459: *
460: * dict - pointer to dictionary to delete
461: * s - string to find and delete
462: * count - count to decrement entry by
463: *
464: * Returns: status code
465: * TRUE - entry has been deleted
466: * FALSE - entry wasn't found or error occured
467: *************************************************************************/
468: BOOLEANC dict_delete(const DICTIONARY *dict, const char *s, const long count)
469: {
470: STRING_ENTRY *se;
471: long n;
472:
473: /* find the string */
474: if ((se = dict_search(dict, s, &n)) == NULL)
475: return FALSE;
476:
477: /* decrement count and make sure it stays valid */
478: se->count -= count;
479: if (se->count < 0)
480: se->count = 0;
481: return TRUE;
482: }
483:
484: /*************************************************************************
485: * dict_search: look for entries in the dictionary
486: *
487: * dict - pointer to dictionary to search
488: * s - string to search for
489: * number - pointer to long to return string number
490: *
491: * Returns: pointer to string entry
492: * NULL - entry not found
493: * non-NULL - pointer to string entry
494: *************************************************************************/
495: STRING_ENTRY *dict_search(const DICTIONARY *dict, const char *s,
496: long *number)
497: {
498: unsigned long hash_value;
499: long hash_index;
500: long string_index = -1;
501: long he;
502:
503: /* have to point to something */
504: if (dict == NULL)
505: return NULL;
506:
507: /* check value has to be OK */
508: if (dict->check_value != DICT_VALIDATE)
509: return NULL;
510:
511: /* must have a string */
512: if (s == NULL) {
513: *number = -1;
514: return NULL;
515: }
516:
517: /* must be non-NULL */
518: if (s[0] == '\0') {
519: *number = -1;
520: return FALSE;
521: }
522:
523: /* figure out which chain it should be in */
524: hash_value = hash_string(s);
525: hash_index = hash_value & dict->hash_mask;
526:
527: /* look for the entry in the chain */
528: for (he = dict->chains[hash_index]; he != DICT_ENTRY_NONE;
529: he = dict->string_table[he].next) {
530: char *sa = (char*)(&dict->string_array[dict->string_table[he].string_offset]);
531:
532: /* compare hash_value and then string, in that order, for speed */
533: if (dict->string_table[he].hash_value == hash_value && !strcmp(s, sa)) {
534: string_index = he;
535: break;
536: }
537: }
538: if (string_index < 0) {
539: *number = -1;
540: return NULL;
541: }
542:
543: *number = string_index;
544: return dict->string_table+string_index;
545: }
546:
547: /*************************************************************************
548: * dict_union: merges contents of 2 dictionaries into
549: * a third
550: *
551: * dict1 - pointer to dictionary 1
552: * dict2 - pointer to dictionary 2
553: *
554: * Returns: dictionary pointer
555: * NULL - failed to merge
556: * non-NULL - pointer to new dictionary
557: *
558: * Notes: entries of the same string have their counts
559: * added and their flags ORed together.
560: *************************************************************************/
561: DICTIONARY *dict_union( const DICTIONARY *dict1 , const DICTIONARY *dict2 )
562: {
563: DICTIONARY *dict = NULL;
564: STRING_ENTRY *se, *se2;
565: long initial_string_count, initial_hash_chains, max_chain_length;
566: long i, string_index;
567:
568: /***********
569: ** Initialize the new dictionary.
570: ***********/
571:
572: if ( dict1==NULL || dict2==NULL )
573: goto err_exit;
574: if ((dict=(DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL)
575: goto err_exit;
576:
577: initial_string_count = dict1->string_max;
578: initial_hash_chains = dict1->table_size;
579: max_chain_length = dict1->allowable_chain_length;
580: dict = dict_create( 4,
581: initial_string_count,
582: initial_hash_chains,
583: max_chain_length );
584:
585: /***********
586: ** Copy the entries from dict1 into the new dictionary.
587: ***********/
588:
589: for ( i = 0 ; i < dict1->entry_count ; i++ ) {
590: se = (STRING_ENTRY*)&dict1->string_table[i];
591: if ( se->count > 0 ) {
592: se2 = dict_insert(
593: dict,
594: dict1->string_array+se->string_offset,
595: se->count,
596: se->flags,
597: NULL,
598: &string_index );
599: if ( se2 == NULL )
600: goto err_exit;
601: } /* endif */
602: } /* endfor */
603:
604: /* Merge the entries from dict2 into the new dictionary. */
605: if ( dict_merge(dict,dict2,FALSE) == FALSE )
606: goto err_exit;
607:
608: /* Success. Return a pointer to the new dictionary. */
609: return( dict );
610:
611: /* Failure. Ignominiously erase our tracks and return NULL. */
612: err_exit:
613: dict_destroy( dict );
614: return NULL;
615: }
616:
617: /*************************************************************************
618: * dict_merge: merges the contents of a dictionary into
619: * another one, updating the contents of the destination
620: *
621: * dst - dictionary to update
622: * src - dictionary to add
623: * move - boolean flag
624: * TRUE - move to dest
625: * FALSE - copy to dest
626: *
627: * Returns: status code
628: * TRUE - merge completed
629: * FALSE - error on merge
630: *
631: * Notes: entries of the same string have their counts
632: * added. At the end of a move, src is empty. If there
633: * is an error during a move, the contents of both
634: * dictionaries cannot be trusted.
635: *************************************************************************/
636: BOOLEANC dict_merge(const DICTIONARY *dst, const DICTIONARY *src, const BOOLEANC move)
637: {
638: STRING_ENTRY *se, *se2;
639: DICTIONARY *dict1, *dict2;
640: long i, string_index, index;
641:
642: dict1 = (DICTIONARY*)src;
643: dict2 = (DICTIONARY*)dst;
644:
645: /***********
646: ** Copy the dictionary entries into the new dictionary.
647: ***********/
648:
649: for ( i = 0 ; i < dict1->entry_count ; i++ ) {
650: se = (STRING_ENTRY*)&dict1->string_table[i];
651: if ( se->count > 0 ) {
652: se2 = dict_insert(
653: dict2,
654: dict1->string_array+se->string_offset,
655: se->count,
656: se->flags,
657: NULL,
658: &string_index );
659: if ( se2 == NULL )
660: goto err_exit;
661: } /* endif */
662: } /* endfor */
663:
664: /***********
665: ** Set up the dictionary parameter vector.
666: ***********/
667:
668: if ( dict_set_parm_values(dict2) == FALSE )
669: return( FALSE );
670:
671: /***********
672: ** Update the table of contents.
673: ** PARM HASH STTB STAR
674: ***********/
675:
676: if ( (index=dict_toc_index(dict2,"HASH")) == -1)
677: goto err_exit;
678: dict2->toc[index].size = dict2->table_size * sizeof(long);
679: dict2->toc[index].ptr = dict2->chains;
680:
681: if ( (index=dict_toc_index(dict2,"STTB")) == -1)
682: goto err_exit;
683: dict2->toc[index].size = dict2->string_max * sizeof(char);
684: dict2->toc[index].ptr = dict2->string_table;
685:
686: if ( (index=dict_toc_index(dict2,"STAR")) == -1)
687: goto err_exit;
688: dict2->toc[index].size = dict2->array_size * sizeof(STRING_ENTRY);
689: dict2->toc[index].ptr = dict2->string_array;
690:
691: /***********
692: ** Update the signature
693: ***********/
694:
695: dict2->sig->checksum =
696: compute_checksum( 4*sizeof(DICT_TOC_ENTRY) , (char*)(dict2->toc) );
697:
698: /***********
699: ** If this is a move, destroy the source dictionary
700: ***********/
701:
702: if ( move == TRUE )
703: if ( dict_destroy(dict1) == FALSE )
704: goto err_exit;
705:
706: /***********
707: ** Success
708: ***********/
709:
710: return( TRUE );
711:
712: /***********
713: ** Failure
714: ***********/
715:
716: err_exit:
717: dict_destroy( dict2 );
718: return FALSE;
719: }
720:
721: /*************************************************************************
722: * dict_string_by_number: return string pointer for
723: * a given string number
724: *
725: * number - string number
726: *
727: * Returns: pointer to string entry
728: * NULL - entry not found
729: * non-NULL - pointer to string entry
730: *************************************************************************/
731: STRING_ENTRY *dict_string_by_number(const DICTIONARY *dict, const long number)
732: {
733: if (dict == NULL)
734: return NULL;
735:
736: /* check value has to be OK */
737: if (dict->check_value != DICT_VALIDATE)
738: return NULL;
739:
740: /* string number has to be within range */
741: if (number < 0 || number >= dict->entry_count)
742: return NULL;
743:
744: return dict->string_table+number;
745: }
746:
747: /*************************************************************************
748: * dict_scan_begin: begin a scan of the dictionary
749: *
750: * dict - pointer to dictionary to scan
751: *
752: * Returns: status code
753: * TRUE - scan initialized
754: * FALSE - error
755: *
756: * Notes: if a scan is already in progress, it is
757: * abandoned and the scan is reset to the
758: * beginning.
759: *************************************************************************/
760: BOOLEANC dict_scan_begin(DICTIONARY *dict)
761: {
762: /* have to point to something */
763: if (dict == NULL)
764: return FALSE;
765:
766: /* check value has to be OK */
767: if (dict->check_value != DICT_VALIDATE)
768: return FALSE;
769:
770: /* point to first entry in string table */
771: dict->scan_string_index = 0;
772: return TRUE;
773: }
774:
775: /*************************************************************************
776: * dict_scan_next: get the next entry in a scan
777: *
778: * dict - pointer to dictionary to continue scanning
779: *
780: * Returns: pointer to string entry
781: * NULL - no more entries
782: * non-NULL - next string entry
783: *************************************************************************/
784: STRING_ENTRY *dict_scan_next(DICTIONARY *dict)
785: {
786: /* have to point to something */
787: if (dict == NULL)
788: return NULL;
789:
790: /* check value has to be OK */
791: if (dict->check_value != DICT_VALIDATE)
792: return NULL;
793:
794: /* scan index has to be within range */
795: if (dict->scan_string_index < 0
796: || dict->scan_string_index > dict->entry_count)
797: return NULL;
798:
799: /* for first non-empty table entry */
800: while (dict->scan_string_index < dict->entry_count
801: && dict->string_table[dict->scan_string_index].count == 0)
802: dict->scan_string_index++;
803:
804: /* past end of table? */
805: if (dict->scan_string_index >= dict->entry_count)
806: return NULL;
807:
808: return &dict->string_table[dict->scan_string_index++];
809: }
810:
811:
812: /*************************************************************************
813: * dict_load - load a compiled dictionary into memory
814: * creates a new dictionary
815: *
816: * fname - fully qualified file name
817: *
818: * Returns: pointer to created dictionary structure
819: * (NULL on failure)
820: *************************************************************************/
821: DICTIONARY *dict_load(const char *fname)
822: {
823: DICTIONARY *dict = NULL;
824: int code, index;
825: FILE *fi;
826: int ntoc;
827:
828: if ( (fi=fopen((char*)fname,"rb")) == NULL ) {
829: signal_error( "dict_load: could not open file" , (char*)fname , 0 );
830: goto err_exit;
831: } /* endif */
832:
833: if ((dict = (DICTIONARY *)malloc(sizeof(DICTIONARY))) == NULL) {
834: /* signal_error( "dict_load: alloc failed" , "" , 0 ); */
835: goto err_exit;
836: } /* endif */
837:
838: /* Read the dictionary signature record */
839: if ((dict->sig = (DICT_SIG *)malloc(sizeof(DICT_SIG))) == NULL) {
840: goto err_exit;
841: } /* endif */
842: code = block_read( fi , (char*)(dict->sig) , sizeof(DICT_SIG) , 0 );
843: if ( code == -1 ) {
844: signal_error( "dict_load: could not read signature" , (char*)fname , 0 );
845: goto err_exit;
846: } /* endif */
847:
848: if ( dict->sig->check_value != DICT_VALIDATE ) {
849: signal_error( "dict_load: could not validate file" , (char*)fname , 0 );
850: goto err_exit;
851: } /* endif */
852: dict->check_value = dict->sig->check_value;
853:
854: /* Read the dictionary Table of Contents */
855: ntoc = dict->sig->toc_size;
856: dict->toc = (DICT_TOC_ENTRY *) malloc( ntoc * sizeof(DICT_TOC_ENTRY) );
857: if ( dict->toc == NULL ) {
858: signal_error( "dict_load: alloc of TOC failed" , "" , 0 );
859: goto err_exit;
860: } /* endif */
861: code = block_read( fi ,
862: (char*)(dict->toc) ,
863: ntoc * sizeof(DICT_TOC_ENTRY) ,
864: sizeof(DICT_SIG) );
865: if ( code == -1 ) {
866: signal_error( "dict_load: could not read Table of Contents" , (char*)fname , 0 );
867: goto err_exit;
868: } /* endif */
869:
870: /* Read the dictionary parameters */
871: dict->parm = (DICT_PARM_ENTRY *)
872: dict_load_block( dict , "PARM" , fi , NULL );
873: if ( dict->parm == NULL ) {
874: signal_error( "dict_load: could not load parameter table" , "" , 0 );
875: goto err_exit;
876: } /* endif */
877: index = dict_toc_index( dict , "PARM" );
878: dict->sig->nparms = dict->toc[index].size / sizeof(DICT_PARM_ENTRY);
879:
880: /* Set the parameter values in the dictionary structure */
881: if ( dict_set_parm_variables(dict) == FALSE )
882: goto err_exit;
883:
884: /* Load the string array */
885: dict->string_array = (char *)
886: dict_load_block( dict , "STAR" , fi , NULL );
887: if ( dict->string_array == NULL ) {
888: signal_error( "dict_load: could not load string array" , (char*)fname , 0 );
889: goto err_exit;
890: } /* endif */
891:
892: /* Load the string table */
893: dict->string_table = (STRING_ENTRY *)
894: dict_load_block( dict , "STTB" , fi , NULL );
895: if ( dict->string_table == NULL ) {
896: signal_error( "dict_load: could not load string table" , (char*)fname , 0 );
897: goto err_exit;
898: } /* endif */
899:
900: /* Load the hash table */
901: dict->chains = (long *)
902: dict_load_block( dict , "HASH" , fi , NULL );
903: if ( dict->chains == NULL ) {
904: signal_error( "dict_load: could not load hash table" , (char*)fname , 0 );
905: goto err_exit;
906: } /* endif */
907:
908: /* Initialize the hook for dictionary extensions */
909: dict->ext = NULL;
910:
911: /* Success */
912: fclose( fi );
913: return( dict );
914:
915: /* Failure */
916: err_exit:
917: if ( fi != NULL )
918: fclose( fi );
919: dict_destroy( dict );
920: return NULL;
921: }
922:
923:
924: /*************************************************************************
925: * dict_save - save a dictionary from memory into a file
926: *
927: * dict - pointer to dictionary to save
928: * fname - full qualified file name prefix of dictionary
929: *
930: * Returns: status code
931: * TRUE - dictionary was saved sucessfully
932: * FALSE - error during save
933: *************************************************************************/
934: BOOLEANC dict_save( DICTIONARY *dict, const char *fname )
935: {
936: int index, ret_code;
937: FILE *fo = NULL;
938:
939: /* Have to be pointing at a valid dictionary */
940: if ( dict == NULL || dict->sig->check_value != DICT_VALIDATE )
941: goto err_exit;
942:
943: /* Open the file for output */
944: if ( (fo=fopen((char*)fname,"wb")) == NULL )
945: goto err_exit;
946:
947: /* Make the table of contents entries current */
948: /* Note: This will not be necessary once the data is stored in EVECTORs */
949:
950: if ( (index=dict_toc_index(dict,"PARM")) == -1 )
951: goto err_exit;
952: dict->toc[index].size = dict->sig->nparms * sizeof(DICT_PARM_ENTRY);
953: dict->toc[index].ptr = dict->parm;
954:
955: if ( (index=dict_toc_index(dict,"STAR")) == -1 )
956: goto err_exit;
957: dict->toc[index].size = dict->array_size * sizeof(char);
958: dict->toc[index].ptr = dict->string_array;
959:
960: if ( (index=dict_toc_index(dict,"STTB")) == -1 )
961: goto err_exit;
962: dict->toc[index].size = dict->string_max * sizeof(STRING_ENTRY);
963: dict->toc[index].ptr = dict->string_table;
964:
965: if ( (index=dict_toc_index(dict,"HASH")) == -1 )
966: goto err_exit;
967: dict->toc[index].size = dict->table_size * sizeof(long);
968: dict->toc[index].ptr = dict->chains;
969:
970: /* Reset the TOC offsets and checksums for ALL tables */
971: /* (not just type=0 tables) */
972: dict_reset_toc_offsets( dict );
973:
974: /* Set the dictionary parm structure from the parameter values */
975: if ( dict_set_parm_values(dict) == FALSE )
976: goto err_exit;
977:
978: /* Save the signature */
979: dict->sig->checksum = compute_checksum( sizeof(DICT_SIG) , (char*)(dict->sig) );
980: ret_code = block_write( fo ,
981: (char*)dict->sig ,
982: sizeof(DICT_SIG) );
983: if ( ret_code == -1 )
984: goto err_exit;
985:
986: /* Save the table of contents */
987: ret_code = block_write( fo,
988: (char*)dict->toc,
989: dict->sig->toc_size * sizeof(DICT_TOC_ENTRY) );
990: if ( ret_code == -1 )
991: goto err_exit;
992:
993: /* Save the tables */
994: /* For now, only save type=0 tables */
995: for ( index = 0 ; index < dict->sig->toc_size ; index++ ) {
996: if ( dict->toc[index].type == 0 ) { /* Ordinary table */
997: ret_code = dict_save_block( dict , dict->toc[index].id , fo );
998: if ( ret_code == FALSE )
999: goto err_exit;
1000: } /* endif */
1001: } /* endfor */
1002:
1003: /* Success */
1004: fclose( fo );
1005: return TRUE;
1006:
1007: /* Failure */
1008: err_exit:
1009: if ( fo != NULL )
1010: fclose( fo );
1011: return FALSE;
1012: }
1013:
1014:
1015: /*************************************************************************
1016: * dict_import: read in an ASCII dictionary.
1017: *
1018: * dict_fname - name of dictionary file
1019: * parameters to create a DICTIONARY structure (see dict_create)
1020: *
1021: * Returns: pointer to created DICTIONARY structure
1022: * (NULL on failure)
1023: *
1024: *************************************************************************/
1025: DICTIONARY *dict_import( const char *dict_fname ,
1026: const long initial_string_count ,
1027: const long initial_hash_entries ,
1028: const long max_chain_length )
1029: {
1030: DICTIONARY *dict;
1031: char buffer[BUFLEN], ch;
1032: int index, c, c0;
1033: long number;
1034: FILE *fi = NULL;
1035:
1036: /***********
1037: ** Dictionary setup.
1038: ***********/
1039:
1040: dict = dict_create( 4,
1041: initial_string_count ,
1042: initial_hash_entries ,
1043: max_chain_length );
1044: if ( dict == NULL )
1045: goto err_exit;
1046:
1047: /***********
1048: ** Read the dictionary file
1049: ** Each line should have one word or a string delimited by '|'
1050: ***********/
1051:
1052: if ( (fi=fopen(dict_fname,"r")) == NULL )
1053: goto err_exit;
1054: while( fgets(buffer,BUFLEN,fi) != NULL ) {
1055: c0 = 0;
1056: /* Skip to non-blank */
1057: while ( (c0<BUFLEN-2) && (buffer[c0]==' ') ) ++c0;
1058: if ( buffer[c0] == '|' ) {
1059: c = ++c0;
1060: ch = '|';
1061: } else {
1062: c = c0;
1063: ch = ' ';
1064: } /* endif */
1065: /* Scan to blank or matching '|' */
1066: while ( (c<BUFLEN-1) && (buffer[c]!='\0') &&
1067: (buffer[c]!='\n') && (buffer[c]!=ch) )
1068: ++c;
1069: buffer[c] = '\0';
1070: /* Insert the word */
1071: if ( dict_insert(dict,buffer+c0,1,0,NULL,&number) == NULL )
1072: goto err_exit;
1073: } /* endwhile */
1074:
1075: /***********
1076: ** Fill in the dictionary parameter vector.
1077: ***********/
1078:
1079: if ( dict_set_parm_values(dict) == FALSE )
1080: goto err_exit;
1081:
1082: /***********
1083: ** Update the table of contents for HASH STTB STAR
1084: ***********/
1085:
1086: if ( (index=dict_toc_index(dict,"HASH")) == -1 )
1087: goto err_exit;
1088: dict->toc[index].size = dict->table_size * sizeof(long);
1089:
1090: if ( (index=dict_toc_index(dict,"STTB")) == -1 )
1091: goto err_exit;
1092: dict->toc[index].size = dict->string_max * sizeof(char);
1093:
1094: if ( (index=dict_toc_index(dict,"STAR")) == -1 )
1095: goto err_exit;
1096: dict->toc[index].size = dict->array_size * sizeof(STRING_ENTRY);
1097:
1098: /* Success. Return a pointer to the new dictionary. */
1099: fclose(fi);
1100: return( dict );
1101:
1102: /* Failure. Ignominiously erase our tracks and return NULL. */
1103: err_exit:
1104: if ( fi != NULL )
1105: fclose(fi);
1106: dict_destroy( dict );
1107: return NULL;
1108: }
1109:
1110: /*************************************************************************
1111: * dict_export - save an extended dictionary from memory into
1112: * an ASCII file
1113: *
1114: * dict - pointer to dictionary to save
1115: * fname - full qualified file name prefix of dictionary
1116: *
1117: * Returns: status code
1118: * TRUE - dictionary was saved sucessfully
1119: * FALSE - error during save
1120: *
1121: *************************************************************************/
1122: BOOLEANC dict_export( DICTIONARY *dict , const char *fname )
1123: {
1124: FILE *fp = NULL;
1125: STRING_ENTRY *se;
1126: int i;
1127:
1128: /* have to point to something */
1129: if (dict == NULL)
1130: goto err_exit;
1131:
1132: /* check value has to be OK */
1133: if (dict->check_value != DICT_VALIDATE)
1134: goto err_exit;
1135:
1136: /* must have a filename */
1137: if (fname == NULL)
1138: goto err_exit;
1139:
1140: fp = fopen( (char*)fname , "w" );
1141: if ( fp == NULL )
1142: goto err_exit;
1143:
1144: for ( i = 0 ; i < dict->entry_count ; i++ ) {
1145: se = &dict->string_table[i];
1146: fprintf( fp , "|%s|\n" , dict->string_array + se->string_offset );
1147: } /* endfor */
1148: fclose( fp );
1149:
1150: /* Success. */
1151: fclose(fp);
1152: return TRUE;
1153:
1154: /* Failure. */
1155: err_exit:
1156: if ( fp != NULL )
1157: fclose(fp);
1158: dict_destroy( dict );
1159: return FALSE;
1160: }
CVSweb