| 1 | /*************************************************************************/ | 
| 2 | /*                                                                       */ | 
| 3 | /*                  Language Technologies Institute                      */ | 
| 4 | /*                     Carnegie Mellon University                        */ | 
| 5 | /*                        Copyright (c) 1999                             */ | 
| 6 | /*                        All Rights Reserved.                           */ | 
| 7 | /*                                                                       */ | 
| 8 | /*  Permission is hereby granted, free of charge, to use and distribute  */ | 
| 9 | /*  this software and its documentation without restriction, including   */ | 
| 10 | /*  without limitation the rights to use, copy, modify, merge, publish,  */ | 
| 11 | /*  distribute, sublicense, and/or sell copies of this work, and to      */ | 
| 12 | /*  permit persons to whom this work is furnished to do so, subject to   */ | 
| 13 | /*  the following conditions:                                            */ | 
| 14 | /*   1. The code must retain the above copyright notice, this list of    */ | 
| 15 | /*      conditions and the following disclaimer.                         */ | 
| 16 | /*   2. Any modifications must be clearly marked as such.                */ | 
| 17 | /*   3. Original authors' names are not deleted.                         */ | 
| 18 | /*   4. The authors' names are not used to endorse or promote products   */ | 
| 19 | /*      derived from this software without specific prior written        */ | 
| 20 | /*      permission.                                                      */ | 
| 21 | /*                                                                       */ | 
| 22 | /*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */ | 
| 23 | /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */ | 
| 24 | /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */ | 
| 25 | /*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */ | 
| 26 | /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */ | 
| 27 | /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */ | 
| 28 | /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */ | 
| 29 | /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */ | 
| 30 | /*  THIS SOFTWARE.                                                       */ | 
| 31 | /*                                                                       */ | 
| 32 | /*************************************************************************/ | 
| 33 | /*             Author:  Alan W Black (awb@cs.cmu.edu)                    */ | 
| 34 | /*               Date:  December 1999                                    */ | 
| 35 | /*************************************************************************/ | 
| 36 | /*                                                                       */ | 
| 37 | /*  Lexicon related functions                                            */ | 
| 38 | /*                                                                       */ | 
| 39 | /*************************************************************************/ | 
| 40 | #ifndef _CST_LEXICON_H__ | 
| 41 | #define _CST_LEXICON_H__ | 
| 42 |  | 
| 43 | #include "cst_item.h" | 
| 44 | #include "cst_lts.h" | 
| 45 |  | 
| 46 | typedef struct lexicon_struct { | 
| 47 |     const char *name; | 
| 48 |     int num_entries; | 
| 49 |     /* Entries are centered around bytes with value 255 */ | 
| 50 |     /* entries and forward (compressed) pronunciations and backwards */ | 
| 51 |     /* each are terminated (preceeded in pron case) by 0 */ | 
| 52 |     /* This saves 4 bytes per entry for an index */ | 
| 53 |     unsigned char *data; /* the entries and phone strings */ | 
| 54 |     int num_bytes;       /* the number of bytes in the data */ | 
| 55 |     char **phone_table; | 
| 56 |  | 
| 57 |     cst_lts_rules *lts_rule_set; | 
| 58 |  | 
| 59 |     int (*syl_boundary)(const cst_item *i,const cst_val *p); | 
| 60 |      | 
| 61 |     cst_val *(*lts_function)(const struct lexicon_struct *l, const char *word, const char *pos, const cst_features *feats); | 
| 62 |  | 
| 63 |     char ***addenda; | 
| 64 |     /* ngram frequency table used for packed entries */ | 
| 65 |     const char * const *phone_hufftable; | 
| 66 |     const char * const *entry_hufftable; | 
| 67 |  | 
| 68 |     cst_utterance *(*postlex)(cst_utterance *u); | 
| 69 |  | 
| 70 |     cst_val *lex_addenda;  /* For pronunciations added at run time */ | 
| 71 |  | 
| 72 | } cst_lexicon; | 
| 73 |  | 
| 74 | cst_lexicon *new_lexicon(); | 
| 75 | void delete_lexicon(cst_lexicon *lex); | 
| 76 |  | 
| 77 | cst_val *cst_lex_make_entry(const cst_lexicon *lex,  | 
| 78 |                             const cst_string *entry); | 
| 79 | cst_val *cst_lex_load_addenda(const cst_lexicon *lex,  | 
| 80 |                               const char *lexfile); | 
| 81 |  | 
| 82 | cst_val *lex_lookup(const cst_lexicon *l, const char *word, const char *pos, | 
| 83 |                     const cst_features *feats); | 
| 84 | int in_lex(const cst_lexicon *l, const char *word, const char *pos, | 
| 85 |            const cst_features *feats); | 
| 86 |  | 
| 87 | CST_VAL_USER_TYPE_DCLS(lexicon,cst_lexicon) | 
| 88 |  | 
| 89 | #endif | 
| 90 |  |