| 1 | /*************************************************************************/ |
| 2 | /* */ |
| 3 | /* Language Technologies Institute */ |
| 4 | /* Carnegie Mellon University */ |
| 5 | /* Copyright (c) 1999 */ |
| 6 | /* All Rights Reserved. */ |
| 7 | /* */ |
| 8 | /* Permission is hereby granted, free of charge, to use and distribute */ |
| 9 | /* this software and its documentation without restriction, including */ |
| 10 | /* without limitation the rights to use, copy, modify, merge, publish, */ |
| 11 | /* distribute, sublicense, and/or sell copies of this work, and to */ |
| 12 | /* permit persons to whom this work is furnished to do so, subject to */ |
| 13 | /* the following conditions: */ |
| 14 | /* 1. The code must retain the above copyright notice, this list of */ |
| 15 | /* conditions and the following disclaimer. */ |
| 16 | /* 2. Any modifications must be clearly marked as such. */ |
| 17 | /* 3. Original authors' names are not deleted. */ |
| 18 | /* 4. The authors' names are not used to endorse or promote products */ |
| 19 | /* derived from this software without specific prior written */ |
| 20 | /* permission. */ |
| 21 | /* */ |
| 22 | /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */ |
| 23 | /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ |
| 24 | /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ |
| 25 | /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */ |
| 26 | /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ |
| 27 | /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ |
| 28 | /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ |
| 29 | /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ |
| 30 | /* THIS SOFTWARE. */ |
| 31 | /* */ |
| 32 | /*************************************************************************/ |
| 33 | /* Author: Alan W Black (awb@cs.cmu.edu) */ |
| 34 | /* Date: December 1999 */ |
| 35 | /*************************************************************************/ |
| 36 | /* */ |
| 37 | /* Lexicon related functions */ |
| 38 | /* */ |
| 39 | /*************************************************************************/ |
| 40 | #ifndef _CST_LEXICON_H__ |
| 41 | #define _CST_LEXICON_H__ |
| 42 | |
| 43 | #include "cst_item.h" |
| 44 | #include "cst_lts.h" |
| 45 | |
| 46 | typedef struct lexicon_struct { |
| 47 | const char *name; |
| 48 | int num_entries; |
| 49 | /* Entries are centered around bytes with value 255 */ |
| 50 | /* entries and forward (compressed) pronunciations and backwards */ |
| 51 | /* each are terminated (preceeded in pron case) by 0 */ |
| 52 | /* This saves 4 bytes per entry for an index */ |
| 53 | unsigned char *data; /* the entries and phone strings */ |
| 54 | int num_bytes; /* the number of bytes in the data */ |
| 55 | char **phone_table; |
| 56 | |
| 57 | cst_lts_rules *lts_rule_set; |
| 58 | |
| 59 | int (*syl_boundary)(const cst_item *i,const cst_val *p); |
| 60 | |
| 61 | cst_val *(*lts_function)(const struct lexicon_struct *l, const char *word, const char *pos, const cst_features *feats); |
| 62 | |
| 63 | char ***addenda; |
| 64 | /* ngram frequency table used for packed entries */ |
| 65 | const char * const *phone_hufftable; |
| 66 | const char * const *entry_hufftable; |
| 67 | |
| 68 | cst_utterance *(*postlex)(cst_utterance *u); |
| 69 | |
| 70 | cst_val *lex_addenda; /* For pronunciations added at run time */ |
| 71 | |
| 72 | } cst_lexicon; |
| 73 | |
| 74 | cst_lexicon *new_lexicon(); |
| 75 | void delete_lexicon(cst_lexicon *lex); |
| 76 | |
| 77 | cst_val *cst_lex_make_entry(const cst_lexicon *lex, |
| 78 | const cst_string *entry); |
| 79 | cst_val *cst_lex_load_addenda(const cst_lexicon *lex, |
| 80 | const char *lexfile); |
| 81 | |
| 82 | cst_val *lex_lookup(const cst_lexicon *l, const char *word, const char *pos, |
| 83 | const cst_features *feats); |
| 84 | int in_lex(const cst_lexicon *l, const char *word, const char *pos, |
| 85 | const cst_features *feats); |
| 86 | |
| 87 | CST_VAL_USER_TYPE_DCLS(lexicon,cst_lexicon) |
| 88 | |
| 89 | #endif |
| 90 | |