Public Types | |
enum | representation_t { sparse , dense , backoff } |
enum | entry_t { frequencies , log_frequencies } |
Public Member Functions | |
EST_Ngrammar (int o, representation_t r, const EST_StrList &wordlist) | |
EST_Ngrammar (int o, representation_t r, const EST_StrList &wordlist, const EST_StrList &predlist) | |
EST_Ngrammar (int o, representation_t r, EST_Discrete &v) | |
void | default_values () |
void | clear () |
bool | init (int o, representation_t r, const EST_StrList &wordlist) |
bool | init (int o, representation_t r, const EST_StrList &wordlist, const EST_StrList &predlist) |
bool | init (int o, representation_t r, EST_Discrete &v) |
bool | init (int o, representation_t r, EST_Discrete &v, EST_Discrete &pv) |
int | num_states (void) const |
double | samples (void) const |
int | order () const |
int | get_vocab_length () const |
EST_String | get_vocab_word (int i) const |
int | get_vocab_word (const EST_String &s) const |
int | get_pred_vocab_length () const |
EST_String | get_pred_vocab_word (int i) const |
int | get_pred_vocab_word (const EST_String &s) const |
int | closed_vocab () const |
entry_t | entry_type () const |
representation_t | representation () const |
bool | build (const EST_StrList &filenames, const EST_String &prev=SENTENCE_START_MARKER, const EST_String &prev_prev=SENTENCE_END_MARKER, const EST_String &last=SENTENCE_END_MARKER, const EST_String &input_format="", const EST_String &oov_mode="", const int mincount=1, const int maxcount=10) |
void | accumulate (const EST_StrVector &words, const double count=1) |
void | accumulate (const EST_IVector &words, const double count=1) |
void | make_htk_compatible () |
EST_read_status | load (const EST_String &filename) |
EST_read_status | load (const EST_String &filename, const EST_StrList &wordlist) |
EST_write_status | save (const EST_String &filename, const EST_String type="cstr_ascii", const bool trace=false, double floor=0.0) |
int | wordlist_index (const EST_String &word, const bool report=true) const |
const EST_String & | wordlist_index (int i) const |
int | predlist_index (const EST_String &word) const |
const EST_String & | predlist_index (int i) const |
bool | set_entry_type (entry_t new_type) |
bool | set_representation (representation_t new_representation) |
double | probability (const EST_StrVector &words, bool force=false, const bool trace=false) const |
double | frequency (const EST_StrVector &words, bool force=false, const bool trace=false) const |
const EST_String & | predict (const EST_StrVector &words, double *prob, int *state) const |
const EST_String & | predict (const EST_StrVector &words) const |
const EST_String & | predict (const EST_StrVector &words, double *prob) const |
const EST_String & | predict (const EST_IVector &words, double *prob, int *state) const |
const EST_String & | predict (const EST_IVector &words) const |
const EST_String & | predict (const EST_IVector &words, double *prob) const |
int | find_state_id (const EST_StrVector &words) const |
int | find_state_id (const EST_IVector &words) const |
int | find_next_state_id (int state, int word) const |
double | reverse_probability (const EST_StrVector &words, bool force=false) const |
double | reverse_probability (const EST_IVector &words, bool force=false) const |
const EST_DiscreteProbDistribution & | prob_dist (const EST_StrVector &words) const |
const EST_DiscreteProbDistribution & | prob_dist (const EST_IVector &words) const |
const EST_DiscreteProbDistribution & | prob_dist (int state) const |
void | fill_window_start (EST_IVector &window, const EST_String &prev, const EST_String &prev_prev) const |
void | fill_window_start (EST_StrVector &window, const EST_String &prev, const EST_String &prev_prev) const |
bool | ngram_exists (const EST_StrVector &words) const |
bool | ngram_exists (const EST_StrVector &words, const double threshold) const |
const double | get_backoff_weight (const EST_StrVector &words) const |
bool | set_backoff_weight (const EST_StrVector &words, const double w) |
void | print_freqs (ostream &os, double floor=0.0) |
bool | compute_backoff_weights (const int mincount=1, const int maxcount=10) |
bool | merge (EST_Ngrammar &n, float weight) |
Protected Member Functions | |
bool | init_sparse_representation () |
bool | init_dense_representation () |
const double | get_backoff_discount (const int order, const double freq) const |
bool | init_backoff_representation () |
void | prune_backoff_representation (EST_BackoffNgrammarState *start_state=NULL) |
void | backoff_restore_unigram_states () |
int | find_dense_state_index (const EST_IVector &words, int index=0) const |
const EST_StrVector & | make_ngram_from_index (const int i) const |
bool | init_vocab (const EST_StrList &wordlist) |
bool | init_vocab (const EST_StrList &word_list, const EST_StrList &pred_list) |
bool | check_vocab (const EST_StrList &wordlist) |
const EST_String & | lastword (const EST_StrVector &words) const |
const int | lastword (const EST_IVector &words) const |
bool | sparse_to_dense () |
bool | dense_to_sparse () |
void | take_logs () |
void | take_exps () |
void | freqs_to_probs () |
bool | build_sparse (const EST_String &filename, const EST_String &prev, const EST_String &prev_prev, const EST_String &last) |
bool | build_ngram (const EST_String &filename, const EST_String &prev, const EST_String &prev_prev, const EST_String &last, const EST_String &input_format) |
void | iterate (EST_StrVector &words, void(*function)(EST_Ngrammar *n, EST_StrVector &words, void *params), void *params) |
void | const_iterate (EST_StrVector &words, void(*function)(const EST_Ngrammar *const n, EST_StrVector &words, void *params), void *params) const |
bool | p_init (int o, representation_t r) |
bool | oov_preprocess (const EST_String &filename, EST_String &new_filename, const EST_String &what) |
const EST_NgrammarState & | find_state_const (const EST_StrVector &words) const |
EST_NgrammarState & | find_state (const EST_StrVector &words) |
const EST_NgrammarState & | find_state_const (const EST_IVector &words) const |
EST_NgrammarState & | find_state (const EST_IVector &words) |
const EST_DiscreteProbDistribution & | backoff_prob_dist (const EST_StrVector &words) const |
const double | backoff_reverse_probability_sub (const EST_StrVector &words, const EST_BackoffNgrammarState *root) const |
const double | backoff_probability (const EST_StrVector &words, const bool trace=false) const |
const double | backoff_reverse_probability (const EST_StrVector &words) const |
const EST_String & | backoff_most_probable (const EST_StrVector &words, double *prob=NULL) const |
void | backoff_traverse (EST_BackoffNgrammarState *start_state, void(*function)(EST_BackoffNgrammarState *s, void *params), void *params) |
void | backoff_traverse (EST_BackoffNgrammarState *start_state, void(*function)(EST_BackoffNgrammarState *s, void *params), void *params, const int level) |
Protected Attributes | |
int | p_order |
int | p_num_samples |
double | p_number_of_sentences |
EST_String | p_sentence_start_marker |
EST_String | p_sentence_end_marker |
representation_t | p_representation |
entry_t | p_entry_type |
EST_PredictionSuffixTree | sparse_representation |
EST_BackoffNgrammarState * | backoff_representation |
double | backoff_threshold |
double | backoff_unigram_floor_freq |
EST_DVector * | backoff_discount |
int | p_num_states |
EST_NgrammarState * | p_states |
EST_Discrete * | vocab |
EST_Discrete * | pred_vocab |
EST_DiscreteProbDistribution | vocab_pdf |
bool | allow_oov |
Friends | |
class | EST_BackoffNgrammar |
ostream & | operator<< (ostream &s, EST_Ngrammar &n) |
EST_read_status | load_ngram_htk_ascii (const EST_String filename, EST_Ngrammar &n) |
EST_read_status | load_ngram_htk_binary (const EST_String filename, EST_Ngrammar &n) |
EST_read_status | load_ngram_arpa (const EST_String filename, EST_Ngrammar &n, const EST_StrList &vocab) |
EST_read_status | load_ngram_cstr_ascii (const EST_String filename, EST_Ngrammar &n) |
EST_read_status | load_ngram_cstr_bin (const EST_String filename, EST_Ngrammar &n) |
EST_write_status | save_ngram_htk_ascii_sub (const EST_String &word, ostream *ost, EST_Ngrammar &n, double floor) |
EST_write_status | save_ngram_htk_ascii (const EST_String filename, EST_Ngrammar &n, double floor) |
EST_write_status | save_ngram_cstr_ascii (const EST_String filename, EST_Ngrammar &n, const bool trace, double floor) |
EST_write_status | save_ngram_cstr_bin (const EST_String filename, EST_Ngrammar &n, const bool trace, double floor) |
EST_write_status | save_ngram_arpa (const EST_String filename, EST_Ngrammar &n) |
EST_write_status | save_ngram_arpa_sub (ostream *ost, EST_Ngrammar &n, const EST_StrVector &words) |
EST_write_status | save_ngram_wfst (const EST_String filename, EST_Ngrammar &n) |
void | frequency_of_frequencies (EST_DVector &ff, EST_Ngrammar &n, int this_order) |
void | map_frequencies (EST_Ngrammar &n, const EST_DVector &map, const int this_order) |
bool | Good_Turing_smooth (EST_Ngrammar &n, int maxcount, int mincount) |
void | Good_Turing_discount (EST_Ngrammar &ngrammar, const int maxcount, const double default_discount) |
void | fs_build_backoff_ngrams (EST_Ngrammar *backoff_ngrams, EST_Ngrammar &ngram) |
int | fs_backoff_smooth (EST_Ngrammar *backoff_ngrams, EST_Ngrammar &ngram, int smooth_thresh) |
Definition at line 209 of file EST_Ngrammar.h.
enum EST_Ngrammar::representation_t |
Definition at line 214 of file EST_Ngrammar.h.
enum EST_Ngrammar::entry_t |
Definition at line 219 of file EST_Ngrammar.h.
|
inline |
Definition at line 366 of file EST_Ngrammar.h.
|
inline |
Definition at line 368 of file EST_Ngrammar.h.
|
inline |
Definition at line 375 of file EST_Ngrammar.h.
|
inline |
Definition at line 382 of file EST_Ngrammar.h.
EST_Ngrammar::~EST_Ngrammar | ( | ) |
Definition at line 498 of file EST_Ngrammar.cc.
|
protected |
Definition at line 595 of file EST_Ngrammar.cc.
|
protected |
Definition at line 575 of file EST_Ngrammar.cc.
|
protected |
Definition at line 2394 of file EST_Ngrammar.cc.
|
protected |
Definition at line 611 of file EST_Ngrammar.cc.
|
protected |
Definition at line 1578 of file EST_Ngrammar.cc.
|
protected |
Definition at line 1558 of file EST_Ngrammar.cc.
|
protected |
Definition at line 1682 of file EST_Ngrammar.cc.
|
protected |
Definition at line 621 of file EST_Ngrammar.cc.
|
protected |
Definition at line 651 of file EST_Ngrammar.cc.
|
protected |
Definition at line 663 of file EST_Ngrammar.cc.
|
protected |
Definition at line 677 of file EST_Ngrammar.cc.
|
inlineprotected |
Definition at line 287 of file EST_Ngrammar.h.
|
inlineprotected |
Definition at line 289 of file EST_Ngrammar.h.
|
protected |
Definition at line 1668 of file EST_Ngrammar.cc.
|
protected |
Definition at line 1675 of file EST_Ngrammar.cc.
|
protected |
Definition at line 1020 of file EST_Ngrammar.cc.
|
protected |
Definition at line 1164 of file EST_Ngrammar.cc.
|
protected |
Definition at line 2236 of file EST_Ngrammar.cc.
|
protected |
Definition at line 2282 of file EST_Ngrammar.cc.
|
protected |
Definition at line 539 of file EST_Ngrammar.cc.
|
protected |
Definition at line 1052 of file EST_Ngrammar.cc.
|
protected |
Definition at line 1749 of file EST_Ngrammar.cc.
|
protected |
Definition at line 1711 of file EST_Ngrammar.cc.
|
protected |
Definition at line 1811 of file EST_Ngrammar.cc.
|
protected |
Definition at line 1785 of file EST_Ngrammar.cc.
|
protected |
Definition at line 2369 of file EST_Ngrammar.cc.
|
protected |
Definition at line 2531 of file EST_Ngrammar.cc.
|
protected |
Definition at line 2409 of file EST_Ngrammar.cc.
|
protected |
Definition at line 2604 of file EST_Ngrammar.cc.
|
protected |
Definition at line 2628 of file EST_Ngrammar.cc.
|
protected |
Definition at line 2667 of file EST_Ngrammar.cc.
|
protected |
Definition at line 2693 of file EST_Ngrammar.cc.
void EST_Ngrammar::default_values | ( | ) |
Definition at line 483 of file EST_Ngrammar.cc.
void EST_Ngrammar::clear | ( | ) |
Definition at line 503 of file EST_Ngrammar.cc.
bool EST_Ngrammar::init | ( | int | o, |
EST_Ngrammar::representation_t | r, | ||
const EST_StrList & | wordlist | ||
) |
Definition at line 508 of file EST_Ngrammar.cc.
bool EST_Ngrammar::init | ( | int | o, |
EST_Ngrammar::representation_t | r, | ||
const EST_StrList & | wordlist, | ||
const EST_StrList & | predlist | ||
) |
Definition at line 514 of file EST_Ngrammar.cc.
bool EST_Ngrammar::init | ( | int | o, |
EST_Ngrammar::representation_t | r, | ||
EST_Discrete & | v | ||
) |
Definition at line 521 of file EST_Ngrammar.cc.
bool EST_Ngrammar::init | ( | int | o, |
EST_Ngrammar::representation_t | r, | ||
EST_Discrete & | v, | ||
EST_Discrete & | pv | ||
) |
Definition at line 530 of file EST_Ngrammar.cc.
|
inline |
Definition at line 400 of file EST_Ngrammar.h.
|
inline |
Definition at line 401 of file EST_Ngrammar.h.
|
inline |
Definition at line 402 of file EST_Ngrammar.h.
|
inline |
Definition at line 403 of file EST_Ngrammar.h.
EST_String EST_Ngrammar::get_vocab_word | ( | int | i | ) | const |
Definition at line 1989 of file EST_Ngrammar.cc.
int EST_Ngrammar::get_vocab_word | ( | const EST_String & | s | ) | const |
Definition at line 1997 of file EST_Ngrammar.cc.
|
inline |
Definition at line 406 of file EST_Ngrammar.h.
|
inline |
Definition at line 407 of file EST_Ngrammar.h.
|
inline |
Definition at line 408 of file EST_Ngrammar.h.
|
inline |
Definition at line 410 of file EST_Ngrammar.h.
|
inline |
Definition at line 411 of file EST_Ngrammar.h.
|
inline |
Definition at line 412 of file EST_Ngrammar.h.
bool EST_Ngrammar::build | ( | const EST_StrList & | filenames, |
const EST_String & | prev = SENTENCE_START_MARKER , |
||
const EST_String & | prev_prev = SENTENCE_END_MARKER , |
||
const EST_String & | last = SENTENCE_END_MARKER , |
||
const EST_String & | input_format = "" , |
||
const EST_String & | oov_mode = "" , |
||
const int | mincount = 1 , |
||
const int | maxcount = 10 |
||
) |
Definition at line 759 of file EST_Ngrammar.cc.
void EST_Ngrammar::accumulate | ( | const EST_StrVector & | words, |
const double | count = 1 |
||
) |
Definition at line 884 of file EST_Ngrammar.cc.
void EST_Ngrammar::accumulate | ( | const EST_IVector & | words, |
const double | count = 1 |
||
) |
Definition at line 914 of file EST_Ngrammar.cc.
void EST_Ngrammar::make_htk_compatible | ( | ) |
Definition at line 2205 of file EST_Ngrammar.cc.
EST_read_status EST_Ngrammar::load | ( | const EST_String & | filename | ) |
Definition at line 2121 of file EST_Ngrammar.cc.
EST_read_status EST_Ngrammar::load | ( | const EST_String & | filename, |
const EST_StrList & | wordlist | ||
) |
Definition at line 2160 of file EST_Ngrammar.cc.
EST_write_status EST_Ngrammar::save | ( | const EST_String & | filename, |
const EST_String | type = "cstr_ascii" , |
||
const bool | trace = false , |
||
double | floor = 0.0 |
||
) |
Definition at line 2213 of file EST_Ngrammar.cc.
int EST_Ngrammar::wordlist_index | ( | const EST_String & | word, |
const bool | report = true |
||
) | const |
Definition at line 733 of file EST_Ngrammar.cc.
const EST_String & EST_Ngrammar::wordlist_index | ( | int | i | ) | const |
Definition at line 697 of file EST_Ngrammar.cc.
int EST_Ngrammar::predlist_index | ( | const EST_String & | word | ) | const |
Definition at line 702 of file EST_Ngrammar.cc.
const EST_String & EST_Ngrammar::predlist_index | ( | int | i | ) | const |
Definition at line 728 of file EST_Ngrammar.cc.
bool EST_Ngrammar::set_entry_type | ( | EST_Ngrammar::entry_t | new_type | ) |
Definition at line 1657 of file EST_Ngrammar.cc.
bool EST_Ngrammar::set_representation | ( | EST_Ngrammar::representation_t | new_representation | ) |
Definition at line 1836 of file EST_Ngrammar.cc.
double EST_Ngrammar::probability | ( | const EST_StrVector & | words, |
bool | force = false , |
||
const bool | trace = false |
||
) | const |
Definition at line 1853 of file EST_Ngrammar.cc.
double EST_Ngrammar::frequency | ( | const EST_StrVector & | words, |
bool | force = false , |
||
const bool | trace = false |
||
) | const |
Definition at line 1876 of file EST_Ngrammar.cc.
const EST_String & EST_Ngrammar::predict | ( | const EST_StrVector & | words, |
double * | prob, | ||
int * | state | ||
) | const |
Definition at line 1899 of file EST_Ngrammar.cc.
|
inline |
Definition at line 463 of file EST_Ngrammar.h.
|
inline |
Definition at line 465 of file EST_Ngrammar.h.
const EST_String & EST_Ngrammar::predict | ( | const EST_IVector & | words, |
double * | prob, | ||
int * | state | ||
) | const |
Definition at line 1927 of file EST_Ngrammar.cc.
|
inline |
Definition at line 469 of file EST_Ngrammar.h.
|
inline |
Definition at line 471 of file EST_Ngrammar.h.
int EST_Ngrammar::find_state_id | ( | const EST_StrVector & | words | ) | const |
Definition at line 1955 of file EST_Ngrammar.cc.
int EST_Ngrammar::find_state_id | ( | const EST_IVector & | words | ) | const |
Definition at line 1972 of file EST_Ngrammar.cc.
int EST_Ngrammar::find_next_state_id | ( | int | state, |
int | word | ||
) | const |
Definition at line 1699 of file EST_Ngrammar.cc.
double EST_Ngrammar::reverse_probability | ( | const EST_StrVector & | words, |
bool | force = false |
||
) | const |
Definition at line 2003 of file EST_Ngrammar.cc.
double EST_Ngrammar::reverse_probability | ( | const EST_IVector & | words, |
bool | force = false |
||
) | const |
Definition at line 2032 of file EST_Ngrammar.cc.
const EST_DiscreteProbDistribution & EST_Ngrammar::prob_dist | ( | const EST_StrVector & | words | ) | const |
Definition at line 2070 of file EST_Ngrammar.cc.
const EST_DiscreteProbDistribution & EST_Ngrammar::prob_dist | ( | const EST_IVector & | words | ) | const |
Definition at line 2095 of file EST_Ngrammar.cc.
const EST_DiscreteProbDistribution & EST_Ngrammar::prob_dist | ( | int | state | ) | const |
Definition at line 2064 of file EST_Ngrammar.cc.
void EST_Ngrammar::fill_window_start | ( | EST_IVector & | window, |
const EST_String & | prev, | ||
const EST_String & | prev_prev | ||
) | const |
Definition at line 1030 of file EST_Ngrammar.cc.
void EST_Ngrammar::fill_window_start | ( | EST_StrVector & | window, |
const EST_String & | prev, | ||
const EST_String & | prev_prev | ||
) | const |
Definition at line 1041 of file EST_Ngrammar.cc.
bool EST_Ngrammar::ngram_exists | ( | const EST_StrVector & | words | ) | const |
Definition at line 956 of file EST_Ngrammar.cc.
bool EST_Ngrammar::ngram_exists | ( | const EST_StrVector & | words, |
const double | threshold | ||
) | const |
Definition at line 984 of file EST_Ngrammar.cc.
const double EST_Ngrammar::get_backoff_weight | ( | const EST_StrVector & | words | ) | const |
Definition at line 997 of file EST_Ngrammar.cc.
bool EST_Ngrammar::set_backoff_weight | ( | const EST_StrVector & | words, |
const double | w | ||
) |
Definition at line 1008 of file EST_Ngrammar.cc.
void EST_Ngrammar::print_freqs | ( | ostream & | os, |
double | floor = 0.0 |
||
) |
Definition at line 2328 of file EST_Ngrammar.cc.
bool EST_Ngrammar::compute_backoff_weights | ( | const int | mincount = 1 , |
const int | maxcount = 10 |
||
) |
Definition at line 1489 of file EST_Ngrammar.cc.
bool EST_Ngrammar::merge | ( | EST_Ngrammar & | n, |
float | weight | ||
) |
Definition at line 2739 of file EST_Ngrammar.cc.
|
friend |
Definition at line 583 of file EST_Ngrammar.h.
|
friend |
Definition at line 1629 of file EST_Ngrammar.cc.
|
friend |
Definition at line 54 of file ngrammar_io.cc.
|
friend |
Definition at line 62 of file ngrammar_io.cc.
|
friend |
Definition at line 70 of file ngrammar_io.cc.
|
friend |
Definition at line 218 of file ngrammar_io.cc.
|
friend |
Definition at line 280 of file ngrammar_io.cc.
|
friend |
Definition at line 407 of file ngrammar_io.cc.
|
friend |
Definition at line 548 of file ngrammar_io.cc.
|
friend |
Definition at line 729 of file ngrammar_io.cc.
|
friend |
Definition at line 826 of file ngrammar_io.cc.
|
friend |
Definition at line 653 of file ngrammar_io.cc.
|
friend |
Definition at line 788 of file ngrammar_io.cc.
|
friend |
Definition at line 205 of file ngrammar_aux.cc.
|
friend |
Definition at line 304 of file ngrammar_aux.cc.
|
friend |
Definition at line 408 of file ngrammar_aux.cc.
|
friend |
Definition at line 539 of file ngrammar_aux.cc.
|
friend |
Definition at line 73 of file freqsmooth.cc.
|
friend |
Definition at line 109 of file freqsmooth.cc.
|
protected |
Definition at line 225 of file EST_Ngrammar.h.
|
protected |
Definition at line 226 of file EST_Ngrammar.h.
|
protected |
Definition at line 228 of file EST_Ngrammar.h.
|
protected |
Definition at line 231 of file EST_Ngrammar.h.
|
protected |
Definition at line 232 of file EST_Ngrammar.h.
|
protected |
Definition at line 235 of file EST_Ngrammar.h.
|
protected |
Definition at line 236 of file EST_Ngrammar.h.
|
protected |
Definition at line 240 of file EST_Ngrammar.h.
|
protected |
Definition at line 250 of file EST_Ngrammar.h.
|
protected |
Definition at line 252 of file EST_Ngrammar.h.
|
protected |
Definition at line 255 of file EST_Ngrammar.h.
|
protected |
Definition at line 262 of file EST_Ngrammar.h.
|
protected |
Definition at line 268 of file EST_Ngrammar.h.
|
protected |
Definition at line 269 of file EST_Ngrammar.h.
|
protected |
Definition at line 276 of file EST_Ngrammar.h.
|
protected |
Definition at line 277 of file EST_Ngrammar.h.
|
protected |
Definition at line 285 of file EST_Ngrammar.h.
|
protected |
Definition at line 292 of file EST_Ngrammar.h.