Edinburgh Speech Tools 2.4-release
rgcompile.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Alan W Black */
34/* Date : December 1997 */
35/*-----------------------------------------------------------------------*/
36/* */
37/* A Regular grammar compiler, its pretty free about the grammar */
38/* Actually it will take full context free grammars and convert them */
39/* up to a specified rewrite depth */
40/* */
41/* Based loosely on "Finite State Machines from Features Grammars" by */
42/* Black, International Workshop of Parsing Technologies, CMU 89 */
43/* */
44/*=======================================================================*/
45#include <iostream>
46#include "EST_cutils.h"
47#include "EST_WFST.h"
48
49static LISP find_rewrites(LISP rules, LISP terms, LISP nonterms);
50static LISP rg_find_nt_ts(LISP rules,LISP sets);
51static LISP prod_join(LISP n, LISP p);
52static int production_index(LISP state,
54 int proposed);
55
56void rgcompile(LISP rg, EST_WFST &all_wfst)
57{
58 // Build a transducer from given regular grammar.
59 LISP nt_ts,nonterms,terms,rewrites;
60 LISP sets=siod_nth(2,rg);
61 LISP rules=siod_nth(3,rg);
62
63 nt_ts = rg_find_nt_ts(rules,sets);
64 nonterms = car(nt_ts);
65 terms = cdr(nt_ts);
66
67 rewrites = find_rewrites(rules,terms,nonterms);
68
69 if (rewrites == NIL)
70 return; // left recursive or no rules
71
72 all_wfst.build_from_rg(terms,terms,
73 car(car(rules)), // distinguished symbol
74 rewrites,
75 sets,terms,25);
76
77}
78
79static LISP find_rewrites(LISP rules, LISP terms, LISP nonterms)
80{
81 // Find the full rewrites of each nonterminal until a terminal
82 // appears as the first item
83 LISP nt,r;
84 LISP rewrites = NIL;
85 (void)terms;
86
87 // got lazy and haven't done this recursively yet ...
88 for (nt=nonterms; nt != NIL; nt=cdr(nt))
89 {
90 LISP nn = NIL;
91 for (r=rules; r != NIL; r=cdr(r))
92 if (car(car(r)) == car(nt)) // depend on symbols being eq
93 nn = cons(cdr(cdr(car(r))),nn);
94 rewrites = cons(cons(car(nt),nn),rewrites);
95 }
96
97 return rewrites;
98}
99
100static LISP rg_find_nt_ts(LISP rules,LISP sets)
101{
102 // Find the alphabets used in the rules
103 LISP terms=NIL,nonterms=NIL,r,s,set,t;
104
105 for (r=rules; r != NIL; r=cdr(r))
106 if (!siod_member_str(get_c_string(car(car(r))),nonterms))
107 nonterms = cons(car(car(r)),nonterms);
108
109 for (r=rules; r != NIL; r=cdr(r))
110 for (s=cdr(cdr(car(r))); s != NIL; s=cdr(s))
111 if ((!siod_member_str(get_c_string(car(s)),terms)) &&
112 (!siod_member_str(get_c_string(car(s)),nonterms)) &&
113 (!siod_assoc_str(get_c_string(car(s)),sets)))
114 terms = cons(car(s),terms);
115 else if ((set=siod_assoc_str(get_c_string(car(s)),sets)))
116 {
117 for (t=cdr(set); t != 0; t=cdr(t))
118 if (!siod_member_str(get_c_string(car(t)),terms))
119 terms = cons(car(t),terms);
120 }
121
122 return cons(nonterms,terms);
123}
124
125
126void EST_WFST::build_from_rg(LISP inalpha, LISP outalpha,
127 LISP distinguished, LISP rewrites,
128 LISP sets, LISP terms,
129 int max_depth)
130{
131 // This is sort of similar to determinising in that the "state"
132 // is represented by a list of numbers, i.e. the remainder of
133 // of production
134 LISP current, start_state, remainder, set, new_prod;
135 int ns, current_state;
136 const char *current_sym;
137 LISP agenda = NIL;
138 EST_WFST_MultiStateIndex index(100);
139 (void)max_depth;
140 int c=0;
141
142 clear();
143 init(inalpha,outalpha);
144 int i_epsilon = in_epsilon();
145 int o_epsilon = out_epsilon();
146
147 // Create a starting state and add it to this WFST
148 p_start_state = add_state(wfst_nonfinal);
149 start_state = cons(flocons((double)p_start_state),
150 cons(distinguished,NIL));
151
152 production_index(start_state,index,p_start_state);
153
154 agenda = cons(start_state,agenda); // initialize agenda
155
156 while (agenda != NIL)
157 {
158 current = car(agenda);
159 agenda = cdr(agenda);
160 current_state = get_c_int(car(current));
161 current_sym = get_c_string(car(cdr(current)));
162 remainder = cdr(cdr(current));
163 if ((c % 1000)== 0)
164 cout << summary() << " Agenda " << siod_llength(agenda) << endl;
165 c++;
166
167 if ((set=siod_assoc_str(current_sym,sets)))
168 {
169 ns = production_index(remainder,index,p_num_states);
170 // add transitions for each member of set
171 for (LISP s=cdr(set); s != NIL; s=cdr(s))
172 p_states[current_state]
173 ->add_transition(0.0, // no weights
174 ns,
175 p_in_symbols.name(get_c_string(car(s))),
176 p_out_symbols.name(get_c_string(car(s))));
177 if (remainder == NIL)
178 add_state(wfst_final);
179 else if (ns == p_num_states) // its a new remainder
180 {
181 add_state(wfst_nonfinal);
182 agenda = cons(cons(flocons(ns),remainder),agenda);
183 }
184 }
185 else if (siod_member_str(current_sym,terms))
186 {
187 ns = production_index(remainder,index,p_num_states);
188 // Add transition for this terminal symbol
189 p_states[current_state]
190 ->add_transition(0.0, // no weights
191 ns,
192 p_in_symbols.name(current_sym),
193 p_out_symbols.name(current_sym));
194 if (remainder == NIL)
195 add_state(wfst_final);
196 else if (ns == p_num_states) // its a new remainder
197 {
198 add_state(wfst_nonfinal);
199 agenda = cons(cons(flocons(ns),remainder),agenda);
200 }
201 }
202 else // its a non-terminal so simply rewrite
203 {
204 for (LISP p=cdr(siod_assoc_str(current_sym,rewrites));
205 p != NIL;
206 p=cdr(p))
207 {
208 new_prod = prod_join(car(p),remainder);
209 ns = production_index(new_prod,index,p_num_states);
210 p_states[current_state]
211 ->add_transition(0.0, // no weights
212 ns,i_epsilon,o_epsilon);
213 if (ns == p_num_states) // its a new remainder
214 {
215 if (new_prod == NIL)
216 add_state(wfst_final);
217 else
218 {
219 add_state(wfst_nonfinal);
220 agenda = cons(cons(flocons(ns),new_prod),agenda);
221 }
222 }
223 }
224 }
225 }
226}
227
228static int production_index(LISP state,
230 int proposed)
231{
232 // Returns proposed if ms is not already in index, otherwise
233 // returns the value that was proposed when it was first new.
234
235 // I'll have to make this more efficient in future.
236 EST_String istring("");
237 LISP p;
238
239 for (p=state; p != NIL; p = cdr(p))
240 istring += EST_String(get_c_string(car(p))) + " ";
241
242 int ns,found;
243
244 for (p=state; p != NIL; p = cdr(p))
245 istring += EST_String(get_c_string(car(p))) + " ";
246
247 ns = index.val(istring,found);
248 if (found)
249 return ns;
250 else
251 {
252 index.add_item(istring,proposed);
253 return proposed;
254 }
255
256}
257
258static LISP prod_join(LISP n, LISP p)
259{
260 if (n == NIL)
261 return p;
262 else
263 return cons(car(n),prod_join(cdr(n),p));
264}
const EST_String & name(const int n) const
The name given the index.
int add_state(enum wfst_state_type state_type)
Add a new state, returns new name.
Definition: EST_WFST.cc:652
void init(int init_num_states=10)
Clear with (estimation of number of states required)
Definition: EST_WFST.cc:145
void clear()
clear removing existing states if any
Definition: EST_WFST.cc:115
int out_epsilon() const
Internal index for output epsilon.
Definition: EST_WFST.h:220
int in_epsilon() const
Internal index for input epsilon.
Definition: EST_WFST.h:218