Edinburgh Speech Tools 2.4-release
XML_Parser.cc
1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* */
34 /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35 /* -------------------------------------------------------------------- */
36 /* Recursive descent parsing skeleton. */
37 /* */
38 /*************************************************************************/
39
40#include "EST_error.h"
41#include "XML_Parser.h"
42#include "rxp.h"
43
45{
46}
47
49{
50 known_ids.add_item(id_pattern, directory);
51}
52
54{
55 EST_Litem *p;
56
57 for(p=known_ids.head(); p != 0; p= p->next())
58 {
59 EST_String re(known_ids.key(p).tostring());
60 EST_String &pattern = known_ids.val(p);
61
62 list.append(re);
63 list.append(pattern);
64 }
65}
66
67XML_Parser *XML_Parser_Class::make_parser(InputSource source, Entity ent, void *data)
68{
69 return new XML_Parser(*this, source, ent, data);
70}
71
72XML_Parser *XML_Parser_Class::make_parser(InputSource source, void *data)
73{
74 return new XML_Parser(*this, source, NULL, data);
75}
76
77
79 const EST_String desc,
80 void *data)
81{
82 Entity ent = NewExternalEntity(0,0,strdup8(desc),0,0);
83
84 FILE16 *input16=MakeFILE16FromFILE(input, "r");
85
86 if (input16==NULL)
87 EST_sys_error("Can't open 16 bit '%s'", (const char *)desc);
88
89 SetCloseUnderlying(input16, 0);
90
91 return make_parser(NewInputSource(ent, input16), ent, data);
92}
93
94
96 void *data)
97{
98 return make_parser(input, "<ANONYMOUS>", data);
99}
100
101
103 void *data)
104{
105 if ( filename == "-" )
106 return make_parser(stdin, data);
107
108 FILE *input = fopen(filename, "r");
109
110 if (input==NULL)
111 EST_sys_error("Can't open '%s'", (const char *)filename);
112
113 Entity ent = NewExternalEntity(0,0,strdup8(filename),0,0);
114
115 FILE16 *input16=MakeFILE16FromFILE(input, "r");
116
117 if (input16==NULL)
118 EST_sys_error("Can't open 16 bit '%s'", (const char *)filename);
119
120 SetCloseUnderlying(input16, 1);
121
122 return make_parser(NewInputSource(ent, input16), data);
123}
124
125InputSource XML_Parser_Class::try_and_open(Entity ent)
126
127{
128 EST_String id = ent->publicid?ent->publicid:ent->systemid;
129 EST_Litem *p;
130
131 int starts[EST_Regex_max_subexpressions];
132 int ends[EST_Regex_max_subexpressions];
133 for (p = known_ids.head(); p != 0; p = p->next())
134 {
135 EST_Regex &re = known_ids.key(p);
136 EST_String pattern(known_ids.val(p));
137
138 if (id.matches(re, 0, starts, ends))
139 {
140 EST_String res(pattern);
141 res.subst(id, starts, ends);
142
143 FILE *f;
144 FILE16 *f16;
145 if((f = fopen(res, "r")))
146 {
147 if(!(f16 = MakeFILE16FromFILE(f, "r")))
148 return 0;
149 SetCloseUnderlying(f16, 1);
150
151 return NewInputSource(ent, f16);
152 }
153 }
154 }
155
156 return EntityOpen(ent);
157}
158
159
160InputSource XML_Parser_Class::open_entity(Entity ent, void *arg)
161{
162 XML_Parser *parser = (XML_Parser *)arg;
163
164 return parser->open(ent);
165}
166
167// Default do-nothing callbacks.
168
170 XML_Parser &p,
171 void *data)
172{ (void)c; (void)p; (void)data; }
173
175 XML_Parser &p,
176 void *data)
177{ (void)c; (void)p; (void)data; }
178
180 XML_Parser &p,
181 void *data,
182 const char *name,
183 XML_Attribute_List &attributes)
184{ (void)c; (void)p; (void)data; (void)name; (void)attributes; }
185
187 XML_Parser &p,
188 void *data,
189 const char *name,
190 XML_Attribute_List &attributes)
191{ (void)c; (void)p; (void)data; (void)name; (void)attributes;
192 element_open(c, p, data, name, attributes);
193 element_close(c, p, data, name);
194}
195
197 XML_Parser &p,
198 void *data,
199 const char *name)
200{ (void)c; (void)p; (void)data; (void)name; }
201
203 XML_Parser &p,
204 void *data,
205 const char *chars)
206{ (void)c; (void)p; (void)data; (void)chars; }
207
209 XML_Parser &p,
210 void *data,
211 const char *chars)
212{ (void)c; (void)p; (void)data; (void)chars; }
213
215 XML_Parser &p,
216 void *data,
217 const char *instruction)
218{ (void)c; (void)p; (void)data; (void)instruction; }
219
221 XML_Parser &p,
222 void *data)
223{ (void)c; (void)p; (void)data; }
224
226{
227 return p.get_error();
228}
229
231 XML_Parser &p,
232 void *data,
233 EST_String message)
234{
235 if (p.current_bit != NULL)
236 p.current_bit->error_message = message;
237 error(c, p, data);
238}
239
240 /*************************************************************************/
241 /* */
242 /* An actual parser. */
243 /* */
244 /*************************************************************************/
245
247 InputSource s,
248 Entity ent,
249 void *d)
250{
251 pclass=&pc;
252 source=s;
253 initial_entity=ent;
254 data=d;
255 p = NewParser();
256 ParserSetEntityOpener(p, XML_Parser_Class::open_entity);
257 ParserSetFlag(p, ReturnDefaultedAttributes, 1);
258 ParserSetCallbackArg(p, (void *)this);
259}
260
262{
263 if (initial_entity)
264 FreeEntity(initial_entity);
265 FreeDtd(p->dtd);
266 FreeParser(p);
267}
268
269InputSource XML_Parser::open(Entity ent)
270{
271 return pclass->try_and_open(ent);
272}
273
275{
276
277 if (p_track_context)
279
280 if (ParserPush(p, source) == -1)
281 EST_error("XML Parser error in push");
282
283 pclass->document_open(*pclass, *this, data);
284
285 XBit bit;
286 while (1)
287 {
288 current_bit = bit = ReadXBit(p);
289 if (bit->type == XBIT_eof)
290 break;
291 else if (bit->type == XBIT_start || bit->type == XBIT_empty)
292 {
293 Attribute b;
294 XML_Attribute_List att(10);
295
296 for (b=bit->attributes; b; b=b->next)
297 {
298 att.add_item(EST_String(b->definition->name), EST_String(b->value));
299 }
300
301 if (bit->type == XBIT_start)
302 {
304 *this,
305 data,
306 bit->element_definition->name,
307 att
308 );
309 if (p_track_context)
310 {
311 EST_String nm(bit->element_definition->name);
312 p_context.push(nm);
313 }
314
315 }
316 else
318 *this,
319 data,
320 bit->element_definition->name,
321 att
322 );
323 }
324 else if (bit->type == XBIT_end)
325 {
326 if (p_track_context)
327 p_context.pop();
328
330 *this,
331 data,
332 bit->element_definition->name
333 );
334 }
335 else if (bit->type == XBIT_pcdata)
336 {
338 *this,
339 data,
340 bit->pcdata_chars
341 );
342 }
343 else if (bit->type == XBIT_cdsect)
344 {
346 *this,
347 data,
348 bit->cdsect_chars
349 );
350 }
351 else if (bit->type == XBIT_pi)
352 {
354 *this,
355 data,
356 bit->pi_chars
357 );
358 }
359 else if (bit->type == XBIT_error)
360 {
362 *this,
363 data);
364 break;
365 }
366 else
367 {
368 // ignore it
369 }
370 FreeXBit(bit);
371 current_bit=NULL;
372 }
373
374 if (current_bit!=NULL)
375 {
376 FreeXBit(bit);
377 current_bit=NULL;
378 }
379
380 pclass->document_close(*pclass, *this, data);
381}
382
384{
385 p_track_context=flag;
386}
387
389{
390 p_track_contents=flag;
391}
392
393
394// Stolen from xmlparser.c, will need to be tweaked for internal rxp changes.
396{
397 int linenum, charnum;
398 InputSource s;
399 XBit bit = current_bit;
400
401 if (!bit)
402 return "No Parse In Progress";
403
404 p_error_message =
406 bit->type == XBIT_error ? "Error" : "Warning",
407 ": ",
408 bit->error_message?bit->error_message:"non XML error"
409 );
410
411 for(s=p->source; s; s=s->parent)
412 {
413 if(s->entity->name)
414 {
415 p_error_message += " in entity \"";
416 p_error_message += s->entity->name;
417 p_error_message += "\"";
418 }
419 else
420 p_error_message += " in unnamed entity";
421
422 switch(SourceLineAndChar(s, &linenum, &charnum))
423 {
424 case 1:
425 p_error_message += EST_String::cat(" at line ",
426 EST_String::Number(linenum+1),
427 " char ",
428 EST_String::Number(charnum+1),
429 " of ");
430 break;
431 case 0:
432 p_error_message += EST_String::cat(" defined at line ",
433 EST_String::Number(linenum+1),
434 " char ",
435 EST_String::Number(charnum+1),
436 " of ");
437 break;
438 case -1:
439 p_error_message += " defined in ";
440 break;
441 }
442
443 p_error_message += EntityDescription(s->entity);
444 p_error_message += "\n";
445 }
446
447 return (const char *)p_error_message;
448}
449
451{
452 return p_context.nth(n);
453}
454
EST_String tostring(void) const
Get the expression as a string.
Definition: EST_Regex.h:96
static EST_String Number(int i, int base=10)
Build string from an integer.
Definition: EST_String.cc:1211
int subst(EST_String source, int(&starts)[EST_Regex_max_subexpressions], int(&ends)[EST_Regex_max_subexpressions])
Substitute the result of a match into a string.
Definition: EST_String.cc:467
static EST_String cat(const EST_String s1, const EST_String s2=Empty, const EST_String s3=Empty, const EST_String s4=Empty, const EST_String s5=Empty, const EST_String s6=Empty, const EST_String s7=Empty, const EST_String s8=Empty, const EST_String s9=Empty)
Definition: EST_String.cc:1096
void clear(void)
Empty it out.
Definition: EST_TDeque.cc:140
int add_item(const K &key, const V &value, int no_search=0)
Add an entry to the table.
Definition: EST_THash.cc:167
EST_Litem * head() const
Return First key value pair in list.
Definition: EST_TKVL.h:99
int add_item(const K &rkey, const V &rval, int no_search=0)
add key-val pair to list
Definition: EST_TKVL.cc:248
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
const K & key(EST_Litem *ptr, int m=1) const
find key, reference by ptr
Definition: EST_TKVL.cc:201
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:191
friend class XML_Parser
Definition: XML_Parser.h:231
const char * get_error(XML_Parser &p)
Get the error message for the last error.
Definition: XML_Parser.cc:225
virtual void element(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name, XML_Attribute_List &attributes)
Definition: XML_Parser.cc:186
XML_Parser * make_parser(InputSource source, void *data)
Create a parser for the RXP InputSource.
Definition: XML_Parser.cc:72
void register_id(EST_Regex id_pattern, EST_String directory)
Definition: XML_Parser.cc:48
virtual void error(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:220
InputSource try_and_open(Entity ent)
Definition: XML_Parser.cc:125
virtual void processing(XML_Parser_Class &c, XML_Parser &p, void *data, const char *instruction)
Definition: XML_Parser.cc:214
virtual void document_close(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:174
virtual void pcdata(XML_Parser_Class &c, XML_Parser &p, void *data, const char *chars)
Definition: XML_Parser.cc:202
virtual void document_open(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:169
virtual void element_open(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name, XML_Attribute_List &attributes)
Definition: XML_Parser.cc:179
virtual void cdata(XML_Parser_Class &c, XML_Parser &p, void *data, const char *chars)
Definition: XML_Parser.cc:208
void registered_ids(EST_TList< EST_String > &list)
Definition: XML_Parser.cc:53
virtual void element_close(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name)
Definition: XML_Parser.cc:196
static InputSource open_entity(Entity ent, void *arg)
Definition: XML_Parser.cc:160
Parser p
The RXP parser object.
Definition: XML_Parser.h:273
XBit current_bit
The piece of markup being processed.
Definition: XML_Parser.h:259
const char * get_error()
Get the error message for the last error.
Definition: XML_Parser.cc:395
XML_Parser(XML_Parser_Class &parent, InputSource source, Entity initial_entity, void *data)
Creator used by XML_Parser_Class::make_parser()
Definition: XML_Parser.cc:246
void * data
Arbitrary data which can be used by callbacks.
Definition: XML_Parser.h:270
~XML_Parser()
Destructor, may close input if required.
Definition: XML_Parser.cc:261
InputSource open(Entity ent)
Open. Asks the parser class to do the work.
Definition: XML_Parser.cc:269
void track_contents(bool flag)
Definition: XML_Parser.cc:388
void go()
Run the parser.
Definition: XML_Parser.cc:274
EST_String context(int n)
Definition: XML_Parser.cc:450
EST_TDeque< EST_String > p_context
If context is being tracked, this is a stack of element names.
Definition: XML_Parser.h:276
void track_context(bool flag)
Definition: XML_Parser.cc:383
XML_Parser_Class * pclass
Definition: XML_Parser.h:256
InputSource source
Where we are reading from.
Definition: XML_Parser.h:262
Entity initial_entity
Definition: XML_Parser.h:267