Edinburgh Speech Tools 2.4-release
XML_Parser.h
1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33
34
35#ifndef __XML_PARSER_H__
36#define __XML_PARSER_H__
37
38#if !defined(CHAR_SIZE)
39# define CHAR_SIZE 8
40#endif
41
42#if (CHAR_SIZE!=8)
43# error EST can only handle 8 bit characters
44#endif
45
46#include "EST_String.h"
47#include "EST_Regex.h"
48#include "EST_TKVL.h"
49#include "EST_THash.h"
50#include "EST_TDeque.h"
51#include "EST_TList.h"
52#include "rxp/rxp.h"
53
54// We only use types and functions from rxp.h, so we can throw away
55// some of the macros which cause problems.
56
57#undef get
58
59
60/**@name XML Parser
61 * Recursive descent parsing skeleton with hooks for processing.
62 * A C++ wrapper around the rxp parser.
63 *
64 * @author Richard Caley <rjc@cstr.ed.ac.uk>
65 * @version $Id: XML_Parser.h,v 1.3 2004/05/04 00:00:17 awb Exp $
66 */
67//@{
68
69class XML_Parser;
71
72/// Nice name for list of attribute-value pairs.
74
75/** A Class of parsers, All parsers share callbacks and a
76 * list of known public IDs.
77 */
79
80private:
81
82 /** Map PUBLIC and SYSTEM IDs to places on the local system.
83 */
85
86protected:
87 /** Do any necessary remappings and open a stream which reads the given
88 * entity.
89 */
90 static InputSource open_entity(Entity ent, void *arg);
91
92
93 /**@name The callbacks.
94 *
95 * These methods can be overridden in a subclass to create a class
96 * of parsers to do whatever you want.
97 */
98 //@{
99
100 /** Called when starting a document.
101 */
102 virtual void document_open(XML_Parser_Class &c,
103 XML_Parser &p,
104 void *data);
105
106 /** Called at the end of a document.
107 */
108 virtual void document_close(XML_Parser_Class &c,
109 XML_Parser &p,
110 void *data);
111
112 /** Called when an element starts.
113 */
114 virtual void element_open(XML_Parser_Class &c,
115 XML_Parser &p,
116 void *data,
117 const char *name,
118 XML_Attribute_List &attributes);
119
120 /** Called when an element ends.
121 */
122 virtual void element_close(XML_Parser_Class &c,
123 XML_Parser &p,
124 void *data,
125 const char *name);
126
127 /** Called for empty elements.
128 *
129 * Defaults to element_open(...) followed by element_closed(...).
130 */
131 virtual void element(XML_Parser_Class &c,
132 XML_Parser &p,
133 void *data,
134 const char *name,
135 XML_Attribute_List &attributes);
136
137 /** Called for parsed character data sequences.
138 */
139 virtual void pcdata(XML_Parser_Class &c,
140 XML_Parser &p,
141 void *data,
142 const char *chars);
143 /** Called for unparsed character data sequences.
144 */
145 virtual void cdata(XML_Parser_Class &c,
146 XML_Parser &p,
147 void *data,
148 const char *chars);
149
150 /** Called for processing directives.
151 */
152 virtual void processing(XML_Parser_Class &c,
153 XML_Parser &p,
154 void *data,
155 const char *instruction);
156
157 /** Called when there is an error in parsing.
158 */
159 virtual void error(XML_Parser_Class &c,
160 XML_Parser &p,
161 void *data);
162 //@}
163
164 /** This can be called from any of the callbacks to present "message"
165 * as an error through the error callback, thus getting filename and
166 * line information into the message.
167 */
168 void error(XML_Parser_Class &c,
169 XML_Parser &p,
170 void *data,
171 EST_String message);
172
173 /// Get the error message for the last error.
174 const char *get_error(XML_Parser &p);
175
176public:
177
178 /** Create an object representing the class of parsers.
179 */
181
182 virtual ~XML_Parser_Class() { }
183
184 /** Add a mapping from entity ID (SYSTEM or PUBLIC) to filename.
185 *
186 * The string can contain escapes like \2 which are replaced by
187 * the text matching the Nth bracketed part of the regular expression.
188 */
189 void register_id(EST_Regex id_pattern, EST_String directory);
190
191 /** Fill in the list with the known entity ID mappings.
192 */
193
195
196 /**@name Creating a parser
197 *
198 * Each of these methods creates a one-shot parser which will run over the
199 * indicated text.
200 */
201 //@{
202
203 /// Create a parser for the RXP InputSource.
204 XML_Parser *make_parser(InputSource source, void *data);
205
206 /// Create a parser for the RXP InputSource.
207 XML_Parser *make_parser(InputSource source, Entity initial_entity, void *data);
208
209 /// Create a parser for a stdio input stream.
210 XML_Parser *make_parser(FILE *input, void *data);
211
212 /** Create a parser for a stdio input stream, giving a description for
213 * use in errors.
214 */
215 XML_Parser *make_parser(FILE *input, const EST_String desc, void *data);
216
217 // Create a parser for the named file.
218 XML_Parser *make_parser(const EST_String filename, void *data);
219
220 //@}
221
222 /** Utility which tries to open an entity called ID at places
223 * specified in the mapping of this parser class.
224 */
225
226 InputSource try_and_open(Entity ent);
227
228 /** XML_Parser defines the behaviour of an individual one-shot
229 * parser.
230 */
231 friend class XML_Parser;
232};
233
234/** An actual parser. Each such instance parses just one stream which is
235 * given when the parser is created.
236 *
237 * The behaviour of the parser is given by the class to which it belongs.
238 */
239
241
242private:
243 /// Last error message from the parser.
244 EST_String p_error_message;
245
246 /// Set true when context is being remembered.
247 bool p_track_context;
248
249 /// Set true when contents is being remembered. (not yet implemented)
250 bool p_track_contents;
251
252protected:
253 /** The class to which this parser belongs. Defines the behaviour of
254 * the parser.
255 */
257
258 /// The piece of markup being processed.
260
261 /// Where we are reading from.
262 InputSource source;
263
264 /** The entity we started from. May need to be freed at the end of the
265 * parse.
266 */
268
269 /// Arbitrary data which can be used by callbacks.
270 void *data;
271
272 /// The RXP parser object.
273 Parser p;
274
275 /// If context is being tracked, this is a stack of element names.
277
278
279 /// Creator used by XML_Parser_Class::make_parser()
281 InputSource source,
282 Entity initial_entity,
283 void *data);
284
285 /// Open. Asks the parser class to do the work.
286 InputSource open(Entity ent);
287
288 /// Get the error message for the last error.
289 const char *get_error();
290
291public:
292
293 /// Destructor, may close input if required.
294 ~XML_Parser();
295
296 /** Request that parser keep track of the currently open elements.
297 *
298 * These are recorded on a atsck. Use context() to access the information.
299 */
300 void track_context(bool flag);
301 /** Keep track of the content of open elements.
302 *
303 * Not yet implemented.
304 */
305 void track_contents(bool flag);
306
307 /** Get the name of the nth enclosing element.
308 *
309 * context(0) is the element we are directly inside.
310 */
311 EST_String context(int n);
312
313 /// Run the parser.
314 void go();
315
316 friend class XML_Parser_Class;
317};
318
319//@}
320
321#endif
322
const char * get_error(XML_Parser &p)
Get the error message for the last error.
Definition: XML_Parser.cc:225
virtual void element(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name, XML_Attribute_List &attributes)
Definition: XML_Parser.cc:186
XML_Parser * make_parser(InputSource source, void *data)
Create a parser for the RXP InputSource.
Definition: XML_Parser.cc:72
void register_id(EST_Regex id_pattern, EST_String directory)
Definition: XML_Parser.cc:48
virtual void error(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:220
InputSource try_and_open(Entity ent)
Definition: XML_Parser.cc:125
virtual void processing(XML_Parser_Class &c, XML_Parser &p, void *data, const char *instruction)
Definition: XML_Parser.cc:214
virtual void document_close(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:174
virtual void pcdata(XML_Parser_Class &c, XML_Parser &p, void *data, const char *chars)
Definition: XML_Parser.cc:202
virtual void document_open(XML_Parser_Class &c, XML_Parser &p, void *data)
Definition: XML_Parser.cc:169
virtual void element_open(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name, XML_Attribute_List &attributes)
Definition: XML_Parser.cc:179
virtual void cdata(XML_Parser_Class &c, XML_Parser &p, void *data, const char *chars)
Definition: XML_Parser.cc:208
void registered_ids(EST_TList< EST_String > &list)
Definition: XML_Parser.cc:53
virtual void element_close(XML_Parser_Class &c, XML_Parser &p, void *data, const char *name)
Definition: XML_Parser.cc:196
static InputSource open_entity(Entity ent, void *arg)
Definition: XML_Parser.cc:160
Parser p
The RXP parser object.
Definition: XML_Parser.h:273
XBit current_bit
The piece of markup being processed.
Definition: XML_Parser.h:259
const char * get_error()
Get the error message for the last error.
Definition: XML_Parser.cc:395
XML_Parser(XML_Parser_Class &parent, InputSource source, Entity initial_entity, void *data)
Creator used by XML_Parser_Class::make_parser()
Definition: XML_Parser.cc:246
void * data
Arbitrary data which can be used by callbacks.
Definition: XML_Parser.h:270
~XML_Parser()
Destructor, may close input if required.
Definition: XML_Parser.cc:261
InputSource open(Entity ent)
Open. Asks the parser class to do the work.
Definition: XML_Parser.cc:269
void track_contents(bool flag)
Definition: XML_Parser.cc:388
void go()
Run the parser.
Definition: XML_Parser.cc:274
EST_String context(int n)
Definition: XML_Parser.cc:450
EST_TDeque< EST_String > p_context
If context is being tracked, this is a stack of element names.
Definition: XML_Parser.h:276
void track_context(bool flag)
Definition: XML_Parser.cc:383
XML_Parser_Class * pclass
Definition: XML_Parser.h:256
InputSource source
Where we are reading from.
Definition: XML_Parser.h:262
Entity initial_entity
Definition: XML_Parser.h:267