1:
37:
38: package ;
39:
40: import ;
41:
42: import ;
43: import ;
44: import ;
45: import ;
46: import ;
47: import ;
48:
49:
53:
54:
59: public class XPathTokenizer
60: implements XPathParser.yyInput
61:
62: {
63:
64: static class XPathToken
65:
66: {
67:
68: int type;
69: String val;
70:
71: XPathToken (int type)
72: {
73: this (type, null);
74: }
75:
76: XPathToken (int type, String val)
77: {
78:
79: this.type = type;
80: this.val = val;
81: }
82:
83: public String getText ()
84: {
85: return val;
86: }
87:
88: public String toString ()
89: {
90: return val;
91: }
92:
93: }
94:
95: static final Map<String,Integer> keywords = new TreeMap<String,Integer> ();
96: static
97: {
98: keywords.put ("ancestor", new Integer (XPathParser.ANCESTOR));
99: keywords.put ("ancestor-or-self", new Integer (XPathParser.ANCESTOR_OR_SELF));
100: keywords.put ("attribute", new Integer (XPathParser.ATTRIBUTE));
101: keywords.put ("child", new Integer (XPathParser.CHILD));
102: keywords.put ("descendant", new Integer (XPathParser.DESCENDANT));
103: keywords.put ("descendant-or-self", new Integer (XPathParser.DESCENDANT_OR_SELF));
104: keywords.put ("following", new Integer (XPathParser.FOLLOWING));
105: keywords.put ("following-sibling", new Integer (XPathParser.FOLLOWING_SIBLING));
106: keywords.put ("namespace", new Integer (XPathParser.NAMESPACE));
107: keywords.put ("parent", new Integer (XPathParser.PARENT));
108: keywords.put ("preceding", new Integer (XPathParser.PRECEDING));
109: keywords.put ("preceding-sibling", new Integer (XPathParser.PRECEDING_SIBLING));
110: keywords.put ("self", new Integer (XPathParser.SELF));
111: keywords.put ("div", new Integer (XPathParser.DIV));
112: keywords.put ("mod", new Integer (XPathParser.MOD));
113: keywords.put ("or", new Integer (XPathParser.OR));
114: keywords.put ("and", new Integer (XPathParser.AND));
115: keywords.put ("comment", new Integer (XPathParser.COMMENT));
116: keywords.put ("processing-instruction", new Integer (XPathParser.PROCESSING_INSTRUCTION));
117: keywords.put ("text", new Integer (XPathParser.TEXT));
118: keywords.put ("node", new Integer (XPathParser.NODE));
119: }
120:
121: Reader in;
122: XPathToken token;
123: XPathToken lastToken;
124:
125: public XPathTokenizer (String expr)
126: {
127: this (new StringReader (expr));
128: }
129:
130: XPathTokenizer (Reader in)
131: {
132: this.in = in.markSupported () ? in : new BufferedReader (in);
133: }
134:
135:
156:
157: public boolean advance ()
158: throws IOException
159: {
160: lastToken = token;
161: int c = in.read ();
162: switch (c)
163: {
164: case -1:
165: return false;
166: case 0x20:
167: case 0x09:
168: case 0x0d:
169: case 0x0a:
170: return advance ();
171: case 0x22:
172: case 0x27:
173: token = consume_literal (c);
174: break;
175: case 0x28:
176: token = new XPathToken (XPathParser.LP);
177: break;
178: case 0x29:
179: token = new XPathToken (XPathParser.RP);
180: break;
181: case 0x5b:
182: token = new XPathToken (XPathParser.LB);
183: break;
184: case 0x5d:
185: token = new XPathToken (XPathParser.RB);
186: break;
187: case 0x2c:
188: token = new XPathToken (XPathParser.COMMA);
189: break;
190: case 0x7c:
191: token = new XPathToken (XPathParser.PIPE);
192: break;
193: case 0x2f:
194: in.mark (1);
195: int d1 = in.read ();
196: if (d1 == 0x2f)
197: {
198: token = new XPathToken (XPathParser.DOUBLE_SLASH);
199: }
200: else
201: {
202: in.reset ();
203: token = new XPathToken (XPathParser.SLASH);
204: }
205: break;
206: case 0x3d:
207: token = new XPathToken (XPathParser.EQ);
208: break;
209: case 0x21:
210: in.mark (1);
211: int d2 = in.read ();
212: if (d2 == 0x3d)
213: {
214: token = new XPathToken (XPathParser.NE);
215: }
216: else
217: {
218: in.reset ();
219: token = new XPathToken (XPathParser.yyErrorCode);
220: }
221: break;
222: case 0x3e:
223: in.mark (1);
224: int d3 = in.read ();
225: if (d3 == 0x3d)
226: {
227: token = new XPathToken (XPathParser.GTE);
228: }
229: else
230: {
231: in.reset ();
232: token = new XPathToken (XPathParser.GT);
233: }
234: break;
235: case 0x3c:
236: in.mark (1);
237: int d4 = in.read ();
238: if (d4 == 0x3d)
239: {
240: token = new XPathToken (XPathParser.LTE);
241: }
242: else
243: {
244: in.reset ();
245: token = new XPathToken (XPathParser.LT);
246: }
247: break;
248: case 0x2b:
249: token = new XPathToken (XPathParser.PLUS);
250: break;
251: case 0x2d:
252: token = new XPathToken (XPathParser.MINUS);
253: break;
254: case 0x40:
255: token = new XPathToken (XPathParser.AT);
256: break;
257: case 0x2a:
258: token = new XPathToken (XPathParser.STAR);
259: break;
260: case 0x24:
261: token = new XPathToken (XPathParser.DOLLAR);
262: break;
263: case 0x3a:
264: in.mark (1);
265: int d5 = in.read ();
266: if (d5 == 0x3a)
267: {
268: token = new XPathToken (XPathParser.DOUBLE_COLON);
269: }
270: else
271: {
272: in.reset ();
273: token = new XPathToken (XPathParser.COLON);
274: }
275: break;
276: case 0x2e:
277: in.mark (1);
278: int d6 = in.read ();
279: if (d6 == 0x2e)
280: {
281: token = new XPathToken (XPathParser.DOUBLE_DOT);
282: }
283: else
284: {
285: in.reset ();
286: token = new XPathToken (XPathParser.DOT);
287: }
288: break;
289: default:
290: if (c >= 0x30 && c <= 0x39)
291: {
292: token = consume_digits (c);
293: }
294: else if (c == 0x5f || Character.isLetter ((char) c))
295: {
296: token = consume_name (c);
297: }
298: else
299: {
300: token = new XPathToken (XPathParser.yyErrorCode);
301: }
302: }
303: return true;
304: }
305:
306: public int token ()
307: {
308: return token.type;
309: }
310:
311: public Object value ()
312: {
313: return token.val;
314: }
315:
316: XPathToken consume_literal (int delimiter)
317: throws IOException
318: {
319: CPStringBuilder buf = new CPStringBuilder ();
320: while (true)
321: {
322: int c = in.read ();
323: if (c == -1)
324: {
325: return new XPathToken (XPathParser.yyErrorCode);
326: }
327: else if (c == delimiter)
328: {
329: return new XPathToken (XPathParser.LITERAL, buf.toString ());
330: }
331: else
332: {
333: buf.append ((char) c);
334: }
335: }
336: }
337:
338: XPathToken consume_digits (int c)
339: throws IOException
340: {
341: CPStringBuilder buf = new CPStringBuilder ();
342: buf.append ((char) c);
343: while (true)
344: {
345: in.mark (1);
346: c = in.read ();
347: if (c >= 0x30 && c <= 0x39)
348: {
349: buf.append ((char) c);
350: }
351: else
352: {
353: in.reset ();
354: return new XPathToken (XPathParser.DIGITS, buf.toString ());
355: }
356: }
357: }
358:
359: XPathToken consume_name (int c)
360: throws IOException
361: {
362: CPStringBuilder buf = new CPStringBuilder ();
363: buf.append ((char) c);
364: while (true)
365: {
366: in.mark (1);
367: c = in.read ();
368: if (isNameChar (c))
369: {
370: buf.append ((char) c);
371: }
372: else
373: {
374: in.reset ();
375: String name = buf.toString ();
376: Integer keyword = (Integer) keywords.get (name);
377: if (keyword == null)
378: {
379: return new XPathToken (XPathParser.NAME, name);
380: }
381: else
382: {
383: int val = keyword.intValue ();
384: switch (val)
385: {
386: case XPathParser.NODE:
387: case XPathParser.COMMENT:
388: case XPathParser.TEXT:
389: case XPathParser.PROCESSING_INSTRUCTION:
390:
391: in.mark (1);
392: do
393: {
394: c = in.read ();
395: }
396: while (c == 0x20 || c == 0x09);
397: if (c != 0x28)
398: {
399: in.reset ();
400: return new XPathToken (XPathParser.NAME, name);
401: }
402: break;
403: case XPathParser.CHILD:
404: case XPathParser.PARENT:
405: case XPathParser.SELF:
406: case XPathParser.DESCENDANT:
407: case XPathParser.ANCESTOR:
408: case XPathParser.DESCENDANT_OR_SELF:
409: case XPathParser.ANCESTOR_OR_SELF:
410: case XPathParser.ATTRIBUTE:
411: case XPathParser.NAMESPACE:
412: case XPathParser.FOLLOWING:
413: case XPathParser.FOLLOWING_SIBLING:
414: case XPathParser.PRECEDING:
415: case XPathParser.PRECEDING_SIBLING:
416:
417: in.mark(1);
418: do
419: {
420: c = in.read();
421: }
422: while (c == 0x20 || c == 0x09);
423: if (c == 0x3a)
424: {
425: c = in.read();
426: if (c == 0x3a)
427: {
428: in.reset();
429: return new XPathToken(val);
430: }
431: }
432: in.reset();
433: return new XPathToken(XPathParser.NAME, name);
434: case XPathParser.DIV:
435: case XPathParser.MOD:
436:
437: if (lastToken == null)
438: {
439: return new XPathToken(XPathParser.NAME, name);
440: }
441: switch (lastToken.type)
442: {
443: case XPathParser.LP:
444: case XPathParser.LB:
445: case XPathParser.COMMA:
446: case XPathParser.PIPE:
447: case XPathParser.EQ:
448: case XPathParser.NE:
449: case XPathParser.GT:
450: case XPathParser.LT:
451: case XPathParser.GTE:
452: case XPathParser.LTE:
453: case XPathParser.PLUS:
454: case XPathParser.MINUS:
455: case XPathParser.STAR:
456: case XPathParser.AT:
457: case XPathParser.DOLLAR:
458: case XPathParser.COLON:
459: case XPathParser.DOUBLE_COLON:
460: case XPathParser.DIV:
461: case XPathParser.MOD:
462: case XPathParser.OR:
463: case XPathParser.AND:
464: case XPathParser.SLASH:
465: return new XPathToken(XPathParser.NAME, name);
466: }
467: break;
468: }
469: return new XPathToken (val);
470: }
471: }
472: }
473: }
474:
475: boolean isNameChar (int c)
476: {
477:
478: return (c == 0x5f
479: || c == 0x2d
480: || c == 0x2e
481: || (c >= 0x30 && c <= 0x39)
482:
483: || (c >= 0x0300 && c <= 0x0345)
484: || (c >= 0x0360 && c <= 0x0361)
485: || (c >= 0x0483 && c <= 0x0486)
486: || (c >= 0x0591 && c <= 0x05A1)
487: || (c >= 0x05A3 && c <= 0x05B9)
488: || (c >= 0x05BB && c <= 0x05BD)
489: || c == 0x05BF
490: || (c >= 0x05C1 && c <= 0x05C2)
491: || c == 0x05C4
492: || (c >= 0x064B && c <= 0x0652)
493: || c == 0x0670
494: || (c >= 0x06D6 && c <= 0x06DC)
495: || (c >= 0x06DD && c <= 0x06DF)
496: || (c >= 0x06E0 && c <= 0x06E4)
497: || (c >= 0x06E7 && c <= 0x06E8)
498: || (c >= 0x06EA && c <= 0x06ED)
499: || (c >= 0x0901 && c <= 0x0903)
500: || c == 0x093C
501: || (c >= 0x093E && c <= 0x094C)
502: || c == 0x094D
503: || (c >= 0x0951 && c <= 0x0954)
504: || (c >= 0x0962 && c <= 0x0963)
505: || (c >= 0x0981 && c <= 0x0983)
506: || c == 0x09BC
507: || c == 0x09BE
508: || c == 0x09BF
509: || (c >= 0x09C0 && c <= 0x09C4)
510: || (c >= 0x09C7 && c <= 0x09C8)
511: || (c >= 0x09CB && c <= 0x09CD)
512: || c == 0x09D7
513: || (c >= 0x09E2 && c <= 0x09E3)
514: || c == 0x0A02
515: || c == 0x0A3C
516: || c == 0x0A3E
517: || c == 0x0A3F
518: || (c >= 0x0A40 && c <= 0x0A42)
519: || (c >= 0x0A47 && c <= 0x0A48)
520: || (c >= 0x0A4B && c <= 0x0A4D)
521: || (c >= 0x0A70 && c <= 0x0A71)
522: || (c >= 0x0A81 && c <= 0x0A83)
523: || c == 0x0ABC
524: || (c >= 0x0ABE && c <= 0x0AC5)
525: || (c >= 0x0AC7 && c <= 0x0AC9)
526: || (c >= 0x0ACB && c <= 0x0ACD)
527: || (c >= 0x0B01 && c <= 0x0B03)
528: || c == 0x0B3C
529: || (c >= 0x0B3E && c <= 0x0B43)
530: || (c >= 0x0B47 && c <= 0x0B48)
531: || (c >= 0x0B4B && c <= 0x0B4D)
532: || (c >= 0x0B56 && c <= 0x0B57)
533: || (c >= 0x0B82 && c <= 0x0B83)
534: || (c >= 0x0BBE && c <= 0x0BC2)
535: || (c >= 0x0BC6 && c <= 0x0BC8)
536: || (c >= 0x0BCA && c <= 0x0BCD)
537: || c == 0x0BD7
538: || (c >= 0x0C01 && c <= 0x0C03)
539: || (c >= 0x0C3E && c <= 0x0C44)
540: || (c >= 0x0C46 && c <= 0x0C48)
541: || (c >= 0x0C4A && c <= 0x0C4D)
542: || (c >= 0x0C55 && c <= 0x0C56)
543: || (c >= 0x0C82 && c <= 0x0C83)
544: || (c >= 0x0CBE && c <= 0x0CC4)
545: || (c >= 0x0CC6 && c <= 0x0CC8)
546: || (c >= 0x0CCA && c <= 0x0CCD)
547: || (c >= 0x0CD5 && c <= 0x0CD6)
548: || (c >= 0x0D02 && c <= 0x0D03)
549: || (c >= 0x0D3E && c <= 0x0D43)
550: || (c >= 0x0D46 && c <= 0x0D48)
551: || (c >= 0x0D4A && c <= 0x0D4D)
552: || c == 0x0D57
553: || c == 0x0E31
554: || (c >= 0x0E34 && c <= 0x0E3A)
555: || (c >= 0x0E47 && c <= 0x0E4E)
556: || c == 0x0EB1
557: || (c >= 0x0EB4 && c <= 0x0EB9)
558: || (c >= 0x0EBB && c <= 0x0EBC)
559: || (c >= 0x0EC8 && c <= 0x0ECD)
560: || (c >= 0x0F18 && c <= 0x0F19)
561: || c == 0x0F35
562: || c == 0x0F37
563: || c == 0x0F39
564: || c == 0x0F3E
565: || c == 0x0F3F
566: || (c >= 0x0F71 && c <= 0x0F84)
567: || (c >= 0x0F86 && c <= 0x0F8B)
568: || (c >= 0x0F90 && c <= 0x0F95)
569: || c == 0x0F97
570: || (c >= 0x0F99 && c <= 0x0FAD)
571: || (c >= 0x0FB1 && c <= 0x0FB7)
572: || c == 0x0FB9
573: || (c >= 0x20D0 && c <= 0x20DC)
574: || c == 0x20E1
575: || (c >= 0x302A && c <= 0x302F)
576: || c == 0x3099
577: || c == 0x309A
578:
579: || c == 0x00B7
580: || c == 0x02D0
581: || c == 0x02D1
582: || c == 0x0387
583: || c == 0x0640
584: || c == 0x0E46
585: || c == 0x0EC6
586: || c == 0x3005
587: || (c >= 0x3031 && c <= 0x3035)
588: || (c >= 0x309D && c <= 0x309E)
589: || (c >= 0x30FC && c <= 0x30FE)
590:
591: || Character.isLetter ((char) c));
592: }
593:
594: }