1:
53:
54: package ;
55:
56: import ;
57:
58: import ;
59: import ;
60: import ;
61: import ;
62: import ;
63: import ;
64: import ;
65: import ;
66: import ;
67: import ;
68: import ;
69: import ;
70: import ;
71: import ;
72: import ;
73: import ;
74: import ;
75: import ;
76: import ;
77: import ;
78: import ;
79: import ;
80:
81: import ;
82: import ;
83: import ;
84: import ;
85: import ;
86: import ;
87: import ;
88: import ;
89: import ;
90: import ;
91:
92: import ;
93: import ;
94: import ;
95:
96:
119: public class XMLParser
120: implements XMLStreamReader, NamespaceContext
121: {
122:
123:
124: private static final int INIT = 0;
125: private static final int PROLOG = 1;
126: private static final int CONTENT = 2;
127: private static final int EMPTY_ELEMENT = 3;
128: private static final int MISC = 4;
129:
130:
131: private final static int LIT_ENTITY_REF = 2;
132: private final static int LIT_NORMALIZE = 4;
133: private final static int LIT_ATTRIBUTE = 8;
134: private final static int LIT_DISABLE_PE = 16;
135: private final static int LIT_DISABLE_CREF = 32;
136: private final static int LIT_DISABLE_EREF = 64;
137: private final static int LIT_PUBID = 256;
138:
139:
140: final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
141: final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
142: final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
143: final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
144: final static int ATTRIBUTE_DEFAULT_FIXED = 34;
145:
146:
147: final static int START_ENTITY = 50;
148: final static int END_ENTITY = 51;
149:
150:
153: private Input input;
154:
155:
160: private LinkedList inputStack = new LinkedList();
161:
162:
165: private LinkedList startEntityStack = new LinkedList();
166:
167:
170: private LinkedList endEntityStack = new LinkedList();
171:
172:
175: private int state = INIT;
176:
177:
180: private int event;
181:
182:
186: private LinkedList stack = new LinkedList();
187:
188:
193: private LinkedList namespaces = new LinkedList();
194:
195:
200: private LinkedList bases = new LinkedList();
201:
202:
206: private ArrayList attrs = new ArrayList();
207:
208:
211: private StringBuffer buf = new StringBuffer();
212:
213:
216: private StringBuffer nmtokenBuf = new StringBuffer();
217:
218:
221: private StringBuffer literalBuf = new StringBuffer();
222:
223:
226: private int[] tmpBuf = new int[1024];
227:
228:
231: private ContentModel currentContentModel;
232:
233:
239: private LinkedList validationStack;
240:
241:
246: private HashSet ids, idrefs;
247:
248:
252: private String piTarget, piData;
253:
254:
257: private String xmlVersion;
258:
259:
262: private String xmlEncoding;
263:
264:
267: private Boolean xmlStandalone;
268:
269:
272: Doctype doctype;
273:
274:
277: private boolean expandPE, peIsError;
278:
279:
282: private final boolean validating;
283:
284:
287: private final boolean stringInterning;
288:
289:
293: private final boolean coalescing;
294:
295:
300: private final boolean replaceERefs;
301:
302:
305: private final boolean externalEntities;
306:
307:
310: private final boolean supportDTD;
311:
312:
317: private final boolean namespaceAware;
318:
319:
323: private final boolean baseAware;
324:
325:
329: private final boolean extendedEventTypes;
330:
331:
334: final XMLReporter reporter;
335:
336:
339: final XMLResolver resolver;
340:
341:
342: private static final String TEST_START_ELEMENT = "<";
343: private static final String TEST_END_ELEMENT = "</";
344: private static final String TEST_COMMENT = "<!--";
345: private static final String TEST_PI = "<?";
346: private static final String TEST_CDATA = "<![CDATA[";
347: private static final String TEST_XML_DECL = "<?xml";
348: private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE";
349: private static final String TEST_ELEMENT_DECL = "<!ELEMENT";
350: private static final String TEST_ATTLIST_DECL = "<!ATTLIST";
351: private static final String TEST_ENTITY_DECL = "<!ENTITY";
352: private static final String TEST_NOTATION_DECL = "<!NOTATION";
353: private static final String TEST_KET = ">";
354: private static final String TEST_END_COMMENT = "--";
355: private static final String TEST_END_PI = "?>";
356: private static final String TEST_END_CDATA = "]]>";
357:
358:
361: private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap();
362: static
363: {
364: PREDEFINED_ENTITIES.put("amp", "&");
365: PREDEFINED_ENTITIES.put("lt", "<");
366: PREDEFINED_ENTITIES.put("gt", ">");
367: PREDEFINED_ENTITIES.put("apos", "'");
368: PREDEFINED_ENTITIES.put("quot", "\"");
369: }
370:
371:
394: public XMLParser(InputStream in, String systemId,
395: boolean validating,
396: boolean namespaceAware,
397: boolean coalescing,
398: boolean replaceERefs,
399: boolean externalEntities,
400: boolean supportDTD,
401: boolean baseAware,
402: boolean stringInterning,
403: boolean extendedEventTypes,
404: XMLReporter reporter,
405: XMLResolver resolver)
406: {
407: this.validating = validating;
408: this.namespaceAware = namespaceAware;
409: this.coalescing = coalescing;
410: this.replaceERefs = replaceERefs;
411: this.externalEntities = externalEntities;
412: this.supportDTD = supportDTD;
413: this.baseAware = baseAware;
414: this.stringInterning = stringInterning;
415: this.extendedEventTypes = extendedEventTypes;
416: this.reporter = reporter;
417: this.resolver = resolver;
418: if (validating)
419: {
420: validationStack = new LinkedList();
421: ids = new HashSet();
422: idrefs = new HashSet();
423: }
424: String debug = System.getProperty("gnu.xml.debug.input");
425: if (debug != null)
426: {
427: try
428: {
429: File file = File.createTempFile(debug, ".xml");
430: in = new TeeInputStream(in, new FileOutputStream(file));
431: }
432: catch (IOException e)
433: {
434: RuntimeException e2 = new RuntimeException();
435: e2.initCause(e);
436: throw e2;
437: }
438: }
439: systemId = canonicalize(systemId);
440: pushInput(new Input(in, null, null, systemId, null, null, false, true));
441: }
442:
443:
469: public XMLParser(Reader reader, String systemId,
470: boolean validating,
471: boolean namespaceAware,
472: boolean coalescing,
473: boolean replaceERefs,
474: boolean externalEntities,
475: boolean supportDTD,
476: boolean baseAware,
477: boolean stringInterning,
478: boolean extendedEventTypes,
479: XMLReporter reporter,
480: XMLResolver resolver)
481: {
482: this.validating = validating;
483: this.namespaceAware = namespaceAware;
484: this.coalescing = coalescing;
485: this.replaceERefs = replaceERefs;
486: this.externalEntities = externalEntities;
487: this.supportDTD = supportDTD;
488: this.baseAware = baseAware;
489: this.stringInterning = stringInterning;
490: this.extendedEventTypes = extendedEventTypes;
491: this.reporter = reporter;
492: this.resolver = resolver;
493: if (validating)
494: {
495: validationStack = new LinkedList();
496: ids = new HashSet();
497: idrefs = new HashSet();
498: }
499: String debug = System.getProperty("gnu.xml.debug.input");
500: if (debug != null)
501: {
502: try
503: {
504: File file = File.createTempFile(debug, ".xml");
505: reader = new TeeReader(reader, new FileWriter(file));
506: }
507: catch (IOException e)
508: {
509: RuntimeException e2 = new RuntimeException();
510: e2.initCause(e);
511: throw e2;
512: }
513: }
514: systemId = canonicalize(systemId);
515: pushInput(new Input(null, reader, null, systemId, null, null, false, true));
516: }
517:
518:
519:
520: public String getNamespaceURI(String prefix)
521: {
522: if (XMLConstants.XML_NS_PREFIX.equals(prefix))
523: return XMLConstants.XML_NS_URI;
524: if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix))
525: return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
526: for (Iterator i = namespaces.iterator(); i.hasNext(); )
527: {
528: LinkedHashMap ctx = (LinkedHashMap) i.next();
529: String namespaceURI = (String) ctx.get(prefix);
530: if (namespaceURI != null)
531: return namespaceURI;
532: }
533: return null;
534: }
535:
536: public String getPrefix(String namespaceURI)
537: {
538: if (XMLConstants.XML_NS_URI.equals(namespaceURI))
539: return XMLConstants.XML_NS_PREFIX;
540: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
541: return XMLConstants.XMLNS_ATTRIBUTE;
542: for (Iterator i = namespaces.iterator(); i.hasNext(); )
543: {
544: LinkedHashMap ctx = (LinkedHashMap) i.next();
545: if (ctx.containsValue(namespaceURI))
546: {
547: for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
548: {
549: Map.Entry entry = (Map.Entry) i.next();
550: String uri = (String) entry.getValue();
551: if (uri.equals(namespaceURI))
552: return (String) entry.getKey();
553: }
554: }
555: }
556: return null;
557: }
558:
559: public Iterator getPrefixes(String namespaceURI)
560: {
561: if (XMLConstants.XML_NS_URI.equals(namespaceURI))
562: return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator();
563: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
564: return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator();
565: LinkedList acc = new LinkedList();
566: for (Iterator i = namespaces.iterator(); i.hasNext(); )
567: {
568: LinkedHashMap ctx = (LinkedHashMap) i.next();
569: if (ctx.containsValue(namespaceURI))
570: {
571: for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
572: {
573: Map.Entry entry = (Map.Entry) i.next();
574: String uri = (String) entry.getValue();
575: if (uri.equals(namespaceURI))
576: acc.add(entry.getKey());
577: }
578: }
579: }
580: return acc.iterator();
581: }
582:
583:
584:
585: public void close()
586: throws XMLStreamException
587: {
588: stack = null;
589: namespaces = null;
590: bases = null;
591: buf = null;
592: attrs = null;
593: doctype = null;
594:
595: inputStack = null;
596: validationStack = null;
597: ids = null;
598: idrefs = null;
599: }
600:
601: public NamespaceContext getNamespaceContext()
602: {
603: return this;
604: }
605:
606: public int getAttributeCount()
607: {
608: return attrs.size();
609: }
610:
611: public String getAttributeLocalName(int index)
612: {
613: Attribute a = (Attribute) attrs.get(index);
614: return a.localName;
615: }
616:
617: public String getAttributeNamespace(int index)
618: {
619: String prefix = getAttributePrefix(index);
620: return getNamespaceURI(prefix);
621: }
622:
623: public String getAttributePrefix(int index)
624: {
625: Attribute a = (Attribute) attrs.get(index);
626: return a.prefix;
627: }
628:
629: public QName getAttributeName(int index)
630: {
631: Attribute a = (Attribute) attrs.get(index);
632: String namespaceURI = getNamespaceURI(a.prefix);
633: return new QName(namespaceURI, a.localName, a.prefix);
634: }
635:
636: public String getAttributeType(int index)
637: {
638: Attribute a = (Attribute) attrs.get(index);
639: return a.type;
640: }
641:
642: private String getAttributeType(String elementName, String attName)
643: {
644: if (doctype != null)
645: {
646: AttributeDecl att = doctype.getAttributeDecl(elementName, attName);
647: if (att != null)
648: return att.type;
649: }
650: return "CDATA";
651: }
652:
653: public String getAttributeValue(int index)
654: {
655: Attribute a = (Attribute) attrs.get(index);
656: return a.value;
657: }
658:
659: public String getAttributeValue(String namespaceURI, String localName)
660: {
661: for (Iterator i = attrs.iterator(); i.hasNext(); )
662: {
663: Attribute a = (Attribute) i.next();
664: if (a.localName.equals(localName))
665: {
666: String uri = getNamespaceURI(a.prefix);
667: if ((uri == null && namespaceURI == null) ||
668: (uri != null && uri.equals(namespaceURI)))
669: return a.value;
670: }
671: }
672: return null;
673: }
674:
675: boolean isAttributeDeclared(int index)
676: {
677: if (doctype == null)
678: return false;
679: Attribute a = (Attribute) attrs.get(index);
680: String qn = ("".equals(a.prefix)) ? a.localName :
681: a.prefix + ":" + a.localName;
682: String elementName = buf.toString();
683: return doctype.isAttributeDeclared(elementName, qn);
684: }
685:
686: public String getCharacterEncodingScheme()
687: {
688: return xmlEncoding;
689: }
690:
691: public String getElementText()
692: throws XMLStreamException
693: {
694: if (event != XMLStreamConstants.START_ELEMENT)
695: throw new XMLStreamException("current event must be START_ELEMENT");
696: CPStringBuilder elementText = new CPStringBuilder();
697: int depth = stack.size();
698: while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth)
699: {
700: switch (next())
701: {
702: case XMLStreamConstants.CHARACTERS:
703: case XMLStreamConstants.SPACE:
704: elementText.append(buf.toString());
705: }
706: }
707: return elementText.toString();
708: }
709:
710: public String getEncoding()
711: {
712: return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding;
713: }
714:
715: public int getEventType()
716: {
717: return event;
718: }
719:
720: public String getLocalName()
721: {
722: switch (event)
723: {
724: case XMLStreamConstants.START_ELEMENT:
725: case XMLStreamConstants.END_ELEMENT:
726: String qName = buf.toString();
727: int ci = qName.indexOf(':');
728: String localName = (ci == -1) ? qName : qName.substring(ci + 1);
729: if (stringInterning)
730: localName = localName.intern();
731: return localName;
732: default:
733: return null;
734: }
735: }
736:
737: public Location getLocation()
738: {
739: return input;
740: }
741:
742: public QName getName()
743: {
744: switch (event)
745: {
746: case XMLStreamConstants.START_ELEMENT:
747: case XMLStreamConstants.END_ELEMENT:
748: String qName = buf.toString();
749: int ci = qName.indexOf(':');
750: String localName = (ci == -1) ? qName : qName.substring(ci + 1);
751: if (stringInterning)
752: localName = localName.intern();
753: String prefix = (ci == -1) ?
754: (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
755: qName.substring(0, ci);
756: if (stringInterning && prefix != null)
757: prefix = prefix.intern();
758: String namespaceURI = getNamespaceURI(prefix);
759: return new QName(namespaceURI, localName, prefix);
760: default:
761: return null;
762: }
763: }
764:
765: public int getNamespaceCount()
766: {
767: if (!namespaceAware || namespaces.isEmpty())
768: return 0;
769: switch (event)
770: {
771: case XMLStreamConstants.START_ELEMENT:
772: case XMLStreamConstants.END_ELEMENT:
773: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
774: return ctx.size();
775: default:
776: return 0;
777: }
778: }
779:
780: public String getNamespacePrefix(int index)
781: {
782: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
783: int count = 0;
784: for (Iterator i = ctx.keySet().iterator(); i.hasNext(); )
785: {
786: String prefix = (String) i.next();
787: if (count++ == index)
788: return prefix;
789: }
790: return null;
791: }
792:
793: public String getNamespaceURI()
794: {
795: switch (event)
796: {
797: case XMLStreamConstants.START_ELEMENT:
798: case XMLStreamConstants.END_ELEMENT:
799: String qName = buf.toString();
800: int ci = qName.indexOf(':');
801: if (ci == -1)
802: return null;
803: String prefix = qName.substring(0, ci);
804: return getNamespaceURI(prefix);
805: default:
806: return null;
807: }
808: }
809:
810: public String getNamespaceURI(int index)
811: {
812: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
813: int count = 0;
814: for (Iterator i = ctx.values().iterator(); i.hasNext(); )
815: {
816: String uri = (String) i.next();
817: if (count++ == index)
818: return uri;
819: }
820: return null;
821: }
822:
823: public String getPIData()
824: {
825: return piData;
826: }
827:
828: public String getPITarget()
829: {
830: return piTarget;
831: }
832:
833: public String getPrefix()
834: {
835: switch (event)
836: {
837: case XMLStreamConstants.START_ELEMENT:
838: case XMLStreamConstants.END_ELEMENT:
839: String qName = buf.toString();
840: int ci = qName.indexOf(':');
841: String prefix = (ci == -1) ?
842: (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
843: qName.substring(0, ci);
844: if (stringInterning && prefix != null)
845: prefix = prefix.intern();
846: return prefix;
847: default:
848: return null;
849: }
850: }
851:
852: public Object getProperty(String name)
853: throws IllegalArgumentException
854: {
855: if (name == null)
856: throw new IllegalArgumentException("name is null");
857: if (XMLInputFactory.ALLOCATOR.equals(name))
858: return null;
859: if (XMLInputFactory.IS_COALESCING.equals(name))
860: return coalescing ? Boolean.TRUE : Boolean.FALSE;
861: if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name))
862: return namespaceAware ? Boolean.TRUE : Boolean.FALSE;
863: if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name))
864: return replaceERefs ? Boolean.TRUE : Boolean.FALSE;
865: if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name))
866: return externalEntities ? Boolean.TRUE : Boolean.FALSE;
867: if (XMLInputFactory.IS_VALIDATING.equals(name))
868: return Boolean.FALSE;
869: if (XMLInputFactory.REPORTER.equals(name))
870: return reporter;
871: if (XMLInputFactory.RESOLVER.equals(name))
872: return resolver;
873: if (XMLInputFactory.SUPPORT_DTD.equals(name))
874: return supportDTD ? Boolean.TRUE : Boolean.FALSE;
875: if ("gnu.xml.stream.stringInterning".equals(name))
876: return stringInterning ? Boolean.TRUE : Boolean.FALSE;
877: if ("gnu.xml.stream.xmlBase".equals(name))
878: return baseAware ? Boolean.TRUE : Boolean.FALSE;
879: if ("gnu.xml.stream.baseURI".equals(name))
880: return getXMLBase();
881: return null;
882: }
883:
884: public String getText()
885: {
886: return buf.toString();
887: }
888:
889: public char[] getTextCharacters()
890: {
891: return buf.toString().toCharArray();
892: }
893:
894: public int getTextCharacters(int sourceStart, char[] target,
895: int targetStart, int length)
896: throws XMLStreamException
897: {
898: length = Math.min(sourceStart + buf.length(), length);
899: int sourceEnd = sourceStart + length;
900: buf.getChars(sourceStart, sourceEnd, target, targetStart);
901: return length;
902: }
903:
904: public int getTextLength()
905: {
906: return buf.length();
907: }
908:
909: public int getTextStart()
910: {
911: return 0;
912: }
913:
914: public String getVersion()
915: {
916: return (xmlVersion == null) ? "1.0" : xmlVersion;
917: }
918:
919: public boolean hasName()
920: {
921: switch (event)
922: {
923: case XMLStreamConstants.START_ELEMENT:
924: case XMLStreamConstants.END_ELEMENT:
925: return true;
926: default:
927: return false;
928: }
929: }
930:
931: public boolean hasText()
932: {
933: switch (event)
934: {
935: case XMLStreamConstants.CHARACTERS:
936: case XMLStreamConstants.SPACE:
937: return true;
938: default:
939: return false;
940: }
941: }
942:
943: public boolean isAttributeSpecified(int index)
944: {
945: Attribute a = (Attribute) attrs.get(index);
946: return a.specified;
947: }
948:
949: public boolean isCharacters()
950: {
951: return (event == XMLStreamConstants.CHARACTERS);
952: }
953:
954: public boolean isEndElement()
955: {
956: return (event == XMLStreamConstants.END_ELEMENT);
957: }
958:
959: public boolean isStandalone()
960: {
961: return Boolean.TRUE.equals(xmlStandalone);
962: }
963:
964: public boolean isStartElement()
965: {
966: return (event == XMLStreamConstants.START_ELEMENT);
967: }
968:
969: public boolean isWhiteSpace()
970: {
971: return (event == XMLStreamConstants.SPACE);
972: }
973:
974: public int nextTag()
975: throws XMLStreamException
976: {
977: do
978: {
979: switch (next())
980: {
981: case XMLStreamConstants.START_ELEMENT:
982: case XMLStreamConstants.END_ELEMENT:
983: case XMLStreamConstants.CHARACTERS:
984: case XMLStreamConstants.SPACE:
985: case XMLStreamConstants.COMMENT:
986: case XMLStreamConstants.PROCESSING_INSTRUCTION:
987: break;
988: default:
989: throw new XMLStreamException("Unexpected event type: " + event);
990: }
991: }
992: while (event != XMLStreamConstants.START_ELEMENT &&
993: event != XMLStreamConstants.END_ELEMENT);
994: return event;
995: }
996:
997: public void require(int type, String namespaceURI, String localName)
998: throws XMLStreamException
999: {
1000: if (event != type)
1001: throw new XMLStreamException("Current event type is " + event);
1002: if (event == XMLStreamConstants.START_ELEMENT ||
1003: event == XMLStreamConstants.END_ELEMENT)
1004: {
1005: String ln = getLocalName();
1006: if (!ln.equals(localName))
1007: throw new XMLStreamException("Current local-name is " + ln);
1008: String uri = getNamespaceURI();
1009: if ((uri == null && namespaceURI != null) ||
1010: (uri != null && !uri.equals(namespaceURI)))
1011: throw new XMLStreamException("Current namespace URI is " + uri);
1012: }
1013: }
1014:
1015: public boolean standaloneSet()
1016: {
1017: return (xmlStandalone != null);
1018: }
1019:
1020: public boolean hasNext()
1021: throws XMLStreamException
1022: {
1023: return (event != XMLStreamConstants.END_DOCUMENT && event != -1);
1024: }
1025:
1026: public int next()
1027: throws XMLStreamException
1028: {
1029: if (event == XMLStreamConstants.END_ELEMENT)
1030: {
1031:
1032: if (namespaceAware && !namespaces.isEmpty())
1033: namespaces.removeFirst();
1034:
1035: if (baseAware && !bases.isEmpty())
1036: bases.removeFirst();
1037: }
1038: if (!startEntityStack.isEmpty())
1039: {
1040: String entityName = (String) startEntityStack.removeFirst();
1041: buf.setLength(0);
1042: buf.append(entityName);
1043: event = START_ENTITY;
1044: return extendedEventTypes ? event : next();
1045: }
1046: else if (!endEntityStack.isEmpty())
1047: {
1048: String entityName = (String) endEntityStack.removeFirst();
1049: buf.setLength(0);
1050: buf.append(entityName);
1051: event = END_ENTITY;
1052: return extendedEventTypes ? event : next();
1053: }
1054: try
1055: {
1056: if (!input.initialized)
1057: input.init();
1058: switch (state)
1059: {
1060: case CONTENT:
1061: if (tryRead(TEST_END_ELEMENT))
1062: {
1063: readEndElement();
1064: if (stack.isEmpty())
1065: state = MISC;
1066: event = XMLStreamConstants.END_ELEMENT;
1067: }
1068: else if (tryRead(TEST_COMMENT))
1069: {
1070: readComment(false);
1071: event = XMLStreamConstants.COMMENT;
1072: }
1073: else if (tryRead(TEST_PI))
1074: {
1075: readPI(false);
1076: event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1077: }
1078: else if (tryRead(TEST_CDATA))
1079: {
1080: readCDSect();
1081: event = XMLStreamConstants.CDATA;
1082: }
1083: else if (tryRead(TEST_START_ELEMENT))
1084: {
1085: state = readStartElement();
1086: event = XMLStreamConstants.START_ELEMENT;
1087: }
1088: else
1089: {
1090:
1091: mark(8);
1092: int c = readCh();
1093: if (c == 0x26)
1094: {
1095: c = readCh();
1096: if (c == 0x23)
1097: {
1098: reset();
1099: event = readCharData(null);
1100: }
1101: else
1102: {
1103:
1104: reset();
1105: readCh();
1106: readReference();
1107: String ref = buf.toString();
1108: String text = (String) PREDEFINED_ENTITIES.get(ref);
1109: if (text != null)
1110: {
1111: event = readCharData(text);
1112: }
1113: else if (replaceERefs && !isUnparsedEntity(ref))
1114: {
1115:
1116: boolean external = false;
1117: if (doctype != null)
1118: {
1119: Object entity = doctype.getEntity(ref);
1120: if (entity instanceof ExternalIds)
1121: external = true;
1122: }
1123: expandEntity(ref, false, external);
1124: event = next();
1125: }
1126: else
1127: {
1128: event = XMLStreamConstants.ENTITY_REFERENCE;
1129: }
1130: }
1131: }
1132: else
1133: {
1134: reset();
1135: event = readCharData(null);
1136: if (validating && doctype != null)
1137: validatePCData(buf.toString());
1138: }
1139: }
1140: break;
1141: case EMPTY_ELEMENT:
1142: String elementName = (String) stack.removeLast();
1143: buf.setLength(0);
1144: buf.append(elementName);
1145: state = stack.isEmpty() ? MISC : CONTENT;
1146: event = XMLStreamConstants.END_ELEMENT;
1147: if (validating && doctype != null)
1148: endElementValidationHook();
1149: break;
1150: case INIT:
1151: if (tryRead(TEST_XML_DECL))
1152: readXMLDecl();
1153: input.finalizeEncoding();
1154: event = XMLStreamConstants.START_DOCUMENT;
1155: state = PROLOG;
1156: break;
1157: case PROLOG:
1158: skipWhitespace();
1159: if (doctype == null && tryRead(TEST_DOCTYPE_DECL))
1160: {
1161: readDoctypeDecl();
1162: event = XMLStreamConstants.DTD;
1163: }
1164: else if (tryRead(TEST_COMMENT))
1165: {
1166: readComment(false);
1167: event = XMLStreamConstants.COMMENT;
1168: }
1169: else if (tryRead(TEST_PI))
1170: {
1171: readPI(false);
1172: event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1173: }
1174: else if (tryRead(TEST_START_ELEMENT))
1175: {
1176: state = readStartElement();
1177: event = XMLStreamConstants.START_ELEMENT;
1178: }
1179: else
1180: {
1181: int c = readCh();
1182: error("no root element: U+" + Integer.toHexString(c));
1183: }
1184: break;
1185: case MISC:
1186: skipWhitespace();
1187: if (tryRead(TEST_COMMENT))
1188: {
1189: readComment(false);
1190: event = XMLStreamConstants.COMMENT;
1191: }
1192: else if (tryRead(TEST_PI))
1193: {
1194: readPI(false);
1195: event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1196: }
1197: else
1198: {
1199: if (event == XMLStreamConstants.END_DOCUMENT)
1200: throw new NoSuchElementException();
1201: int c = readCh();
1202: if (c != -1)
1203: error("Only comments and PIs may appear after " +
1204: "the root element");
1205: event = XMLStreamConstants.END_DOCUMENT;
1206: }
1207: break;
1208: default:
1209: event = -1;
1210: }
1211: return event;
1212: }
1213: catch (IOException e)
1214: {
1215: XMLStreamException e2 = new XMLStreamException();
1216: e2.initCause(e);
1217: throw e2;
1218: }
1219: }
1220:
1221:
1222:
1223:
1226: String getCurrentElement()
1227: {
1228: return (String) stack.getLast();
1229: }
1230:
1231:
1232:
1233: private void mark(int limit)
1234: throws IOException
1235: {
1236: input.mark(limit);
1237: }
1238:
1239: private void reset()
1240: throws IOException
1241: {
1242: input.reset();
1243: }
1244:
1245: private int read()
1246: throws IOException
1247: {
1248: return input.read();
1249: }
1250:
1251: private int read(int[] b, int off, int len)
1252: throws IOException
1253: {
1254: return input.read(b, off, len);
1255: }
1256:
1257:
1260: private int readCh()
1261: throws IOException, XMLStreamException
1262: {
1263: int c = read();
1264: if (expandPE && c == 0x25)
1265: {
1266: if (peIsError)
1267: error("PE reference within decl in internal subset.");
1268: expandPEReference();
1269: return readCh();
1270: }
1271: return c;
1272: }
1273:
1274:
1280: private void require(char delim)
1281: throws IOException, XMLStreamException
1282: {
1283: mark(1);
1284: int c = readCh();
1285: if (delim != c)
1286: {
1287: reset();
1288: error("required character (got U+" + Integer.toHexString(c) + ")",
1289: new Character(delim));
1290: }
1291: }
1292:
1293:
1299: private void require(String delim)
1300: throws IOException, XMLStreamException
1301: {
1302: char[] chars = delim.toCharArray();
1303: int len = chars.length;
1304: mark(len);
1305: int off = 0;
1306: do
1307: {
1308: int l2 = read(tmpBuf, off, len - off);
1309: if (l2 == -1)
1310: {
1311: reset();
1312: error("EOF before required string", delim);
1313: }
1314: off += l2;
1315: }
1316: while (off < len);
1317: for (int i = 0; i < chars.length; i++)
1318: {
1319: if (chars[i] != tmpBuf[i])
1320: {
1321: reset();
1322: error("required string", delim);
1323: }
1324: }
1325: }
1326:
1327:
1332: private boolean tryRead(char delim)
1333: throws IOException, XMLStreamException
1334: {
1335: mark(1);
1336: int c = readCh();
1337: if (delim != c)
1338: {
1339: reset();
1340: return false;
1341: }
1342: return true;
1343: }
1344:
1345:
1352: private boolean tryRead(String test)
1353: throws IOException
1354: {
1355: char[] chars = test.toCharArray();
1356: int len = chars.length;
1357: mark(len);
1358: int count = 0;
1359: int l2 = read(tmpBuf, 0, len);
1360: if (l2 == -1)
1361: {
1362: reset();
1363: return false;
1364: }
1365: count += l2;
1366:
1367: for (int i = 0; i < count; i++)
1368: {
1369: if (chars[i] != tmpBuf[i])
1370: {
1371: reset();
1372: return false;
1373: }
1374: }
1375: while (count < len)
1376: {
1377:
1378: int c = read();
1379: if (c == -1)
1380: {
1381: reset();
1382: return false;
1383: }
1384: tmpBuf[count] = (char) c;
1385:
1386: if (chars[count] != tmpBuf[count])
1387: {
1388: reset();
1389: return false;
1390: }
1391: count++;
1392: }
1393: return true;
1394: }
1395:
1396:
1400: private void readUntil(String delim)
1401: throws IOException, XMLStreamException
1402: {
1403: int startLine = input.line;
1404: try
1405: {
1406: while (!tryRead(delim))
1407: {
1408: int c = readCh();
1409: if (c == -1)
1410: throw new EOFException();
1411: else if (input.xml11)
1412: {
1413: if (!isXML11Char(c) || isXML11RestrictedChar(c))
1414: error("illegal XML 1.1 character",
1415: "U+" + Integer.toHexString(c));
1416: }
1417: else if (!isChar(c))
1418: error("illegal XML character",
1419: "U+" + Integer.toHexString(c));
1420: buf.append(Character.toChars(c));
1421: }
1422: }
1423: catch (EOFException e)
1424: {
1425: error("end of input while looking for delimiter "+
1426: "(started on line " + startLine + ')', delim);
1427: }
1428: }
1429:
1430:
1434: private boolean tryWhitespace()
1435: throws IOException, XMLStreamException
1436: {
1437: boolean white;
1438: boolean ret = false;
1439: do
1440: {
1441: mark(1);
1442: int c = readCh();
1443: while (c == -1 && inputStack.size() > 1)
1444: {
1445: popInput();
1446: c = readCh();
1447: }
1448: white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1449: if (white)
1450: ret = true;
1451: }
1452: while (white);
1453: reset();
1454: return ret;
1455: }
1456:
1457:
1460: private void skipWhitespace()
1461: throws IOException, XMLStreamException
1462: {
1463: boolean white;
1464: do
1465: {
1466: mark(1);
1467: int c = readCh();
1468: while (c == -1 && inputStack.size() > 1)
1469: {
1470: popInput();
1471: c = readCh();
1472: }
1473: white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1474: }
1475: while (white);
1476: reset();
1477: }
1478:
1479:
1483: private void requireWhitespace()
1484: throws IOException, XMLStreamException
1485: {
1486: if (!tryWhitespace())
1487: error("whitespace required");
1488: }
1489:
1490:
1493: String getXMLBase()
1494: {
1495: if (baseAware)
1496: {
1497: for (Iterator i = bases.iterator(); i.hasNext(); )
1498: {
1499: String base = (String) i.next();
1500: if (base != null)
1501: return base;
1502: }
1503: }
1504: return input.systemId;
1505: }
1506:
1507:
1510: private void pushInput(String name, String text, boolean report,
1511: boolean normalize)
1512: throws IOException, XMLStreamException
1513: {
1514:
1515: if (name != null && !"".equals(name))
1516: {
1517: for (Iterator i = inputStack.iterator(); i.hasNext(); )
1518: {
1519: Input ctx = (Input) i.next();
1520: if (name.equals(ctx.name))
1521: error("entities may not be self-recursive", name);
1522: }
1523: }
1524: else
1525: report = false;
1526: pushInput(new Input(null, new StringReader(text), input.publicId,
1527: input.systemId, name, input.inputEncoding, report,
1528: normalize));
1529: }
1530:
1531:
1534: private void pushInput(String name, ExternalIds ids, boolean report,
1535: boolean normalize)
1536: throws IOException, XMLStreamException
1537: {
1538: if (!externalEntities)
1539: return;
1540: String url = canonicalize(absolutize(input.systemId, ids.systemId));
1541:
1542: for (Iterator i = inputStack.iterator(); i.hasNext(); )
1543: {
1544: Input ctx = (Input) i.next();
1545: if (url.equals(ctx.systemId))
1546: error("entities may not be self-recursive", url);
1547: if (name != null && !"".equals(name) && name.equals(ctx.name))
1548: error("entities may not be self-recursive", name);
1549: }
1550: if (name == null || "".equals(name))
1551: report = false;
1552: InputStream in = null;
1553: if (resolver != null)
1554: {
1555: Object obj = resolver.resolveEntity(ids.publicId, url, getXMLBase(),
1556: null);
1557: if (obj instanceof InputStream)
1558: in = (InputStream) obj;
1559: }
1560: if (in == null)
1561: in = resolve(url);
1562: if (in == null)
1563: error("unable to resolve external entity",
1564: (ids.systemId != null) ? ids.systemId : ids.publicId);
1565: pushInput(new Input(in, null, ids.publicId, url, name, null, report,
1566: normalize));
1567: input.init();
1568: if (tryRead(TEST_XML_DECL))
1569: readTextDecl();
1570: input.finalizeEncoding();
1571: }
1572:
1573:
1576: private void pushInput(Input input)
1577: {
1578: if (input.report)
1579: startEntityStack.addFirst(input.name);
1580: inputStack.addLast(input);
1581: if (this.input != null)
1582: input.xml11 = this.input.xml11;
1583: this.input = input;
1584: }
1585:
1586:
1591: static String canonicalize(String url)
1592: {
1593: if (url == null)
1594: return null;
1595: if (url.startsWith("file:") && !url.startsWith("file://"))
1596: url = "file://" + url.substring(5);
1597: return url;
1598: }
1599:
1600:
1605: public static String absolutize(String base, String href)
1606: {
1607: if (href == null)
1608: return null;
1609: int ci = href.indexOf(':');
1610: if (ci > 1 && isURLScheme(href.substring(0, ci)))
1611: {
1612:
1613: return href;
1614: }
1615: if (base == null)
1616: base = "";
1617: else
1618: {
1619: int i = base.lastIndexOf('/');
1620: if (i != -1)
1621: base = base.substring(0, i + 1);
1622: else
1623: base = "";
1624: }
1625: if ("".equals(base))
1626: {
1627:
1628: base = System.getProperty("user.dir");
1629: if (base.charAt(0) == '/')
1630: base = base.substring(1);
1631: base = "file:///" + base.replace(File.separatorChar, '/');
1632: if (!base.endsWith("/"))
1633: base += "/";
1634: }
1635:
1636:
1637:
1638:
1639:
1640: String basePrefix = null;
1641: ci = base.indexOf(':');
1642: if (ci > 1 && isURLScheme(base.substring(0, ci)))
1643: {
1644: if (base.length() > (ci + 3) &&
1645: base.charAt(ci + 1) == '/' &&
1646: base.charAt(ci + 2) == '/')
1647: {
1648: int si = base.indexOf('/', ci + 3);
1649: if (si == -1)
1650: base = null;
1651: else
1652: {
1653: basePrefix = base.substring(0, si);
1654: base = base.substring(si);
1655: }
1656: }
1657: else
1658: base = null;
1659: }
1660: if (base == null)
1661: return href;
1662: if (href.startsWith("/"))
1663: return (basePrefix == null) ? href : basePrefix + href;
1664:
1665: if (!base.endsWith("/"))
1666: {
1667: int lsi = base.lastIndexOf('/');
1668: if (lsi == -1)
1669: base = "/";
1670: else
1671: base = base.substring(0, lsi + 1);
1672: }
1673: while (href.startsWith("../") || href.startsWith("./"))
1674: {
1675: if (href.startsWith("../"))
1676: {
1677:
1678: int lsi = base.lastIndexOf('/', base.length() - 2);
1679: if (lsi > -1)
1680: base = base.substring(0, lsi + 1);
1681: href = href.substring(3);
1682: }
1683: else
1684: {
1685: href = href.substring(2);
1686: }
1687: }
1688: return (basePrefix == null) ? base + href : basePrefix + base + href;
1689: }
1690:
1691:
1696: private static boolean isURLScheme(String text)
1697: {
1698: int len = text.length();
1699: for (int i = 0; i < len; i++)
1700: {
1701: char c = text.charAt(i);
1702: if (c == '+' || c == '.' || c == '-')
1703: continue;
1704: if (c < 65 || (c > 90 && c < 97) || c > 122)
1705: return false;
1706: }
1707: return true;
1708: }
1709:
1710:
1713: static InputStream resolve(String url)
1714: throws IOException
1715: {
1716: try
1717: {
1718: return new URL(url).openStream();
1719: }
1720: catch (MalformedURLException e)
1721: {
1722: return null;
1723: }
1724: catch (IOException e)
1725: {
1726: IOException e2 = new IOException("error resolving " + url);
1727: e2.initCause(e);
1728: throw e2;
1729: }
1730: }
1731:
1732:
1735: private void popInput()
1736: {
1737: Input old = (Input) inputStack.removeLast();
1738: if (old.report)
1739: endEntityStack.addFirst(old.name);
1740: input = (Input) inputStack.getLast();
1741: }
1742:
1743:
1746: private void readTextDecl()
1747: throws IOException, XMLStreamException
1748: {
1749: final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1750: requireWhitespace();
1751: if (tryRead("version"))
1752: {
1753: readEq();
1754: String v = readLiteral(flags, false);
1755: if ("1.0".equals(v))
1756: input.xml11 = false;
1757: else if ("1.1".equals(v))
1758: {
1759: Input i1 = (Input) inputStack.getFirst();
1760: if (!i1.xml11)
1761: error("external entity specifies later version number");
1762: input.xml11 = true;
1763: }
1764: else
1765: throw new XMLStreamException("illegal XML version: " + v);
1766: requireWhitespace();
1767: }
1768: require("encoding");
1769: readEq();
1770: String enc = readLiteral(flags, false);
1771: skipWhitespace();
1772: require("?>");
1773: input.setInputEncoding(enc);
1774: }
1775:
1776:
1779: private void readXMLDecl()
1780: throws IOException, XMLStreamException
1781: {
1782: final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1783:
1784: requireWhitespace();
1785: require("version");
1786: readEq();
1787: xmlVersion = readLiteral(flags, false);
1788: if ("1.0".equals(xmlVersion))
1789: input.xml11 = false;
1790: else if ("1.1".equals(xmlVersion))
1791: input.xml11 = true;
1792: else
1793: throw new XMLStreamException("illegal XML version: " + xmlVersion);
1794:
1795: boolean white = tryWhitespace();
1796:
1797: if (tryRead("encoding"))
1798: {
1799: if (!white)
1800: error("whitespace required before 'encoding='");
1801: readEq();
1802: xmlEncoding = readLiteral(flags, false);
1803: white = tryWhitespace();
1804: }
1805:
1806: if (tryRead("standalone"))
1807: {
1808: if (!white)
1809: error("whitespace required before 'standalone='");
1810: readEq();
1811: String standalone = readLiteral(flags, false);
1812: if ("yes".equals(standalone))
1813: xmlStandalone = Boolean.TRUE;
1814: else if ("no".equals(standalone))
1815: xmlStandalone = Boolean.FALSE;
1816: else
1817: error("standalone flag must be 'yes' or 'no'", standalone);
1818: }
1819:
1820: skipWhitespace();
1821: require("?>");
1822: if (xmlEncoding != null)
1823: input.setInputEncoding(xmlEncoding);
1824: }
1825:
1826:
1829: private void readDoctypeDecl()
1830: throws IOException, XMLStreamException
1831: {
1832: if (!supportDTD)
1833: error("parser was configured not to support DTDs");
1834: requireWhitespace();
1835: String rootName = readNmtoken(true);
1836: skipWhitespace();
1837: ExternalIds ids = readExternalIds(false, true);
1838: doctype =
1839: this.new Doctype(rootName, ids.publicId, ids.systemId);
1840:
1841:
1842: skipWhitespace();
1843: if (tryRead('['))
1844: {
1845: while (true)
1846: {
1847: expandPE = true;
1848: skipWhitespace();
1849: expandPE = false;
1850: if (tryRead(']'))
1851: break;
1852: else
1853: readMarkupdecl(false);
1854: }
1855: }
1856: skipWhitespace();
1857: require('>');
1858:
1859:
1860: if (ids.systemId != null && externalEntities)
1861: {
1862: pushInput("", ">", false, false);
1863: pushInput("[dtd]", ids, true, true);
1864:
1865: while (true)
1866: {
1867: expandPE = true;
1868: skipWhitespace();
1869: expandPE = false;
1870: mark(1);
1871: int c = readCh();
1872: if (c == 0x3e)
1873: break;
1874: else if (c == -1)
1875: popInput();
1876: else
1877: {
1878: reset();
1879: expandPE = true;
1880: readMarkupdecl(true);
1881: expandPE = true;
1882: }
1883: }
1884: if (inputStack.size() != 2)
1885: error("external subset has unmatched '>'");
1886: popInput();
1887: }
1888: checkDoctype();
1889: if (validating)
1890: validateDoctype();
1891:
1892:
1893: buf.setLength(0);
1894: buf.append(rootName);
1895: }
1896:
1897:
1900: private void checkDoctype()
1901: throws XMLStreamException
1902: {
1903:
1904: }
1905:
1906:
1909: private void readMarkupdecl(boolean inExternalSubset)
1910: throws IOException, XMLStreamException
1911: {
1912: boolean saved = expandPE;
1913: mark(1);
1914: require('<');
1915: reset();
1916: expandPE = false;
1917: if (tryRead(TEST_ELEMENT_DECL))
1918: {
1919: expandPE = saved;
1920: readElementDecl();
1921: }
1922: else if (tryRead(TEST_ATTLIST_DECL))
1923: {
1924: expandPE = saved;
1925: readAttlistDecl();
1926: }
1927: else if (tryRead(TEST_ENTITY_DECL))
1928: {
1929: expandPE = saved;
1930: readEntityDecl(inExternalSubset);
1931: }
1932: else if (tryRead(TEST_NOTATION_DECL))
1933: {
1934: expandPE = saved;
1935: readNotationDecl(inExternalSubset);
1936: }
1937: else if (tryRead(TEST_PI))
1938: {
1939: readPI(true);
1940: expandPE = saved;
1941: }
1942: else if (tryRead(TEST_COMMENT))
1943: {
1944: readComment(true);
1945: expandPE = saved;
1946: }
1947: else if (tryRead("<!["))
1948: {
1949:
1950: expandPE = saved;
1951: if (inputStack.size() < 2)
1952: error("conditional sections illegal in internal subset");
1953: skipWhitespace();
1954: if (tryRead("INCLUDE"))
1955: {
1956: skipWhitespace();
1957: require('[');
1958: skipWhitespace();
1959: while (!tryRead("]]>"))
1960: {
1961: readMarkupdecl(inExternalSubset);
1962: skipWhitespace();
1963: }
1964: }
1965: else if (tryRead("IGNORE"))
1966: {
1967: skipWhitespace();
1968: require('[');
1969: expandPE = false;
1970: for (int nesting = 1; nesting > 0; )
1971: {
1972: int c = readCh();
1973: switch (c)
1974: {
1975: case 0x3c:
1976: if (tryRead("!["))
1977: nesting++;
1978: break;
1979: case 0x5d:
1980: if (tryRead("]>"))
1981: nesting--;
1982: break;
1983: case -1:
1984: throw new EOFException();
1985: }
1986: }
1987: expandPE = saved;
1988: }
1989: else
1990: error("conditional section must begin with INCLUDE or IGNORE");
1991: }
1992: else
1993: error("expected markup declaration");
1994: }
1995:
1996:
1999: private void readElementDecl()
2000: throws IOException, XMLStreamException
2001: {
2002: requireWhitespace();
2003: boolean saved = expandPE;
2004: expandPE = (inputStack.size() > 1);
2005: String name = readNmtoken(true);
2006: expandPE = saved;
2007: requireWhitespace();
2008: readContentspec(name);
2009: skipWhitespace();
2010: require('>');
2011: }
2012:
2013:
2016: private void readContentspec(String elementName)
2017: throws IOException, XMLStreamException
2018: {
2019: if (tryRead("EMPTY"))
2020: doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel());
2021: else if (tryRead("ANY"))
2022: doctype.addElementDecl(elementName, "ANY", new AnyContentModel());
2023: else
2024: {
2025: ContentModel model;
2026: CPStringBuilder acc = new CPStringBuilder();
2027: require('(');
2028: acc.append('(');
2029: skipWhitespace();
2030: if (tryRead("#PCDATA"))
2031: {
2032:
2033: acc.append("#PCDATA");
2034: MixedContentModel mm = new MixedContentModel();
2035: model = mm;
2036: skipWhitespace();
2037: if (tryRead(')'))
2038: {
2039: acc.append(")");
2040: if (tryRead('*'))
2041: {
2042: mm.min = 0;
2043: mm.max = -1;
2044: }
2045: }
2046: else
2047: {
2048: while (!tryRead(")"))
2049: {
2050: require('|');
2051: acc.append('|');
2052: skipWhitespace();
2053: String name = readNmtoken(true);
2054: acc.append(name);
2055: mm.addName(name);
2056: skipWhitespace();
2057: }
2058: require('*');
2059: acc.append(")*");
2060: mm.min = 0;
2061: mm.max = -1;
2062: }
2063: }
2064: else
2065: model = readElements(acc);
2066: doctype.addElementDecl(elementName, acc.toString(), model);
2067: }
2068: }
2069:
2070:
2073: private ElementContentModel readElements(CPStringBuilder acc)
2074: throws IOException, XMLStreamException
2075: {
2076: int separator;
2077: ElementContentModel model = new ElementContentModel();
2078:
2079:
2080: skipWhitespace();
2081: model.addContentParticle(readContentParticle(acc));
2082:
2083: skipWhitespace();
2084: int c = readCh();
2085: switch (c)
2086: {
2087: case 0x29:
2088: acc.append(')');
2089: mark(1);
2090: c = readCh();
2091: switch (c)
2092: {
2093: case 0x3f:
2094: acc.append('?');
2095: model.min = 0;
2096: model.max = 1;
2097: break;
2098: case 0x2a:
2099: acc.append('*');
2100: model.min = 0;
2101: model.max = -1;
2102: break;
2103: case 0x2b:
2104: acc.append('+');
2105: model.min = 1;
2106: model.max = -1;
2107: break;
2108: default:
2109: reset();
2110: }
2111: return model;
2112: case 0x7c:
2113: model.or = true;
2114:
2115: case 0x2c:
2116: separator = c;
2117: acc.append(Character.toChars(c));
2118: break;
2119: default:
2120: error("bad separator in content model",
2121: "U+" + Integer.toHexString(c));
2122: return model;
2123: }
2124:
2125: while (true)
2126: {
2127: skipWhitespace();
2128: model.addContentParticle(readContentParticle(acc));
2129: skipWhitespace();
2130: c = readCh();
2131: if (c == 0x29)
2132: {
2133: acc.append(')');
2134: break;
2135: }
2136: else if (c != separator)
2137: {
2138: error("bad separator in content model",
2139: "U+" + Integer.toHexString(c));
2140: return model;
2141: }
2142: else
2143: acc.append(c);
2144: }
2145:
2146: mark(1);
2147: c = readCh();
2148: switch (c)
2149: {
2150: case 0x3f:
2151: acc.append('?');
2152: model.min = 0;
2153: model.max = 1;
2154: break;
2155: case 0x2a:
2156: acc.append('*');
2157: model.min = 0;
2158: model.max = -1;
2159: break;
2160: case 0x2b:
2161: acc.append('+');
2162: model.min = 1;
2163: model.max = -1;
2164: break;
2165: default:
2166: reset();
2167: }
2168: return model;
2169: }
2170:
2171:
2174: private ContentParticle readContentParticle(CPStringBuilder acc)
2175: throws IOException, XMLStreamException
2176: {
2177: ContentParticle cp = new ContentParticle();
2178: if (tryRead('('))
2179: {
2180: acc.append('(');
2181: cp.content = readElements(acc);
2182: }
2183: else
2184: {
2185: String name = readNmtoken(true);
2186: acc.append(name);
2187: cp.content = name;
2188: mark(1);
2189: int c = readCh();
2190: switch (c)
2191: {
2192: case 0x3f:
2193: acc.append('?');
2194: cp.min = 0;
2195: cp.max = 1;
2196: break;
2197: case 0x2a:
2198: acc.append('*');
2199: cp.min = 0;
2200: cp.max = -1;
2201: break;
2202: case 0x2b:
2203: acc.append('+');
2204: cp.min = 1;
2205: cp.max = -1;
2206: break;
2207: default:
2208: reset();
2209: }
2210: }
2211: return cp;
2212: }
2213:
2214:
2217: private void readAttlistDecl()
2218: throws IOException, XMLStreamException
2219: {
2220: requireWhitespace();
2221: boolean saved = expandPE;
2222: expandPE = (inputStack.size() > 1);
2223: String elementName = readNmtoken(true);
2224: expandPE = saved;
2225: boolean white = tryWhitespace();
2226: while (!tryRead('>'))
2227: {
2228: if (!white)
2229: error("whitespace required before attribute definition");
2230: readAttDef(elementName);
2231: white = tryWhitespace();
2232: }
2233: }
2234:
2235:
2238: private void readAttDef(String elementName)
2239: throws IOException, XMLStreamException
2240: {
2241: String name = readNmtoken(true);
2242: requireWhitespace();
2243: CPStringBuilder acc = new CPStringBuilder();
2244: HashSet values = new HashSet();
2245: String type = readAttType(acc, values);
2246: if (validating)
2247: {
2248: if ("ID".equals(type))
2249: {
2250:
2251: for (Iterator i = doctype.attlistIterator(elementName);
2252: i.hasNext(); )
2253: {
2254: Map.Entry entry = (Map.Entry) i.next();
2255: AttributeDecl decl = (AttributeDecl) entry.getValue();
2256: if ("ID".equals(decl.type))
2257: error("element types must not have more than one ID " +
2258: "attribute");
2259: }
2260: }
2261: else if ("NOTATION".equals(type))
2262: {
2263:
2264: for (Iterator i = doctype.attlistIterator(elementName);
2265: i.hasNext(); )
2266: {
2267: Map.Entry entry = (Map.Entry) i.next();
2268: AttributeDecl decl = (AttributeDecl) entry.getValue();
2269: if ("NOTATION".equals(decl.type))
2270: error("element types must not have more than one NOTATION " +
2271: "attribute");
2272: }
2273:
2274: ContentModel model = doctype.getElementModel(elementName);
2275: if (model != null && model.type == ContentModel.EMPTY)
2276: error("attributes of type NOTATION must not be declared on an " +
2277: "element declared EMPTY");
2278: }
2279: }
2280: String enumer = null;
2281: if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
2282: enumer = acc.toString();
2283: else
2284: values = null;
2285: requireWhitespace();
2286: readDefault(elementName, name, type, enumer, values);
2287: }
2288:
2289:
2292: private String readAttType(CPStringBuilder acc, HashSet values)
2293: throws IOException, XMLStreamException
2294: {
2295: if (tryRead('('))
2296: {
2297: readEnumeration(false, acc, values);
2298: return "ENUMERATION";
2299: }
2300: else
2301: {
2302: String typeString = readNmtoken(true);
2303: if ("NOTATION".equals(typeString))
2304: {
2305: readNotationType(acc, values);
2306: return typeString;
2307: }
2308: else if ("CDATA".equals(typeString) ||
2309: "ID".equals(typeString) ||
2310: "IDREF".equals(typeString) ||
2311: "IDREFS".equals(typeString) ||
2312: "ENTITY".equals(typeString) ||
2313: "ENTITIES".equals(typeString) ||
2314: "NMTOKEN".equals(typeString) ||
2315: "NMTOKENS".equals(typeString))
2316: return typeString;
2317: else
2318: {
2319: error("illegal attribute type", typeString);
2320: return null;
2321: }
2322: }
2323: }
2324:
2325:
2328: private void readEnumeration(boolean isNames, CPStringBuilder acc,
2329: HashSet values)
2330: throws IOException, XMLStreamException
2331: {
2332: acc.append('(');
2333:
2334: skipWhitespace();
2335: String token = readNmtoken(isNames);
2336: acc.append(token);
2337: values.add(token);
2338:
2339: skipWhitespace();
2340: while (!tryRead(')'))
2341: {
2342: require('|');
2343: acc.append('|');
2344: skipWhitespace();
2345: token = readNmtoken(isNames);
2346:
2347: if (validating && values.contains(token))
2348: error("duplicate token", token);
2349: acc.append(token);
2350: values.add(token);
2351: skipWhitespace();
2352: }
2353: acc.append(')');
2354: }
2355:
2356:
2359: private void readNotationType(CPStringBuilder acc, HashSet values)
2360: throws IOException, XMLStreamException
2361: {
2362: requireWhitespace();
2363: require('(');
2364: readEnumeration(true, acc, values);
2365: }
2366:
2367:
2370: private void readDefault(String elementName, String name,
2371: String type, String enumeration, HashSet values)
2372: throws IOException, XMLStreamException
2373: {
2374: int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
2375: int flags = LIT_ATTRIBUTE;
2376: String value = null, defaultType = null;
2377: boolean saved = expandPE;
2378:
2379: if (!"CDATA".equals(type))
2380: flags |= LIT_NORMALIZE;
2381:
2382: expandPE = false;
2383: if (tryRead('#'))
2384: {
2385: if (tryRead("FIXED"))
2386: {
2387: defaultType = "#FIXED";
2388: valueType = ATTRIBUTE_DEFAULT_FIXED;
2389: requireWhitespace();
2390: value = readLiteral(flags, false);
2391: }
2392: else if (tryRead("REQUIRED"))
2393: {
2394: defaultType = "#REQUIRED";
2395: valueType = ATTRIBUTE_DEFAULT_REQUIRED;
2396: }
2397: else if (tryRead("IMPLIED"))
2398: {
2399: defaultType = "#IMPLIED";
2400: valueType = ATTRIBUTE_DEFAULT_IMPLIED;
2401: }
2402: else
2403: error("illegal keyword for attribute default value");
2404: }
2405: else
2406: value = readLiteral(flags, false);
2407: expandPE = saved;
2408: if (validating)
2409: {
2410: if ("ID".equals(type))
2411: {
2412:
2413: if (value != null && !isNmtoken(value, true))
2414: error("default value must match Name production", value);
2415:
2416: if (valueType != ATTRIBUTE_DEFAULT_REQUIRED &&
2417: valueType != ATTRIBUTE_DEFAULT_IMPLIED)
2418: error("ID attributes must have a declared default of " +
2419: "#IMPLIED or #REQUIRED");
2420: }
2421: else if (value != null)
2422: {
2423:
2424: if ("IDREF".equals(type) || "ENTITY".equals(type))
2425: {
2426: if (!isNmtoken(value, true))
2427: error("default value must match Name production", value);
2428: }
2429: else if ("IDREFS".equals(type) || "ENTITIES".equals(type))
2430: {
2431: StringTokenizer st = new StringTokenizer(value);
2432: while (st.hasMoreTokens())
2433: {
2434: String token = st.nextToken();
2435: if (!isNmtoken(token, true))
2436: error("default value must match Name production", token);
2437: }
2438: }
2439: else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type))
2440: {
2441: if (!isNmtoken(value, false))
2442: error("default value must match Nmtoken production", value);
2443: }
2444: else if ("NMTOKENS".equals(type))
2445: {
2446: StringTokenizer st = new StringTokenizer(value);
2447: while (st.hasMoreTokens())
2448: {
2449: String token = st.nextToken();
2450: if (!isNmtoken(token, false))
2451: error("default value must match Nmtoken production",
2452: token);
2453: }
2454: }
2455: }
2456: }
2457:
2458: AttributeDecl attribute =
2459: new AttributeDecl(type, value, valueType, enumeration, values,
2460: inputStack.size() != 1);
2461: doctype.addAttributeDecl(elementName, name, attribute);
2462: }
2463:
2464:
2467: private void readEntityDecl(boolean inExternalSubset)
2468: throws IOException, XMLStreamException
2469: {
2470: int flags = 0;
2471:
2472: boolean peFlag = false;
2473: expandPE = false;
2474: requireWhitespace();
2475: if (tryRead('%'))
2476: {
2477: peFlag = true;
2478: requireWhitespace();
2479: }
2480: expandPE = true;
2481:
2482: String name = readNmtoken(true);
2483: if (name.indexOf(':') != -1)
2484: error("illegal character ':' in entity name", name);
2485: if (peFlag)
2486: name = "%" + name;
2487: requireWhitespace();
2488: mark(1);
2489: int c = readCh();
2490: reset();
2491: if (c == 0x22 || c == 0x27)
2492: {
2493:
2494: String value = readLiteral(flags | LIT_DISABLE_EREF, true);
2495: int ai = value.indexOf('&');
2496: while (ai != -1)
2497: {
2498: int sci = value.indexOf(';', ai);
2499: if (sci == -1)
2500: error("malformed reference in entity value", value);
2501: String ref = value.substring(ai + 1, sci);
2502: int[] cp = UnicodeReader.toCodePointArray(ref);
2503: if (cp.length == 0)
2504: error("malformed reference in entity value", value);
2505: if (cp[0] == 0x23)
2506: {
2507: if (cp.length == 1)
2508: error("malformed reference in entity value", value);
2509: if (cp[1] == 0x78)
2510: {
2511: if (cp.length == 2)
2512: error("malformed reference in entity value", value);
2513: for (int i = 2; i < cp.length; i++)
2514: {
2515: int x = cp[i];
2516: if (x < 0x30 ||
2517: (x > 0x39 && x < 0x41) ||
2518: (x > 0x46 && x < 0x61) ||
2519: x > 0x66)
2520: error("malformed character reference in entity value",
2521: value);
2522: }
2523: }
2524: else
2525: {
2526: for (int i = 1; i < cp.length; i++)
2527: {
2528: int x = cp[i];
2529: if (x < 0x30 || x > 0x39)
2530: error("malformed character reference in entity value",
2531: value);
2532: }
2533: }
2534: }
2535: else
2536: {
2537: if (!isNameStartCharacter(cp[0], input.xml11))
2538: error("malformed reference in entity value", value);
2539: for (int i = 1; i < cp.length; i++)
2540: {
2541: if (!isNameCharacter(cp[i], input.xml11))
2542: error("malformed reference in entity value", value);
2543: }
2544: }
2545: ai = value.indexOf('&', sci);
2546: }
2547: doctype.addEntityDecl(name, value, inExternalSubset);
2548: }
2549: else
2550: {
2551: ExternalIds ids = readExternalIds(false, false);
2552:
2553: boolean white = tryWhitespace();
2554: if (!peFlag && tryRead("NDATA"))
2555: {
2556: if (!white)
2557: error("whitespace required before NDATA");
2558: requireWhitespace();
2559: ids.notationName = readNmtoken(true);
2560: }
2561: doctype.addEntityDecl(name, ids, inExternalSubset);
2562: }
2563:
2564: skipWhitespace();
2565: require('>');
2566: }
2567:
2568:
2571: private void readNotationDecl(boolean inExternalSubset)
2572: throws IOException, XMLStreamException
2573: {
2574: requireWhitespace();
2575: String notationName = readNmtoken(true);
2576: if (notationName.indexOf(':') != -1)
2577: error("illegal character ':' in notation name", notationName);
2578: if (validating)
2579: {
2580:
2581: ExternalIds notation = doctype.getNotation(notationName);
2582: if (notation != null)
2583: error("duplicate notation name", notationName);
2584: }
2585: requireWhitespace();
2586: ExternalIds ids = readExternalIds(true, false);
2587: ids.notationName = notationName;
2588: doctype.addNotationDecl(notationName, ids, inExternalSubset);
2589: skipWhitespace();
2590: require('>');
2591: }
2592:
2593:
2596: private ExternalIds readExternalIds(boolean inNotation, boolean isSubset)
2597: throws IOException, XMLStreamException
2598: {
2599: int c;
2600: int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
2601: ExternalIds ids = new ExternalIds();
2602:
2603: if (tryRead("PUBLIC"))
2604: {
2605: requireWhitespace();
2606: ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false);
2607: if (inNotation)
2608: {
2609: skipWhitespace();
2610: mark(1);
2611: c = readCh();
2612: reset();
2613: if (c == 0x22 || c == 0x27)
2614: {
2615: String href = readLiteral(flags, false);
2616: ids.systemId = absolutize(input.systemId, href);
2617: }
2618: }
2619: else
2620: {
2621: requireWhitespace();
2622: String href = readLiteral(flags, false);
2623: ids.systemId = absolutize(input.systemId, href);
2624: }
2625:
2626: for (int i = 0; i < ids.publicId.length(); i++)
2627: {
2628: char d = ids.publicId.charAt(i);
2629: if (d >= 'a' && d <= 'z')
2630: continue;
2631: if (d >= 'A' && d <= 'Z')
2632: continue;
2633: if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1)
2634: continue;
2635: error("illegal PUBLIC id character",
2636: "U+" + Integer.toHexString(d));
2637: }
2638: }
2639: else if (tryRead("SYSTEM"))
2640: {
2641: requireWhitespace();
2642: String href = readLiteral(flags, false);
2643: ids.systemId = absolutize(input.systemId, href);
2644: }
2645: else if (!isSubset)
2646: {
2647: error("missing SYSTEM or PUBLIC keyword");
2648: }
2649: if (ids.systemId != null && !inNotation)
2650: {
2651: if (ids.systemId.indexOf('#') != -1)
2652: error("SYSTEM id has a URI fragment", ids.systemId);
2653: }
2654: return ids;
2655: }
2656:
2657:
2661: private int readStartElement()
2662: throws IOException, XMLStreamException
2663: {
2664:
2665: String elementName = readNmtoken(true);
2666: attrs.clear();
2667:
2668: if (namespaceAware)
2669: {
2670: if (elementName.charAt(0) == ':' ||
2671: elementName.charAt(elementName.length() - 1) == ':')
2672: error("not a QName", elementName);
2673: namespaces.addFirst(new LinkedHashMap());
2674: }
2675:
2676: boolean white = tryWhitespace();
2677: mark(1);
2678: int c = readCh();
2679: while (c != 0x2f && c != 0x3e)
2680: {
2681:
2682: reset();
2683: if (!white)
2684: error("need whitespace between attributes");
2685: readAttribute(elementName);
2686: white = tryWhitespace();
2687: mark(1);
2688: c = readCh();
2689: }
2690:
2691: if (doctype != null)
2692: {
2693: for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); )
2694: {
2695: Map.Entry entry = (Map.Entry) i.next();
2696: String attName = (String) entry.getKey();
2697: AttributeDecl decl = (AttributeDecl) entry.getValue();
2698: if (validating)
2699: {
2700: switch (decl.valueType)
2701: {
2702: case ATTRIBUTE_DEFAULT_REQUIRED:
2703:
2704: if (decl.value == null && !attributeSpecified(attName))
2705: error("value for " + attName + " attribute is required");
2706: break;
2707: case ATTRIBUTE_DEFAULT_FIXED:
2708:
2709: for (Iterator j = attrs.iterator(); j.hasNext(); )
2710: {
2711: Attribute a = (Attribute) j.next();
2712: if (attName.equals(a.name) &&
2713: !decl.value.equals(a.value))
2714: error("value for " + attName + " attribute must be " +
2715: decl.value);
2716: }
2717: break;
2718: }
2719: }
2720: if (namespaceAware && attName.equals("xmlns"))
2721: {
2722: LinkedHashMap ctx =
2723: (LinkedHashMap) namespaces.getFirst();
2724: if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2725: continue;
2726: }
2727: else if (namespaceAware && attName.startsWith("xmlns:"))
2728: {
2729: LinkedHashMap ctx =
2730: (LinkedHashMap) namespaces.getFirst();
2731: if (ctx.containsKey(attName.substring(6)))
2732: continue;
2733: }
2734: else if (attributeSpecified(attName))
2735: continue;
2736: if (decl.value == null)
2737: continue;
2738:
2739: if (validating && decl.external && xmlStandalone == Boolean.TRUE)
2740: error("standalone must be 'no' if attributes inherit values " +
2741: "from externally declared markup declarations");
2742: Attribute attr =
2743: new Attribute(attName, decl.type, false, decl.value);
2744: if (namespaceAware)
2745: {
2746: if (!addNamespace(attr))
2747: attrs.add(attr);
2748: }
2749: else
2750: attrs.add(attr);
2751: }
2752: }
2753: if (baseAware)
2754: {
2755: String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base");
2756: String base = getXMLBase();
2757: bases.addFirst(absolutize(base, uri));
2758: }
2759: if (namespaceAware)
2760: {
2761:
2762: int ci = elementName.indexOf(':');
2763: if (ci != -1)
2764: {
2765: String prefix = elementName.substring(0, ci);
2766: String uri = getNamespaceURI(prefix);
2767: if (uri == null)
2768: error("unbound element prefix", prefix);
2769: else if (input.xml11 && "".equals(uri))
2770: error("XML 1.1 unbound element prefix", prefix);
2771: }
2772: for (Iterator i = attrs.iterator(); i.hasNext(); )
2773: {
2774: Attribute attr = (Attribute) i.next();
2775: if (attr.prefix != null &&
2776: !XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix))
2777: {
2778: String uri = getNamespaceURI(attr.prefix);
2779: if (uri == null)
2780: error("unbound attribute prefix", attr.prefix);
2781: else if (input.xml11 && "".equals(uri))
2782: error("XML 1.1 unbound attribute prefix", attr.prefix);
2783: }
2784: }
2785: }
2786: if (validating && doctype != null)
2787: {
2788: validateStartElement(elementName);
2789: currentContentModel = doctype.getElementModel(elementName);
2790: if (currentContentModel == null)
2791: error("no element declaration", elementName);
2792: validationStack.add(new LinkedList());
2793: }
2794:
2795: buf.setLength(0);
2796: buf.append(elementName);
2797:
2798: stack.addLast(elementName);
2799: switch (c)
2800: {
2801: case 0x3e:
2802: return CONTENT;
2803: case 0x2f:
2804: require('>');
2805: return EMPTY_ELEMENT;
2806: }
2807: return -1;
2808: }
2809:
2810:
2814: private boolean attributeSpecified(String attName)
2815: {
2816: for (Iterator j = attrs.iterator(); j.hasNext(); )
2817: {
2818: Attribute a = (Attribute) j.next();
2819: if (attName.equals(a.name))
2820: return true;
2821: }
2822: return false;
2823: }
2824:
2825:
2828: private void readAttribute(String elementName)
2829: throws IOException, XMLStreamException
2830: {
2831:
2832: String attributeName = readNmtoken(true);
2833: String type = getAttributeType(elementName, attributeName);
2834: readEq();
2835:
2836: final int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF;
2837: String value = (type == null || "CDATA".equals(type)) ?
2838: readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false);
2839:
2840: Attribute attr = this.new Attribute(attributeName, type, true, value);
2841: if (namespaceAware)
2842: {
2843: if (attributeName.charAt(0) == ':' ||
2844: attributeName.charAt(attributeName.length() - 1) == ':')
2845: error("not a QName", attributeName);
2846: else if (attributeName.equals("xmlns"))
2847: {
2848: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2849: if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2850: error("duplicate default namespace");
2851: }
2852: else if (attributeName.startsWith("xmlns:"))
2853: {
2854: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2855: if (ctx.containsKey(attributeName.substring(6)))
2856: error("duplicate namespace", attributeName.substring(6));
2857: }
2858: else if (attrs.contains(attr))
2859: error("duplicate attribute", attributeName);
2860: }
2861: else if (attrs.contains(attr))
2862: error("duplicate attribute", attributeName);
2863: if (validating && doctype != null)
2864: {
2865:
2866: AttributeDecl decl =
2867: doctype.getAttributeDecl(elementName, attributeName);
2868: if (decl == null)
2869: error("attribute must be declared", attributeName);
2870: if ("ENUMERATION".equals(decl.type))
2871: {
2872:
2873: if (!decl.values.contains(value))
2874: error("value does not match enumeration " + decl.enumeration,
2875: value);
2876: }
2877: else if ("ID".equals(decl.type))
2878: {
2879:
2880: if (!isNmtoken(value, true))
2881: error("ID values must match the Name production");
2882: if (ids.contains(value))
2883: error("Duplicate ID", value);
2884: ids.add(value);
2885: }
2886: else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type))
2887: {
2888: StringTokenizer st = new StringTokenizer(value);
2889: while (st.hasMoreTokens())
2890: {
2891: String token = st.nextToken();
2892:
2893: if (!isNmtoken(token, true))
2894: error("IDREF values must match the Name production");
2895: idrefs.add(token);
2896: }
2897: }
2898: else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type))
2899: {
2900: StringTokenizer st = new StringTokenizer(value);
2901: while (st.hasMoreTokens())
2902: {
2903: String token = st.nextToken();
2904:
2905: if (!isNmtoken(token, false))
2906: error("NMTOKEN values must match the Nmtoken production");
2907: }
2908: }
2909: else if ("ENTITY".equals(decl.type))
2910: {
2911:
2912: if (!isNmtoken(value, true))
2913: error("ENTITY values must match the Name production");
2914: Object entity = doctype.getEntity(value);
2915: if (entity == null || !(entity instanceof ExternalIds) ||
2916: ((ExternalIds) entity).notationName == null)
2917: error("ENTITY values must match the name of an unparsed " +
2918: "entity declared in the DTD");
2919: }
2920: else if ("NOTATION".equals(decl.type))
2921: {
2922: if (!decl.values.contains(value))
2923: error("NOTATION values must match a declared notation name",
2924: value);
2925:
2926: ExternalIds notation = doctype.getNotation(value);
2927: if (notation == null)
2928: error("NOTATION values must match the name of a notation " +
2929: "declared in the DTD", value);
2930: }
2931: }
2932: if (namespaceAware)
2933: {
2934: if (!addNamespace(attr))
2935: attrs.add(attr);
2936: }
2937: else
2938: attrs.add(attr);
2939: }
2940:
2941:
2946: private boolean addNamespace(Attribute attr)
2947: throws XMLStreamException
2948: {
2949: if ("xmlns".equals(attr.name))
2950: {
2951: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2952: if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null)
2953: error("Duplicate default namespace declaration");
2954: if (XMLConstants.XML_NS_URI.equals(attr.value))
2955: error("can't bind XML namespace");
2956: ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value);
2957: return true;
2958: }
2959: else if ("xmlns".equals(attr.prefix))
2960: {
2961: LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2962: if (ctx.get(attr.localName) != null)
2963: error("Duplicate namespace declaration for prefix",
2964: attr.localName);
2965: if (XMLConstants.XML_NS_PREFIX.equals(attr.localName))
2966: {
2967: if (!XMLConstants.XML_NS_URI.equals(attr.value))
2968: error("can't redeclare xml prefix");
2969: else
2970: return false;
2971: }
2972: if (XMLConstants.XML_NS_URI.equals(attr.value))
2973: error("can't bind non-xml prefix to XML namespace");
2974: if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName))
2975: error("can't redeclare xmlns prefix");
2976: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value))
2977: error("can't bind non-xmlns prefix to XML Namespace namespace");
2978: if ("".equals(attr.value) && !input.xml11)
2979: error("illegal use of 1.1-style prefix unbinding in 1.0 document");
2980: ctx.put(attr.localName, attr.value);
2981: return true;
2982: }
2983: return false;
2984: }
2985:
2986:
2989: private void readEndElement()
2990: throws IOException, XMLStreamException
2991: {
2992:
2993: String expected = (String) stack.removeLast();
2994: require(expected);
2995: skipWhitespace();
2996: require('>');
2997:
2998: buf.setLength(0);
2999: buf.append(expected);
3000: if (validating && doctype != null)
3001: endElementValidationHook();
3002: }
3003:
3004:
3008: private void endElementValidationHook()
3009: throws XMLStreamException
3010: {
3011: validateEndElement();
3012: validationStack.removeLast();
3013: if (stack.isEmpty())
3014: currentContentModel = null;
3015: else
3016: {
3017: String parent = (String) stack.getLast();
3018: currentContentModel = doctype.getElementModel(parent);
3019: }
3020: }
3021:
3022:
3025: private void readComment(boolean inDTD)
3026: throws IOException, XMLStreamException
3027: {
3028: boolean saved = expandPE;
3029: expandPE = false;
3030: buf.setLength(0);
3031: readUntil(TEST_END_COMMENT);
3032: require('>');
3033: expandPE = saved;
3034: if (inDTD)
3035: doctype.addComment(buf.toString());
3036: }
3037:
3038:
3041: private void readPI(boolean inDTD)
3042: throws IOException, XMLStreamException
3043: {
3044: boolean saved = expandPE;
3045: expandPE = false;
3046: piTarget = readNmtoken(true);
3047: if (piTarget.indexOf(':') != -1)
3048: error("illegal character in PI target", new Character(':'));
3049: if ("xml".equalsIgnoreCase(piTarget))
3050: error("illegal PI target", piTarget);
3051: if (tryRead(TEST_END_PI))
3052: piData = null;
3053: else
3054: {
3055: if (!tryWhitespace())
3056: error("whitespace required between PI target and data");
3057: buf.setLength(0);
3058: readUntil(TEST_END_PI);
3059: piData = buf.toString();
3060: }
3061: expandPE = saved;
3062: if (inDTD)
3063: doctype.addPI(piTarget, piData);
3064: }
3065:
3066:
3069: private void readReference()
3070: throws IOException, XMLStreamException
3071: {
3072: buf.setLength(0);
3073: String entityName = readNmtoken(true);
3074: require(';');
3075: buf.setLength(0);
3076: buf.append(entityName);
3077: }
3078:
3079:
3082: private void readCDSect()
3083: throws IOException, XMLStreamException
3084: {
3085: buf.setLength(0);
3086: readUntil(TEST_END_CDATA);
3087: }
3088:
3089:
3093: private int readCharData(String prefix)
3094: throws IOException, XMLStreamException
3095: {
3096: boolean white = true;
3097: buf.setLength(0);
3098: if (prefix != null)
3099: buf.append(prefix);
3100: boolean done = false;
3101: boolean entities = false;
3102: while (!done)
3103: {
3104:
3105: mark(tmpBuf.length);
3106: int len = read(tmpBuf, 0, tmpBuf.length);
3107: if (len == -1)
3108: {
3109: if (inputStack.size() > 1)
3110: {
3111: popInput();
3112:
3113: done = true;
3114: }
3115: else
3116: throw new EOFException();
3117: }
3118: for (int i = 0; i < len && !done; i++)
3119: {
3120: int c = tmpBuf[i];
3121: switch (c)
3122: {
3123: case 0x20:
3124: case 0x09:
3125: case 0x0a:
3126: case 0x0d:
3127: buf.append(Character.toChars(c));
3128: break;
3129: case 0x26:
3130: reset();
3131: read(tmpBuf, 0, i);
3132:
3133: mark(3);
3134: c = readCh();
3135: c = readCh();
3136: if (c == 0x23)
3137: {
3138: mark(1);
3139: c = readCh();
3140: boolean hex = (c == 0x78);
3141: if (!hex)
3142: reset();
3143: char[] ch = readCharacterRef(hex ? 16 : 10);
3144: buf.append(ch, 0, ch.length);
3145: for (int j = 0; j < ch.length; j++)
3146: {
3147: switch (ch[j])
3148: {
3149: case 0x20:
3150: case 0x09:
3151: case 0x0a:
3152: case 0x0d:
3153: break;
3154: default:
3155: white = false;
3156: }
3157: }
3158: }
3159: else
3160: {
3161:
3162: reset();
3163: c = readCh();
3164: String entityName = readNmtoken(true);
3165: require(';');
3166: String text =
3167: (String) PREDEFINED_ENTITIES.get(entityName);
3168: if (text != null)
3169: buf.append(text);
3170: else
3171: {
3172: pushInput("", "&" + entityName + ";", false, false);
3173: done = true;
3174: break;
3175: }
3176: }
3177:
3178: i = -1;
3179: mark(tmpBuf.length);
3180: len = read(tmpBuf, 0, tmpBuf.length);
3181: if (len == -1)
3182: {
3183: if (inputStack.size() > 1)
3184: {
3185: popInput();
3186: done = true;
3187: }
3188: else
3189: throw new EOFException();
3190: }
3191: entities = true;
3192: break;
3193: case 0x3e:
3194: int l = buf.length();
3195: if (l > 1 &&
3196: buf.charAt(l - 1) == ']' &&
3197: buf.charAt(l - 2) == ']')
3198: error("Character data may not contain unescaped ']]>'");
3199: buf.append(Character.toChars(c));
3200: break;
3201: case 0x3c:
3202: reset();
3203:
3204: int count = 0, remaining = i;
3205: do
3206: {
3207: int r = read(tmpBuf, 0, remaining);
3208: count += r;
3209: remaining -= r;
3210: }
3211: while (count < i);
3212: i = len;
3213: if (coalescing && tryRead(TEST_CDATA))
3214: readUntil(TEST_END_CDATA);
3215: else
3216: done = true;
3217: break;
3218: default:
3219: if (input.xml11)
3220: {
3221: if (!isXML11Char(c) || isXML11RestrictedChar(c))
3222: error("illegal XML 1.1 character",
3223: "U+" + Integer.toHexString(c));
3224: }
3225: else if (!isChar(c))
3226: error("illegal XML character",
3227: "U+" + Integer.toHexString(c));
3228: white = false;
3229: buf.append(Character.toChars(c));
3230: }
3231: }
3232:
3233:
3234: if (buf.length() >= 2097152)
3235: done = true;
3236: }
3237: if (entities)
3238: normalizeCRLF(buf);
3239: return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS;
3240: }
3241:
3242:
3245: private void expandEntity(String name, boolean inAttr, boolean normalize)
3246: throws IOException, XMLStreamException
3247: {
3248: if (doctype != null)
3249: {
3250: Object value = doctype.getEntity(name);
3251: if (value != null)
3252: {
3253: if (xmlStandalone == Boolean.TRUE)
3254: {
3255:
3256: if (doctype.isEntityExternal(name))
3257: error("reference to external entity in standalone document");
3258: else if (value instanceof ExternalIds)
3259: {
3260: ExternalIds ids = (ExternalIds) value;
3261: if (ids.notationName != null &&
3262: doctype.isNotationExternal(ids.notationName))
3263: error("reference to external notation in " +
3264: "standalone document");
3265: }
3266: }
3267: if (value instanceof String)
3268: {
3269: String text = (String) value;
3270: if (inAttr && text.indexOf('<') != -1)
3271: error("< in attribute value");
3272: pushInput(name, text, !inAttr, normalize);
3273: }
3274: else if (inAttr)
3275: error("reference to external entity in attribute value", name);
3276: else
3277: pushInput(name, (ExternalIds) value, !inAttr, normalize);
3278: return;
3279: }
3280: }
3281: error("reference to undeclared entity", name);
3282: }
3283:
3284:
3287: private boolean isUnparsedEntity(String name)
3288: {
3289: if (doctype != null)
3290: {
3291: Object value = doctype.getEntity(name);
3292: if (value != null && value instanceof ExternalIds)
3293: return ((ExternalIds) value).notationName != null;
3294: }
3295: return false;
3296: }
3297:
3298:
3301: private void readEq()
3302: throws IOException, XMLStreamException
3303: {
3304: skipWhitespace();
3305: require('=');
3306: skipWhitespace();
3307: }
3308:
3309:
3313: private int literalReadCh(boolean recognizePEs)
3314: throws IOException, XMLStreamException
3315: {
3316: int c = recognizePEs ? readCh() : read();
3317: while (c == -1)
3318: {
3319: if (inputStack.size() > 1)
3320: {
3321: inputStack.removeLast();
3322: input = (Input) inputStack.getLast();
3323:
3324: c = recognizePEs ? readCh() : read();
3325: }
3326: else
3327: throw new EOFException();
3328: }
3329: return c;
3330: }
3331:
3332:
3335: private String readLiteral(int flags, boolean recognizePEs)
3336: throws IOException, XMLStreamException
3337: {
3338: boolean saved = expandPE;
3339: int delim = readCh();
3340: if (delim != 0x27 && delim != 0x22)
3341: error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim));
3342: literalBuf.setLength(0);
3343: if ((flags & LIT_DISABLE_PE) != 0)
3344: expandPE = false;
3345: boolean entities = false;
3346: int inputStackSize = inputStack.size();
3347: do
3348: {
3349: int c = literalReadCh(recognizePEs);
3350: if (c == delim && inputStackSize == inputStack.size())
3351: break;
3352: switch (c)
3353: {
3354: case 0x0a:
3355: case 0x0d:
3356: if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
3357: c = 0x20;
3358: break;
3359: case 0x09:
3360: if ((flags & LIT_ATTRIBUTE) != 0)
3361: c = 0x20;
3362: break;
3363: case 0x26:
3364: mark(2);
3365: c = readCh();
3366: if (c == 0x23)
3367: {
3368: if ((flags & LIT_DISABLE_CREF) != 0)
3369: {
3370: reset();
3371: c = 0x26;
3372: }
3373: else
3374: {
3375: mark(1);
3376: c = readCh();
3377: boolean hex = (c == 0x78);
3378: if (!hex)
3379: reset();
3380: char[] ref = readCharacterRef(hex ? 16 : 10);
3381: for (int i = 0; i < ref.length; i++)
3382: literalBuf.append(ref[i]);
3383: entities = true;
3384: continue;
3385: }
3386: }
3387: else
3388: {
3389: if ((flags & LIT_DISABLE_EREF) != 0)
3390: {
3391: reset();
3392: c = 0x26;
3393: }
3394: else
3395: {
3396: reset();
3397: String entityName = readNmtoken(true);
3398: require(';');
3399: String text =
3400: (String) PREDEFINED_ENTITIES.get(entityName);
3401: if (text != null)
3402: literalBuf.append(text);
3403: else
3404: expandEntity(entityName,
3405: (flags & LIT_ATTRIBUTE) != 0,
3406: true);
3407: entities = true;
3408: continue;
3409: }
3410: }
3411: break;
3412: case 0x3c:
3413: if ((flags & LIT_ATTRIBUTE) != 0)
3414: error("attribute values may not contain '<'");
3415: break;
3416: case -1:
3417: if (inputStack.size() > 1)
3418: {
3419: popInput();
3420: continue;
3421: }
3422: throw new EOFException();
3423: default:
3424: if ((c < 0x0020 || c > 0xfffd) ||
3425: (c >= 0xd800 && c < 0xdc00) ||
3426: (input.xml11 && (c >= 0x007f) &&
3427: (c <= 0x009f) && (c != 0x0085)))
3428: error("illegal character", "U+" + Integer.toHexString(c));
3429: }
3430: literalBuf.append(Character.toChars(c));
3431: }
3432: while (true);
3433: expandPE = saved;
3434: if (entities)
3435: normalizeCRLF(literalBuf);
3436: if ((flags & LIT_NORMALIZE) > 0)
3437: literalBuf = normalize(literalBuf);
3438: return literalBuf.toString();
3439: }
3440:
3441:
3446: private StringBuffer normalize(StringBuffer buf)
3447: {
3448: StringBuffer acc = new StringBuffer();
3449: int len = buf.length();
3450: int avState = 0;
3451: for (int i = 0; i < len; i++)
3452: {
3453: char c = buf.charAt(i);
3454: if (c == ' ')
3455: avState = (avState == 0) ? 0 : 1;
3456: else
3457: {
3458: if (avState == 1)
3459: acc.append(' ');
3460: acc.append(c);
3461: avState = 2;
3462: }
3463: }
3464: return acc;
3465: }
3466:
3467:
3472: private void normalizeCRLF(StringBuffer buf)
3473: {
3474: int len = buf.length() - 1;
3475: for (int i = 0; i < len; i++)
3476: {
3477: char c = buf.charAt(i);
3478: if (c == '\r' && buf.charAt(i + 1) == '\n')
3479: {
3480: buf.deleteCharAt(i--);
3481: len--;
3482: }
3483: }
3484: }
3485:
3486:
3489: private void expandPEReference()
3490: throws IOException, XMLStreamException
3491: {
3492: String name = readNmtoken(true, new StringBuffer());
3493: require(';');
3494: mark(1);
3495: if (doctype != null)
3496: {
3497: String entityName = "%" + name;
3498: Object entity = doctype.getEntity(entityName);
3499: if (entity != null)
3500: {
3501: if (xmlStandalone == Boolean.TRUE)
3502: {
3503: if (doctype.isEntityExternal(entityName))
3504: error("reference to external parameter entity in " +
3505: "standalone document");
3506: }
3507: if (entity instanceof String)
3508: {
3509: pushInput(name, (String) entity, false, input.normalize);
3510:
3511: }
3512: else
3513: {
3514:
3515: pushInput(name, (ExternalIds) entity, false, input.normalize);
3516:
3517: }
3518: }
3519: else
3520: error("reference to undeclared parameter entity", name);
3521: }
3522: else
3523: error("reference to parameter entity without doctype", name);
3524: }
3525:
3526:
3530: private char[] readCharacterRef(int base)
3531: throws IOException, XMLStreamException
3532: {
3533: CPStringBuilder b = new CPStringBuilder();
3534: for (int c = readCh(); c != 0x3b && c != -1; c = readCh())
3535: b.append(Character.toChars(c));
3536: try
3537: {
3538: int ord = Integer.parseInt(b.toString(), base);
3539: if (input.xml11)
3540: {
3541: if (!isXML11Char(ord))
3542: error("illegal XML 1.1 character reference " +
3543: "U+" + Integer.toHexString(ord));
3544: }
3545: else
3546: {
3547: if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d))
3548: || (ord >= 0xd800 && ord <= 0xdfff)
3549: || ord == 0xfffe || ord == 0xffff
3550: || ord > 0x0010ffff)
3551: error("illegal XML character reference " +
3552: "U+" + Integer.toHexString(ord));
3553: }
3554: return Character.toChars(ord);
3555: }
3556: catch (NumberFormatException e)
3557: {
3558: error("illegal characters in character reference", b.toString());
3559: return null;
3560: }
3561: }
3562:
3563:
3567: private String readNmtoken(boolean isName)
3568: throws IOException, XMLStreamException
3569: {
3570: return readNmtoken(isName, nmtokenBuf);
3571: }
3572:
3573:
3578: private String readNmtoken(boolean isName, StringBuffer buf)
3579: throws IOException, XMLStreamException
3580: {
3581: buf.setLength(0);
3582: int c = readCh();
3583: if (isName)
3584: {
3585: if (!isNameStartCharacter(c, input.xml11))
3586: error("not a name start character",
3587: "U+" + Integer.toHexString(c));
3588: }
3589: else
3590: {
3591: if (!isNameCharacter(c, input.xml11))
3592: error("not a name character",
3593: "U+" + Integer.toHexString(c));
3594: }
3595: buf.append(Character.toChars(c));
3596: do
3597: {
3598: mark(1);
3599: c = readCh();
3600: switch (c)
3601: {
3602: case 0x25:
3603: case 0x3c:
3604: case 0x3e:
3605: case 0x26:
3606: case 0x2c:
3607: case 0x7c:
3608: case 0x2a:
3609: case 0x2b:
3610: case 0x3f:
3611: case 0x29:
3612: case 0x3d:
3613: case 0x27:
3614: case 0x22:
3615: case 0x5b:
3616: case 0x20:
3617: case 0x09:
3618: case 0x0a:
3619: case 0x0d:
3620: case 0x3b:
3621: case 0x2f:
3622: case -1:
3623: reset();
3624: return intern(buf.toString());
3625: default:
3626: if (!isNameCharacter(c, input.xml11))
3627: error("not a name character",
3628: "U+" + Integer.toHexString(c));
3629: else
3630: buf.append(Character.toChars(c));
3631: }
3632: }
3633: while (true);
3634: }
3635:
3636:
3639: public static boolean isXML11Char(int c)
3640: {
3641: return ((c >= 0x0001 && c <= 0xD7FF) ||
3642: (c >= 0xE000 && c < 0xFFFE) ||
3643: (c >= 0x10000 && c <= 0x10FFFF));
3644: }
3645:
3646:
3650: public static boolean isXML11RestrictedChar(int c)
3651: {
3652: return ((c >= 0x0001 && c <= 0x0008) ||
3653: (c >= 0x000B && c <= 0x000C) ||
3654: (c >= 0x000E && c <= 0x001F) ||
3655: (c >= 0x007F && c <= 0x0084) ||
3656: (c >= 0x0086 && c <= 0x009F));
3657: }
3658:
3659:
3663: private boolean isNmtoken(String text, boolean isName)
3664: {
3665: try
3666: {
3667: int[] cp = UnicodeReader.toCodePointArray(text);
3668: if (cp.length == 0)
3669: return false;
3670: if (isName)
3671: {
3672: if (!isNameStartCharacter(cp[0], input.xml11))
3673: return false;
3674: }
3675: else
3676: {
3677: if (!isNameCharacter(cp[0], input.xml11))
3678: return false;
3679: }
3680: for (int i = 1; i < cp.length; i++)
3681: {
3682: if (!isNameCharacter(cp[i], input.xml11))
3683: return false;
3684: }
3685: return true;
3686: }
3687: catch (IOException e)
3688: {
3689: return false;
3690: }
3691: }
3692:
3693:
3697: public static boolean isNameStartCharacter(int c, boolean xml11)
3698: {
3699: if (xml11)
3700: return ((c >= 0x0041 && c <= 0x005a) ||
3701: (c >= 0x0061 && c <= 0x007a) ||
3702: c == 0x3a |
3703: c == 0x5f |
3704: (c >= 0xC0 && c <= 0xD6) ||
3705: (c >= 0xD8 && c <= 0xF6) ||
3706: (c >= 0xF8 && c <= 0x2FF) ||
3707: (c >= 0x370 && c <= 0x37D) ||
3708: (c >= 0x37F && c <= 0x1FFF) ||
3709: (c >= 0x200C && c <= 0x200D) ||
3710: (c >= 0x2070 && c <= 0x218F) ||
3711: (c >= 0x2C00 && c <= 0x2FEF) ||
3712: (c >= 0x3001 && c <= 0xD7FF) ||
3713: (c >= 0xF900 && c <= 0xFDCF) ||
3714: (c >= 0xFDF0 && c <= 0xFFFD) ||
3715: (c >= 0x10000 && c <= 0xEFFFF));
3716: else
3717: return (c == 0x5f || c == 0x3a || isLetter(c));
3718: }
3719:
3720:
3724: public static boolean isNameCharacter(int c, boolean xml11)
3725: {
3726: if (xml11)
3727: return ((c >= 0x0041 && c <= 0x005a) ||
3728: (c >= 0x0061 && c <= 0x007a) ||
3729: (c >= 0x0030 && c <= 0x0039) ||
3730: c == 0x3a |
3731: c == 0x5f |
3732: c == 0x2d |
3733: c == 0x2e |
3734: c == 0xB7 |
3735: (c >= 0xC0 && c <= 0xD6) ||
3736: (c >= 0xD8 && c <= 0xF6) ||
3737: (c >= 0xF8 && c <= 0x2FF) ||
3738: (c >= 0x300 && c <= 0x37D) ||
3739: (c >= 0x37F && c <= 0x1FFF) ||
3740: (c >= 0x200C && c <= 0x200D) ||
3741: (c >= 0x203F && c <= 0x2040) ||
3742: (c >= 0x2070 && c <= 0x218F) ||
3743: (c >= 0x2C00 && c <= 0x2FEF) ||
3744: (c >= 0x3001 && c <= 0xD7FF) ||
3745: (c >= 0xF900 && c <= 0xFDCF) ||
3746: (c >= 0xFDF0 && c <= 0xFFFD) ||
3747: (c >= 0x10000 && c <= 0xEFFFF));
3748: else
3749: return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a ||
3750: isLetter(c) || isDigit(c) ||
3751: isCombiningChar(c) || isExtender(c));
3752: }
3753:
3754:
3758: public static boolean isLetter(int c)
3759: {
3760: if ((c >= 0x0041 && c <= 0x005A) ||
3761: (c >= 0x0061 && c <= 0x007A) ||
3762: (c >= 0x00C0 && c <= 0x00D6) ||
3763: (c >= 0x00D8 && c <= 0x00F6) ||
3764: (c >= 0x00F8 && c <= 0x00FF) ||
3765: (c >= 0x0100 && c <= 0x0131) ||
3766: (c >= 0x0134 && c <= 0x013E) ||
3767: (c >= 0x0141 && c <= 0x0148) ||
3768: (c >= 0x014A && c <= 0x017E) ||
3769: (c >= 0x0180 && c <= 0x01C3) ||
3770: (c >= 0x01CD && c <= 0x01F0) ||
3771: (c >= 0x01F4 && c <= 0x01F5) ||
3772: (c >= 0x01FA && c <= 0x0217) ||
3773: (c >= 0x0250 && c <= 0x02A8) ||
3774: (c >= 0x02BB && c <= 0x02C1) ||
3775: c == 0x0386 ||
3776: (c >= 0x0388 && c <= 0x038A) ||
3777: c == 0x038C ||
3778: (c >= 0x038E && c <= 0x03A1) ||
3779: (c >= 0x03A3 && c <= 0x03CE) ||
3780: (c >= 0x03D0 && c <= 0x03D6) ||
3781: c == 0x03DA ||
3782: c == 0x03DC ||
3783: c == 0x03DE ||
3784: c == 0x03E0 ||
3785: (c >= 0x03E2 && c <= 0x03F3) ||
3786: (c >= 0x0401 && c <= 0x040C) ||
3787: (c >= 0x040E && c <= 0x044F) ||
3788: (c >= 0x0451 && c <= 0x045C) ||
3789: (c >= 0x045E && c <= 0x0481) ||
3790: (c >= 0x0490 && c <= 0x04C4) ||
3791: (c >= 0x04C7 && c <= 0x04C8) ||
3792: (c >= 0x04CB && c <= 0x04CC) ||
3793: (c >= 0x04D0 && c <= 0x04EB) ||
3794: (c >= 0x04EE && c <= 0x04F5) ||
3795: (c >= 0x04F8 && c <= 0x04F9) ||
3796: (c >= 0x0531 && c <= 0x0556) ||
3797: c == 0x0559 ||
3798: (c >= 0x0561 && c <= 0x0586) ||
3799: (c >= 0x05D0 && c <= 0x05EA) ||
3800: (c >= 0x05F0 && c <= 0x05F2) ||
3801: (c >= 0x0621 && c <= 0x063A) ||
3802: (c >= 0x0641 && c <= 0x064A) ||
3803: (c >= 0x0671 && c <= 0x06B7) ||
3804: (c >= 0x06BA && c <= 0x06BE) ||
3805: (c >= 0x06C0 && c <= 0x06CE) ||
3806: (c >= 0x06D0 && c <= 0x06D3) ||
3807: c == 0x06D5 ||
3808: (c >= 0x06E5 && c <= 0x06E6) ||
3809: (c >= 0x0905 && c <= 0x0939) ||
3810: c == 0x093D ||
3811: (c >= 0x0958 && c <= 0x0961) ||
3812: (c >= 0x0985 && c <= 0x098C) ||
3813: (c >= 0x098F && c <= 0x0990) ||
3814: (c >= 0x0993 && c <= 0x09A8) ||
3815: (c >= 0x09AA && c <= 0x09B0) ||
3816: c == 0x09B2 ||
3817: (c >= 0x09B6 && c <= 0x09B9) ||
3818: (c >= 0x09DC && c <= 0x09DD) ||
3819: (c >= 0x09DF && c <= 0x09E1) ||
3820: (c >= 0x09F0 && c <= 0x09F1) ||
3821: (c >= 0x0A05 && c <= 0x0A0A) ||
3822: (c >= 0x0A0F && c <= 0x0A10) ||
3823: (c >= 0x0A13 && c <= 0x0A28) ||
3824: (c >= 0x0A2A && c <= 0x0A30) ||
3825: (c >= 0x0A32 && c <= 0x0A33) ||
3826: (c >= 0x0A35 && c <= 0x0A36) ||
3827: (c >= 0x0A38 && c <= 0x0A39) ||
3828: (c >= 0x0A59 && c <= 0x0A5C) ||
3829: c == 0x0A5E ||
3830: (c >= 0x0A72 && c <= 0x0A74) ||
3831: (c >= 0x0A85 && c <= 0x0A8B) ||
3832: c == 0x0A8D ||
3833: (c >= 0x0A8F && c <= 0x0A91) ||
3834: (c >= 0x0A93 && c <= 0x0AA8) ||
3835: (c >= 0x0AAA && c <= 0x0AB0) ||
3836: (c >= 0x0AB2 && c <= 0x0AB3) ||
3837: (c >= 0x0AB5 && c <= 0x0AB9) ||
3838: c == 0x0ABD ||
3839: c == 0x0AE0 ||
3840: (c >= 0x0B05 && c <= 0x0B0C) ||
3841: (c >= 0x0B0F && c <= 0x0B10) ||
3842: (c >= 0x0B13 && c <= 0x0B28) ||
3843: (c >= 0x0B2A && c <= 0x0B30) ||
3844: (c >= 0x0B32 && c <= 0x0B33) ||
3845: (c >= 0x0B36 && c <= 0x0B39) ||
3846: c == 0x0B3D ||
3847: (c >= 0x0B5C && c <= 0x0B5D) ||
3848: (c >= 0x0B5F && c <= 0x0B61) ||
3849: (c >= 0x0B85 && c <= 0x0B8A) ||
3850: (c >= 0x0B8E && c <= 0x0B90) ||
3851: (c >= 0x0B92 && c <= 0x0B95) ||
3852: (c >= 0x0B99 && c <= 0x0B9A) ||
3853: c == 0x0B9C ||
3854: (c >= 0x0B9E && c <= 0x0B9F) ||
3855: (c >= 0x0BA3 && c <= 0x0BA4) ||
3856: (c >= 0x0BA8 && c <= 0x0BAA) ||
3857: (c >= 0x0BAE && c <= 0x0BB5) ||
3858: (c >= 0x0BB7 && c <= 0x0BB9) ||
3859: (c >= 0x0C05 && c <= 0x0C0C) ||
3860: (c >= 0x0C0E && c <= 0x0C10) ||
3861: (c >= 0x0C12 && c <= 0x0C28) ||
3862: (c >= 0x0C2A && c <= 0x0C33) ||
3863: (c >= 0x0C35 && c <= 0x0C39) ||
3864: (c >= 0x0C60 && c <= 0x0C61) ||
3865: (c >= 0x0C85 && c <= 0x0C8C) ||
3866: (c >= 0x0C8E && c <= 0x0C90) ||
3867: (c >= 0x0C92 && c <= 0x0CA8) ||
3868: (c >= 0x0CAA && c <= 0x0CB3) ||
3869: (c >= 0x0CB5 && c <= 0x0CB9) ||
3870: c == 0x0CDE ||
3871: (c >= 0x0CE0 && c <= 0x0CE1) ||
3872: (c >= 0x0D05 && c <= 0x0D0C) ||
3873: (c >= 0x0D0E && c <= 0x0D10) ||
3874: (c >= 0x0D12 && c <= 0x0D28) ||
3875: (c >= 0x0D2A && c <= 0x0D39) ||
3876: (c >= 0x0D60 && c <= 0x0D61) ||
3877: (c >= 0x0E01 && c <= 0x0E2E) ||
3878: c == 0x0E30 ||
3879: (c >= 0x0E32 && c <= 0x0E33) ||
3880: (c >= 0x0E40 && c <= 0x0E45) ||
3881: (c >= 0x0E81 && c <= 0x0E82) ||
3882: c == 0x0E84 ||
3883: (c >= 0x0E87 && c <= 0x0E88) ||
3884: c == 0x0E8A ||
3885: c == 0x0E8D ||
3886: (c >= 0x0E94 && c <= 0x0E97) ||
3887: (c >= 0x0E99 && c <= 0x0E9F) ||
3888: (c >= 0x0EA1 && c <= 0x0EA3) ||
3889: c == 0x0EA5 ||
3890: c == 0x0EA7 ||
3891: (c >= 0x0EAA && c <= 0x0EAB) ||
3892: (c >= 0x0EAD && c <= 0x0EAE) ||
3893: c == 0x0EB0 ||
3894: (c >= 0x0EB2 && c <= 0x0EB3) ||
3895: c == 0x0EBD ||
3896: (c >= 0x0EC0 && c <= 0x0EC4) ||
3897: (c >= 0x0F40 && c <= 0x0F47) ||
3898: (c >= 0x0F49 && c <= 0x0F69) ||
3899: (c >= 0x10A0 && c <= 0x10C5) ||
3900: (c >= 0x10D0 && c <= 0x10F6) ||
3901: c == 0x1100 ||
3902: (c >= 0x1102 && c <= 0x1103) ||
3903: (c >= 0x1105 && c <= 0x1107) ||
3904: c == 0x1109 ||
3905: (c >= 0x110B && c <= 0x110C) ||
3906: (c >= 0x110E && c <= 0x1112) ||
3907: c == 0x113C ||
3908: c == 0x113E ||
3909: c == 0x1140 ||
3910: c == 0x114C ||
3911: c == 0x114E ||
3912: c == 0x1150 ||
3913: (c >= 0x1154 && c <= 0x1155) ||
3914: c == 0x1159 ||
3915: (c >= 0x115F && c <= 0x1161) ||
3916: c == 0x1163 ||
3917: c == 0x1165 ||
3918: c == 0x1167 ||
3919: c == 0x1169 ||
3920: (c >= 0x116D && c <= 0x116E) ||
3921: (c >= 0x1172 && c <= 0x1173) ||
3922: c == 0x1175 ||
3923: c == 0x119E ||
3924: c == 0x11A8 ||
3925: c == 0x11AB ||
3926: (c >= 0x11AE && c <= 0x11AF) ||
3927: (c >= 0x11B7 && c <= 0x11B8) ||
3928: c == 0x11BA ||
3929: (c >= 0x11BC && c <= 0x11C2) ||
3930: c == 0x11EB ||
3931: c == 0x11F0 ||
3932: c == 0x11F9 ||
3933: (c >= 0x1E00 && c <= 0x1E9B) ||
3934: (c >= 0x1EA0 && c <= 0x1EF9) ||
3935: (c >= 0x1F00 && c <= 0x1F15) ||
3936: (c >= 0x1F18 && c <= 0x1F1D) ||
3937: (c >= 0x1F20 && c <= 0x1F45) ||
3938: (c >= 0x1F48 && c <= 0x1F4D) ||
3939: (c >= 0x1F50 && c <= 0x1F57) ||
3940: c == 0x1F59 ||
3941: c == 0x1F5B ||
3942: c == 0x1F5D ||
3943: (c >= 0x1F5F && c <= 0x1F7D) ||
3944: (c >= 0x1F80 && c <= 0x1FB4) ||
3945: (c >= 0x1FB6 && c <= 0x1FBC) ||
3946: c == 0x1FBE ||
3947: (c >= 0x1FC2 && c <= 0x1FC4) ||
3948: (c >= 0x1FC6 && c <= 0x1FCC) ||
3949: (c >= 0x1FD0 && c <= 0x1FD3) ||
3950: (c >= 0x1FD6 && c <= 0x1FDB) ||
3951: (c >= 0x1FE0 && c <= 0x1FEC) ||
3952: (c >= 0x1FF2 && c <= 0x1FF4) ||
3953: (c >= 0x1FF6 && c <= 0x1FFC) ||
3954: c == 0x2126 ||
3955: (c >= 0x212A && c <= 0x212B) ||
3956: c == 0x212E ||
3957: (c >= 0x2180 && c <= 0x2182) ||
3958: (c >= 0x3041 && c <= 0x3094) ||
3959: (c >= 0x30A1 && c <= 0x30FA) ||
3960: (c >= 0x3105 && c <= 0x312C) ||
3961: (c >= 0xAC00 && c <= 0xD7A3))
3962: return true;
3963: if ((c >= 0x4e00 && c <= 0x9fa5) ||
3964: c == 0x3007 ||
3965: (c >= 0x3021 && c <= 0x3029))
3966: return true;
3967: return false;
3968: }
3969:
3970:
3974: public static boolean isDigit(int c)
3975: {
3976: return ((c >= 0x0030 && c <= 0x0039) ||
3977: (c >= 0x0660 && c <= 0x0669) ||
3978: (c >= 0x06F0 && c <= 0x06F9) ||
3979: (c >= 0x0966 && c <= 0x096F) ||
3980: (c >= 0x09E6 && c <= 0x09EF) ||
3981: (c >= 0x0A66 && c <= 0x0A6F) ||
3982: (c >= 0x0AE6 && c <= 0x0AEF) ||
3983: (c >= 0x0B66 && c <= 0x0B6F) ||
3984: (c >= 0x0BE7 && c <= 0x0BEF) ||
3985: (c >= 0x0C66 && c <= 0x0C6F) ||
3986: (c >= 0x0CE6 && c <= 0x0CEF) ||
3987: (c >= 0x0D66 && c <= 0x0D6F) ||
3988: (c >= 0x0E50 && c <= 0x0E59) ||
3989: (c >= 0x0ED0 && c <= 0x0ED9) ||
3990: (c >= 0x0F20 && c <= 0x0F29));
3991: }
3992:
3993:
3997: public static boolean isCombiningChar(int c)
3998: {
3999: return ((c >= 0x0300 && c <= 0x0345) ||
4000: (c >= 0x0360 && c <= 0x0361) ||
4001: (c >= 0x0483 && c <= 0x0486) ||
4002: (c >= 0x0591 && c <= 0x05A1) ||
4003: (c >= 0x05A3 && c <= 0x05B9) ||
4004: (c >= 0x05BB && c <= 0x05BD) ||
4005: c == 0x05BF ||
4006: (c >= 0x05C1 && c <= 0x05C2) ||
4007: c == 0x05C4 ||
4008: (c >= 0x064B && c <= 0x0652) ||
4009: c == 0x0670 ||
4010: (c >= 0x06D6 && c <= 0x06DC) ||
4011: (c >= 0x06DD && c <= 0x06DF) ||
4012: (c >= 0x06E0 && c <= 0x06E4) ||
4013: (c >= 0x06E7 && c <= 0x06E8) ||
4014: (c >= 0x06EA && c <= 0x06ED) ||
4015: (c >= 0x0901 && c <= 0x0903) ||
4016: c == 0x093C ||
4017: (c >= 0x093E && c <= 0x094C) ||
4018: c == 0x094D ||
4019: (c >= 0x0951 && c <= 0x0954) ||
4020: (c >= 0x0962 && c <= 0x0963) ||
4021: (c >= 0x0981 && c <= 0x0983) ||
4022: c == 0x09BC ||
4023: c == 0x09BE ||
4024: c == 0x09BF ||
4025: (c >= 0x09C0 && c <= 0x09C4) ||
4026: (c >= 0x09C7 && c <= 0x09C8) ||
4027: (c >= 0x09CB && c <= 0x09CD) ||
4028: c == 0x09D7 ||
4029: (c >= 0x09E2 && c <= 0x09E3) ||
4030: c == 0x0A02 ||
4031: c == 0x0A3C ||
4032: c == 0x0A3E ||
4033: c == 0x0A3F ||
4034: (c >= 0x0A40 && c <= 0x0A42) ||
4035: (c >= 0x0A47 && c <= 0x0A48) ||
4036: (c >= 0x0A4B && c <= 0x0A4D) ||
4037: (c >= 0x0A70 && c <= 0x0A71) ||
4038: (c >= 0x0A81 && c <= 0x0A83) ||
4039: c == 0x0ABC ||
4040: (c >= 0x0ABE && c <= 0x0AC5) ||
4041: (c >= 0x0AC7 && c <= 0x0AC9) ||
4042: (c >= 0x0ACB && c <= 0x0ACD) ||
4043: (c >= 0x0B01 && c <= 0x0B03) ||
4044: c == 0x0B3C ||
4045: (c >= 0x0B3E && c <= 0x0B43) ||
4046: (c >= 0x0B47 && c <= 0x0B48) ||
4047: (c >= 0x0B4B && c <= 0x0B4D) ||
4048: (c >= 0x0B56 && c <= 0x0B57) ||
4049: (c >= 0x0B82 && c <= 0x0B83) ||
4050: (c >= 0x0BBE && c <= 0x0BC2) ||
4051: (c >= 0x0BC6 && c <= 0x0BC8) ||
4052: (c >= 0x0BCA && c <= 0x0BCD) ||
4053: c == 0x0BD7 ||
4054: (c >= 0x0C01 && c <= 0x0C03) ||
4055: (c >= 0x0C3E && c <= 0x0C44) ||
4056: (c >= 0x0C46 && c <= 0x0C48) ||
4057: (c >= 0x0C4A && c <= 0x0C4D) ||
4058: (c >= 0x0C55 && c <= 0x0C56) ||
4059: (c >= 0x0C82 && c <= 0x0C83) ||
4060: (c >= 0x0CBE && c <= 0x0CC4) ||
4061: (c >= 0x0CC6 && c <= 0x0CC8) ||
4062: (c >= 0x0CCA && c <= 0x0CCD) ||
4063: (c >= 0x0CD5 && c <= 0x0CD6) ||
4064: (c >= 0x0D02 && c <= 0x0D03) ||
4065: (c >= 0x0D3E && c <= 0x0D43) ||
4066: (c >= 0x0D46 && c <= 0x0D48) ||
4067: (c >= 0x0D4A && c <= 0x0D4D) ||
4068: c == 0x0D57 ||
4069: c == 0x0E31 ||
4070: (c >= 0x0E34 && c <= 0x0E3A) ||
4071: (c >= 0x0E47 && c <= 0x0E4E) ||
4072: c == 0x0EB1 ||
4073: (c >= 0x0EB4 && c <= 0x0EB9) ||
4074: (c >= 0x0EBB && c <= 0x0EBC) ||
4075: (c >= 0x0EC8 && c <= 0x0ECD) ||
4076: (c >= 0x0F18 && c <= 0x0F19) ||
4077: c == 0x0F35 ||
4078: c == 0x0F37 ||
4079: c == 0x0F39 ||
4080: c == 0x0F3E ||
4081: c == 0x0F3F ||
4082: (c >= 0x0F71 && c <= 0x0F84) ||
4083: (c >= 0x0F86 && c <= 0x0F8B) ||
4084: (c >= 0x0F90 && c <= 0x0F95) ||
4085: c == 0x0F97 ||
4086: (c >= 0x0F99 && c <= 0x0FAD) ||
4087: (c >= 0x0FB1 && c <= 0x0FB7) ||
4088: c == 0x0FB9 ||
4089: (c >= 0x20D0 && c <= 0x20DC) ||
4090: c == 0x20E1 ||
4091: (c >= 0x302A && c <= 0x302F) ||
4092: c == 0x3099 ||
4093: c == 0x309A);
4094: }
4095:
4096:
4100: public static boolean isExtender(int c)
4101: {
4102: return (c == 0x00B7 ||
4103: c == 0x02D0 ||
4104: c == 0x02D1 ||
4105: c == 0x0387 ||
4106: c == 0x0640 ||
4107: c == 0x0E46 ||
4108: c == 0x0EC6 ||
4109: c == 0x3005 ||
4110: (c >= 0x3031 && c <= 0x3035) ||
4111: (c >= 0x309D && c <= 0x309E) ||
4112: (c >= 0x30FC && c <= 0x30FE));
4113: }
4114:
4115:
4119: public static boolean isChar(int c)
4120: {
4121: return (c >= 0x20 && c < 0xd800) ||
4122: (c >= 0xe00 && c < 0xfffe) ||
4123: (c >= 0x10000 && c < 0x110000) ||
4124: c == 0xa || c == 0x9 || c == 0xd;
4125: }
4126:
4127:
4131: private String intern(String text)
4132: {
4133: return stringInterning ? text.intern() : text;
4134: }
4135:
4136:
4139: private void error(String message)
4140: throws XMLStreamException
4141: {
4142: error(message, null);
4143: }
4144:
4145:
4148: private void error(String message, Object info)
4149: throws XMLStreamException
4150: {
4151: if (info != null)
4152: {
4153: if (info instanceof String)
4154: message += ": \"" + ((String) info) + "\"";
4155: else if (info instanceof Character)
4156: message += ": '" + ((Character) info) + "'";
4157: }
4158: throw new XMLStreamException(message);
4159: }
4160:
4161:
4164: private void validateStartElement(String elementName)
4165: throws XMLStreamException
4166: {
4167: if (currentContentModel == null)
4168: {
4169:
4170:
4171: if (!elementName.equals(doctype.rootName))
4172: error("root element name must match name in DTD");
4173: return;
4174: }
4175:
4176: switch (currentContentModel.type)
4177: {
4178: case ContentModel.EMPTY:
4179: error("child element found in empty element", elementName);
4180: break;
4181: case ContentModel.ELEMENT:
4182: LinkedList ctx = (LinkedList) validationStack.getLast();
4183: ctx.add(elementName);
4184: break;
4185: case ContentModel.MIXED:
4186: MixedContentModel mm = (MixedContentModel) currentContentModel;
4187: if (!mm.containsName(elementName))
4188: error("illegal element for content model", elementName);
4189: break;
4190: }
4191: }
4192:
4193:
4196: private void validateEndElement()
4197: throws XMLStreamException
4198: {
4199: if (currentContentModel == null)
4200: {
4201:
4202:
4203: if (!idrefs.containsAll(ids))
4204: error("IDREF values must match the value of some ID attribute");
4205: return;
4206: }
4207:
4208: switch (currentContentModel.type)
4209: {
4210: case ContentModel.ELEMENT:
4211: LinkedList ctx = (LinkedList) validationStack.getLast();
4212: ElementContentModel ecm = (ElementContentModel) currentContentModel;
4213: validateElementContent(ecm, ctx);
4214: break;
4215: }
4216: }
4217:
4218:
4221: private void validatePCData(String text)
4222: throws XMLStreamException
4223: {
4224:
4225: switch (currentContentModel.type)
4226: {
4227: case ContentModel.EMPTY:
4228: error("character data found in empty element", text);
4229: break;
4230: case ContentModel.ELEMENT:
4231: boolean white = true;
4232: int len = text.length();
4233: for (int i = 0; i < len; i++)
4234: {
4235: char c = text.charAt(i);
4236: if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
4237: {
4238: white = false;
4239: break;
4240: }
4241: }
4242: if (!white)
4243: error("character data found in element with element content", text);
4244: else if (xmlStandalone == Boolean.TRUE && currentContentModel.external)
4245:
4246: error("whitespace in element content of externally declared " +
4247: "element in standalone document");
4248: break;
4249: }
4250: }
4251:
4252:
4256: private void validateElementContent(ElementContentModel model,
4257: LinkedList children)
4258: throws XMLStreamException
4259: {
4260:
4261: CPStringBuilder buf = new CPStringBuilder();
4262: for (Iterator i = children.iterator(); i.hasNext(); )
4263: {
4264: buf.append((String) i.next());
4265: buf.append(' ');
4266: }
4267: String c = buf.toString();
4268: String regex = createRegularExpression(model);
4269: if (!c.matches(regex))
4270: error("element content "+model.text+" does not match expression "+regex, c);
4271: }
4272:
4273:
4277: private String createRegularExpression(ElementContentModel model)
4278: {
4279: if (model.regex == null)
4280: {
4281: CPStringBuilder buf = new CPStringBuilder();
4282: buf.append('(');
4283: for (Iterator i = model.contentParticles.iterator(); i.hasNext(); )
4284: {
4285: ContentParticle cp = (ContentParticle) i.next();
4286: if (cp.content instanceof String)
4287: {
4288: buf.append('(');
4289: buf.append((String) cp.content);
4290: buf.append(' ');
4291: buf.append(')');
4292: if (cp.max == -1)
4293: {
4294: if (cp.min == 0)
4295: buf.append('*');
4296: else
4297: buf.append('+');
4298: }
4299: else if (cp.min == 0)
4300: buf.append('?');
4301: }
4302: else
4303: {
4304: ElementContentModel ecm = (ElementContentModel) cp.content;
4305: buf.append(createRegularExpression(ecm));
4306: }
4307: if (model.or && i.hasNext())
4308: buf.append('|');
4309: }
4310: buf.append(')');
4311: if (model.max == -1)
4312: {
4313: if (model.min == 0)
4314: buf.append('*');
4315: else
4316: buf.append('+');
4317: }
4318: else if (model.min == 0)
4319: buf.append('?');
4320: model.regex = buf.toString();
4321: }
4322: return model.regex;
4323: }
4324:
4325:
4328: void validateDoctype()
4329: throws XMLStreamException
4330: {
4331: for (Iterator i = doctype.entityIterator(); i.hasNext(); )
4332: {
4333: Map.Entry entry = (Map.Entry) i.next();
4334: Object entity = entry.getValue();
4335: if (entity instanceof ExternalIds)
4336: {
4337: ExternalIds ids = (ExternalIds) entity;
4338: if (ids.notationName != null)
4339: {
4340:
4341: ExternalIds notation = doctype.getNotation(ids.notationName);
4342: if (notation == null)
4343: error("Notation name must match the declared name of a " +
4344: "notation", ids.notationName);
4345: }
4346: }
4347: }
4348: }
4349:
4350:
4355: public static void main(String[] args)
4356: throws Exception
4357: {
4358: boolean validating = false;
4359: boolean namespaceAware = false;
4360: boolean xIncludeAware = false;
4361: int pos = 0;
4362: while (pos < args.length && args[pos].startsWith("-"))
4363: {
4364: if ("-x".equals(args[pos]))
4365: xIncludeAware = true;
4366: else if ("-v".equals(args[pos]))
4367: validating = true;
4368: else if ("-n".equals(args[pos]))
4369: namespaceAware = true;
4370: pos++;
4371: }
4372: if (pos >= args.length)
4373: {
4374: System.out.println("Syntax: XMLParser [-n] [-v] [-x] <file> [<file2> [...]]");
4375: System.out.println("\t-n: use namespace aware mode");
4376: System.out.println("\t-v: use validating parser");
4377: System.out.println("\t-x: use XInclude aware mode");
4378: System.exit(2);
4379: }
4380: while (pos < args.length)
4381: {
4382: XMLParser p = new XMLParser(new java.io.FileInputStream(args[pos]),
4383: absolutize(null, args[pos]),
4384: validating,
4385: namespaceAware,
4386: true,
4387: true,
4388: true,
4389: true,
4390: true,
4391: true,
4392: true,
4393: null,
4394: null);
4395: XMLStreamReader reader = p;
4396: if (xIncludeAware)
4397: reader = new XIncludeFilter(p, args[pos], true, true, true);
4398: try
4399: {
4400: int event;
4401:
4402: while (reader.hasNext())
4403: {
4404: event = reader.next();
4405: Location loc = reader.getLocation();
4406: System.out.print(loc.getLineNumber() + ":" +
4407: loc.getColumnNumber() + " ");
4408: switch (event)
4409: {
4410: case XMLStreamConstants.START_DOCUMENT:
4411: System.out.println("START_DOCUMENT version=" +
4412: reader.getVersion() +
4413: " encoding=" +
4414: reader.getEncoding());
4415: break;
4416: case XMLStreamConstants.END_DOCUMENT:
4417: System.out.println("END_DOCUMENT");
4418: break;
4419: case XMLStreamConstants.START_ELEMENT:
4420: System.out.println("START_ELEMENT " +
4421: reader.getName());
4422: int l = reader.getNamespaceCount();
4423: for (int i = 0; i < l; i++)
4424: System.out.println("\tnamespace " +
4425: reader.getNamespacePrefix(i) + "='" +
4426: reader.getNamespaceURI(i)+"'");
4427: l = reader.getAttributeCount();
4428: for (int i = 0; i < l; i++)
4429: System.out.println("\tattribute " +
4430: reader.getAttributeName(i) + "='" +
4431: reader.getAttributeValue(i) + "'");
4432: break;
4433: case XMLStreamConstants.END_ELEMENT:
4434: System.out.println("END_ELEMENT " + reader.getName());
4435: break;
4436: case XMLStreamConstants.CHARACTERS:
4437: System.out.println("CHARACTERS '" +
4438: encodeText(reader.getText()) + "'");
4439: break;
4440: case XMLStreamConstants.CDATA:
4441: System.out.println("CDATA '" +
4442: encodeText(reader.getText()) + "'");
4443: break;
4444: case XMLStreamConstants.SPACE:
4445: System.out.println("SPACE '" +
4446: encodeText(reader.getText()) + "'");
4447: break;
4448: case XMLStreamConstants.DTD:
4449: System.out.println("DTD " + reader.getText());
4450: break;
4451: case XMLStreamConstants.ENTITY_REFERENCE:
4452: System.out.println("ENTITY_REFERENCE " + reader.getText());
4453: break;
4454: case XMLStreamConstants.COMMENT:
4455: System.out.println("COMMENT '" +
4456: encodeText(reader.getText()) + "'");
4457: break;
4458: case XMLStreamConstants.PROCESSING_INSTRUCTION:
4459: System.out.println("PROCESSING_INSTRUCTION " +
4460: reader.getPITarget() + " " +
4461: reader.getPIData());
4462: break;
4463: case START_ENTITY:
4464: System.out.println("START_ENTITY " + reader.getText());
4465: break;
4466: case END_ENTITY:
4467: System.out.println("END_ENTITY " + reader.getText());
4468: break;
4469: default:
4470: System.out.println("Unknown event: " + event);
4471: }
4472: }
4473: }
4474: catch (XMLStreamException e)
4475: {
4476: Location l = reader.getLocation();
4477: System.out.println("At line "+l.getLineNumber()+
4478: ", column "+l.getColumnNumber()+
4479: " of "+l.getSystemId());
4480: throw e;
4481: }
4482: pos++;
4483: }
4484: }
4485:
4486:
4489: private static String encodeText(String text)
4490: {
4491: CPStringBuilder b = new CPStringBuilder();
4492: int len = text.length();
4493: for (int i = 0; i < len; i++)
4494: {
4495: char c = text.charAt(i);
4496: switch (c)
4497: {
4498: case '\t':
4499: b.append("\\t");
4500: break;
4501: case '\n':
4502: b.append("\\n");
4503: break;
4504: case '\r':
4505: b.append("\\r");
4506: break;
4507: default:
4508: b.append(c);
4509: }
4510: }
4511: return b.toString();
4512: }
4513:
4514:
4517: class Attribute
4518: {
4519:
4520:
4523: final String name;
4524:
4525:
4528: final String type;
4529:
4530:
4533: final boolean specified;
4534:
4535:
4538: final String value;
4539:
4540:
4543: final String prefix;
4544:
4545:
4548: final String localName;
4549:
4550: Attribute(String name, String type, boolean specified, String value)
4551: {
4552: this.name = name;
4553: this.type = type;
4554: this.specified = specified;
4555: this.value = value;
4556: int ci = name.indexOf(':');
4557: if (ci == -1)
4558: {
4559: prefix = null;
4560: localName = intern(name);
4561: }
4562: else
4563: {
4564: prefix = intern(name.substring(0, ci));
4565: localName = intern(name.substring(ci + 1));
4566: }
4567: }
4568:
4569: public boolean equals(Object other)
4570: {
4571: if (other instanceof Attribute)
4572: {
4573: Attribute a = (Attribute) other;
4574: if (namespaceAware)
4575: {
4576: if (!a.localName.equals(localName))
4577: return false;
4578: String auri = getNamespaceURI(a.prefix);
4579: String uri = getNamespaceURI(prefix);
4580: if (uri == null && (auri == null ||
4581: (input.xml11 && "".equals(auri))))
4582: return true;
4583: if (uri != null)
4584: {
4585: if ("".equals(uri) && input.xml11 && "".equals(auri))
4586: return true;
4587: return uri.equals(auri);
4588: }
4589: return false;
4590: }
4591: else
4592: return a.name.equals(name);
4593: }
4594: return false;
4595: }
4596:
4597: public String toString()
4598: {
4599: CPStringBuilder buf = new CPStringBuilder(getClass().getName());
4600: buf.append('[');
4601: buf.append("name=");
4602: buf.append(name);
4603: if (value != null)
4604: {
4605: buf.append(",value=");
4606: buf.append(value);
4607: }
4608: if (type != null)
4609: {
4610: buf.append(",type=");
4611: buf.append(type);
4612: }
4613: if (specified)
4614: buf.append(",specified");
4615: buf.append(']');
4616: return buf.toString();
4617: }
4618:
4619: }
4620:
4621:
4624: class Doctype
4625: {
4626:
4627:
4630: final String rootName;
4631:
4632:
4635: final String publicId;
4636:
4637:
4640: final String systemId;
4641:
4642:
4645: private final LinkedHashMap elements = new LinkedHashMap();
4646:
4647:
4650: private final LinkedHashMap attlists = new LinkedHashMap();
4651:
4652:
4655: private final LinkedHashMap entities = new LinkedHashMap();
4656:
4657:
4660: private final LinkedHashMap notations = new LinkedHashMap();
4661:
4662:
4665: private final LinkedHashMap comments = new LinkedHashMap();
4666:
4667:
4671: private final LinkedHashMap pis = new LinkedHashMap();
4672:
4673:
4676: private final LinkedList entries = new LinkedList();
4677:
4678:
4681: private final HashSet externalEntities = new HashSet();
4682:
4683:
4686: private final HashSet externalNotations = new HashSet();
4687:
4688:
4691: private int anon = 1;
4692:
4693:
4696: Doctype(String rootName, String publicId, String systemId)
4697: {
4698: this.rootName = rootName;
4699: this.publicId = publicId;
4700: this.systemId = systemId;
4701: }
4702:
4703:
4709: void addElementDecl(String name, String text, ContentModel model)
4710: {
4711: if (elements.containsKey(name))
4712: return;
4713: model.text = text;
4714: model.external = (inputStack.size() != 1);
4715: elements.put(name, model);
4716: entries.add("E" + name);
4717: }
4718:
4719:
4725: void addAttributeDecl(String ename, String aname, AttributeDecl decl)
4726: {
4727: LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4728: if (attlist == null)
4729: {
4730: attlist = new LinkedHashMap();
4731: attlists.put(ename, attlist);
4732: }
4733: else if (attlist.containsKey(aname))
4734: return;
4735: attlist.put(aname, decl);
4736: String key = "A" + ename;
4737: if (!entries.contains(key))
4738: entries.add(key);
4739: }
4740:
4741:
4747: void addEntityDecl(String name, String text, boolean inExternalSubset)
4748: {
4749: if (entities.containsKey(name))
4750: return;
4751: entities.put(name, text);
4752: entries.add("e" + name);
4753: if (inExternalSubset)
4754: externalEntities.add(name);
4755: }
4756:
4757:
4763: void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset)
4764: {
4765: if (entities.containsKey(name))
4766: return;
4767: entities.put(name, ids);
4768: entries.add("e" + name);
4769: if (inExternalSubset)
4770: externalEntities.add(name);
4771: }
4772:
4773:
4779: void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset)
4780: {
4781: if (notations.containsKey(name))
4782: return;
4783: notations.put(name, ids);
4784: entries.add("n" + name);
4785: if (inExternalSubset)
4786: externalNotations.add(name);
4787: }
4788:
4789:
4792: void addComment(String text)
4793: {
4794: String key = Integer.toString(anon++);
4795: comments.put(key, text);
4796: entries.add("c" + key);
4797: }
4798:
4799:
4802: void addPI(String target, String data)
4803: {
4804: String key = Integer.toString(anon++);
4805: pis.put(key, new String[] {target, data});
4806: entries.add("p" + key);
4807: }
4808:
4809:
4813: ContentModel getElementModel(String name)
4814: {
4815: return (ContentModel) elements.get(name);
4816: }
4817:
4818:
4823: AttributeDecl getAttributeDecl(String ename, String aname)
4824: {
4825: LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4826: return (attlist == null) ? null : (AttributeDecl) attlist.get(aname);
4827: }
4828:
4829:
4834: boolean isAttributeDeclared(String ename, String aname)
4835: {
4836: LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4837: return (attlist == null) ? false : attlist.containsKey(aname);
4838: }
4839:
4840:
4845: Iterator attlistIterator(String ename)
4846: {
4847: LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4848: return (attlist == null) ? Collections.EMPTY_LIST.iterator() :
4849: attlist.entrySet().iterator();
4850: }
4851:
4852:
4855: Object getEntity(String name)
4856: {
4857: return entities.get(name);
4858: }
4859:
4860:
4864: boolean isEntityExternal(String name)
4865: {
4866: return externalEntities.contains(name);
4867: }
4868:
4869:
4872: Iterator entityIterator()
4873: {
4874: return entities.entrySet().iterator();
4875: }
4876:
4877:
4880: ExternalIds getNotation(String name)
4881: {
4882: return (ExternalIds) notations.get(name);
4883: }
4884:
4885:
4889: boolean isNotationExternal(String name)
4890: {
4891: return externalNotations.contains(name);
4892: }
4893:
4894:
4897: String getComment(String key)
4898: {
4899: return (String) comments.get(key);
4900: }
4901:
4902:
4906: String[] getPI(String key)
4907: {
4908: return (String[]) pis.get(key);
4909: }
4910:
4911:
4915: Iterator entryIterator()
4916: {
4917: return entries.iterator();
4918: }
4919:
4920: }
4921:
4922:
4925: class ExternalIds
4926: {
4927:
4928:
4931: String publicId;
4932:
4933:
4936: String systemId;
4937:
4938:
4941: String notationName;
4942: }
4943:
4944:
4947: abstract class ContentModel
4948: {
4949: static final int EMPTY = 0;
4950: static final int ANY = 1;
4951: static final int ELEMENT = 2;
4952: static final int MIXED = 3;
4953:
4954: int min;
4955: int max;
4956: final int type;
4957: String text;
4958: boolean external;
4959:
4960: ContentModel(int type)
4961: {
4962: this.type = type;
4963: min = 1;
4964: max = 1;
4965: }
4966:
4967: }
4968:
4969:
4972: class EmptyContentModel
4973: extends ContentModel
4974: {
4975:
4976: EmptyContentModel()
4977: {
4978: super(ContentModel.EMPTY);
4979: min = 0;
4980: max = 0;
4981: }
4982:
4983: }
4984:
4985:
4988: class AnyContentModel
4989: extends ContentModel
4990: {
4991:
4992: AnyContentModel()
4993: {
4994: super(ContentModel.ANY);
4995: min = 0;
4996: max = -1;
4997: }
4998:
4999: }
5000:
5001:
5004: class ElementContentModel
5005: extends ContentModel
5006: {
5007:
5008: LinkedList contentParticles;
5009: boolean or;
5010: String regex;
5011:
5012: ElementContentModel()
5013: {
5014: super(ContentModel.ELEMENT);
5015: contentParticles = new LinkedList();
5016: }
5017:
5018: void addContentParticle(ContentParticle cp)
5019: {
5020: contentParticles.add(cp);
5021: }
5022:
5023: }
5024:
5025: class ContentParticle
5026: {
5027:
5028: int min = 1;
5029: int max = 1;
5030: Object content;
5031:
5032: }
5033:
5034:
5037: class MixedContentModel
5038: extends ContentModel
5039: {
5040:
5041: private HashSet names;
5042:
5043: MixedContentModel()
5044: {
5045: super(ContentModel.MIXED);
5046: names = new HashSet();
5047: }
5048:
5049: void addName(String name)
5050: {
5051: names.add(name);
5052: }
5053:
5054: boolean containsName(String name)
5055: {
5056: return names.contains(name);
5057: }
5058:
5059: }
5060:
5061:
5064: class AttributeDecl
5065: {
5066:
5067:
5070: final String type;
5071:
5072:
5075: final String value;
5076:
5077:
5080: final int valueType;
5081:
5082:
5085: final String enumeration;
5086:
5087:
5090: final HashSet values;
5091:
5092:
5095: final boolean external;
5096:
5097: AttributeDecl(String type, String value,
5098: int valueType, String enumeration,
5099: HashSet values, boolean external)
5100: {
5101: this.type = type;
5102: this.value = value;
5103: this.valueType = valueType;
5104: this.enumeration = enumeration;
5105: this.values = values;
5106: this.external = external;
5107: }
5108:
5109: }
5110:
5111:
5114: static class Input
5115: implements Location
5116: {
5117:
5118: int line = 1, markLine;
5119: int column, markColumn;
5120: int offset, markOffset;
5121: final String publicId, systemId, name;
5122: final boolean report;
5123: final boolean normalize;
5124:
5125: InputStream in;
5126: Reader reader;
5127: UnicodeReader unicodeReader;
5128: boolean initialized;
5129: boolean encodingDetected;
5130: String inputEncoding;
5131: boolean xml11;
5132:
5133: Input(InputStream in, Reader reader, String publicId, String systemId,
5134: String name, String inputEncoding, boolean report,
5135: boolean normalize)
5136: {
5137: if (inputEncoding == null)
5138: inputEncoding = "UTF-8";
5139: this.inputEncoding = inputEncoding;
5140: this.publicId = publicId;
5141: this.systemId = systemId;
5142: this.name = name;
5143: this.report = report;
5144: this.normalize = normalize;
5145: if (in != null)
5146: {
5147: if (reader != null)
5148: throw new IllegalStateException("both byte and char streams "+
5149: "specified");
5150: if (normalize)
5151: in = new CRLFInputStream(in);
5152: in = new BufferedInputStream(in);
5153: this.in = in;
5154: }
5155: else
5156: {
5157: this.reader = normalize ? new CRLFReader(reader) : reader;
5158: unicodeReader = new UnicodeReader(this.reader);
5159: }
5160: initialized = false;
5161: }
5162:
5163:
5164:
5165: public int getCharacterOffset()
5166: {
5167: return offset;
5168: }
5169:
5170: public int getColumnNumber()
5171: {
5172: return column;
5173: }
5174:
5175: public int getLineNumber()
5176: {
5177: return line;
5178: }
5179:
5180: public String getPublicId()
5181: {
5182: return publicId;
5183: }
5184:
5185: public String getSystemId()
5186: {
5187: return systemId;
5188: }
5189:
5190: void init()
5191: throws IOException
5192: {
5193: if (initialized)
5194: return;
5195: if (in != null)
5196: detectEncoding();
5197: initialized = true;
5198: }
5199:
5200: void mark(int len)
5201: throws IOException
5202: {
5203: markOffset = offset;
5204: markLine = line;
5205: markColumn = column;
5206: if (unicodeReader != null)
5207: unicodeReader.mark(len);
5208: else
5209: in.mark(len);
5210: }
5211:
5212:
5215: int read()
5216: throws IOException
5217: {
5218: offset++;
5219: int ret = (unicodeReader != null) ? unicodeReader.read() : in.read();
5220: if (normalize &&
5221: (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028))))
5222: {
5223:
5224: ret = 0x0a;
5225: }
5226:
5227: if (ret == 0x0a)
5228: {
5229: line++;
5230: column = 0;
5231: }
5232: else
5233: column++;
5234: return ret;
5235: }
5236:
5237:
5240: int read(int[] b, int off, int len)
5241: throws IOException
5242: {
5243: int ret;
5244: if (unicodeReader != null)
5245: {
5246: ret = unicodeReader.read(b, off, len);
5247: }
5248: else
5249: {
5250: byte[] b2 = new byte[len];
5251: ret = in.read(b2, 0, len);
5252: if (ret != -1)
5253: {
5254: String s = new String(b2, 0, ret, inputEncoding);
5255: int[] c = UnicodeReader.toCodePointArray(s);
5256: ret = c.length;
5257: System.arraycopy(c, 0, b, off, ret);
5258: }
5259: }
5260: if (ret != -1)
5261: {
5262:
5263: for (int i = 0; i < ret; i++)
5264: {
5265: int c = b[off + i];
5266: if (normalize &&
5267: (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028))))
5268: {
5269:
5270: c = 0x0a;
5271: b[off + i] = c;
5272: }
5273: if (c == 0x0a)
5274: {
5275: line++;
5276: column = 0;
5277: }
5278: else
5279: column++;
5280: }
5281: }
5282: return ret;
5283: }
5284:
5285: void reset()
5286: throws IOException
5287: {
5288: if (unicodeReader != null)
5289: unicodeReader.reset();
5290: else
5291: in.reset();
5292: offset = markOffset;
5293: line = markLine;
5294: column = markColumn;
5295: }
5296:
5297:
5298:
5299: private static final int[] SIGNATURE_UCS_4_1234 =
5300: new int[] { 0x00, 0x00, 0x00, 0x3c };
5301: private static final int[] SIGNATURE_UCS_4_4321 =
5302: new int[] { 0x3c, 0x00, 0x00, 0x00 };
5303: private static final int[] SIGNATURE_UCS_4_2143 =
5304: new int[] { 0x00, 0x00, 0x3c, 0x00 };
5305: private static final int[] SIGNATURE_UCS_4_3412 =
5306: new int[] { 0x00, 0x3c, 0x00, 0x00 };
5307: private static final int[] SIGNATURE_UCS_2_12 =
5308: new int[] { 0xfe, 0xff };
5309: private static final int[] SIGNATURE_UCS_2_21 =
5310: new int[] { 0xff, 0xfe };
5311: private static final int[] SIGNATURE_UCS_2_12_NOBOM =
5312: new int[] { 0x00, 0x3c, 0x00, 0x3f };
5313: private static final int[] SIGNATURE_UCS_2_21_NOBOM =
5314: new int[] { 0x3c, 0x00, 0x3f, 0x00 };
5315: private static final int[] SIGNATURE_UTF_8 =
5316: new int[] { 0x3c, 0x3f, 0x78, 0x6d };
5317: private static final int[] SIGNATURE_UTF_8_BOM =
5318: new int[] { 0xef, 0xbb, 0xbf };
5319:
5320:
5323: private void detectEncoding()
5324: throws IOException
5325: {
5326: int[] signature = new int[4];
5327: in.mark(4);
5328: for (int i = 0; i < 4; i++)
5329: signature[i] = in.read();
5330: in.reset();
5331:
5332:
5333: if (equals(SIGNATURE_UCS_4_1234, signature))
5334: {
5335: in.read();
5336: in.read();
5337: in.read();
5338: in.read();
5339: setInputEncoding("UTF-32BE");
5340: encodingDetected = true;
5341: }
5342: else if (equals(SIGNATURE_UCS_4_4321, signature))
5343: {
5344: in.read();
5345: in.read();
5346: in.read();
5347: in.read();
5348: setInputEncoding("UTF-32LE");
5349: encodingDetected = true;
5350: }
5351: else if (equals(SIGNATURE_UCS_4_2143, signature) ||
5352: equals(SIGNATURE_UCS_4_3412, signature))
5353: throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering");
5354:
5355:
5356: else if (equals(SIGNATURE_UCS_2_12, signature))
5357: {
5358: in.read();
5359: in.read();
5360: setInputEncoding("UTF-16BE");
5361: encodingDetected = true;
5362: }
5363: else if (equals(SIGNATURE_UCS_2_21, signature))
5364: {
5365: in.read();
5366: in.read();
5367: setInputEncoding("UTF-16LE");
5368: encodingDetected = true;
5369: }
5370: else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature))
5371: {
5372:
5373: throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5374: }
5375: else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature))
5376: {
5377:
5378: throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5379: }
5380:
5381: else if (equals(SIGNATURE_UTF_8, signature))
5382: {
5383:
5384: }
5385: else if (equals(SIGNATURE_UTF_8_BOM, signature))
5386: {
5387: in.read();
5388: in.read();
5389: in.read();
5390: setInputEncoding("UTF-8");
5391: encodingDetected = true;
5392: }
5393: }
5394:
5395: private static boolean equals(int[] b1, int[] b2)
5396: {
5397: for (int i = 0; i < b1.length; i++)
5398: {
5399: if (b1[i] != b2[i])
5400: return false;
5401: }
5402: return true;
5403: }
5404:
5405: void setInputEncoding(String encoding)
5406: throws IOException
5407: {
5408: if (encoding.equals(inputEncoding))
5409: return;
5410: if ("UTF-16".equalsIgnoreCase(encoding) &&
5411: inputEncoding.startsWith("UTF-16"))
5412: return;
5413: if (encodingDetected)
5414: throw new UnsupportedEncodingException("document is not in its " +
5415: "declared encoding " +
5416: inputEncoding +
5417: ": " + encoding);
5418: inputEncoding = encoding;
5419: finalizeEncoding();
5420: }
5421:
5422: void finalizeEncoding()
5423: throws IOException
5424: {
5425: if (reader != null)
5426: return;
5427: reader = new BufferedReader(new InputStreamReader(in, inputEncoding));
5428: unicodeReader = new UnicodeReader(reader);
5429: mark(1);
5430: }
5431:
5432: }
5433:
5434: }