Source for gnu.xml.stream.XMLParser

   1: /* XMLParser.java --
   2:    Copyright (C) 2005  Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version.
  37: 
  38: Partly derived from code which carried the following notice:
  39: 
  40:   Copyright (c) 1997, 1998 by Microstar Software Ltd.
  41: 
  42:   AElfred is free for both commercial and non-commercial use and
  43:   redistribution, provided that Microstar's copyright and disclaimer are
  44:   retained intact.  You are free to modify AElfred for your own use and
  45:   to redistribute AElfred with your modifications, provided that the
  46:   modifications are clearly documented.
  47: 
  48:   This program is distributed in the hope that it will be useful, but
  49:   WITHOUT ANY WARRANTY; without even the implied warranty of
  50:   merchantability or fitness for a particular purpose.  Please use it AT
  51:   YOUR OWN RISK.
  52: */
  53: 
  54: package gnu.xml.stream;
  55: 
  56: import gnu.java.lang.CPStringBuilder;
  57: 
  58: import java.io.BufferedInputStream;
  59: import java.io.EOFException;
  60: import java.io.File;
  61: import java.io.FileOutputStream;
  62: import java.io.FileWriter;
  63: import java.io.InputStream;
  64: import java.io.InputStreamReader;
  65: import java.io.IOException;
  66: import java.io.Reader;
  67: import java.io.StringReader;
  68: import java.io.UnsupportedEncodingException;
  69: import java.net.MalformedURLException;
  70: import java.net.URL;
  71: import java.util.ArrayList;
  72: import java.util.Collections;
  73: import java.util.HashSet;
  74: import java.util.Iterator;
  75: import java.util.LinkedHashMap;
  76: import java.util.LinkedList;
  77: import java.util.Map;
  78: import java.util.NoSuchElementException;
  79: import java.util.StringTokenizer;
  80: 
  81: import javax.xml.XMLConstants;
  82: import javax.xml.namespace.NamespaceContext;
  83: import javax.xml.namespace.QName;
  84: import javax.xml.stream.Location;
  85: import javax.xml.stream.XMLInputFactory;
  86: import javax.xml.stream.XMLReporter;
  87: import javax.xml.stream.XMLResolver;
  88: import javax.xml.stream.XMLStreamConstants;
  89: import javax.xml.stream.XMLStreamException;
  90: import javax.xml.stream.XMLStreamReader;
  91: 
  92: import gnu.java.net.CRLFInputStream;
  93: import gnu.classpath.debug.TeeInputStream;
  94: import gnu.classpath.debug.TeeReader;
  95: 
  96: /**
  97:  * An XML parser.
  98:  * This parser supports the following additional StAX properties:
  99:  * <table>
 100:  * <tr><td>gnu.xml.stream.stringInterning</td>
 101:  * <td>Boolean</td>
 102:  * <td>Indicates whether markup strings will be interned</td></tr>
 103:  * <tr><td>gnu.xml.stream.xmlBase</td>
 104:  * <td>Boolean</td>
 105:  * <td>Indicates whether XML Base processing will be performed</td></tr>
 106:  * <tr><td>gnu.xml.stream.baseURI</td>
 107:  * <td>String</td>
 108:  * <td>Returns the base URI of the current event</td></tr>
 109:  * </table>
 110:  *
 111:  * @see http://www.w3.org/TR/REC-xml/
 112:  * @see http://www.w3.org/TR/xml11/
 113:  * @see http://www.w3.org/TR/REC-xml-names
 114:  * @see http://www.w3.org/TR/xml-names11
 115:  * @see http://www.w3.org/TR/xmlbase/
 116:  *
 117:  * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
 118:  */
 119: public class XMLParser
 120:   implements XMLStreamReader, NamespaceContext
 121: {
 122: 
 123:   // -- parser state machine states --
 124:   private static final int INIT = 0; // start state
 125:   private static final int PROLOG = 1; // in prolog
 126:   private static final int CONTENT = 2; // in content
 127:   private static final int EMPTY_ELEMENT = 3; // empty element state
 128:   private static final int MISC = 4; // in Misc (after root element)
 129: 
 130:   // -- parameters for parsing literals --
 131:   private final static int LIT_ENTITY_REF = 2;
 132:   private final static int LIT_NORMALIZE = 4;
 133:   private final static int LIT_ATTRIBUTE = 8;
 134:   private final static int LIT_DISABLE_PE = 16;
 135:   private final static int LIT_DISABLE_CREF = 32;
 136:   private final static int LIT_DISABLE_EREF = 64;
 137:   private final static int LIT_PUBID = 256;
 138: 
 139:   // -- types of attribute values --
 140:   final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
 141:   final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
 142:   final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
 143:   final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
 144:   final static int ATTRIBUTE_DEFAULT_FIXED = 34;
 145: 
 146:   // -- additional event types --
 147:   final static int START_ENTITY = 50;
 148:   final static int END_ENTITY = 51;
 149: 
 150:   /**
 151:    * The current input.
 152:    */
 153:   private Input input;
 154: 
 155:   /**
 156:    * Stack of inputs representing XML general entities.
 157:    * The input representing the XML input stream or reader is always the
 158:    * first element in this stack.
 159:    */
 160:   private LinkedList inputStack = new LinkedList();
 161: 
 162:   /**
 163:    * Stack of start-entity events to be reported.
 164:    */
 165:   private LinkedList startEntityStack = new LinkedList();
 166: 
 167:   /**
 168:    * Stack of end-entity events to be reported.
 169:    */
 170:   private LinkedList endEntityStack = new LinkedList();
 171: 
 172:   /**
 173:    * Current parser state within the main state machine.
 174:    */
 175:   private int state = INIT;
 176: 
 177:   /**
 178:    * The (type of the) current event.
 179:    */
 180:   private int event;
 181: 
 182:   /**
 183:    * The element name stack. The first element in this stack will be the
 184:    * root element.
 185:    */
 186:   private LinkedList stack = new LinkedList();
 187: 
 188:   /**
 189:    * Stack of namespace contexts. These are maps specifying prefix-to-URI
 190:    * mappings. The first element in this stack is the most recent namespace
 191:    * context (i.e. the other way around from the element name stack).
 192:    */
 193:   private LinkedList namespaces = new LinkedList();
 194: 
 195:   /**
 196:    * The base-URI stack. This holds the base URI context for each element.
 197:    * The first element in this stack is the most recent context (i.e. the
 198:    * other way around from the element name stack).
 199:    */
 200:   private LinkedList bases = new LinkedList();
 201: 
 202:   /**
 203:    * The list of attributes for the current element, in the order defined in
 204:    * the XML stream.
 205:    */
 206:   private ArrayList attrs = new ArrayList();
 207: 
 208:   /**
 209:    * Buffer for text and character data.
 210:    */
 211:   private StringBuffer buf = new StringBuffer();
 212: 
 213:   /**
 214:    * Buffer for NMTOKEN strings (markup).
 215:    */
 216:   private StringBuffer nmtokenBuf = new StringBuffer();
 217: 
 218:   /**
 219:    * Buffer for string literals. (e.g. attribute values)
 220:    */
 221:   private StringBuffer literalBuf = new StringBuffer();
 222: 
 223:   /**
 224:    * Temporary Unicode character buffer used during character data reads.
 225:    */
 226:   private int[] tmpBuf = new int[1024];
 227: 
 228:   /**
 229:    * The element content model for the current element.
 230:    */
 231:   private ContentModel currentContentModel;
 232: 
 233:   /**
 234:    * The validation stack. This holds lists of the elements seen for each
 235:    * element, in order to determine whether the names and order of these
 236:    * elements match the content model for the element. The last entry in
 237:    * this stack represents the current element.
 238:    */
 239:   private LinkedList validationStack;
 240: 
 241:   /**
 242:    * These sets contain the IDs and the IDREFs seen in the document, to
 243:    * ensure that IDs are unique and that each IDREF refers to an ID in the
 244:    * document.
 245:    */
 246:   private HashSet ids, idrefs;
 247: 
 248:   /**
 249:    * The target and data associated with the current processing instruction
 250:    * event.
 251:    */
 252:   private String piTarget, piData;
 253: 
 254:   /**
 255:    * The XML version declared in the XML declaration.
 256:    */
 257:   private String xmlVersion;
 258: 
 259:   /**
 260:    * The encoding declared in the XML declaration.
 261:    */
 262:   private String xmlEncoding;
 263: 
 264:   /**
 265:    * The standalone value declared in the XML declaration.
 266:    */
 267:   private Boolean xmlStandalone;
 268: 
 269:   /**
 270:    * The document type definition.
 271:    */
 272:   Doctype doctype;
 273: 
 274:   /**
 275:    * State variables for determining parameter-entity expansion.
 276:    */
 277:   private boolean expandPE, peIsError;
 278: 
 279:   /**
 280:    * Whether this is a validating parser.
 281:    */
 282:   private final boolean validating;
 283: 
 284:   /**
 285:    * Whether strings representing markup will be interned.
 286:    */
 287:   private final boolean stringInterning;
 288: 
 289:   /**
 290:    * If true, CDATA sections will be merged with adjacent text nodes into a
 291:    * single event.
 292:    */
 293:   private final boolean coalescing;
 294: 
 295:   /**
 296:    * Whether to replace general entity references with their replacement
 297:    * text automatically during parsing.
 298:    * Otherwise entity-reference events will be issued.
 299:    */
 300:   private final boolean replaceERefs;
 301: 
 302:   /**
 303:    * Whether to support external entities.
 304:    */
 305:   private final boolean externalEntities;
 306: 
 307:   /**
 308:    * Whether to support DTDs.
 309:    */
 310:   private final boolean supportDTD;
 311: 
 312:   /**
 313:    * Whether to support XML namespaces. If true, namespace information will
 314:    * be available. Otherwise namespaces will simply be reported as ordinary
 315:    * attributes.
 316:    */
 317:   private final boolean namespaceAware;
 318: 
 319:   /**
 320:    * Whether to support XML Base. If true, URIs specified in xml:base
 321:    * attributes will be honoured when resolving external entities.
 322:    */
 323:   private final boolean baseAware;
 324: 
 325:   /**
 326:    * Whether to report extended event types (START_ENTITY and END_ENTITY)
 327:    * in addition to the standard event types. Used by the SAX parser.
 328:    */
 329:   private final boolean extendedEventTypes;
 330: 
 331:   /**
 332:    * The reporter to receive parsing warnings.
 333:    */
 334:   final XMLReporter reporter;
 335: 
 336:   /**
 337:    * Callback interface for resolving external entities.
 338:    */
 339:   final XMLResolver resolver;
 340: 
 341:   // -- Constants for testing the next kind of markup event --
 342:   private static final String TEST_START_ELEMENT = "<";
 343:   private static final String TEST_END_ELEMENT = "</";
 344:   private static final String TEST_COMMENT = "<!--";
 345:   private static final String TEST_PI = "<?";
 346:   private static final String TEST_CDATA = "<![CDATA[";
 347:   private static final String TEST_XML_DECL = "<?xml";
 348:   private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE";
 349:   private static final String TEST_ELEMENT_DECL = "<!ELEMENT";
 350:   private static final String TEST_ATTLIST_DECL = "<!ATTLIST";
 351:   private static final String TEST_ENTITY_DECL = "<!ENTITY";
 352:   private static final String TEST_NOTATION_DECL = "<!NOTATION";
 353:   private static final String TEST_KET = ">";
 354:   private static final String TEST_END_COMMENT = "--";
 355:   private static final String TEST_END_PI = "?>";
 356:   private static final String TEST_END_CDATA = "]]>";
 357: 
 358:   /**
 359:    * The general entities predefined by the XML specification.
 360:    */
 361:   private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap();
 362:   static
 363:   {
 364:     PREDEFINED_ENTITIES.put("amp", "&");
 365:     PREDEFINED_ENTITIES.put("lt", "<");
 366:     PREDEFINED_ENTITIES.put("gt", ">");
 367:     PREDEFINED_ENTITIES.put("apos", "'");
 368:     PREDEFINED_ENTITIES.put("quot", "\"");
 369:   }
 370: 
 371:   /**
 372:    * Creates a new XML parser for the given input stream.
 373:    * This constructor should be used where possible, as it allows the
 374:    * encoding of the XML data to be correctly determined from the stream.
 375:    * @param in the input stream
 376:    * @param systemId the URL from which the input stream was retrieved
 377:    * (necessary if there are external entities to be resolved)
 378:    * @param validating if the parser is to be a validating parser
 379:    * @param namespaceAware if the parser should support XML Namespaces
 380:    * @param coalescing if CDATA sections should be merged into adjacent text
 381:    * nodes
 382:    * @param replaceERefs if entity references should be automatically
 383:    * replaced by their replacement text (otherwise they will be reported as
 384:    * entity-reference events)
 385:    * @param externalEntities if external entities should be loaded
 386:    * @param supportDTD if support for the XML DTD should be enabled
 387:    * @param baseAware if the parser should support XML Base to resolve
 388:    * external entities
 389:    * @param stringInterning whether strings will be interned during parsing
 390:    * @param reporter the reporter to receive warnings during processing
 391:    * @param resolver the callback interface used to resolve external
 392:    * entities
 393:    */
 394:   public XMLParser(InputStream in, String systemId,
 395:                    boolean validating,
 396:                    boolean namespaceAware,
 397:                    boolean coalescing,
 398:                    boolean replaceERefs,
 399:                    boolean externalEntities,
 400:                    boolean supportDTD,
 401:                    boolean baseAware,
 402:                    boolean stringInterning,
 403:                    boolean extendedEventTypes,
 404:                    XMLReporter reporter,
 405:                    XMLResolver resolver)
 406:   {
 407:     this.validating = validating;
 408:     this.namespaceAware = namespaceAware;
 409:     this.coalescing = coalescing;
 410:     this.replaceERefs = replaceERefs;
 411:     this.externalEntities = externalEntities;
 412:     this.supportDTD = supportDTD;
 413:     this.baseAware = baseAware;
 414:     this.stringInterning = stringInterning;
 415:     this.extendedEventTypes = extendedEventTypes;
 416:     this.reporter = reporter;
 417:     this.resolver = resolver;
 418:     if (validating)
 419:       {
 420:         validationStack = new LinkedList();
 421:         ids = new HashSet();
 422:         idrefs = new HashSet();
 423:       }
 424:     String debug = System.getProperty("gnu.xml.debug.input");
 425:     if (debug != null)
 426:       {
 427:         try
 428:           {
 429:             File file = File.createTempFile(debug, ".xml");
 430:             in = new TeeInputStream(in, new FileOutputStream(file));
 431:           }
 432:         catch (IOException e)
 433:           {
 434:             RuntimeException e2 = new RuntimeException();
 435:             e2.initCause(e);
 436:             throw e2;
 437:           }
 438:       }
 439:     systemId = canonicalize(systemId);
 440:     pushInput(new Input(in, null, null, systemId, null, null, false, true));
 441:   }
 442: 
 443:   /**
 444:    * Creates a new XML parser for the given character stream.
 445:    * This constructor is only available for compatibility with the JAXP
 446:    * APIs, which permit XML to be parsed from a character stream. Because
 447:    * the encoding specified by the character stream may conflict with that
 448:    * specified in the XML declaration, this method should be avoided where
 449:    * possible.
 450:    * @param in the input stream
 451:    * @param systemId the URL from which the input stream was retrieved
 452:    * (necessary if there are external entities to be resolved)
 453:    * @param validating if the parser is to be a validating parser
 454:    * @param namespaceAware if the parser should support XML Namespaces
 455:    * @param coalescing if CDATA sections should be merged into adjacent text
 456:    * nodes
 457:    * @param replaceERefs if entity references should be automatically
 458:    * replaced by their replacement text (otherwise they will be reported as
 459:    * entity-reference events)
 460:    * @param externalEntities if external entities should be loaded
 461:    * @param supportDTD if support for the XML DTD should be enabled
 462:    * @param baseAware if the parser should support XML Base to resolve
 463:    * external entities
 464:    * @param stringInterning whether strings will be interned during parsing
 465:    * @param reporter the reporter to receive warnings during processing
 466:    * @param resolver the callback interface used to resolve external
 467:    * entities
 468:    */
 469:   public XMLParser(Reader reader, String systemId,
 470:                    boolean validating,
 471:                    boolean namespaceAware,
 472:                    boolean coalescing,
 473:                    boolean replaceERefs,
 474:                    boolean externalEntities,
 475:                    boolean supportDTD,
 476:                    boolean baseAware,
 477:                    boolean stringInterning,
 478:                    boolean extendedEventTypes,
 479:                    XMLReporter reporter,
 480:                    XMLResolver resolver)
 481:   {
 482:     this.validating = validating;
 483:     this.namespaceAware = namespaceAware;
 484:     this.coalescing = coalescing;
 485:     this.replaceERefs = replaceERefs;
 486:     this.externalEntities = externalEntities;
 487:     this.supportDTD = supportDTD;
 488:     this.baseAware = baseAware;
 489:     this.stringInterning = stringInterning;
 490:     this.extendedEventTypes = extendedEventTypes;
 491:     this.reporter = reporter;
 492:     this.resolver = resolver;
 493:     if (validating)
 494:       {
 495:         validationStack = new LinkedList();
 496:         ids = new HashSet();
 497:         idrefs = new HashSet();
 498:       }
 499:     String debug = System.getProperty("gnu.xml.debug.input");
 500:     if (debug != null)
 501:       {
 502:         try
 503:           {
 504:             File file = File.createTempFile(debug, ".xml");
 505:             reader = new TeeReader(reader, new FileWriter(file));
 506:           }
 507:         catch (IOException e)
 508:           {
 509:             RuntimeException e2 = new RuntimeException();
 510:             e2.initCause(e);
 511:             throw e2;
 512:           }
 513:       }
 514:     systemId = canonicalize(systemId);
 515:     pushInput(new Input(null, reader, null, systemId, null, null, false, true));
 516:   }
 517: 
 518:   // -- NamespaceContext --
 519: 
 520:   public String getNamespaceURI(String prefix)
 521:   {
 522:     if (XMLConstants.XML_NS_PREFIX.equals(prefix))
 523:       return XMLConstants.XML_NS_URI;
 524:     if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix))
 525:       return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
 526:     for (Iterator i = namespaces.iterator(); i.hasNext(); )
 527:       {
 528:         LinkedHashMap ctx = (LinkedHashMap) i.next();
 529:         String namespaceURI = (String) ctx.get(prefix);
 530:         if (namespaceURI != null)
 531:           return namespaceURI;
 532:       }
 533:     return null;
 534:   }
 535: 
 536:   public String getPrefix(String namespaceURI)
 537:   {
 538:     if (XMLConstants.XML_NS_URI.equals(namespaceURI))
 539:       return XMLConstants.XML_NS_PREFIX;
 540:     if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
 541:       return XMLConstants.XMLNS_ATTRIBUTE;
 542:     for (Iterator i = namespaces.iterator(); i.hasNext(); )
 543:       {
 544:         LinkedHashMap ctx = (LinkedHashMap) i.next();
 545:         if (ctx.containsValue(namespaceURI))
 546:           {
 547:             for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
 548:               {
 549:                 Map.Entry entry = (Map.Entry) i.next();
 550:                 String uri = (String) entry.getValue();
 551:                 if (uri.equals(namespaceURI))
 552:                   return (String) entry.getKey();
 553:               }
 554:           }
 555:       }
 556:     return null;
 557:   }
 558: 
 559:   public Iterator getPrefixes(String namespaceURI)
 560:   {
 561:     if (XMLConstants.XML_NS_URI.equals(namespaceURI))
 562:       return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator();
 563:     if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
 564:       return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator();
 565:     LinkedList acc = new LinkedList();
 566:     for (Iterator i = namespaces.iterator(); i.hasNext(); )
 567:       {
 568:         LinkedHashMap ctx = (LinkedHashMap) i.next();
 569:         if (ctx.containsValue(namespaceURI))
 570:           {
 571:             for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
 572:               {
 573:                 Map.Entry entry = (Map.Entry) i.next();
 574:                 String uri = (String) entry.getValue();
 575:                 if (uri.equals(namespaceURI))
 576:                   acc.add(entry.getKey());
 577:               }
 578:           }
 579:       }
 580:     return acc.iterator();
 581:   }
 582: 
 583:   // -- XMLStreamReader --
 584: 
 585:   public void close()
 586:     throws XMLStreamException
 587:   {
 588:     stack = null;
 589:     namespaces = null;
 590:     bases = null;
 591:     buf = null;
 592:     attrs = null;
 593:     doctype = null;
 594: 
 595:     inputStack = null;
 596:     validationStack = null;
 597:     ids = null;
 598:     idrefs = null;
 599:   }
 600: 
 601:   public NamespaceContext getNamespaceContext()
 602:   {
 603:     return this;
 604:   }
 605: 
 606:   public int getAttributeCount()
 607:   {
 608:     return attrs.size();
 609:   }
 610: 
 611:   public String getAttributeLocalName(int index)
 612:   {
 613:     Attribute a = (Attribute) attrs.get(index);
 614:     return a.localName;
 615:   }
 616: 
 617:   public String getAttributeNamespace(int index)
 618:   {
 619:     String prefix = getAttributePrefix(index);
 620:     return getNamespaceURI(prefix);
 621:   }
 622: 
 623:   public String getAttributePrefix(int index)
 624:   {
 625:     Attribute a = (Attribute) attrs.get(index);
 626:     return a.prefix;
 627:   }
 628: 
 629:   public QName getAttributeName(int index)
 630:   {
 631:     Attribute a = (Attribute) attrs.get(index);
 632:     String namespaceURI = getNamespaceURI(a.prefix);
 633:     return new QName(namespaceURI, a.localName, a.prefix);
 634:   }
 635: 
 636:   public String getAttributeType(int index)
 637:   {
 638:     Attribute a = (Attribute) attrs.get(index);
 639:     return a.type;
 640:   }
 641: 
 642:   private String getAttributeType(String elementName, String attName)
 643:   {
 644:     if (doctype != null)
 645:       {
 646:         AttributeDecl att = doctype.getAttributeDecl(elementName, attName);
 647:         if (att != null)
 648:           return att.type;
 649:       }
 650:     return "CDATA";
 651:   }
 652: 
 653:   public String getAttributeValue(int index)
 654:   {
 655:     Attribute a = (Attribute) attrs.get(index);
 656:     return a.value;
 657:   }
 658: 
 659:   public String getAttributeValue(String namespaceURI, String localName)
 660:   {
 661:     for (Iterator i = attrs.iterator(); i.hasNext(); )
 662:       {
 663:         Attribute a = (Attribute) i.next();
 664:         if (a.localName.equals(localName))
 665:           {
 666:             String uri = getNamespaceURI(a.prefix);
 667:             if ((uri == null && namespaceURI == null) ||
 668:                 (uri != null && uri.equals(namespaceURI)))
 669:               return a.value;
 670:           }
 671:       }
 672:     return null;
 673:   }
 674: 
 675:   boolean isAttributeDeclared(int index)
 676:   {
 677:     if (doctype == null)
 678:       return false;
 679:     Attribute a = (Attribute) attrs.get(index);
 680:     String qn = ("".equals(a.prefix)) ? a.localName :
 681:       a.prefix + ":" + a.localName;
 682:     String elementName = buf.toString();
 683:     return doctype.isAttributeDeclared(elementName, qn);
 684:   }
 685: 
 686:   public String getCharacterEncodingScheme()
 687:   {
 688:     return xmlEncoding;
 689:   }
 690: 
 691:   public String getElementText()
 692:     throws XMLStreamException
 693:   {
 694:     if (event != XMLStreamConstants.START_ELEMENT)
 695:       throw new XMLStreamException("current event must be START_ELEMENT");
 696:     CPStringBuilder elementText = new CPStringBuilder();
 697:     int depth = stack.size();
 698:     while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth)
 699:       {
 700:         switch (next())
 701:           {
 702:           case XMLStreamConstants.CHARACTERS:
 703:           case XMLStreamConstants.SPACE:
 704:             elementText.append(buf.toString());
 705:           }
 706:       }
 707:     return elementText.toString();
 708:   }
 709: 
 710:   public String getEncoding()
 711:   {
 712:     return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding;
 713:   }
 714: 
 715:   public int getEventType()
 716:   {
 717:     return event;
 718:   }
 719: 
 720:   public String getLocalName()
 721:   {
 722:     switch (event)
 723:       {
 724:       case XMLStreamConstants.START_ELEMENT:
 725:       case XMLStreamConstants.END_ELEMENT:
 726:         String qName = buf.toString();
 727:         int ci = qName.indexOf(':');
 728:         String localName = (ci == -1) ? qName : qName.substring(ci + 1);
 729:         if (stringInterning)
 730:           localName = localName.intern();
 731:         return localName;
 732:       default:
 733:         return null;
 734:       }
 735:   }
 736: 
 737:   public Location getLocation()
 738:   {
 739:     return input;
 740:   }
 741: 
 742:   public QName getName()
 743:   {
 744:     switch (event)
 745:       {
 746:       case XMLStreamConstants.START_ELEMENT:
 747:       case XMLStreamConstants.END_ELEMENT:
 748:         String qName = buf.toString();
 749:         int ci = qName.indexOf(':');
 750:         String localName = (ci == -1) ? qName : qName.substring(ci + 1);
 751:         if (stringInterning)
 752:           localName = localName.intern();
 753:         String prefix = (ci == -1) ?
 754:           (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
 755:           qName.substring(0, ci);
 756:         if (stringInterning && prefix != null)
 757:           prefix = prefix.intern();
 758:         String namespaceURI = getNamespaceURI(prefix);
 759:         return new QName(namespaceURI, localName, prefix);
 760:       default:
 761:         return null;
 762:       }
 763:   }
 764: 
 765:   public int getNamespaceCount()
 766:   {
 767:     if (!namespaceAware || namespaces.isEmpty())
 768:       return 0;
 769:     switch (event)
 770:       {
 771:       case XMLStreamConstants.START_ELEMENT:
 772:       case XMLStreamConstants.END_ELEMENT:
 773:         LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
 774:         return ctx.size();
 775:       default:
 776:         return 0;
 777:       }
 778:   }
 779: 
 780:   public String getNamespacePrefix(int index)
 781:   {
 782:     LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
 783:     int count = 0;
 784:     for (Iterator i = ctx.keySet().iterator(); i.hasNext(); )
 785:       {
 786:         String prefix = (String) i.next();
 787:         if (count++ == index)
 788:           return prefix;
 789:       }
 790:     return null;
 791:   }
 792: 
 793:   public String getNamespaceURI()
 794:   {
 795:     switch (event)
 796:       {
 797:       case XMLStreamConstants.START_ELEMENT:
 798:       case XMLStreamConstants.END_ELEMENT:
 799:         String qName = buf.toString();
 800:         int ci = qName.indexOf(':');
 801:         if (ci == -1)
 802:           return null;
 803:         String prefix = qName.substring(0, ci);
 804:         return getNamespaceURI(prefix);
 805:       default:
 806:         return null;
 807:       }
 808:   }
 809: 
 810:   public String getNamespaceURI(int index)
 811:   {
 812:     LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
 813:     int count = 0;
 814:     for (Iterator i = ctx.values().iterator(); i.hasNext(); )
 815:       {
 816:         String uri = (String) i.next();
 817:         if (count++ == index)
 818:           return uri;
 819:       }
 820:     return null;
 821:   }
 822: 
 823:   public String getPIData()
 824:   {
 825:     return piData;
 826:   }
 827: 
 828:   public String getPITarget()
 829:   {
 830:     return piTarget;
 831:   }
 832: 
 833:   public String getPrefix()
 834:   {
 835:     switch (event)
 836:       {
 837:       case XMLStreamConstants.START_ELEMENT:
 838:       case XMLStreamConstants.END_ELEMENT:
 839:         String qName = buf.toString();
 840:         int ci = qName.indexOf(':');
 841:         String prefix = (ci == -1) ?
 842:           (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
 843:           qName.substring(0, ci);
 844:         if (stringInterning && prefix != null)
 845:           prefix = prefix.intern();
 846:         return prefix;
 847:       default:
 848:         return null;
 849:       }
 850:   }
 851: 
 852:   public Object getProperty(String name)
 853:     throws IllegalArgumentException
 854:   {
 855:     if (name == null)
 856:       throw new IllegalArgumentException("name is null");
 857:     if (XMLInputFactory.ALLOCATOR.equals(name))
 858:       return null;
 859:     if (XMLInputFactory.IS_COALESCING.equals(name))
 860:       return coalescing ? Boolean.TRUE : Boolean.FALSE;
 861:     if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name))
 862:       return namespaceAware ? Boolean.TRUE : Boolean.FALSE;
 863:     if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name))
 864:       return replaceERefs ? Boolean.TRUE : Boolean.FALSE;
 865:     if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name))
 866:       return externalEntities ? Boolean.TRUE : Boolean.FALSE;
 867:     if (XMLInputFactory.IS_VALIDATING.equals(name))
 868:       return Boolean.FALSE;
 869:     if (XMLInputFactory.REPORTER.equals(name))
 870:       return reporter;
 871:     if (XMLInputFactory.RESOLVER.equals(name))
 872:       return resolver;
 873:     if (XMLInputFactory.SUPPORT_DTD.equals(name))
 874:       return supportDTD ? Boolean.TRUE : Boolean.FALSE;
 875:     if ("gnu.xml.stream.stringInterning".equals(name))
 876:       return stringInterning ? Boolean.TRUE : Boolean.FALSE;
 877:     if ("gnu.xml.stream.xmlBase".equals(name))
 878:       return baseAware ? Boolean.TRUE : Boolean.FALSE;
 879:     if ("gnu.xml.stream.baseURI".equals(name))
 880:       return getXMLBase();
 881:     return null;
 882:   }
 883: 
 884:   public String getText()
 885:   {
 886:     return buf.toString();
 887:   }
 888: 
 889:   public char[] getTextCharacters()
 890:   {
 891:     return buf.toString().toCharArray();
 892:   }
 893: 
 894:   public int getTextCharacters(int sourceStart, char[] target,
 895:                                int targetStart, int length)
 896:     throws XMLStreamException
 897:   {
 898:     length = Math.min(sourceStart + buf.length(), length);
 899:     int sourceEnd = sourceStart + length;
 900:     buf.getChars(sourceStart, sourceEnd, target, targetStart);
 901:     return length;
 902:   }
 903: 
 904:   public int getTextLength()
 905:   {
 906:     return buf.length();
 907:   }
 908: 
 909:   public int getTextStart()
 910:   {
 911:     return 0;
 912:   }
 913: 
 914:   public String getVersion()
 915:   {
 916:     return (xmlVersion == null) ? "1.0" : xmlVersion;
 917:   }
 918: 
 919:   public boolean hasName()
 920:   {
 921:     switch (event)
 922:       {
 923:       case XMLStreamConstants.START_ELEMENT:
 924:       case XMLStreamConstants.END_ELEMENT:
 925:         return true;
 926:       default:
 927:         return false;
 928:       }
 929:   }
 930: 
 931:   public boolean hasText()
 932:   {
 933:     switch (event)
 934:       {
 935:       case XMLStreamConstants.CHARACTERS:
 936:       case XMLStreamConstants.SPACE:
 937:         return true;
 938:       default:
 939:         return false;
 940:       }
 941:   }
 942: 
 943:   public boolean isAttributeSpecified(int index)
 944:   {
 945:     Attribute a = (Attribute) attrs.get(index);
 946:     return a.specified;
 947:   }
 948: 
 949:   public boolean isCharacters()
 950:   {
 951:     return (event == XMLStreamConstants.CHARACTERS);
 952:   }
 953: 
 954:   public boolean isEndElement()
 955:   {
 956:     return (event == XMLStreamConstants.END_ELEMENT);
 957:   }
 958: 
 959:   public boolean isStandalone()
 960:   {
 961:     return Boolean.TRUE.equals(xmlStandalone);
 962:   }
 963: 
 964:   public boolean isStartElement()
 965:   {
 966:     return (event == XMLStreamConstants.START_ELEMENT);
 967:   }
 968: 
 969:   public boolean isWhiteSpace()
 970:   {
 971:     return (event == XMLStreamConstants.SPACE);
 972:   }
 973: 
 974:   public int nextTag()
 975:     throws XMLStreamException
 976:   {
 977:     do
 978:       {
 979:         switch (next())
 980:           {
 981:           case XMLStreamConstants.START_ELEMENT:
 982:           case XMLStreamConstants.END_ELEMENT:
 983:           case XMLStreamConstants.CHARACTERS:
 984:           case XMLStreamConstants.SPACE:
 985:           case XMLStreamConstants.COMMENT:
 986:           case XMLStreamConstants.PROCESSING_INSTRUCTION:
 987:             break;
 988:           default:
 989:             throw new XMLStreamException("Unexpected event type: " + event);
 990:           }
 991:       }
 992:     while (event != XMLStreamConstants.START_ELEMENT &&
 993:            event != XMLStreamConstants.END_ELEMENT);
 994:     return event;
 995:   }
 996: 
 997:   public void require(int type, String namespaceURI, String localName)
 998:     throws XMLStreamException
 999:   {
1000:     if (event != type)
1001:       throw new XMLStreamException("Current event type is " + event);
1002:     if (event == XMLStreamConstants.START_ELEMENT ||
1003:         event == XMLStreamConstants.END_ELEMENT)
1004:       {
1005:         String ln = getLocalName();
1006:         if (!ln.equals(localName))
1007:           throw new XMLStreamException("Current local-name is " + ln);
1008:         String uri = getNamespaceURI();
1009:         if ((uri == null && namespaceURI != null) ||
1010:             (uri != null && !uri.equals(namespaceURI)))
1011:           throw new XMLStreamException("Current namespace URI is " + uri);
1012:       }
1013:   }
1014: 
1015:   public boolean standaloneSet()
1016:   {
1017:     return (xmlStandalone != null);
1018:   }
1019: 
1020:   public boolean hasNext()
1021:     throws XMLStreamException
1022:   {
1023:     return (event != XMLStreamConstants.END_DOCUMENT && event != -1);
1024:   }
1025: 
1026:   public int next()
1027:     throws XMLStreamException
1028:   {
1029:     if (event == XMLStreamConstants.END_ELEMENT)
1030:       {
1031:         // Pop namespace context
1032:         if (namespaceAware && !namespaces.isEmpty())
1033:           namespaces.removeFirst();
1034:         // Pop base context
1035:         if (baseAware && !bases.isEmpty())
1036:           bases.removeFirst();
1037:       }
1038:     if (!startEntityStack.isEmpty())
1039:       {
1040:         String entityName = (String) startEntityStack.removeFirst();
1041:         buf.setLength(0);
1042:         buf.append(entityName);
1043:         event = START_ENTITY;
1044:         return extendedEventTypes ? event : next();
1045:       }
1046:     else if (!endEntityStack.isEmpty())
1047:       {
1048:         String entityName = (String) endEntityStack.removeFirst();
1049:         buf.setLength(0);
1050:         buf.append(entityName);
1051:         event = END_ENTITY;
1052:         return extendedEventTypes ? event : next();
1053:       }
1054:     try
1055:       {
1056:         if (!input.initialized)
1057:           input.init();
1058:         switch (state)
1059:           {
1060:           case CONTENT:
1061:             if (tryRead(TEST_END_ELEMENT))
1062:               {
1063:                 readEndElement();
1064:                 if (stack.isEmpty())
1065:                   state = MISC;
1066:                 event = XMLStreamConstants.END_ELEMENT;
1067:               }
1068:             else if (tryRead(TEST_COMMENT))
1069:               {
1070:                 readComment(false);
1071:                 event = XMLStreamConstants.COMMENT;
1072:               }
1073:             else if (tryRead(TEST_PI))
1074:               {
1075:                 readPI(false);
1076:                 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1077:               }
1078:             else if (tryRead(TEST_CDATA))
1079:               {
1080:                 readCDSect();
1081:                 event = XMLStreamConstants.CDATA;
1082:               }
1083:             else if (tryRead(TEST_START_ELEMENT))
1084:               {
1085:                 state = readStartElement();
1086:                 event = XMLStreamConstants.START_ELEMENT;
1087:               }
1088:             else
1089:               {
1090:                 // Check for character reference or predefined entity
1091:                 mark(8);
1092:                 int c = readCh();
1093:                 if (c == 0x26) // '&'
1094:                   {
1095:                     c = readCh();
1096:                     if (c == 0x23) // '#'
1097:                       {
1098:                         reset();
1099:                         event = readCharData(null);
1100:                       }
1101:                     else
1102:                       {
1103:                         // entity reference
1104:                         reset();
1105:                         readCh(); // &
1106:                         readReference();
1107:                         String ref = buf.toString();
1108:                         String text = (String) PREDEFINED_ENTITIES.get(ref);
1109:                         if (text != null)
1110:                           {
1111:                             event = readCharData(text);
1112:                           }
1113:                         else if (replaceERefs && !isUnparsedEntity(ref))
1114:                           {
1115:                             // this will report a start-entity event
1116:                             boolean external = false;
1117:                             if (doctype != null)
1118:                               {
1119:                                 Object entity = doctype.getEntity(ref);
1120:                                 if (entity instanceof ExternalIds)
1121:                                   external = true;
1122:                               }
1123:                             expandEntity(ref, false, external);
1124:                             event = next();
1125:                           }
1126:                         else
1127:                           {
1128:                             event = XMLStreamConstants.ENTITY_REFERENCE;
1129:                           }
1130:                       }
1131:                   }
1132:                 else
1133:                   {
1134:                     reset();
1135:                     event = readCharData(null);
1136:                     if (validating && doctype != null)
1137:                       validatePCData(buf.toString());
1138:                   }
1139:               }
1140:             break;
1141:           case EMPTY_ELEMENT:
1142:             String elementName = (String) stack.removeLast();
1143:             buf.setLength(0);
1144:             buf.append(elementName);
1145:             state = stack.isEmpty() ? MISC : CONTENT;
1146:             event = XMLStreamConstants.END_ELEMENT;
1147:             if (validating && doctype != null)
1148:               endElementValidationHook();
1149:             break;
1150:           case INIT: // XMLDecl?
1151:             if (tryRead(TEST_XML_DECL))
1152:               readXMLDecl();
1153:             input.finalizeEncoding();
1154:             event = XMLStreamConstants.START_DOCUMENT;
1155:             state = PROLOG;
1156:             break;
1157:           case PROLOG: // Misc* (doctypedecl Misc*)?
1158:             skipWhitespace();
1159:             if (doctype == null && tryRead(TEST_DOCTYPE_DECL))
1160:               {
1161:                 readDoctypeDecl();
1162:                 event = XMLStreamConstants.DTD;
1163:               }
1164:             else if (tryRead(TEST_COMMENT))
1165:               {
1166:                 readComment(false);
1167:                 event = XMLStreamConstants.COMMENT;
1168:               }
1169:             else if (tryRead(TEST_PI))
1170:               {
1171:                 readPI(false);
1172:                 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1173:               }
1174:             else if (tryRead(TEST_START_ELEMENT))
1175:               {
1176:                 state = readStartElement();
1177:                 event = XMLStreamConstants.START_ELEMENT;
1178:               }
1179:             else
1180:               {
1181:                 int c = readCh();
1182:                 error("no root element: U+" + Integer.toHexString(c));
1183:               }
1184:             break;
1185:           case MISC: // Comment | PI | S
1186:             skipWhitespace();
1187:             if (tryRead(TEST_COMMENT))
1188:               {
1189:                 readComment(false);
1190:                 event = XMLStreamConstants.COMMENT;
1191:               }
1192:             else if (tryRead(TEST_PI))
1193:               {
1194:                 readPI(false);
1195:                 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1196:               }
1197:             else
1198:               {
1199:                 if (event == XMLStreamConstants.END_DOCUMENT)
1200:                   throw new NoSuchElementException();
1201:                 int c = readCh();
1202:                 if (c != -1)
1203:                   error("Only comments and PIs may appear after " +
1204:                         "the root element");
1205:                 event = XMLStreamConstants.END_DOCUMENT;
1206:               }
1207:             break;
1208:           default:
1209:             event = -1;
1210:           }
1211:         return event;
1212:       }
1213:     catch (IOException e)
1214:       {
1215:         XMLStreamException e2 = new XMLStreamException();
1216:         e2.initCause(e);
1217:         throw e2;
1218:       }
1219:   }
1220: 
1221:   // package private
1222: 
1223:   /**
1224:    * Returns the current element name.
1225:    */
1226:   String getCurrentElement()
1227:   {
1228:     return (String) stack.getLast();
1229:   }
1230: 
1231:   // private
1232: 
1233:   private void mark(int limit)
1234:     throws IOException
1235:   {
1236:     input.mark(limit);
1237:   }
1238: 
1239:   private void reset()
1240:     throws IOException
1241:   {
1242:     input.reset();
1243:   }
1244: 
1245:   private int read()
1246:     throws IOException
1247:   {
1248:     return input.read();
1249:   }
1250: 
1251:   private int read(int[] b, int off, int len)
1252:     throws IOException
1253:   {
1254:     return input.read(b, off, len);
1255:   }
1256: 
1257:   /**
1258:    * Parsed character read.
1259:    */
1260:   private int readCh()
1261:     throws IOException, XMLStreamException
1262:   {
1263:     int c = read();
1264:     if (expandPE && c == 0x25) // '%'
1265:       {
1266:         if (peIsError)
1267:           error("PE reference within decl in internal subset.");
1268:         expandPEReference();
1269:         return readCh();
1270:       }
1271:     return c;
1272:   }
1273: 
1274:   /**
1275:    * Reads the next character, ensuring it is the character specified.
1276:    * @param delim the character to match
1277:    * @exception XMLStreamException if the next character is not the
1278:    * specified one
1279:    */
1280:   private void require(char delim)
1281:     throws IOException, XMLStreamException
1282:   {
1283:     mark(1);
1284:     int c = readCh();
1285:     if (delim != c)
1286:       {
1287:         reset();
1288:         error("required character (got U+" + Integer.toHexString(c) + ")",
1289:               new Character(delim));
1290:       }
1291:   }
1292: 
1293:   /**
1294:    * Reads the next few characters, ensuring they match the string specified.
1295:    * @param delim the string to match
1296:    * @exception XMLStreamException if the next characters do not match the
1297:    * specified string
1298:    */
1299:   private void require(String delim)
1300:     throws IOException, XMLStreamException
1301:   {
1302:     char[] chars = delim.toCharArray();
1303:     int len = chars.length;
1304:     mark(len);
1305:     int off = 0;
1306:     do
1307:       {
1308:         int l2 = read(tmpBuf, off, len - off);
1309:         if (l2 == -1)
1310:           {
1311:             reset();
1312:             error("EOF before required string", delim);
1313:           }
1314:         off += l2;
1315:       }
1316:     while (off < len);
1317:     for (int i = 0; i < chars.length; i++)
1318:       {
1319:         if (chars[i] != tmpBuf[i])
1320:           {
1321:             reset();
1322:             error("required string", delim);
1323:           }
1324:       }
1325:   }
1326: 
1327:   /**
1328:    * Try to read a single character. On failure, reset the stream.
1329:    * @param delim the character to test
1330:    * @return true if the character matched delim, false otherwise.
1331:    */
1332:   private boolean tryRead(char delim)
1333:     throws IOException, XMLStreamException
1334:   {
1335:     mark(1);
1336:     int c = readCh();
1337:     if (delim != c)
1338:       {
1339:         reset();
1340:         return false;
1341:       }
1342:     return true;
1343:   }
1344: 
1345:   /**
1346:    * Tries to read the specified characters.
1347:    * If successful, the stream is positioned after the last character,
1348:    * otherwise it is reset.
1349:    * @param test the string to test
1350:    * @return true if the characters matched the test string, false otherwise.
1351:    */
1352:   private boolean tryRead(String test)
1353:     throws IOException
1354:   {
1355:     char[] chars = test.toCharArray();
1356:     int len = chars.length;
1357:     mark(len);
1358:     int count = 0;
1359:     int l2 = read(tmpBuf, 0, len);
1360:     if (l2 == -1)
1361:       {
1362:         reset();
1363:         return false;
1364:       }
1365:     count += l2;
1366:     // check the characters we received first before doing additional reads
1367:     for (int i = 0; i < count; i++)
1368:       {
1369:         if (chars[i] != tmpBuf[i])
1370:           {
1371:             reset();
1372:             return false;
1373:           }
1374:       }
1375:     while (count < len)
1376:       {
1377:         // force read
1378:         int c = read();
1379:         if (c == -1)
1380:           {
1381:             reset();
1382:             return false;
1383:           }
1384:         tmpBuf[count] = (char) c;
1385:         // check each character as it is read
1386:         if (chars[count] != tmpBuf[count])
1387:           {
1388:             reset();
1389:             return false;
1390:           }
1391:         count++;
1392:       }
1393:     return true;
1394:   }
1395: 
1396:   /**
1397:    * Reads characters until the specified test string is encountered.
1398:    * @param delim the string delimiting the end of the characters
1399:    */
1400:   private void readUntil(String delim)
1401:     throws IOException, XMLStreamException
1402:   {
1403:     int startLine = input.line;
1404:     try
1405:       {
1406:         while (!tryRead(delim))
1407:           {
1408:             int c = readCh();
1409:             if (c == -1)
1410:               throw new EOFException();
1411:             else if (input.xml11)
1412:               {
1413:                 if (!isXML11Char(c) || isXML11RestrictedChar(c))
1414:                   error("illegal XML 1.1 character",
1415:                         "U+" + Integer.toHexString(c));
1416:               }
1417:             else if (!isChar(c))
1418:               error("illegal XML character",
1419:                     "U+" + Integer.toHexString(c));
1420:             buf.append(Character.toChars(c));
1421:           }
1422:       }
1423:     catch (EOFException e)
1424:       {
1425:         error("end of input while looking for delimiter "+
1426:               "(started on line " + startLine + ')', delim);
1427:       }
1428:   }
1429: 
1430:   /**
1431:    * Reads any whitespace characters.
1432:    * @return true if whitespace characters were read, false otherwise
1433:    */
1434:   private boolean tryWhitespace()
1435:     throws IOException, XMLStreamException
1436:   {
1437:     boolean white;
1438:     boolean ret = false;
1439:     do
1440:       {
1441:         mark(1);
1442:         int c = readCh();
1443:         while (c == -1 && inputStack.size() > 1)
1444:           {
1445:             popInput();
1446:             c = readCh();
1447:           }
1448:         white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1449:         if (white)
1450:           ret = true;
1451:       }
1452:     while (white);
1453:     reset();
1454:     return ret;
1455:   }
1456: 
1457:   /**
1458:    * Skip over any whitespace characters.
1459:    */
1460:   private void skipWhitespace()
1461:     throws IOException, XMLStreamException
1462:   {
1463:     boolean white;
1464:     do
1465:       {
1466:         mark(1);
1467:         int c = readCh();
1468:         while (c == -1 && inputStack.size() > 1)
1469:           {
1470:             popInput();
1471:             c = readCh();
1472:           }
1473:         white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1474:       }
1475:     while (white);
1476:     reset();
1477:   }
1478: 
1479:   /**
1480:    * Try to read as many whitespace characters as are available.
1481:    * @exception XMLStreamException if no whitespace characters were seen
1482:    */
1483:   private void requireWhitespace()
1484:     throws IOException, XMLStreamException
1485:   {
1486:     if (!tryWhitespace())
1487:       error("whitespace required");
1488:   }
1489: 
1490:   /**
1491:    * Returns the current base URI for resolving external entities.
1492:    */
1493:   String getXMLBase()
1494:   {
1495:     if (baseAware)
1496:       {
1497:         for (Iterator i = bases.iterator(); i.hasNext(); )
1498:           {
1499:             String base = (String) i.next();
1500:             if (base != null)
1501:               return base;
1502:           }
1503:       }
1504:     return input.systemId;
1505:   }
1506: 
1507:   /**
1508:    * Push the specified text input source.
1509:    */
1510:   private void pushInput(String name, String text, boolean report,
1511:                          boolean normalize)
1512:     throws IOException, XMLStreamException
1513:   {
1514:     // Check for recursion
1515:     if (name != null && !"".equals(name))
1516:       {
1517:         for (Iterator i = inputStack.iterator(); i.hasNext(); )
1518:           {
1519:             Input ctx = (Input) i.next();
1520:             if (name.equals(ctx.name))
1521:               error("entities may not be self-recursive", name);
1522:           }
1523:       }
1524:     else
1525:       report = false;
1526:     pushInput(new Input(null, new StringReader(text), input.publicId,
1527:                         input.systemId, name, input.inputEncoding, report,
1528:                         normalize));
1529:   }
1530: 
1531:   /**
1532:    * Push the specified external input source.
1533:    */
1534:   private void pushInput(String name, ExternalIds ids, boolean report,
1535:                          boolean normalize)
1536:     throws IOException, XMLStreamException
1537:   {
1538:     if (!externalEntities)
1539:       return;
1540:     String url = canonicalize(absolutize(input.systemId, ids.systemId));
1541:     // Check for recursion
1542:     for (Iterator i = inputStack.iterator(); i.hasNext(); )
1543:       {
1544:         Input ctx = (Input) i.next();
1545:         if (url.equals(ctx.systemId))
1546:           error("entities may not be self-recursive", url);
1547:         if (name != null && !"".equals(name) && name.equals(ctx.name))
1548:           error("entities may not be self-recursive", name);
1549:       }
1550:     if (name == null || "".equals(name))
1551:       report = false;
1552:     InputStream in = null;
1553:     if (resolver != null)
1554:       {
1555:         Object obj = resolver.resolveEntity(ids.publicId, url, getXMLBase(),
1556:                                             null);
1557:         if (obj instanceof InputStream)
1558:           in = (InputStream) obj;
1559:       }
1560:     if (in == null)
1561:       in = resolve(url);
1562:     if (in == null)
1563:       error("unable to resolve external entity",
1564:             (ids.systemId != null) ? ids.systemId : ids.publicId);
1565:     pushInput(new Input(in, null, ids.publicId, url, name, null, report,
1566:                         normalize));
1567:     input.init();
1568:     if (tryRead(TEST_XML_DECL))
1569:       readTextDecl();
1570:     input.finalizeEncoding();
1571:   }
1572: 
1573:   /**
1574:    * Push the specified input source (general entity) onto the input stack.
1575:    */
1576:   private void pushInput(Input input)
1577:   {
1578:     if (input.report)
1579:       startEntityStack.addFirst(input.name);
1580:     inputStack.addLast(input);
1581:     if (this.input != null)
1582:       input.xml11 = this.input.xml11;
1583:     this.input = input;
1584:   }
1585: 
1586:   /**
1587:    * Returns a canonicalized version of the specified URL.
1588:    * This is largely to work around a problem with the specification of
1589:    * file URLs.
1590:    */
1591:   static String canonicalize(String url)
1592:   {
1593:     if (url == null)
1594:       return null;
1595:     if (url.startsWith("file:") && !url.startsWith("file://"))
1596:       url = "file://" + url.substring(5);
1597:     return url;
1598:   }
1599: 
1600:   /**
1601:    * "Absolutize" a URL. This resolves a relative URL into an absolute one.
1602:    * @param base the current base URL
1603:    * @param href the (absolute or relative) URL to resolve
1604:    */
1605:   public static String absolutize(String base, String href)
1606:   {
1607:     if (href == null)
1608:       return null;
1609:     int ci = href.indexOf(':');
1610:     if (ci > 1 && isURLScheme(href.substring(0, ci)))
1611:       {
1612:         // href is absolute already
1613:         return href;
1614:       }
1615:     if (base == null)
1616:       base = "";
1617:     else
1618:       {
1619:         int i = base.lastIndexOf('/');
1620:         if (i != -1)
1621:           base = base.substring(0, i + 1);
1622:         else
1623:           base = "";
1624:       }
1625:     if ("".equals(base))
1626:       {
1627:         // assume file URL relative to current directory
1628:         base = System.getProperty("user.dir");
1629:         if (base.charAt(0) == '/')
1630:           base = base.substring(1);
1631:         base = "file:///" + base.replace(File.separatorChar, '/');
1632:         if (!base.endsWith("/"))
1633:           base += "/";
1634:       }
1635:     // We can't use java.net.URL here to do the parsing, as it searches for
1636:     // a protocol handler. A protocol handler may not be registered for the
1637:     // URL scheme here. Do it manually.
1638:     //
1639:     // Set aside scheme and host portion of base URL
1640:     String basePrefix = null;
1641:     ci = base.indexOf(':');
1642:     if (ci > 1 && isURLScheme(base.substring(0, ci)))
1643:       {
1644:           if (base.length() > (ci + 3)  &&
1645:               base.charAt(ci + 1) == '/' &&
1646:               base.charAt(ci + 2) == '/')
1647:             {
1648:               int si = base.indexOf('/', ci + 3);
1649:               if (si == -1)
1650:                 base = null;
1651:               else
1652:                 {
1653:                   basePrefix = base.substring(0, si);
1654:                   base = base.substring(si);
1655:                 }
1656:             }
1657:           else
1658:             base = null;
1659:       }
1660:     if (base == null) // unknown or malformed base URL, use href
1661:       return href;
1662:     if (href.startsWith("/")) // absolute href pathname
1663:       return (basePrefix == null) ? href : basePrefix + href;
1664:     // relative href pathname
1665:     if (!base.endsWith("/"))
1666:       {
1667:         int lsi = base.lastIndexOf('/');
1668:         if (lsi == -1)
1669:           base = "/";
1670:         else
1671:           base = base.substring(0, lsi + 1);
1672:       }
1673:     while (href.startsWith("../") || href.startsWith("./"))
1674:       {
1675:         if (href.startsWith("../"))
1676:           {
1677:             // strip last path component from base
1678:             int lsi = base.lastIndexOf('/', base.length() - 2);
1679:             if (lsi > -1)
1680:               base = base.substring(0, lsi + 1);
1681:             href = href.substring(3); // strip ../ prefix
1682:           }
1683:         else
1684:           {
1685:             href = href.substring(2); // strip ./ prefix
1686:           }
1687:       }
1688:     return (basePrefix == null) ? base + href : basePrefix + base + href;
1689:   }
1690: 
1691:   /**
1692:    * Indicates whether the specified characters match the scheme portion of
1693:    * a URL.
1694:    * @see RFC 1738 section 2.1
1695:    */
1696:   private static boolean isURLScheme(String text)
1697:   {
1698:     int len = text.length();
1699:     for (int i = 0; i < len; i++)
1700:       {
1701:         char c = text.charAt(i);
1702:         if (c == '+' || c == '.' || c == '-')
1703:           continue;
1704:         if (c < 65 || (c > 90 && c < 97) || c > 122)
1705:           return false;
1706:       }
1707:     return true;
1708:   }
1709: 
1710:   /**
1711:    * Returns an input stream for the given URL.
1712:    */
1713:   static InputStream resolve(String url)
1714:     throws IOException
1715:   {
1716:     try
1717:       {
1718:         return new URL(url).openStream();
1719:       }
1720:     catch (MalformedURLException e)
1721:       {
1722:         return null;
1723:       }
1724:     catch (IOException e)
1725:       {
1726:         IOException e2 = new IOException("error resolving " + url);
1727:         e2.initCause(e);
1728:         throw e2;
1729:       }
1730:   }
1731: 
1732:   /**
1733:    * Pops the current input source (general entity) off the stack.
1734:    */
1735:   private void popInput()
1736:   {
1737:     Input old = (Input) inputStack.removeLast();
1738:     if (old.report)
1739:       endEntityStack.addFirst(old.name);
1740:     input = (Input) inputStack.getLast();
1741:   }
1742: 
1743:   /**
1744:    * Parse an entity text declaration.
1745:    */
1746:   private void readTextDecl()
1747:     throws IOException, XMLStreamException
1748:   {
1749:     final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1750:     requireWhitespace();
1751:     if (tryRead("version"))
1752:       {
1753:         readEq();
1754:         String v = readLiteral(flags, false);
1755:         if ("1.0".equals(v))
1756:           input.xml11 = false;
1757:         else if ("1.1".equals(v))
1758:           {
1759:             Input i1 = (Input) inputStack.getFirst();
1760:             if (!i1.xml11)
1761:               error("external entity specifies later version number");
1762:             input.xml11 = true;
1763:           }
1764:         else
1765:           throw new XMLStreamException("illegal XML version: " + v);
1766:         requireWhitespace();
1767:       }
1768:     require("encoding");
1769:     readEq();
1770:     String enc = readLiteral(flags, false);
1771:     skipWhitespace();
1772:     require("?>");
1773:     input.setInputEncoding(enc);
1774:   }
1775: 
1776:   /**
1777:    * Parse the XML declaration.
1778:    */
1779:   private void readXMLDecl()
1780:     throws IOException, XMLStreamException
1781:   {
1782:     final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1783: 
1784:     requireWhitespace();
1785:     require("version");
1786:     readEq();
1787:     xmlVersion = readLiteral(flags, false);
1788:     if ("1.0".equals(xmlVersion))
1789:       input.xml11 = false;
1790:     else if ("1.1".equals(xmlVersion))
1791:       input.xml11 = true;
1792:     else
1793:       throw new XMLStreamException("illegal XML version: " + xmlVersion);
1794: 
1795:     boolean white = tryWhitespace();
1796: 
1797:     if (tryRead("encoding"))
1798:       {
1799:         if (!white)
1800:           error("whitespace required before 'encoding='");
1801:         readEq();
1802:         xmlEncoding = readLiteral(flags, false);
1803:         white = tryWhitespace();
1804:       }
1805: 
1806:     if (tryRead("standalone"))
1807:       {
1808:         if (!white)
1809:           error("whitespace required before 'standalone='");
1810:         readEq();
1811:         String standalone = readLiteral(flags, false);
1812:         if ("yes".equals(standalone))
1813:           xmlStandalone = Boolean.TRUE;
1814:         else if ("no".equals(standalone))
1815:           xmlStandalone = Boolean.FALSE;
1816:         else
1817:           error("standalone flag must be 'yes' or 'no'", standalone);
1818:       }
1819: 
1820:     skipWhitespace();
1821:     require("?>");
1822:     if (xmlEncoding != null)
1823:       input.setInputEncoding(xmlEncoding);
1824:   }
1825: 
1826:   /**
1827:    * Parse the DOCTYPE declaration.
1828:    */
1829:   private void readDoctypeDecl()
1830:     throws IOException, XMLStreamException
1831:   {
1832:     if (!supportDTD)
1833:       error("parser was configured not to support DTDs");
1834:     requireWhitespace();
1835:     String rootName = readNmtoken(true);
1836:     skipWhitespace();
1837:     ExternalIds ids = readExternalIds(false, true);
1838:     doctype =
1839:       this.new Doctype(rootName, ids.publicId, ids.systemId);
1840: 
1841:     // Parse internal subset first
1842:     skipWhitespace();
1843:     if (tryRead('['))
1844:       {
1845:         while (true)
1846:           {
1847:             expandPE = true;
1848:             skipWhitespace();
1849:             expandPE = false;
1850:             if (tryRead(']'))
1851:               break;
1852:             else
1853:               readMarkupdecl(false);
1854:           }
1855:       }
1856:     skipWhitespace();
1857:     require('>');
1858: 
1859:     // Parse external subset
1860:     if (ids.systemId != null && externalEntities)
1861:       {
1862:         pushInput("", ">", false, false);
1863:         pushInput("[dtd]", ids, true, true);
1864:         // loop until we get back to ">"
1865:         while (true)
1866:           {
1867:             expandPE = true;
1868:             skipWhitespace();
1869:             expandPE = false;
1870:             mark(1);
1871:             int c = readCh();
1872:             if (c == 0x3e) // '>'
1873:               break;
1874:             else if (c == -1)
1875:               popInput();
1876:             else
1877:               {
1878:                 reset();
1879:                 expandPE = true;
1880:                 readMarkupdecl(true);
1881:                 expandPE = true;
1882:               }
1883:           }
1884:         if (inputStack.size() != 2)
1885:           error("external subset has unmatched '>'");
1886:         popInput();
1887:       }
1888:     checkDoctype();
1889:     if (validating)
1890:       validateDoctype();
1891: 
1892:     // Make rootName available for reading
1893:     buf.setLength(0);
1894:     buf.append(rootName);
1895:   }
1896: 
1897:   /**
1898:    * Checks the well-formedness of the DTD.
1899:    */
1900:   private void checkDoctype()
1901:     throws XMLStreamException
1902:   {
1903:     // TODO check entity recursion
1904:   }
1905: 
1906:   /**
1907:    * Parse the markupdecl production.
1908:    */
1909:   private void readMarkupdecl(boolean inExternalSubset)
1910:     throws IOException, XMLStreamException
1911:   {
1912:     boolean saved = expandPE;
1913:     mark(1);
1914:     require('<');
1915:     reset();
1916:     expandPE = false;
1917:     if (tryRead(TEST_ELEMENT_DECL))
1918:       {
1919:         expandPE = saved;
1920:         readElementDecl();
1921:       }
1922:     else if (tryRead(TEST_ATTLIST_DECL))
1923:       {
1924:         expandPE = saved;
1925:         readAttlistDecl();
1926:       }
1927:     else if (tryRead(TEST_ENTITY_DECL))
1928:       {
1929:         expandPE = saved;
1930:         readEntityDecl(inExternalSubset);
1931:       }
1932:     else if (tryRead(TEST_NOTATION_DECL))
1933:       {
1934:         expandPE = saved;
1935:         readNotationDecl(inExternalSubset);
1936:       }
1937:     else if (tryRead(TEST_PI))
1938:       {
1939:         readPI(true);
1940:         expandPE = saved;
1941:       }
1942:     else if (tryRead(TEST_COMMENT))
1943:       {
1944:         readComment(true);
1945:         expandPE = saved;
1946:       }
1947:     else if (tryRead("<!["))
1948:       {
1949:         // conditional section
1950:         expandPE = saved;
1951:         if (inputStack.size() < 2)
1952:           error("conditional sections illegal in internal subset");
1953:         skipWhitespace();
1954:         if (tryRead("INCLUDE"))
1955:           {
1956:             skipWhitespace();
1957:             require('[');
1958:             skipWhitespace();
1959:             while (!tryRead("]]>"))
1960:               {
1961:                 readMarkupdecl(inExternalSubset);
1962:                 skipWhitespace();
1963:               }
1964:           }
1965:         else if (tryRead("IGNORE"))
1966:           {
1967:             skipWhitespace();
1968:             require('[');
1969:             expandPE = false;
1970:             for (int nesting = 1; nesting > 0; )
1971:               {
1972:                 int c = readCh();
1973:                 switch (c)
1974:                   {
1975:                   case 0x3c: // '<'
1976:                     if (tryRead("!["))
1977:                       nesting++;
1978:                     break;
1979:                   case 0x5d: // ']'
1980:                     if (tryRead("]>"))
1981:                       nesting--;
1982:                     break;
1983:                   case -1:
1984:                     throw new EOFException();
1985:                   }
1986:               }
1987:             expandPE = saved;
1988:           }
1989:         else
1990:           error("conditional section must begin with INCLUDE or IGNORE");
1991:       }
1992:     else
1993:       error("expected markup declaration");
1994:   }
1995: 
1996:   /**
1997:    * Parse the elementdecl production.
1998:    */
1999:   private void readElementDecl()
2000:     throws IOException, XMLStreamException
2001:   {
2002:     requireWhitespace();
2003:     boolean saved = expandPE;
2004:     expandPE = (inputStack.size() > 1);
2005:     String name = readNmtoken(true);
2006:     expandPE = saved;
2007:     requireWhitespace();
2008:     readContentspec(name);
2009:     skipWhitespace();
2010:     require('>');
2011:   }
2012: 
2013:   /**
2014:    * Parse the contentspec production.
2015:    */
2016:   private void readContentspec(String elementName)
2017:     throws IOException, XMLStreamException
2018:   {
2019:     if (tryRead("EMPTY"))
2020:       doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel());
2021:     else if (tryRead("ANY"))
2022:       doctype.addElementDecl(elementName, "ANY", new AnyContentModel());
2023:     else
2024:       {
2025:         ContentModel model;
2026:         CPStringBuilder acc = new CPStringBuilder();
2027:         require('(');
2028:         acc.append('(');
2029:         skipWhitespace();
2030:         if (tryRead("#PCDATA"))
2031:           {
2032:             // mixed content
2033:             acc.append("#PCDATA");
2034:             MixedContentModel mm = new MixedContentModel();
2035:             model = mm;
2036:             skipWhitespace();
2037:             if (tryRead(')'))
2038:               {
2039:                 acc.append(")");
2040:                 if (tryRead('*'))
2041:                   {
2042:                     mm.min = 0;
2043:                     mm.max = -1;
2044:                   }
2045:               }
2046:             else
2047:               {
2048:                 while (!tryRead(")"))
2049:                   {
2050:                     require('|');
2051:                     acc.append('|');
2052:                     skipWhitespace();
2053:                     String name = readNmtoken(true);
2054:                     acc.append(name);
2055:                     mm.addName(name);
2056:                     skipWhitespace();
2057:                   }
2058:                 require('*');
2059:                 acc.append(")*");
2060:                 mm.min = 0;
2061:                 mm.max = -1;
2062:               }
2063:           }
2064:         else
2065:           model = readElements(acc);
2066:         doctype.addElementDecl(elementName, acc.toString(), model);
2067:       }
2068:   }
2069: 
2070:   /**
2071:    * Parses an element content model.
2072:    */
2073:   private ElementContentModel readElements(CPStringBuilder acc)
2074:     throws IOException, XMLStreamException
2075:   {
2076:     int separator;
2077:     ElementContentModel model = new ElementContentModel();
2078: 
2079:     // Parse first content particle
2080:     skipWhitespace();
2081:     model.addContentParticle(readContentParticle(acc));
2082:     // End or separator
2083:     skipWhitespace();
2084:     int c = readCh();
2085:     switch (c)
2086:       {
2087:       case 0x29: // ')'
2088:         acc.append(')');
2089:         mark(1);
2090:         c = readCh();
2091:         switch (c)
2092:           {
2093:           case 0x3f: // '?'
2094:             acc.append('?');
2095:             model.min = 0;
2096:             model.max = 1;
2097:             break;
2098:           case 0x2a: // '*'
2099:             acc.append('*');
2100:             model.min = 0;
2101:             model.max = -1;
2102:             break;
2103:           case 0x2b: // '+'
2104:             acc.append('+');
2105:             model.min = 1;
2106:             model.max = -1;
2107:             break;
2108:           default:
2109:             reset();
2110:           }
2111:         return model; // done
2112:       case 0x7c: // '|'
2113:         model.or = true;
2114:         // fall through
2115:       case 0x2c: // ','
2116:         separator = c;
2117:         acc.append(Character.toChars(c));
2118:         break;
2119:       default:
2120:         error("bad separator in content model",
2121:               "U+" + Integer.toHexString(c));
2122:         return model;
2123:       }
2124:     // Parse subsequent content particles
2125:     while (true)
2126:       {
2127:         skipWhitespace();
2128:         model.addContentParticle(readContentParticle(acc));
2129:         skipWhitespace();
2130:         c = readCh();
2131:         if (c == 0x29) // ')'
2132:           {
2133:             acc.append(')');
2134:             break;
2135:           }
2136:         else if (c != separator)
2137:           {
2138:             error("bad separator in content model",
2139:                   "U+" + Integer.toHexString(c));
2140:             return model;
2141:           }
2142:         else
2143:           acc.append(c);
2144:       }
2145:     // Check for occurrence indicator
2146:     mark(1);
2147:     c = readCh();
2148:     switch (c)
2149:       {
2150:       case 0x3f: // '?'
2151:         acc.append('?');
2152:         model.min = 0;
2153:         model.max = 1;
2154:         break;
2155:       case 0x2a: // '*'
2156:         acc.append('*');
2157:         model.min = 0;
2158:         model.max = -1;
2159:         break;
2160:       case 0x2b: // '+'
2161:         acc.append('+');
2162:         model.min = 1;
2163:         model.max = -1;
2164:         break;
2165:       default:
2166:         reset();
2167:       }
2168:     return model;
2169:   }
2170: 
2171:   /**
2172:    * Parse a cp production.
2173:    */
2174:   private ContentParticle readContentParticle(CPStringBuilder acc)
2175:     throws IOException, XMLStreamException
2176:   {
2177:     ContentParticle cp = new ContentParticle();
2178:     if (tryRead('('))
2179:       {
2180:         acc.append('(');
2181:         cp.content = readElements(acc);
2182:       }
2183:     else
2184:       {
2185:         String name = readNmtoken(true);
2186:         acc.append(name);
2187:         cp.content = name;
2188:         mark(1);
2189:         int c = readCh();
2190:         switch (c)
2191:           {
2192:           case 0x3f: // '?'
2193:             acc.append('?');
2194:             cp.min = 0;
2195:             cp.max = 1;
2196:             break;
2197:           case 0x2a: // '*'
2198:             acc.append('*');
2199:             cp.min = 0;
2200:             cp.max = -1;
2201:             break;
2202:           case 0x2b: // '+'
2203:             acc.append('+');
2204:             cp.min = 1;
2205:             cp.max = -1;
2206:             break;
2207:           default:
2208:             reset();
2209:           }
2210:       }
2211:     return cp;
2212:   }
2213: 
2214:   /**
2215:    * Parse an attribute-list definition.
2216:    */
2217:   private void readAttlistDecl()
2218:     throws IOException, XMLStreamException
2219:   {
2220:     requireWhitespace();
2221:     boolean saved = expandPE;
2222:     expandPE = (inputStack.size() > 1);
2223:     String elementName = readNmtoken(true);
2224:     expandPE = saved;
2225:     boolean white = tryWhitespace();
2226:     while (!tryRead('>'))
2227:       {
2228:         if (!white)
2229:           error("whitespace required before attribute definition");
2230:         readAttDef(elementName);
2231:         white = tryWhitespace();
2232:       }
2233:   }
2234: 
2235:   /**
2236:    * Parse a single attribute definition.
2237:    */
2238:   private void readAttDef(String elementName)
2239:     throws IOException, XMLStreamException
2240:   {
2241:     String name = readNmtoken(true);
2242:     requireWhitespace();
2243:     CPStringBuilder acc = new CPStringBuilder();
2244:     HashSet values = new HashSet();
2245:     String type = readAttType(acc, values);
2246:     if (validating)
2247:       {
2248:         if ("ID".equals(type))
2249:           {
2250:             // VC: One ID per Element Type
2251:             for (Iterator i = doctype.attlistIterator(elementName);
2252:                  i.hasNext(); )
2253:               {
2254:                 Map.Entry entry = (Map.Entry) i.next();
2255:                 AttributeDecl decl = (AttributeDecl) entry.getValue();
2256:                 if ("ID".equals(decl.type))
2257:                   error("element types must not have more than one ID " +
2258:                         "attribute");
2259:               }
2260:           }
2261:         else if ("NOTATION".equals(type))
2262:           {
2263:             // VC: One Notation Per Element Type
2264:             for (Iterator i = doctype.attlistIterator(elementName);
2265:                  i.hasNext(); )
2266:               {
2267:                 Map.Entry entry = (Map.Entry) i.next();
2268:                 AttributeDecl decl = (AttributeDecl) entry.getValue();
2269:                 if ("NOTATION".equals(decl.type))
2270:                   error("element types must not have more than one NOTATION " +
2271:                         "attribute");
2272:               }
2273:             // VC: No Notation on Empty Element
2274:             ContentModel model = doctype.getElementModel(elementName);
2275:             if (model != null && model.type == ContentModel.EMPTY)
2276:               error("attributes of type NOTATION must not be declared on an " +
2277:                     "element declared EMPTY");
2278:           }
2279:       }
2280:     String enumer = null;
2281:     if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
2282:       enumer = acc.toString();
2283:     else
2284:       values = null;
2285:     requireWhitespace();
2286:     readDefault(elementName, name, type, enumer, values);
2287:   }
2288: 
2289:   /**
2290:    * Parse an attribute type.
2291:    */
2292:   private String readAttType(CPStringBuilder acc, HashSet values)
2293:     throws IOException, XMLStreamException
2294:   {
2295:     if (tryRead('('))
2296:       {
2297:         readEnumeration(false, acc, values);
2298:         return "ENUMERATION";
2299:       }
2300:     else
2301:       {
2302:         String typeString = readNmtoken(true);
2303:         if ("NOTATION".equals(typeString))
2304:           {
2305:             readNotationType(acc, values);
2306:             return typeString;
2307:           }
2308:         else if ("CDATA".equals(typeString) ||
2309:                  "ID".equals(typeString) ||
2310:                  "IDREF".equals(typeString) ||
2311:                  "IDREFS".equals(typeString) ||
2312:                  "ENTITY".equals(typeString) ||
2313:                  "ENTITIES".equals(typeString) ||
2314:                  "NMTOKEN".equals(typeString) ||
2315:                  "NMTOKENS".equals(typeString))
2316:           return typeString;
2317:         else
2318:           {
2319:             error("illegal attribute type", typeString);
2320:             return null;
2321:           }
2322:       }
2323:   }
2324: 
2325:   /**
2326:    * Parse an enumeration.
2327:    */
2328:   private void readEnumeration(boolean isNames, CPStringBuilder acc,
2329:                                HashSet values)
2330:     throws IOException, XMLStreamException
2331:   {
2332:     acc.append('(');
2333:     // first token
2334:     skipWhitespace();
2335:     String token = readNmtoken(isNames);
2336:     acc.append(token);
2337:     values.add(token);
2338:     // subsequent tokens
2339:     skipWhitespace();
2340:     while (!tryRead(')'))
2341:       {
2342:         require('|');
2343:         acc.append('|');
2344:         skipWhitespace();
2345:         token = readNmtoken(isNames);
2346:         // VC: No Duplicate Tokens
2347:         if (validating && values.contains(token))
2348:           error("duplicate token", token);
2349:         acc.append(token);
2350:         values.add(token);
2351:         skipWhitespace();
2352:       }
2353:     acc.append(')');
2354:   }
2355: 
2356:   /**
2357:    * Parse a notation type for an attribute.
2358:    */
2359:   private void readNotationType(CPStringBuilder acc, HashSet values)
2360:     throws IOException, XMLStreamException
2361:   {
2362:     requireWhitespace();
2363:     require('(');
2364:     readEnumeration(true, acc, values);
2365:   }
2366: 
2367:   /**
2368:    * Parse the default value for an attribute.
2369:    */
2370:   private void readDefault(String elementName, String name,
2371:                            String type, String enumeration, HashSet values)
2372:     throws IOException, XMLStreamException
2373:   {
2374:     int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
2375:     int flags = LIT_ATTRIBUTE;
2376:     String value = null, defaultType = null;
2377:     boolean saved = expandPE;
2378: 
2379:     if (!"CDATA".equals(type))
2380:       flags |= LIT_NORMALIZE;
2381: 
2382:     expandPE = false;
2383:     if (tryRead('#'))
2384:       {
2385:         if (tryRead("FIXED"))
2386:           {
2387:             defaultType = "#FIXED";
2388:             valueType = ATTRIBUTE_DEFAULT_FIXED;
2389:             requireWhitespace();
2390:             value = readLiteral(flags, false);
2391:           }
2392:         else if (tryRead("REQUIRED"))
2393:           {
2394:             defaultType = "#REQUIRED";
2395:             valueType = ATTRIBUTE_DEFAULT_REQUIRED;
2396:           }
2397:         else if (tryRead("IMPLIED"))
2398:           {
2399:             defaultType = "#IMPLIED";
2400:             valueType = ATTRIBUTE_DEFAULT_IMPLIED;
2401:           }
2402:         else
2403:           error("illegal keyword for attribute default value");
2404:       }
2405:     else
2406:       value = readLiteral(flags, false);
2407:     expandPE = saved;
2408:     if (validating)
2409:       {
2410:         if ("ID".equals(type))
2411:           {
2412:             // VC: Attribute Default Value Syntactically Correct
2413:             if (value != null && !isNmtoken(value, true))
2414:               error("default value must match Name production", value);
2415:             // VC: ID Attribute Default
2416:             if (valueType != ATTRIBUTE_DEFAULT_REQUIRED &&
2417:                 valueType != ATTRIBUTE_DEFAULT_IMPLIED)
2418:               error("ID attributes must have a declared default of " +
2419:                     "#IMPLIED or #REQUIRED");
2420:           }
2421:         else if (value != null)
2422:           {
2423:             // VC: Attribute Default Value Syntactically Correct
2424:             if ("IDREF".equals(type) || "ENTITY".equals(type))
2425:               {
2426:                 if (!isNmtoken(value, true))
2427:                   error("default value must match Name production", value);
2428:               }
2429:             else if ("IDREFS".equals(type) || "ENTITIES".equals(type))
2430:               {
2431:                 StringTokenizer st = new StringTokenizer(value);
2432:                 while (st.hasMoreTokens())
2433:                   {
2434:                     String token = st.nextToken();
2435:                     if (!isNmtoken(token, true))
2436:                       error("default value must match Name production", token);
2437:                   }
2438:               }
2439:             else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type))
2440:               {
2441:                 if (!isNmtoken(value, false))
2442:                   error("default value must match Nmtoken production", value);
2443:               }
2444:             else if ("NMTOKENS".equals(type))
2445:               {
2446:                 StringTokenizer st = new StringTokenizer(value);
2447:                 while (st.hasMoreTokens())
2448:                   {
2449:                     String token = st.nextToken();
2450:                     if (!isNmtoken(token, false))
2451:                       error("default value must match Nmtoken production",
2452:                             token);
2453:                   }
2454:               }
2455:           }
2456:       }
2457:     // Register attribute def
2458:     AttributeDecl attribute =
2459:       new AttributeDecl(type, value, valueType, enumeration, values,
2460:                         inputStack.size() != 1);
2461:     doctype.addAttributeDecl(elementName, name, attribute);
2462:   }
2463: 
2464:   /**
2465:    * Parse the EntityDecl production.
2466:    */
2467:   private void readEntityDecl(boolean inExternalSubset)
2468:     throws IOException, XMLStreamException
2469:   {
2470:     int flags = 0;
2471:     // Check if parameter entity
2472:     boolean peFlag = false;
2473:     expandPE = false;
2474:     requireWhitespace();
2475:     if (tryRead('%'))
2476:       {
2477:         peFlag = true;
2478:         requireWhitespace();
2479:       }
2480:     expandPE = true;
2481:     // Read entity name
2482:     String name = readNmtoken(true);
2483:     if (name.indexOf(':') != -1)
2484:       error("illegal character ':' in entity name", name);
2485:     if (peFlag)
2486:       name = "%" + name;
2487:     requireWhitespace();
2488:     mark(1);
2489:     int c = readCh();
2490:     reset();
2491:     if (c == 0x22 || c == 0x27) // " | '
2492:       {
2493:         // Internal entity replacement text
2494:         String value = readLiteral(flags | LIT_DISABLE_EREF, true);
2495:         int ai = value.indexOf('&');
2496:         while (ai != -1)
2497:           {
2498:             int sci = value.indexOf(';', ai);
2499:             if (sci == -1)
2500:               error("malformed reference in entity value", value);
2501:             String ref = value.substring(ai + 1, sci);
2502:             int[] cp = UnicodeReader.toCodePointArray(ref);
2503:             if (cp.length == 0)
2504:               error("malformed reference in entity value", value);
2505:             if (cp[0] == 0x23) // #
2506:               {
2507:                 if (cp.length == 1)
2508:                   error("malformed reference in entity value", value);
2509:                 if (cp[1] == 0x78) // 'x'
2510:                   {
2511:                     if (cp.length == 2)
2512:                       error("malformed reference in entity value", value);
2513:                     for (int i = 2; i < cp.length; i++)
2514:                       {
2515:                         int x = cp[i];
2516:                         if (x < 0x30 ||
2517:                             (x > 0x39 && x < 0x41) ||
2518:                             (x > 0x46 && x < 0x61) ||
2519:                             x > 0x66)
2520:                           error("malformed character reference in entity value",
2521:                                 value);
2522:                       }
2523:                   }
2524:                 else
2525:                   {
2526:                     for (int i = 1; i < cp.length; i++)
2527:                       {
2528:                         int x = cp[i];
2529:                         if (x < 0x30 || x > 0x39)
2530:                           error("malformed character reference in entity value",
2531:                                 value);
2532:                       }
2533:                   }
2534:               }
2535:             else
2536:               {
2537:                 if (!isNameStartCharacter(cp[0], input.xml11))
2538:                   error("malformed reference in entity value", value);
2539:                 for (int i = 1; i < cp.length; i++)
2540:                   {
2541:                     if (!isNameCharacter(cp[i], input.xml11))
2542:                       error("malformed reference in entity value", value);
2543:                   }
2544:               }
2545:             ai = value.indexOf('&', sci);
2546:           }
2547:         doctype.addEntityDecl(name, value, inExternalSubset);
2548:       }
2549:     else
2550:       {
2551:         ExternalIds ids = readExternalIds(false, false);
2552:         // Check for NDATA
2553:         boolean white = tryWhitespace();
2554:         if (!peFlag && tryRead("NDATA"))
2555:           {
2556:             if (!white)
2557:               error("whitespace required before NDATA");
2558:             requireWhitespace();
2559:             ids.notationName = readNmtoken(true);
2560:           }
2561:         doctype.addEntityDecl(name, ids, inExternalSubset);
2562:       }
2563:     // finish
2564:     skipWhitespace();
2565:     require('>');
2566:   }
2567: 
2568:   /**
2569:    * Parse the NotationDecl production.
2570:    */
2571:   private void readNotationDecl(boolean inExternalSubset)
2572:     throws IOException, XMLStreamException
2573:   {
2574:     requireWhitespace();
2575:     String notationName = readNmtoken(true);
2576:     if (notationName.indexOf(':') != -1)
2577:       error("illegal character ':' in notation name", notationName);
2578:     if (validating)
2579:       {
2580:         // VC: Unique Notation Name
2581:         ExternalIds notation = doctype.getNotation(notationName);
2582:         if (notation != null)
2583:           error("duplicate notation name", notationName);
2584:       }
2585:     requireWhitespace();
2586:     ExternalIds ids = readExternalIds(true, false);
2587:     ids.notationName = notationName;
2588:     doctype.addNotationDecl(notationName, ids, inExternalSubset);
2589:     skipWhitespace();
2590:     require('>');
2591:   }
2592: 
2593:   /**
2594:    * Returns a tuple {publicId, systemId}.
2595:    */
2596:   private ExternalIds readExternalIds(boolean inNotation, boolean isSubset)
2597:     throws IOException, XMLStreamException
2598:   {
2599:     int c;
2600:     int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
2601:     ExternalIds ids = new ExternalIds();
2602: 
2603:     if (tryRead("PUBLIC"))
2604:       {
2605:         requireWhitespace();
2606:         ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false);
2607:         if (inNotation)
2608:           {
2609:             skipWhitespace();
2610:             mark(1);
2611:             c = readCh();
2612:             reset();
2613:             if (c == 0x22 || c == 0x27) // " | '
2614:               {
2615:                 String href = readLiteral(flags, false);
2616:                 ids.systemId = absolutize(input.systemId, href);
2617:               }
2618:           }
2619:         else
2620:           {
2621:             requireWhitespace();
2622:             String href = readLiteral(flags, false);
2623:             ids.systemId = absolutize(input.systemId, href);
2624:           }
2625:         // Check valid URI characters
2626:         for (int i = 0; i < ids.publicId.length(); i++)
2627:           {
2628:             char d = ids.publicId.charAt(i);
2629:             if (d >= 'a' && d <= 'z')
2630:               continue;
2631:             if (d >= 'A' && d <= 'Z')
2632:               continue;
2633:             if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1)
2634:               continue;
2635:             error("illegal PUBLIC id character",
2636:                   "U+" + Integer.toHexString(d));
2637:           }
2638:       }
2639:     else if (tryRead("SYSTEM"))
2640:       {
2641:         requireWhitespace();
2642:         String href = readLiteral(flags, false);
2643:         ids.systemId = absolutize(input.systemId, href);
2644:       }
2645:     else if (!isSubset)
2646:       {
2647:         error("missing SYSTEM or PUBLIC keyword");
2648:       }
2649:     if (ids.systemId != null && !inNotation)
2650:       {
2651:         if (ids.systemId.indexOf('#') != -1)
2652:           error("SYSTEM id has a URI fragment", ids.systemId);
2653:       }
2654:     return ids;
2655:   }
2656: 
2657:   /**
2658:    * Parse the start of an element.
2659:    * @return the state of the parser afterwards (EMPTY_ELEMENT or CONTENT)
2660:    */
2661:   private int readStartElement()
2662:     throws IOException, XMLStreamException
2663:   {
2664:     // Read element name
2665:     String elementName = readNmtoken(true);
2666:     attrs.clear();
2667:     // Push namespace context
2668:     if (namespaceAware)
2669:       {
2670:         if (elementName.charAt(0) == ':' ||
2671:             elementName.charAt(elementName.length() - 1) == ':')
2672:           error("not a QName", elementName);
2673:         namespaces.addFirst(new LinkedHashMap());
2674:       }
2675:     // Read element content
2676:     boolean white = tryWhitespace();
2677:     mark(1);
2678:     int c = readCh();
2679:     while (c != 0x2f && c != 0x3e) // '/' | '>'
2680:       {
2681:         // Read attribute
2682:         reset();
2683:         if (!white)
2684:           error("need whitespace between attributes");
2685:         readAttribute(elementName);
2686:         white = tryWhitespace();
2687:         mark(1);
2688:         c = readCh();
2689:       }
2690:     // supply defaulted attributes
2691:     if (doctype != null)
2692:       {
2693:         for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); )
2694:           {
2695:             Map.Entry entry = (Map.Entry) i.next();
2696:             String attName = (String) entry.getKey();
2697:             AttributeDecl decl = (AttributeDecl) entry.getValue();
2698:             if (validating)
2699:               {
2700:                 switch (decl.valueType)
2701:                   {
2702:                   case ATTRIBUTE_DEFAULT_REQUIRED:
2703:                     // VC: Required Attribute
2704:                     if (decl.value == null && !attributeSpecified(attName))
2705:                       error("value for " + attName + " attribute is required");
2706:                     break;
2707:                   case ATTRIBUTE_DEFAULT_FIXED:
2708:                     // VC: Fixed Attribute Default
2709:                     for (Iterator j = attrs.iterator(); j.hasNext(); )
2710:                       {
2711:                         Attribute a = (Attribute) j.next();
2712:                         if (attName.equals(a.name) &&
2713:                             !decl.value.equals(a.value))
2714:                           error("value for " + attName + " attribute must be " +
2715:                                 decl.value);
2716:                       }
2717:                     break;
2718:                   }
2719:               }
2720:             if (namespaceAware && attName.equals("xmlns"))
2721:               {
2722:                 LinkedHashMap ctx =
2723:                   (LinkedHashMap) namespaces.getFirst();
2724:                 if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2725:                   continue; // namespace was specified
2726:               }
2727:             else if (namespaceAware && attName.startsWith("xmlns:"))
2728:               {
2729:                 LinkedHashMap ctx =
2730:                   (LinkedHashMap) namespaces.getFirst();
2731:                 if (ctx.containsKey(attName.substring(6)))
2732:                   continue; // namespace was specified
2733:               }
2734:             else if (attributeSpecified(attName))
2735:               continue;
2736:             if (decl.value == null)
2737:               continue;
2738:             // VC: Standalone Document Declaration
2739:             if (validating && decl.external && xmlStandalone == Boolean.TRUE)
2740:               error("standalone must be 'no' if attributes inherit values " +
2741:                     "from externally declared markup declarations");
2742:             Attribute attr =
2743:               new Attribute(attName, decl.type, false, decl.value);
2744:             if (namespaceAware)
2745:               {
2746:                 if (!addNamespace(attr))
2747:                   attrs.add(attr);
2748:               }
2749:             else
2750:               attrs.add(attr);
2751:           }
2752:       }
2753:     if (baseAware)
2754:       {
2755:         String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base");
2756:         String base = getXMLBase();
2757:         bases.addFirst(absolutize(base, uri));
2758:       }
2759:     if (namespaceAware)
2760:       {
2761:         // check prefix bindings
2762:         int ci = elementName.indexOf(':');
2763:         if (ci != -1)
2764:           {
2765:             String prefix = elementName.substring(0, ci);
2766:             String uri = getNamespaceURI(prefix);
2767:             if (uri == null)
2768:               error("unbound element prefix", prefix);
2769:             else if (input.xml11 && "".equals(uri))
2770:               error("XML 1.1 unbound element prefix", prefix);
2771:           }
2772:         for (Iterator i = attrs.iterator(); i.hasNext(); )
2773:           {
2774:             Attribute attr = (Attribute) i.next();
2775:             if (attr.prefix != null &&
2776:                 !XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix))
2777:               {
2778:                 String uri = getNamespaceURI(attr.prefix);
2779:                 if (uri == null)
2780:                   error("unbound attribute prefix", attr.prefix);
2781:                 else if (input.xml11 && "".equals(uri))
2782:                   error("XML 1.1 unbound attribute prefix", attr.prefix);
2783:               }
2784:           }
2785:       }
2786:     if (validating && doctype != null)
2787:       {
2788:         validateStartElement(elementName);
2789:         currentContentModel = doctype.getElementModel(elementName);
2790:         if (currentContentModel == null)
2791:           error("no element declaration", elementName);
2792:         validationStack.add(new LinkedList());
2793:       }
2794:     // make element name available for read
2795:     buf.setLength(0);
2796:     buf.append(elementName);
2797:     // push element onto stack
2798:     stack.addLast(elementName);
2799:     switch (c)
2800:       {
2801:       case 0x3e: // '>'
2802:         return CONTENT;
2803:       case 0x2f: // '/'
2804:         require('>');
2805:         return EMPTY_ELEMENT;
2806:       }
2807:     return -1; // to satisfy compiler
2808:   }
2809: 
2810:   /**
2811:    * Indicates whether the specified attribute name was specified for the
2812:    * current element.
2813:    */
2814:   private boolean attributeSpecified(String attName)
2815:   {
2816:     for (Iterator j = attrs.iterator(); j.hasNext(); )
2817:       {
2818:         Attribute a = (Attribute) j.next();
2819:         if (attName.equals(a.name))
2820:           return true;
2821:       }
2822:     return false;
2823:   }
2824: 
2825:   /**
2826:    * Parse an attribute.
2827:    */
2828:   private void readAttribute(String elementName)
2829:     throws IOException, XMLStreamException
2830:   {
2831:     // Read attribute name
2832:     String attributeName = readNmtoken(true);
2833:     String type = getAttributeType(elementName, attributeName);
2834:     readEq();
2835:     // Read literal
2836:     final int flags = LIT_ATTRIBUTE |  LIT_ENTITY_REF;
2837:     String value = (type == null || "CDATA".equals(type)) ?
2838:       readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false);
2839:     // add attribute event
2840:     Attribute attr = this.new Attribute(attributeName, type, true, value);
2841:     if (namespaceAware)
2842:       {
2843:         if (attributeName.charAt(0) == ':' ||
2844:             attributeName.charAt(attributeName.length() - 1) == ':')
2845:           error("not a QName", attributeName);
2846:         else if (attributeName.equals("xmlns"))
2847:           {
2848:             LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2849:             if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2850:               error("duplicate default namespace");
2851:           }
2852:         else if (attributeName.startsWith("xmlns:"))
2853:           {
2854:             LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2855:             if (ctx.containsKey(attributeName.substring(6)))
2856:               error("duplicate namespace", attributeName.substring(6));
2857:           }
2858:         else if (attrs.contains(attr))
2859:           error("duplicate attribute", attributeName);
2860:       }
2861:     else if (attrs.contains(attr))
2862:       error("duplicate attribute", attributeName);
2863:     if (validating && doctype != null)
2864:       {
2865:         // VC: Attribute Value Type
2866:         AttributeDecl decl =
2867:           doctype.getAttributeDecl(elementName, attributeName);
2868:         if (decl == null)
2869:           error("attribute must be declared", attributeName);
2870:         if ("ENUMERATION".equals(decl.type))
2871:           {
2872:             // VC: Enumeration
2873:             if (!decl.values.contains(value))
2874:               error("value does not match enumeration " + decl.enumeration,
2875:                     value);
2876:           }
2877:         else if ("ID".equals(decl.type))
2878:           {
2879:             // VC: ID
2880:             if (!isNmtoken(value, true))
2881:               error("ID values must match the Name production");
2882:             if (ids.contains(value))
2883:               error("Duplicate ID", value);
2884:             ids.add(value);
2885:           }
2886:         else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type))
2887:           {
2888:             StringTokenizer st = new StringTokenizer(value);
2889:             while (st.hasMoreTokens())
2890:               {
2891:                 String token = st.nextToken();
2892:                 // VC: IDREF
2893:                 if (!isNmtoken(token, true))
2894:                   error("IDREF values must match the Name production");
2895:                 idrefs.add(token);
2896:               }
2897:           }
2898:         else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type))
2899:           {
2900:             StringTokenizer st = new StringTokenizer(value);
2901:             while (st.hasMoreTokens())
2902:               {
2903:                 String token = st.nextToken();
2904:                 // VC: Name Token
2905:                 if (!isNmtoken(token, false))
2906:                   error("NMTOKEN values must match the Nmtoken production");
2907:               }
2908:           }
2909:         else if ("ENTITY".equals(decl.type))
2910:           {
2911:             // VC: Entity Name
2912:             if (!isNmtoken(value, true))
2913:               error("ENTITY values must match the Name production");
2914:             Object entity = doctype.getEntity(value);
2915:             if (entity == null || !(entity instanceof ExternalIds) ||
2916:                 ((ExternalIds) entity).notationName == null)
2917:               error("ENTITY values must match the name of an unparsed " +
2918:                     "entity declared in the DTD");
2919:           }
2920:         else if ("NOTATION".equals(decl.type))
2921:           {
2922:             if (!decl.values.contains(value))
2923:               error("NOTATION values must match a declared notation name",
2924:                     value);
2925:             // VC: Notation Attributes
2926:             ExternalIds notation = doctype.getNotation(value);
2927:             if (notation == null)
2928:               error("NOTATION values must match the name of a notation " +
2929:                     "declared in the DTD", value);
2930:           }
2931:       }
2932:     if (namespaceAware)
2933:       {
2934:         if (!addNamespace(attr))
2935:           attrs.add(attr);
2936:       }
2937:     else
2938:       attrs.add(attr);
2939:   }
2940: 
2941:   /**
2942:    * Determines whether the specified attribute is a namespace declaration,
2943:    * and adds it to the current namespace context if so. Returns false if
2944:    * the attribute is an ordinary attribute.
2945:    */
2946:   private boolean addNamespace(Attribute attr)
2947:     throws XMLStreamException
2948:   {
2949:     if ("xmlns".equals(attr.name))
2950:       {
2951:         LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2952:         if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null)
2953:           error("Duplicate default namespace declaration");
2954:         if (XMLConstants.XML_NS_URI.equals(attr.value))
2955:           error("can't bind XML namespace");
2956:         ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value);
2957:         return true;
2958:       }
2959:     else if ("xmlns".equals(attr.prefix))
2960:       {
2961:         LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2962:         if (ctx.get(attr.localName) != null)
2963:           error("Duplicate namespace declaration for prefix",
2964:                 attr.localName);
2965:         if (XMLConstants.XML_NS_PREFIX.equals(attr.localName))
2966:           {
2967:             if (!XMLConstants.XML_NS_URI.equals(attr.value))
2968:               error("can't redeclare xml prefix");
2969:             else
2970:               return false; // treat as attribute
2971:           }
2972:         if (XMLConstants.XML_NS_URI.equals(attr.value))
2973:           error("can't bind non-xml prefix to XML namespace");
2974:         if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName))
2975:           error("can't redeclare xmlns prefix");
2976:         if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value))
2977:           error("can't bind non-xmlns prefix to XML Namespace namespace");
2978:         if ("".equals(attr.value) && !input.xml11)
2979:           error("illegal use of 1.1-style prefix unbinding in 1.0 document");
2980:         ctx.put(attr.localName, attr.value);
2981:         return true;
2982:       }
2983:     return false;
2984:   }
2985: 
2986:   /**
2987:    * Parse a closing tag.
2988:    */
2989:   private void readEndElement()
2990:     throws IOException, XMLStreamException
2991:   {
2992:     // pop element off stack
2993:     String expected = (String) stack.removeLast();
2994:     require(expected);
2995:     skipWhitespace();
2996:     require('>');
2997:     // Make element name available
2998:     buf.setLength(0);
2999:     buf.append(expected);
3000:     if (validating && doctype != null)
3001:       endElementValidationHook();
3002:   }
3003: 
3004:   /**
3005:    * Validate the end of an element.
3006:    * Called on an end-element or empty element if validating.
3007:    */
3008:   private void endElementValidationHook()
3009:     throws XMLStreamException
3010:   {
3011:     validateEndElement();
3012:     validationStack.removeLast();
3013:     if (stack.isEmpty())
3014:       currentContentModel = null;
3015:     else
3016:       {
3017:         String parent = (String) stack.getLast();
3018:         currentContentModel = doctype.getElementModel(parent);
3019:       }
3020:   }
3021: 
3022:   /**
3023:    * Parse a comment.
3024:    */
3025:   private void readComment(boolean inDTD)
3026:     throws IOException, XMLStreamException
3027:   {
3028:     boolean saved = expandPE;
3029:     expandPE = false;
3030:     buf.setLength(0);
3031:     readUntil(TEST_END_COMMENT);
3032:     require('>');
3033:     expandPE = saved;
3034:     if (inDTD)
3035:       doctype.addComment(buf.toString());
3036:   }
3037: 
3038:   /**
3039:    * Parse a processing instruction.
3040:    */
3041:   private void readPI(boolean inDTD)
3042:     throws IOException, XMLStreamException
3043:   {
3044:     boolean saved = expandPE;
3045:     expandPE = false;
3046:     piTarget = readNmtoken(true);
3047:     if (piTarget.indexOf(':') != -1)
3048:       error("illegal character in PI target", new Character(':'));
3049:     if ("xml".equalsIgnoreCase(piTarget))
3050:       error("illegal PI target", piTarget);
3051:     if (tryRead(TEST_END_PI))
3052:       piData = null;
3053:     else
3054:       {
3055:         if (!tryWhitespace())
3056:           error("whitespace required between PI target and data");
3057:         buf.setLength(0);
3058:         readUntil(TEST_END_PI);
3059:         piData = buf.toString();
3060:       }
3061:     expandPE = saved;
3062:     if (inDTD)
3063:       doctype.addPI(piTarget, piData);
3064:   }
3065: 
3066:   /**
3067:    * Parse an entity reference.
3068:    */
3069:   private void readReference()
3070:     throws IOException, XMLStreamException
3071:   {
3072:     buf.setLength(0);
3073:     String entityName = readNmtoken(true);
3074:     require(';');
3075:     buf.setLength(0);
3076:     buf.append(entityName);
3077:   }
3078: 
3079:   /**
3080:    * Read an CDATA section.
3081:    */
3082:   private void readCDSect()
3083:     throws IOException, XMLStreamException
3084:   {
3085:     buf.setLength(0);
3086:     readUntil(TEST_END_CDATA);
3087:   }
3088: 
3089:   /**
3090:    * Read character data.
3091:    * @return the type of text read (CHARACTERS or SPACE)
3092:    */
3093:   private int readCharData(String prefix)
3094:     throws IOException, XMLStreamException
3095:   {
3096:     boolean white = true;
3097:     buf.setLength(0);
3098:     if (prefix != null)
3099:       buf.append(prefix);
3100:     boolean done = false;
3101:     boolean entities = false;
3102:     while (!done)
3103:       {
3104:         // Block read
3105:         mark(tmpBuf.length);
3106:         int len = read(tmpBuf, 0, tmpBuf.length);
3107:         if (len == -1)
3108:           {
3109:             if (inputStack.size() > 1)
3110:               {
3111:                 popInput();
3112:                 // report end-entity
3113:                 done = true;
3114:               }
3115:             else
3116:               throw new EOFException();
3117:           }
3118:         for (int i = 0; i < len && !done; i++)
3119:           {
3120:             int c = tmpBuf[i];
3121:             switch (c)
3122:               {
3123:               case 0x20:
3124:               case 0x09:
3125:               case 0x0a:
3126:               case 0x0d:
3127:                 buf.append(Character.toChars(c));
3128:                 break; // whitespace
3129:               case 0x26: // '&'
3130:                 reset();
3131:                 read(tmpBuf, 0, i);
3132:                 // character reference?
3133:                 mark(3);
3134:                 c = readCh(); // &
3135:                 c = readCh();
3136:                 if (c == 0x23) // '#'
3137:                   {
3138:                     mark(1);
3139:                     c = readCh();
3140:                     boolean hex = (c == 0x78); // 'x'
3141:                     if (!hex)
3142:                       reset();
3143:                     char[] ch = readCharacterRef(hex ? 16 : 10);
3144:                     buf.append(ch, 0, ch.length);
3145:                     for (int j = 0; j < ch.length; j++)
3146:                       {
3147:                         switch (ch[j])
3148:                           {
3149:                           case 0x20:
3150:                           case 0x09:
3151:                           case 0x0a:
3152:                           case 0x0d:
3153:                             break; // whitespace
3154:                           default:
3155:                             white = false;
3156:                           }
3157:                       }
3158:                   }
3159:                 else
3160:                   {
3161:                     // entity reference
3162:                     reset();
3163:                     c = readCh(); // &
3164:                     String entityName = readNmtoken(true);
3165:                     require(';');
3166:                     String text =
3167:                       (String) PREDEFINED_ENTITIES.get(entityName);
3168:                     if (text != null)
3169:                       buf.append(text);
3170:                     else
3171:                       {
3172:                         pushInput("", "&" + entityName + ";", false, false);
3173:                         done = true;
3174:                         break;
3175:                       }
3176:                   }
3177:                 // continue processing
3178:                 i = -1;
3179:                 mark(tmpBuf.length);
3180:                 len = read(tmpBuf, 0, tmpBuf.length);
3181:                 if (len == -1)
3182:                   {
3183:                     if (inputStack.size() > 1)
3184:                       {
3185:                         popInput();
3186:                         done = true;
3187:                       }
3188:                     else
3189:                       throw new EOFException();
3190:                   }
3191:                 entities = true;
3192:                 break; // end of text sequence
3193:               case 0x3e: // '>'
3194:                 int l = buf.length();
3195:                 if (l > 1 &&
3196:                     buf.charAt(l - 1) == ']' &&
3197:                     buf.charAt(l - 2) == ']')
3198:                   error("Character data may not contain unescaped ']]>'");
3199:                 buf.append(Character.toChars(c));
3200:                 break;
3201:               case 0x3c: // '<'
3202:                 reset();
3203:                 // read i characters
3204:                 int count = 0, remaining = i;
3205:                 do
3206:                   {
3207:                     int r = read(tmpBuf, 0, remaining);
3208:                     count += r;
3209:                     remaining -= r;
3210:                   }
3211:                 while (count < i);
3212:                 i = len;
3213:                 if (coalescing && tryRead(TEST_CDATA))
3214:                   readUntil(TEST_END_CDATA); // read CDATA section into buf
3215:                 else
3216:                   done = true; // end of text sequence
3217:                 break;
3218:               default:
3219:                 if (input.xml11)
3220:                   {
3221:                     if (!isXML11Char(c) || isXML11RestrictedChar(c))
3222:                       error("illegal XML 1.1 character",
3223:                             "U+" + Integer.toHexString(c));
3224:                   }
3225:                 else if (!isChar(c))
3226:                   error("illegal XML character",
3227:                         "U+" + Integer.toHexString(c));
3228:                 white = false;
3229:                 buf.append(Character.toChars(c));
3230:               }
3231:           }
3232:         // if text buffer >= 2MB, return it as a chunk
3233:         // to avoid excessive memory use
3234:         if (buf.length() >= 2097152)
3235:           done = true;
3236:       }
3237:     if (entities)
3238:       normalizeCRLF(buf);
3239:     return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS;
3240:   }
3241: 
3242:   /**
3243:    * Expands the specified entity.
3244:    */
3245:   private void expandEntity(String name, boolean inAttr, boolean normalize)
3246:     throws IOException, XMLStreamException
3247:   {
3248:     if (doctype != null)
3249:       {
3250:         Object value = doctype.getEntity(name);
3251:         if (value != null)
3252:           {
3253:             if (xmlStandalone == Boolean.TRUE)
3254:               {
3255:                 // VC: Standalone Document Declaration
3256:                 if (doctype.isEntityExternal(name))
3257:                   error("reference to external entity in standalone document");
3258:                 else if (value instanceof ExternalIds)
3259:                   {
3260:                     ExternalIds ids = (ExternalIds) value;
3261:                     if (ids.notationName != null &&
3262:                         doctype.isNotationExternal(ids.notationName))
3263:                       error("reference to external notation in " +
3264:                             "standalone document");
3265:                   }
3266:               }
3267:             if (value instanceof String)
3268:               {
3269:                 String text = (String) value;
3270:                 if (inAttr && text.indexOf('<') != -1)
3271:                   error("< in attribute value");
3272:                 pushInput(name, text, !inAttr, normalize);
3273:               }
3274:             else if (inAttr)
3275:               error("reference to external entity in attribute value", name);
3276:             else
3277:               pushInput(name, (ExternalIds) value, !inAttr, normalize);
3278:             return;
3279:           }
3280:       }
3281:     error("reference to undeclared entity", name);
3282:   }
3283: 
3284:   /**
3285:    * Indicates whether the specified entity is unparsed.
3286:    */
3287:   private boolean isUnparsedEntity(String name)
3288:   {
3289:     if (doctype != null)
3290:       {
3291:         Object value = doctype.getEntity(name);
3292:         if (value != null && value instanceof ExternalIds)
3293:           return ((ExternalIds) value).notationName != null;
3294:       }
3295:     return false;
3296:   }
3297: 
3298:   /**
3299:    * Read an equals sign.
3300:    */
3301:   private void readEq()
3302:     throws IOException, XMLStreamException
3303:   {
3304:     skipWhitespace();
3305:     require('=');
3306:     skipWhitespace();
3307:   }
3308: 
3309:   /**
3310:    * Character read for reading literals.
3311:    * @param recognizePEs whether to recognize parameter-entity references
3312:    */
3313:   private int literalReadCh(boolean recognizePEs)
3314:     throws IOException, XMLStreamException
3315:   {
3316:     int c = recognizePEs ? readCh() : read();
3317:     while (c == -1)
3318:       {
3319:         if (inputStack.size() > 1)
3320:           {
3321:             inputStack.removeLast();
3322:             input = (Input) inputStack.getLast();
3323:             // Don't issue end-entity
3324:             c = recognizePEs ? readCh() : read();
3325:           }
3326:         else
3327:           throw new EOFException();
3328:       }
3329:     return c;
3330:   }
3331: 
3332:   /**
3333:    * Read a string literal.
3334:    */
3335:   private String readLiteral(int flags, boolean recognizePEs)
3336:     throws IOException, XMLStreamException
3337:   {
3338:     boolean saved = expandPE;
3339:     int delim = readCh();
3340:     if (delim != 0x27 && delim != 0x22)
3341:       error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim));
3342:     literalBuf.setLength(0);
3343:     if ((flags & LIT_DISABLE_PE) != 0)
3344:       expandPE = false;
3345:     boolean entities = false;
3346:     int inputStackSize = inputStack.size();
3347:     do
3348:       {
3349:         int c = literalReadCh(recognizePEs);
3350:         if (c == delim && inputStackSize == inputStack.size())
3351:           break;
3352:         switch (c)
3353:           {
3354:           case 0x0a:
3355:           case 0x0d:
3356:             if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
3357:               c = 0x20; // normalize to space
3358:             break;
3359:           case 0x09:
3360:             if ((flags & LIT_ATTRIBUTE) != 0)
3361:               c = 0x20; // normalize to space
3362:             break;
3363:           case 0x26: // '&'
3364:             mark(2);
3365:             c = readCh();
3366:             if (c == 0x23) // '#'
3367:               {
3368:                 if ((flags & LIT_DISABLE_CREF) != 0)
3369:                   {
3370:                     reset();
3371:                     c = 0x26; // '&'
3372:                   }
3373:                 else
3374:                   {
3375:                     mark(1);
3376:                     c = readCh();
3377:                     boolean hex = (c == 0x78); // 'x'
3378:                     if (!hex)
3379:                       reset();
3380:                     char[] ref = readCharacterRef(hex ? 16 : 10);
3381:                     for (int i = 0; i < ref.length; i++)
3382:                       literalBuf.append(ref[i]);
3383:                     entities = true;
3384:                     continue;
3385:                   }
3386:               }
3387:             else
3388:               {
3389:                 if ((flags & LIT_DISABLE_EREF) != 0)
3390:                   {
3391:                     reset();
3392:                     c = 0x26; // '&'
3393:                   }
3394:                 else
3395:                   {
3396:                     reset();
3397:                     String entityName = readNmtoken(true);
3398:                     require(';');
3399:                     String text =
3400:                       (String) PREDEFINED_ENTITIES.get(entityName);
3401:                     if (text != null)
3402:                       literalBuf.append(text);
3403:                     else
3404:                       expandEntity(entityName,
3405:                                    (flags & LIT_ATTRIBUTE) != 0,
3406:                                    true);
3407:                     entities = true;
3408:                     continue;
3409:                   }
3410:               }
3411:             break;
3412:           case 0x3c: // '<'
3413:             if ((flags & LIT_ATTRIBUTE) != 0)
3414:               error("attribute values may not contain '<'");
3415:             break;
3416:           case -1:
3417:             if (inputStack.size() > 1)
3418:               {
3419:                 popInput();
3420:                 continue;
3421:               }
3422:             throw new EOFException();
3423:           default:
3424:             if ((c < 0x0020 || c > 0xfffd) ||
3425:                 (c >= 0xd800 && c < 0xdc00) ||
3426:                 (input.xml11 && (c >= 0x007f) &&
3427:                  (c <= 0x009f) && (c != 0x0085)))
3428:               error("illegal character", "U+" + Integer.toHexString(c));
3429:           }
3430:         literalBuf.append(Character.toChars(c));
3431:       }
3432:     while (true);
3433:     expandPE = saved;
3434:     if (entities)
3435:       normalizeCRLF(literalBuf);
3436:     if ((flags & LIT_NORMALIZE) > 0)
3437:       literalBuf = normalize(literalBuf);
3438:     return literalBuf.toString();
3439:   }
3440: 
3441:   /**
3442:    * Performs attribute-value normalization of the text buffer.
3443:    * This discards leading and trailing whitespace, and replaces sequences
3444:    * of whitespace with a single space.
3445:    */
3446:   private StringBuffer normalize(StringBuffer buf)
3447:   {
3448:     StringBuffer acc = new StringBuffer();
3449:     int len = buf.length();
3450:     int avState = 0;
3451:     for (int i = 0; i < len; i++)
3452:       {
3453:         char c = buf.charAt(i);
3454:         if (c == ' ')
3455:           avState = (avState == 0) ? 0 : 1;
3456:         else
3457:           {
3458:             if (avState == 1)
3459:               acc.append(' ');
3460:             acc.append(c);
3461:             avState = 2;
3462:           }
3463:       }
3464:     return acc;
3465:   }
3466: 
3467:   /**
3468:    * Replace any CR/LF pairs in the buffer with LF.
3469:    * This may be necessary if combinations of CR or LF were declared as
3470:    * (character) entity references in the input.
3471:    */
3472:   private void normalizeCRLF(StringBuffer buf)
3473:   {
3474:     int len = buf.length() - 1;
3475:     for (int i = 0; i < len; i++)
3476:       {
3477:         char c = buf.charAt(i);
3478:         if (c == '\r' && buf.charAt(i + 1) == '\n')
3479:           {
3480:             buf.deleteCharAt(i--);
3481:             len--;
3482:           }
3483:       }
3484:   }
3485: 
3486:   /**
3487:    * Parse and expand a parameter entity reference.
3488:    */
3489:   private void expandPEReference()
3490:     throws IOException, XMLStreamException
3491:   {
3492:     String name = readNmtoken(true, new StringBuffer());
3493:     require(';');
3494:     mark(1); // ensure we don't reset to before the semicolon
3495:     if (doctype != null)
3496:       {
3497:         String entityName = "%" + name;
3498:         Object entity = doctype.getEntity(entityName);
3499:         if (entity != null)
3500:           {
3501:             if (xmlStandalone == Boolean.TRUE)
3502:               {
3503:                 if (doctype.isEntityExternal(entityName))
3504:                   error("reference to external parameter entity in " +
3505:                         "standalone document");
3506:               }
3507:             if (entity instanceof String)
3508:               {
3509:                 pushInput(name, (String) entity, false, input.normalize);
3510:                 //pushInput(name, " " + (String) entity + " ");
3511:               }
3512:             else
3513:               {
3514:                 //pushInput("", " ");
3515:                 pushInput(name, (ExternalIds) entity, false, input.normalize);
3516:                 //pushInput("", " ");
3517:               }
3518:           }
3519:         else
3520:           error("reference to undeclared parameter entity", name);
3521:       }
3522:     else
3523:       error("reference to parameter entity without doctype", name);
3524:   }
3525: 
3526:   /**
3527:    * Parse the digits in a character reference.
3528:    * @param base the base of the digits (10 or 16)
3529:    */
3530:   private char[] readCharacterRef(int base)
3531:     throws IOException, XMLStreamException
3532:   {
3533:     CPStringBuilder b = new CPStringBuilder();
3534:     for (int c = readCh(); c != 0x3b && c != -1; c = readCh())
3535:       b.append(Character.toChars(c));
3536:     try
3537:       {
3538:         int ord = Integer.parseInt(b.toString(), base);
3539:         if (input.xml11)
3540:           {
3541:             if (!isXML11Char(ord))
3542:               error("illegal XML 1.1 character reference " +
3543:                     "U+" + Integer.toHexString(ord));
3544:           }
3545:         else
3546:           {
3547:             if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d))
3548:                 || (ord >= 0xd800 && ord <= 0xdfff)
3549:                 || ord == 0xfffe || ord == 0xffff
3550:                 || ord > 0x0010ffff)
3551:               error("illegal XML character reference " +
3552:                     "U+" + Integer.toHexString(ord));
3553:           }
3554:         return Character.toChars(ord);
3555:       }
3556:     catch (NumberFormatException e)
3557:       {
3558:         error("illegal characters in character reference", b.toString());
3559:         return null;
3560:       }
3561:   }
3562: 
3563:   /**
3564:    * Parses an NMTOKEN or Name production.
3565:    * @param isName if a Name, otherwise an NMTOKEN
3566:    */
3567:   private String readNmtoken(boolean isName)
3568:     throws IOException, XMLStreamException
3569:   {
3570:     return readNmtoken(isName, nmtokenBuf);
3571:   }
3572: 
3573:   /**
3574:    * Parses an NMTOKEN or Name production using the specified buffer.
3575:    * @param isName if a Name, otherwise an NMTOKEN
3576:    * @param buf the character buffer to use
3577:    */
3578:   private String readNmtoken(boolean isName, StringBuffer buf)
3579:     throws IOException, XMLStreamException
3580:   {
3581:     buf.setLength(0);
3582:     int c = readCh();
3583:     if (isName)
3584:       {
3585:         if (!isNameStartCharacter(c, input.xml11))
3586:           error("not a name start character",
3587:                 "U+" + Integer.toHexString(c));
3588:       }
3589:     else
3590:       {
3591:         if (!isNameCharacter(c, input.xml11))
3592:           error("not a name character",
3593:                 "U+" + Integer.toHexString(c));
3594:       }
3595:     buf.append(Character.toChars(c));
3596:     do
3597:       {
3598:         mark(1);
3599:         c = readCh();
3600:         switch (c)
3601:           {
3602:           case 0x25: // '%'
3603:           case 0x3c: // '<'
3604:           case 0x3e: // '>'
3605:           case 0x26: // '&'
3606:           case 0x2c: // ','
3607:           case 0x7c: // '|'
3608:           case 0x2a: // '*'
3609:           case 0x2b: // '+'
3610:           case 0x3f: // '?'
3611:           case 0x29: // ')'
3612:           case 0x3d: // '='
3613:           case 0x27: // '\''
3614:           case 0x22: // '"'
3615:           case 0x5b: // '['
3616:           case 0x20: // ' '
3617:           case 0x09: // '\t'
3618:           case 0x0a: // '\n'
3619:           case 0x0d: // '\r'
3620:           case 0x3b: // ';'
3621:           case 0x2f: // '/'
3622:           case -1:
3623:             reset();
3624:             return intern(buf.toString());
3625:           default:
3626:             if (!isNameCharacter(c, input.xml11))
3627:               error("not a name character",
3628:                     "U+" + Integer.toHexString(c));
3629:             else
3630:               buf.append(Character.toChars(c));
3631:           }
3632:       }
3633:     while (true);
3634:   }
3635: 
3636:   /**
3637:    * Indicates whether the specified Unicode character is an XML 1.1 Char.
3638:    */
3639:   public static boolean isXML11Char(int c)
3640:   {
3641:     return ((c >= 0x0001 && c <= 0xD7FF) ||
3642:             (c >= 0xE000 && c < 0xFFFE) ||
3643:             (c >= 0x10000 && c <= 0x10FFFF));
3644:   }
3645: 
3646:   /**
3647:    * Indicates whether the specified Unicode character is an XML 1.1
3648:    * RestrictedChar.
3649:    */
3650:   public static boolean isXML11RestrictedChar(int c)
3651:   {
3652:     return ((c >= 0x0001 && c <= 0x0008) ||
3653:             (c >= 0x000B && c <= 0x000C) ||
3654:             (c >= 0x000E && c <= 0x001F) ||
3655:             (c >= 0x007F && c <= 0x0084) ||
3656:             (c >= 0x0086 && c <= 0x009F));
3657:   }
3658: 
3659:   /**
3660:    * Indicates whether the specified text matches the Name or Nmtoken
3661:    * production.
3662:    */
3663:   private boolean isNmtoken(String text, boolean isName)
3664:   {
3665:     try
3666:       {
3667:         int[] cp = UnicodeReader.toCodePointArray(text);
3668:         if (cp.length == 0)
3669:           return false;
3670:         if (isName)
3671:           {
3672:             if (!isNameStartCharacter(cp[0], input.xml11))
3673:               return false;
3674:           }
3675:         else
3676:           {
3677:             if (!isNameCharacter(cp[0], input.xml11))
3678:               return false;
3679:           }
3680:         for (int i = 1; i < cp.length; i++)
3681:           {
3682:             if (!isNameCharacter(cp[i], input.xml11))
3683:               return false;
3684:           }
3685:         return true;
3686:       }
3687:     catch (IOException e)
3688:       {
3689:         return false;
3690:       }
3691:   }
3692: 
3693:   /**
3694:    * Indicates whether the specified Unicode character is a Name start
3695:    * character.
3696:    */
3697:   public static boolean isNameStartCharacter(int c, boolean xml11)
3698:   {
3699:     if (xml11)
3700:       return ((c >= 0x0041 && c <= 0x005a) ||
3701:               (c >= 0x0061 && c <= 0x007a) ||
3702:               c == 0x3a |
3703:               c == 0x5f |
3704:               (c >= 0xC0 && c <= 0xD6) ||
3705:               (c >= 0xD8 && c <= 0xF6) ||
3706:               (c >= 0xF8 && c <= 0x2FF) ||
3707:               (c >= 0x370 && c <= 0x37D) ||
3708:               (c >= 0x37F && c <= 0x1FFF) ||
3709:               (c >= 0x200C && c <= 0x200D) ||
3710:               (c >= 0x2070 && c <= 0x218F) ||
3711:               (c >= 0x2C00 && c <= 0x2FEF) ||
3712:               (c >= 0x3001 && c <= 0xD7FF) ||
3713:               (c >= 0xF900 && c <= 0xFDCF) ||
3714:               (c >= 0xFDF0 && c <= 0xFFFD) ||
3715:               (c >= 0x10000 && c <= 0xEFFFF));
3716:     else
3717:       return (c == 0x5f || c == 0x3a || isLetter(c));
3718:   }
3719: 
3720:   /**
3721:    * Indicates whether the specified Unicode character is a Name non-initial
3722:    * character.
3723:    */
3724:   public static boolean isNameCharacter(int c, boolean xml11)
3725:   {
3726:     if (xml11)
3727:       return ((c >= 0x0041 && c <= 0x005a) ||
3728:               (c >= 0x0061 && c <= 0x007a) ||
3729:               (c >= 0x0030 && c <= 0x0039) ||
3730:               c == 0x3a |
3731:               c == 0x5f |
3732:               c == 0x2d |
3733:               c == 0x2e |
3734:               c == 0xB7 |
3735:               (c >= 0xC0 && c <= 0xD6) ||
3736:               (c >= 0xD8 && c <= 0xF6) ||
3737:               (c >= 0xF8 && c <= 0x2FF) ||
3738:               (c >= 0x300 && c <= 0x37D) ||
3739:               (c >= 0x37F && c <= 0x1FFF) ||
3740:               (c >= 0x200C && c <= 0x200D) ||
3741:               (c >= 0x203F && c <= 0x2040) ||
3742:               (c >= 0x2070 && c <= 0x218F) ||
3743:               (c >= 0x2C00 && c <= 0x2FEF) ||
3744:               (c >= 0x3001 && c <= 0xD7FF) ||
3745:               (c >= 0xF900 && c <= 0xFDCF) ||
3746:               (c >= 0xFDF0 && c <= 0xFFFD) ||
3747:               (c >= 0x10000 && c <= 0xEFFFF));
3748:     else
3749:       return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a ||
3750:               isLetter(c) || isDigit(c) ||
3751:               isCombiningChar(c) || isExtender(c));
3752:   }
3753: 
3754:   /**
3755:    * Indicates whether the specified Unicode character matches the Letter
3756:    * production.
3757:    */
3758:   public static boolean isLetter(int c)
3759:   {
3760:     if ((c >= 0x0041 && c <= 0x005A) ||
3761:         (c >= 0x0061 && c <= 0x007A) ||
3762:         (c >= 0x00C0 && c <= 0x00D6) ||
3763:         (c >= 0x00D8 && c <= 0x00F6) ||
3764:         (c >= 0x00F8 && c <= 0x00FF) ||
3765:         (c >= 0x0100 && c <= 0x0131) ||
3766:         (c >= 0x0134 && c <= 0x013E) ||
3767:         (c >= 0x0141 && c <= 0x0148) ||
3768:         (c >= 0x014A && c <= 0x017E) ||
3769:         (c >= 0x0180 && c <= 0x01C3) ||
3770:         (c >= 0x01CD && c <= 0x01F0) ||
3771:         (c >= 0x01F4 && c <= 0x01F5) ||
3772:         (c >= 0x01FA && c <= 0x0217) ||
3773:         (c >= 0x0250 && c <= 0x02A8) ||
3774:         (c >= 0x02BB && c <= 0x02C1) ||
3775:         c == 0x0386 ||
3776:         (c >= 0x0388 && c <= 0x038A) ||
3777:         c == 0x038C ||
3778:         (c >= 0x038E && c <= 0x03A1) ||
3779:         (c >= 0x03A3 && c <= 0x03CE) ||
3780:         (c >= 0x03D0 && c <= 0x03D6) ||
3781:         c == 0x03DA ||
3782:       c == 0x03DC ||
3783:         c == 0x03DE ||
3784:         c == 0x03E0 ||
3785:         (c >= 0x03E2 && c <= 0x03F3) ||
3786:         (c >= 0x0401 && c <= 0x040C) ||
3787:         (c >= 0x040E && c <= 0x044F) ||
3788:         (c >= 0x0451 && c <= 0x045C) ||
3789:         (c >= 0x045E && c <= 0x0481) ||
3790:         (c >= 0x0490 && c <= 0x04C4) ||
3791:         (c >= 0x04C7 && c <= 0x04C8) ||
3792:         (c >= 0x04CB && c <= 0x04CC) ||
3793:         (c >= 0x04D0 && c <= 0x04EB) ||
3794:         (c >= 0x04EE && c <= 0x04F5) ||
3795:         (c >= 0x04F8 && c <= 0x04F9) ||
3796:         (c >= 0x0531 && c <= 0x0556) ||
3797:         c == 0x0559 ||
3798:         (c >= 0x0561 && c <= 0x0586) ||
3799:         (c >= 0x05D0 && c <= 0x05EA) ||
3800:         (c >= 0x05F0 && c <= 0x05F2) ||
3801:         (c >= 0x0621 && c <= 0x063A) ||
3802:         (c >= 0x0641 && c <= 0x064A) ||
3803:         (c >= 0x0671 && c <= 0x06B7) ||
3804:         (c >= 0x06BA && c <= 0x06BE) ||
3805:         (c >= 0x06C0 && c <= 0x06CE) ||
3806:         (c >= 0x06D0 && c <= 0x06D3) ||
3807:         c == 0x06D5 ||
3808:         (c >= 0x06E5 && c <= 0x06E6) ||
3809:         (c >= 0x0905 && c <= 0x0939) ||
3810:         c == 0x093D ||
3811:         (c >= 0x0958 && c <= 0x0961) ||
3812:         (c >= 0x0985 && c <= 0x098C) ||
3813:         (c >= 0x098F && c <= 0x0990) ||
3814:         (c >= 0x0993 && c <= 0x09A8) ||
3815:         (c >= 0x09AA && c <= 0x09B0) ||
3816:         c == 0x09B2 ||
3817:         (c >= 0x09B6 && c <= 0x09B9) ||
3818:         (c >= 0x09DC && c <= 0x09DD) ||
3819:         (c >= 0x09DF && c <= 0x09E1) ||
3820:         (c >= 0x09F0 && c <= 0x09F1) ||
3821:         (c >= 0x0A05 && c <= 0x0A0A) ||
3822:         (c >= 0x0A0F && c <= 0x0A10) ||
3823:         (c >= 0x0A13 && c <= 0x0A28) ||
3824:         (c >= 0x0A2A && c <= 0x0A30) ||
3825:         (c >= 0x0A32 && c <= 0x0A33) ||
3826:         (c >= 0x0A35 && c <= 0x0A36) ||
3827:         (c >= 0x0A38 && c <= 0x0A39) ||
3828:         (c >= 0x0A59 && c <= 0x0A5C) ||
3829:         c == 0x0A5E ||
3830:         (c >= 0x0A72 && c <= 0x0A74) ||
3831:         (c >= 0x0A85 && c <= 0x0A8B) ||
3832:         c == 0x0A8D ||
3833:         (c >= 0x0A8F && c <= 0x0A91) ||
3834:         (c >= 0x0A93 && c <= 0x0AA8) ||
3835:         (c >= 0x0AAA && c <= 0x0AB0) ||
3836:         (c >= 0x0AB2 && c <= 0x0AB3) ||
3837:         (c >= 0x0AB5 && c <= 0x0AB9) ||
3838:         c == 0x0ABD ||
3839:         c == 0x0AE0 ||
3840:         (c >= 0x0B05 && c <= 0x0B0C) ||
3841:         (c >= 0x0B0F && c <= 0x0B10) ||
3842:         (c >= 0x0B13 && c <= 0x0B28) ||
3843:         (c >= 0x0B2A && c <= 0x0B30) ||
3844:         (c >= 0x0B32 && c <= 0x0B33) ||
3845:         (c >= 0x0B36 && c <= 0x0B39) ||
3846:         c == 0x0B3D ||
3847:         (c >= 0x0B5C && c <= 0x0B5D) ||
3848:         (c >= 0x0B5F && c <= 0x0B61) ||
3849:         (c >= 0x0B85 && c <= 0x0B8A) ||
3850:         (c >= 0x0B8E && c <= 0x0B90) ||
3851:         (c >= 0x0B92 && c <= 0x0B95) ||
3852:         (c >= 0x0B99 && c <= 0x0B9A) ||
3853:         c == 0x0B9C ||
3854:         (c >= 0x0B9E && c <= 0x0B9F) ||
3855:         (c >= 0x0BA3 && c <= 0x0BA4) ||
3856:         (c >= 0x0BA8 && c <= 0x0BAA) ||
3857:         (c >= 0x0BAE && c <= 0x0BB5) ||
3858:         (c >= 0x0BB7 && c <= 0x0BB9) ||
3859:         (c >= 0x0C05 && c <= 0x0C0C) ||
3860:         (c >= 0x0C0E && c <= 0x0C10) ||
3861:         (c >= 0x0C12 && c <= 0x0C28) ||
3862:         (c >= 0x0C2A && c <= 0x0C33) ||
3863:         (c >= 0x0C35 && c <= 0x0C39) ||
3864:         (c >= 0x0C60 && c <= 0x0C61) ||
3865:         (c >= 0x0C85 && c <= 0x0C8C) ||
3866:         (c >= 0x0C8E && c <= 0x0C90) ||
3867:         (c >= 0x0C92 && c <= 0x0CA8) ||
3868:         (c >= 0x0CAA && c <= 0x0CB3) ||
3869:         (c >= 0x0CB5 && c <= 0x0CB9) ||
3870:         c == 0x0CDE ||
3871:         (c >= 0x0CE0 && c <= 0x0CE1) ||
3872:         (c >= 0x0D05 && c <= 0x0D0C) ||
3873:         (c >= 0x0D0E && c <= 0x0D10) ||
3874:         (c >= 0x0D12 && c <= 0x0D28) ||
3875:         (c >= 0x0D2A && c <= 0x0D39) ||
3876:         (c >= 0x0D60 && c <= 0x0D61) ||
3877:         (c >= 0x0E01 && c <= 0x0E2E) ||
3878:         c == 0x0E30 ||
3879:         (c >= 0x0E32 && c <= 0x0E33) ||
3880:         (c >= 0x0E40 && c <= 0x0E45) ||
3881:         (c >= 0x0E81 && c <= 0x0E82) ||
3882:         c == 0x0E84 ||
3883:         (c >= 0x0E87 && c <= 0x0E88) ||
3884:         c == 0x0E8A ||
3885:         c == 0x0E8D ||
3886:         (c >= 0x0E94 && c <= 0x0E97) ||
3887:         (c >= 0x0E99 && c <= 0x0E9F) ||
3888:         (c >= 0x0EA1 && c <= 0x0EA3) ||
3889:         c == 0x0EA5 ||
3890:         c == 0x0EA7 ||
3891:         (c >= 0x0EAA && c <= 0x0EAB) ||
3892:         (c >= 0x0EAD && c <= 0x0EAE) ||
3893:         c == 0x0EB0 ||
3894:         (c >= 0x0EB2 && c <= 0x0EB3) ||
3895:         c == 0x0EBD ||
3896:         (c >= 0x0EC0 && c <= 0x0EC4) ||
3897:         (c >= 0x0F40 && c <= 0x0F47) ||
3898:         (c >= 0x0F49 && c <= 0x0F69) ||
3899:         (c >= 0x10A0 && c <= 0x10C5) ||
3900:         (c >= 0x10D0 && c <= 0x10F6) ||
3901:         c == 0x1100 ||
3902:         (c >= 0x1102 && c <= 0x1103) ||
3903:         (c >= 0x1105 && c <= 0x1107) ||
3904:         c == 0x1109 ||
3905:         (c >= 0x110B && c <= 0x110C) ||
3906:         (c >= 0x110E && c <= 0x1112) ||
3907:         c == 0x113C ||
3908:         c == 0x113E ||
3909:         c == 0x1140 ||
3910:         c == 0x114C ||
3911:         c == 0x114E ||
3912:         c == 0x1150 ||
3913:         (c >= 0x1154 && c <= 0x1155) ||
3914:         c == 0x1159 ||
3915:         (c >= 0x115F && c <= 0x1161) ||
3916:         c == 0x1163 ||
3917:         c == 0x1165 ||
3918:         c == 0x1167 ||
3919:         c == 0x1169 ||
3920:         (c >= 0x116D && c <= 0x116E) ||
3921:         (c >= 0x1172 && c <= 0x1173) ||
3922:         c == 0x1175 ||
3923:         c == 0x119E ||
3924:         c == 0x11A8 ||
3925:         c == 0x11AB ||
3926:         (c >= 0x11AE && c <= 0x11AF) ||
3927:         (c >= 0x11B7 && c <= 0x11B8) ||
3928:         c == 0x11BA ||
3929:         (c >= 0x11BC && c <= 0x11C2) ||
3930:         c == 0x11EB ||
3931:         c == 0x11F0 ||
3932:         c == 0x11F9 ||
3933:         (c >= 0x1E00 && c <= 0x1E9B) ||
3934:         (c >= 0x1EA0 && c <= 0x1EF9) ||
3935:         (c >= 0x1F00 && c <= 0x1F15) ||
3936:         (c >= 0x1F18 && c <= 0x1F1D) ||
3937:         (c >= 0x1F20 && c <= 0x1F45) ||
3938:         (c >= 0x1F48 && c <= 0x1F4D) ||
3939:         (c >= 0x1F50 && c <= 0x1F57) ||
3940:         c == 0x1F59 ||
3941:         c == 0x1F5B ||
3942:         c == 0x1F5D ||
3943:         (c >= 0x1F5F && c <= 0x1F7D) ||
3944:         (c >= 0x1F80 && c <= 0x1FB4) ||
3945:         (c >= 0x1FB6 && c <= 0x1FBC) ||
3946:         c == 0x1FBE ||
3947:         (c >= 0x1FC2 && c <= 0x1FC4) ||
3948:         (c >= 0x1FC6 && c <= 0x1FCC) ||
3949:         (c >= 0x1FD0 && c <= 0x1FD3) ||
3950:         (c >= 0x1FD6 && c <= 0x1FDB) ||
3951:         (c >= 0x1FE0 && c <= 0x1FEC) ||
3952:         (c >= 0x1FF2 && c <= 0x1FF4) ||
3953:         (c >= 0x1FF6 && c <= 0x1FFC) ||
3954:         c == 0x2126 ||
3955:         (c >= 0x212A && c <= 0x212B) ||
3956:         c == 0x212E ||
3957:         (c >= 0x2180 && c <= 0x2182) ||
3958:         (c >= 0x3041 && c <= 0x3094) ||
3959:         (c >= 0x30A1 && c <= 0x30FA) ||
3960:         (c >= 0x3105 && c <= 0x312C) ||
3961:         (c >= 0xAC00 && c <= 0xD7A3))
3962:         return true; // BaseChar
3963:     if ((c >= 0x4e00 && c <= 0x9fa5) ||
3964:         c == 0x3007 ||
3965:         (c >= 0x3021 && c <= 0x3029))
3966:       return true; // Ideographic
3967:     return false;
3968:   }
3969: 
3970:   /**
3971:    * Indicates whether the specified Unicode character matches the Digit
3972:    * production.
3973:    */
3974:   public static boolean isDigit(int c)
3975:   {
3976:     return ((c >= 0x0030 && c <= 0x0039) ||
3977:             (c >= 0x0660 && c <= 0x0669) ||
3978:             (c >= 0x06F0 && c <= 0x06F9) ||
3979:             (c >= 0x0966 && c <= 0x096F) ||
3980:             (c >= 0x09E6 && c <= 0x09EF) ||
3981:             (c >= 0x0A66 && c <= 0x0A6F) ||
3982:             (c >= 0x0AE6 && c <= 0x0AEF) ||
3983:             (c >= 0x0B66 && c <= 0x0B6F) ||
3984:             (c >= 0x0BE7 && c <= 0x0BEF) ||
3985:             (c >= 0x0C66 && c <= 0x0C6F) ||
3986:             (c >= 0x0CE6 && c <= 0x0CEF) ||
3987:             (c >= 0x0D66 && c <= 0x0D6F) ||
3988:             (c >= 0x0E50 && c <= 0x0E59) ||
3989:             (c >= 0x0ED0 && c <= 0x0ED9) ||
3990:             (c >= 0x0F20 && c <= 0x0F29));
3991:   }
3992: 
3993:   /**
3994:    * Indicates whether the specified Unicode character matches the
3995:    * CombiningChar production.
3996:    */
3997:   public static boolean isCombiningChar(int c)
3998:   {
3999:     return ((c >= 0x0300 && c <= 0x0345) ||
4000:             (c >= 0x0360 && c <= 0x0361) ||
4001:             (c >= 0x0483 && c <= 0x0486) ||
4002:             (c >= 0x0591 && c <= 0x05A1) ||
4003:             (c >= 0x05A3 && c <= 0x05B9) ||
4004:             (c >= 0x05BB && c <= 0x05BD) ||
4005:             c == 0x05BF ||
4006:             (c >= 0x05C1 && c <= 0x05C2) ||
4007:             c == 0x05C4 ||
4008:             (c >= 0x064B && c <= 0x0652) ||
4009:             c == 0x0670 ||
4010:             (c >= 0x06D6 && c <= 0x06DC) ||
4011:             (c >= 0x06DD && c <= 0x06DF) ||
4012:             (c >= 0x06E0 && c <= 0x06E4) ||
4013:             (c >= 0x06E7 && c <= 0x06E8) ||
4014:             (c >= 0x06EA && c <= 0x06ED) ||
4015:             (c >= 0x0901 && c <= 0x0903) ||
4016:             c == 0x093C ||
4017:             (c >= 0x093E && c <= 0x094C) ||
4018:             c == 0x094D ||
4019:             (c >= 0x0951 && c <= 0x0954) ||
4020:             (c >= 0x0962 && c <= 0x0963) ||
4021:             (c >= 0x0981 && c <= 0x0983) ||
4022:             c == 0x09BC ||
4023:             c == 0x09BE ||
4024:             c == 0x09BF ||
4025:             (c >= 0x09C0 && c <= 0x09C4) ||
4026:             (c >= 0x09C7 && c <= 0x09C8) ||
4027:             (c >= 0x09CB && c <= 0x09CD) ||
4028:             c == 0x09D7 ||
4029:             (c >= 0x09E2 && c <= 0x09E3) ||
4030:             c == 0x0A02 ||
4031:             c == 0x0A3C ||
4032:             c == 0x0A3E ||
4033:             c == 0x0A3F ||
4034:             (c >= 0x0A40 && c <= 0x0A42) ||
4035:             (c >= 0x0A47 && c <= 0x0A48) ||
4036:             (c >= 0x0A4B && c <= 0x0A4D) ||
4037:             (c >= 0x0A70 && c <= 0x0A71) ||
4038:             (c >= 0x0A81 && c <= 0x0A83) ||
4039:             c == 0x0ABC ||
4040:             (c >= 0x0ABE && c <= 0x0AC5) ||
4041:             (c >= 0x0AC7 && c <= 0x0AC9) ||
4042:             (c >= 0x0ACB && c <= 0x0ACD) ||
4043:             (c >= 0x0B01 && c <= 0x0B03) ||
4044:             c == 0x0B3C ||
4045:             (c >= 0x0B3E && c <= 0x0B43) ||
4046:             (c >= 0x0B47 && c <= 0x0B48) ||
4047:             (c >= 0x0B4B && c <= 0x0B4D) ||
4048:             (c >= 0x0B56 && c <= 0x0B57) ||
4049:             (c >= 0x0B82 && c <= 0x0B83) ||
4050:             (c >= 0x0BBE && c <= 0x0BC2) ||
4051:             (c >= 0x0BC6 && c <= 0x0BC8) ||
4052:             (c >= 0x0BCA && c <= 0x0BCD) ||
4053:             c == 0x0BD7 ||
4054:             (c >= 0x0C01 && c <= 0x0C03) ||
4055:             (c >= 0x0C3E && c <= 0x0C44) ||
4056:             (c >= 0x0C46 && c <= 0x0C48) ||
4057:             (c >= 0x0C4A && c <= 0x0C4D) ||
4058:             (c >= 0x0C55 && c <= 0x0C56) ||
4059:             (c >= 0x0C82 && c <= 0x0C83) ||
4060:             (c >= 0x0CBE && c <= 0x0CC4) ||
4061:             (c >= 0x0CC6 && c <= 0x0CC8) ||
4062:             (c >= 0x0CCA && c <= 0x0CCD) ||
4063:             (c >= 0x0CD5 && c <= 0x0CD6) ||
4064:             (c >= 0x0D02 && c <= 0x0D03) ||
4065:             (c >= 0x0D3E && c <= 0x0D43) ||
4066:             (c >= 0x0D46 && c <= 0x0D48) ||
4067:             (c >= 0x0D4A && c <= 0x0D4D) ||
4068:             c == 0x0D57 ||
4069:             c == 0x0E31 ||
4070:             (c >= 0x0E34 && c <= 0x0E3A) ||
4071:             (c >= 0x0E47 && c <= 0x0E4E) ||
4072:             c == 0x0EB1 ||
4073:             (c >= 0x0EB4 && c <= 0x0EB9) ||
4074:             (c >= 0x0EBB && c <= 0x0EBC) ||
4075:             (c >= 0x0EC8 && c <= 0x0ECD) ||
4076:             (c >= 0x0F18 && c <= 0x0F19) ||
4077:             c == 0x0F35 ||
4078:             c == 0x0F37 ||
4079:             c == 0x0F39 ||
4080:             c == 0x0F3E ||
4081:             c == 0x0F3F ||
4082:             (c >= 0x0F71 && c <= 0x0F84) ||
4083:             (c >= 0x0F86 && c <= 0x0F8B) ||
4084:             (c >= 0x0F90 && c <= 0x0F95) ||
4085:             c == 0x0F97 ||
4086:             (c >= 0x0F99 && c <= 0x0FAD) ||
4087:             (c >= 0x0FB1 && c <= 0x0FB7) ||
4088:             c == 0x0FB9 ||
4089:             (c >= 0x20D0 && c <= 0x20DC) ||
4090:             c == 0x20E1 ||
4091:             (c >= 0x302A && c <= 0x302F) ||
4092:             c == 0x3099 ||
4093:             c == 0x309A);
4094:   }
4095: 
4096:   /**
4097:    * Indicates whether the specified Unicode character matches the Extender
4098:    * production.
4099:    */
4100:   public static boolean isExtender(int c)
4101:   {
4102:     return (c == 0x00B7 ||
4103:             c == 0x02D0 ||
4104:             c == 0x02D1 ||
4105:             c == 0x0387 ||
4106:             c == 0x0640 ||
4107:             c == 0x0E46 ||
4108:             c == 0x0EC6 ||
4109:             c == 0x3005 ||
4110:             (c >= 0x3031 && c <= 0x3035) ||
4111:             (c >= 0x309D && c <= 0x309E) ||
4112:             (c >= 0x30FC && c <= 0x30FE));
4113:   }
4114: 
4115:   /**
4116:    * Indicates whether the specified Unicode character matches the Char
4117:    * production.
4118:    */
4119:   public static boolean isChar(int c)
4120:   {
4121:     return (c >= 0x20 && c < 0xd800) ||
4122:       (c >= 0xe00 && c < 0xfffe) ||
4123:       (c >= 0x10000 && c < 0x110000) ||
4124:       c == 0xa || c == 0x9 || c == 0xd;
4125:   }
4126: 
4127:   /**
4128:    * Interns the specified text or not, depending on the value of
4129:    * stringInterning.
4130:    */
4131:   private String intern(String text)
4132:   {
4133:     return stringInterning ? text.intern() : text;
4134:   }
4135: 
4136:   /**
4137:    * Report a parsing error.
4138:    */
4139:   private void error(String message)
4140:     throws XMLStreamException
4141:   {
4142:     error(message, null);
4143:   }
4144: 
4145:   /**
4146:    * Report a parsing error.
4147:    */
4148:   private void error(String message, Object info)
4149:     throws XMLStreamException
4150:   {
4151:     if (info != null)
4152:       {
4153:         if (info instanceof String)
4154:           message += ": \"" + ((String) info) + "\"";
4155:         else if (info instanceof Character)
4156:           message += ": '" + ((Character) info) + "'";
4157:       }
4158:     throw new XMLStreamException(message);
4159:   }
4160: 
4161:   /**
4162:    * Perform validation of a start-element event.
4163:    */
4164:   private void validateStartElement(String elementName)
4165:     throws XMLStreamException
4166:   {
4167:     if (currentContentModel == null)
4168:       {
4169:         // root element
4170:         // VC: Root Element Type
4171:         if (!elementName.equals(doctype.rootName))
4172:           error("root element name must match name in DTD");
4173:         return;
4174:       }
4175:     // VC: Element Valid
4176:     switch (currentContentModel.type)
4177:       {
4178:       case ContentModel.EMPTY:
4179:         error("child element found in empty element", elementName);
4180:         break;
4181:       case ContentModel.ELEMENT:
4182:         LinkedList ctx = (LinkedList) validationStack.getLast();
4183:         ctx.add(elementName);
4184:         break;
4185:       case ContentModel.MIXED:
4186:         MixedContentModel mm = (MixedContentModel) currentContentModel;
4187:         if (!mm.containsName(elementName))
4188:           error("illegal element for content model", elementName);
4189:         break;
4190:       }
4191:   }
4192: 
4193:   /**
4194:    * Perform validation of an end-element event.
4195:    */
4196:   private void validateEndElement()
4197:     throws XMLStreamException
4198:   {
4199:     if (currentContentModel == null)
4200:       {
4201:         // root element
4202:         // VC: IDREF
4203:         if (!idrefs.containsAll(ids))
4204:           error("IDREF values must match the value of some ID attribute");
4205:         return;
4206:       }
4207:     // VC: Element Valid
4208:     switch (currentContentModel.type)
4209:       {
4210:       case ContentModel.ELEMENT:
4211:         LinkedList ctx = (LinkedList) validationStack.getLast();
4212:         ElementContentModel ecm = (ElementContentModel) currentContentModel;
4213:         validateElementContent(ecm, ctx);
4214:         break;
4215:       }
4216:   }
4217: 
4218:   /**
4219:    * Perform validation of character data.
4220:    */
4221:   private void validatePCData(String text)
4222:     throws XMLStreamException
4223:   {
4224:     // VC: Element Valid
4225:     switch (currentContentModel.type)
4226:       {
4227:       case ContentModel.EMPTY:
4228:         error("character data found in empty element", text);
4229:         break;
4230:       case ContentModel.ELEMENT:
4231:         boolean white = true;
4232:         int len = text.length();
4233:         for (int i = 0; i < len; i++)
4234:           {
4235:             char c = text.charAt(i);
4236:             if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
4237:               {
4238:                 white = false;
4239:                 break;
4240:               }
4241:           }
4242:         if (!white)
4243:           error("character data found in element with element content", text);
4244:         else if (xmlStandalone == Boolean.TRUE && currentContentModel.external)
4245:           // VC: Standalone Document Declaration
4246:           error("whitespace in element content of externally declared " +
4247:                 "element in standalone document");
4248:         break;
4249:       }
4250:   }
4251: 
4252:   /**
4253:    * Validates the specified validation context (list of child elements)
4254:    * against the element content model for the current element.
4255:    */
4256:   private void validateElementContent(ElementContentModel model,
4257:                                       LinkedList children)
4258:     throws XMLStreamException
4259:   {
4260:     // Use regular expression
4261:     CPStringBuilder buf = new CPStringBuilder();
4262:     for (Iterator i = children.iterator(); i.hasNext(); )
4263:       {
4264:         buf.append((String) i.next());
4265:         buf.append(' ');
4266:       }
4267:     String c = buf.toString();
4268:     String regex = createRegularExpression(model);
4269:     if (!c.matches(regex))
4270:       error("element content "+model.text+" does not match expression "+regex, c);
4271:   }
4272: 
4273:   /**
4274:    * Creates the regular expression used to validate an element content
4275:    * model.
4276:    */
4277:   private String createRegularExpression(ElementContentModel model)
4278:   {
4279:     if (model.regex == null)
4280:       {
4281:         CPStringBuilder buf = new CPStringBuilder();
4282:         buf.append('(');
4283:         for (Iterator i = model.contentParticles.iterator(); i.hasNext(); )
4284:           {
4285:             ContentParticle cp = (ContentParticle) i.next();
4286:             if (cp.content instanceof String)
4287:               {
4288:                 buf.append('(');
4289:                 buf.append((String) cp.content);
4290:                 buf.append(' ');
4291:                 buf.append(')');
4292:                 if (cp.max == -1)
4293:                   {
4294:                     if (cp.min == 0)
4295:                       buf.append('*');
4296:                     else
4297:                       buf.append('+');
4298:                   }
4299:                 else if (cp.min == 0)
4300:                   buf.append('?');
4301:               }
4302:             else
4303:               {
4304:                 ElementContentModel ecm = (ElementContentModel) cp.content;
4305:                 buf.append(createRegularExpression(ecm));
4306:               }
4307:             if (model.or && i.hasNext())
4308:               buf.append('|');
4309:           }
4310:         buf.append(')');
4311:         if (model.max == -1)
4312:           {
4313:             if (model.min == 0)
4314:               buf.append('*');
4315:             else
4316:               buf.append('+');
4317:           }
4318:         else if (model.min == 0)
4319:           buf.append('?');
4320:         model.regex = buf.toString();
4321:       }
4322:     return model.regex;
4323:   }
4324: 
4325:   /**
4326:    * Performs validation of a document type declaration event.
4327:    */
4328:   void validateDoctype()
4329:     throws XMLStreamException
4330:   {
4331:     for (Iterator i = doctype.entityIterator(); i.hasNext(); )
4332:       {
4333:         Map.Entry entry = (Map.Entry) i.next();
4334:         Object entity = entry.getValue();
4335:         if (entity instanceof ExternalIds)
4336:           {
4337:             ExternalIds ids = (ExternalIds) entity;
4338:             if (ids.notationName != null)
4339:               {
4340:                 // VC: Notation Declared
4341:                 ExternalIds notation = doctype.getNotation(ids.notationName);
4342:                 if (notation == null)
4343:                   error("Notation name must match the declared name of a " +
4344:                         "notation", ids.notationName);
4345:               }
4346:           }
4347:       }
4348:   }
4349: 
4350:   /**
4351:    * Simple test harness for reading an XML file.
4352:    * args[0] is the filename of the XML file
4353:    * If args[1] is "-x", enable XInclude processing
4354:    */
4355:   public static void main(String[] args)
4356:     throws Exception
4357:   {
4358:     boolean validating = false;
4359:     boolean namespaceAware = false;
4360:     boolean xIncludeAware = false;
4361:     int pos = 0;
4362:     while (pos < args.length && args[pos].startsWith("-"))
4363:       {
4364:         if ("-x".equals(args[pos]))
4365:           xIncludeAware = true;
4366:         else if ("-v".equals(args[pos]))
4367:           validating = true;
4368:         else if ("-n".equals(args[pos]))
4369:           namespaceAware = true;
4370:         pos++;
4371:       }
4372:     if (pos >= args.length)
4373:       {
4374:         System.out.println("Syntax: XMLParser [-n] [-v] [-x] <file> [<file2> [...]]");
4375:         System.out.println("\t-n: use namespace aware mode");
4376:         System.out.println("\t-v: use validating parser");
4377:         System.out.println("\t-x: use XInclude aware mode");
4378:         System.exit(2);
4379:       }
4380:     while (pos < args.length)
4381:       {
4382:         XMLParser p = new XMLParser(new java.io.FileInputStream(args[pos]),
4383:                                     absolutize(null, args[pos]),
4384:                                     validating, // validating
4385:                                     namespaceAware, // namespaceAware
4386:                                     true, // coalescing,
4387:                                     true, // replaceERefs
4388:                                     true, // externalEntities
4389:                                     true, // supportDTD
4390:                                     true, // baseAware
4391:                                     true, // stringInterning
4392:                                     true, // extendedEventTypes
4393:                                     null,
4394:                                     null);
4395:         XMLStreamReader reader = p;
4396:         if (xIncludeAware)
4397:           reader = new XIncludeFilter(p, args[pos], true, true, true);
4398:         try
4399:           {
4400:             int event;
4401:             //do
4402:             while (reader.hasNext())
4403:               {
4404:                 event = reader.next();
4405:                 Location loc = reader.getLocation();
4406:                 System.out.print(loc.getLineNumber() + ":" +
4407:                                  loc.getColumnNumber() + " ");
4408:                 switch (event)
4409:                   {
4410:                   case XMLStreamConstants.START_DOCUMENT:
4411:                     System.out.println("START_DOCUMENT version=" +
4412:                                        reader.getVersion() +
4413:                                        " encoding=" +
4414:                                        reader.getEncoding());
4415:                     break;
4416:                   case XMLStreamConstants.END_DOCUMENT:
4417:                     System.out.println("END_DOCUMENT");
4418:                     break;
4419:                   case XMLStreamConstants.START_ELEMENT:
4420:                     System.out.println("START_ELEMENT " +
4421:                                        reader.getName());
4422:                     int l = reader.getNamespaceCount();
4423:                     for (int i = 0; i < l; i++)
4424:                       System.out.println("\tnamespace " +
4425:                                          reader.getNamespacePrefix(i) + "='" +
4426:                                          reader.getNamespaceURI(i)+"'");
4427:                     l = reader.getAttributeCount();
4428:                     for (int i = 0; i < l; i++)
4429:                       System.out.println("\tattribute " +
4430:                                          reader.getAttributeName(i) + "='" +
4431:                                          reader.getAttributeValue(i) + "'");
4432:                     break;
4433:                   case XMLStreamConstants.END_ELEMENT:
4434:                     System.out.println("END_ELEMENT " + reader.getName());
4435:                     break;
4436:                   case XMLStreamConstants.CHARACTERS:
4437:                     System.out.println("CHARACTERS '" +
4438:                                        encodeText(reader.getText()) + "'");
4439:                     break;
4440:                   case XMLStreamConstants.CDATA:
4441:                     System.out.println("CDATA '" +
4442:                                        encodeText(reader.getText()) + "'");
4443:                     break;
4444:                   case XMLStreamConstants.SPACE:
4445:                     System.out.println("SPACE '" +
4446:                                        encodeText(reader.getText()) + "'");
4447:                     break;
4448:                   case XMLStreamConstants.DTD:
4449:                     System.out.println("DTD " + reader.getText());
4450:                     break;
4451:                   case XMLStreamConstants.ENTITY_REFERENCE:
4452:                     System.out.println("ENTITY_REFERENCE " + reader.getText());
4453:                     break;
4454:                   case XMLStreamConstants.COMMENT:
4455:                     System.out.println("COMMENT '" +
4456:                                        encodeText(reader.getText()) + "'");
4457:                     break;
4458:                   case XMLStreamConstants.PROCESSING_INSTRUCTION:
4459:                     System.out.println("PROCESSING_INSTRUCTION " +
4460:                                        reader.getPITarget() + " " +
4461:                                        reader.getPIData());
4462:                     break;
4463:                   case START_ENTITY:
4464:                     System.out.println("START_ENTITY " + reader.getText());
4465:                     break;
4466:                   case END_ENTITY:
4467:                     System.out.println("END_ENTITY " + reader.getText());
4468:                     break;
4469:                   default:
4470:                     System.out.println("Unknown event: " + event);
4471:                   }
4472:               }
4473:           }
4474:         catch (XMLStreamException e)
4475:           {
4476:             Location l = reader.getLocation();
4477:             System.out.println("At line "+l.getLineNumber()+
4478:                                ", column "+l.getColumnNumber()+
4479:                                " of "+l.getSystemId());
4480:             throw e;
4481:           }
4482:         pos++;
4483:       }
4484:   }
4485: 
4486:   /**
4487:    * Escapes control characters in the specified text. For debugging.
4488:    */
4489:   private static String encodeText(String text)
4490:   {
4491:     CPStringBuilder b = new CPStringBuilder();
4492:     int len = text.length();
4493:     for (int i = 0; i < len; i++)
4494:       {
4495:         char c = text.charAt(i);
4496:         switch (c)
4497:           {
4498:           case '\t':
4499:             b.append("\\t");
4500:             break;
4501:           case '\n':
4502:             b.append("\\n");
4503:             break;
4504:           case '\r':
4505:             b.append("\\r");
4506:             break;
4507:           default:
4508:             b.append(c);
4509:           }
4510:       }
4511:     return b.toString();
4512:   }
4513: 
4514:   /**
4515:    * An attribute instance.
4516:    */
4517:   class Attribute
4518:   {
4519: 
4520:     /**
4521:      * Attribute name.
4522:      */
4523:     final String name;
4524: 
4525:     /**
4526:      * Attribute type as declared in the DTD, or CDATA otherwise.
4527:      */
4528:     final String type;
4529: 
4530:     /**
4531:      * Whether the attribute was specified or defaulted.
4532:      */
4533:     final boolean specified;
4534: 
4535:     /**
4536:      * The attribute value.
4537:      */
4538:     final String value;
4539: 
4540:     /**
4541:      * The namespace prefix.
4542:      */
4543:     final String prefix;
4544: 
4545:     /**
4546:      * The namespace local-name.
4547:      */
4548:     final String localName;
4549: 
4550:     Attribute(String name, String type, boolean specified, String value)
4551:     {
4552:       this.name = name;
4553:       this.type = type;
4554:       this.specified = specified;
4555:       this.value = value;
4556:       int ci = name.indexOf(':');
4557:       if (ci == -1)
4558:         {
4559:           prefix = null;
4560:           localName = intern(name);
4561:         }
4562:       else
4563:         {
4564:           prefix = intern(name.substring(0, ci));
4565:           localName = intern(name.substring(ci + 1));
4566:         }
4567:     }
4568: 
4569:     public boolean equals(Object other)
4570:     {
4571:       if (other instanceof Attribute)
4572:         {
4573:           Attribute a = (Attribute) other;
4574:           if (namespaceAware)
4575:             {
4576:               if (!a.localName.equals(localName))
4577:                 return false;
4578:               String auri = getNamespaceURI(a.prefix);
4579:               String uri = getNamespaceURI(prefix);
4580:               if (uri == null && (auri == null ||
4581:                                   (input.xml11 && "".equals(auri))))
4582:                return true;
4583:               if (uri != null)
4584:                 {
4585:                   if ("".equals(uri) && input.xml11 && "".equals(auri))
4586:                     return true;
4587:                   return uri.equals(auri);
4588:                 }
4589:               return false;
4590:             }
4591:           else
4592:             return a.name.equals(name);
4593:         }
4594:       return false;
4595:     }
4596: 
4597:     public String toString()
4598:     {
4599:       CPStringBuilder buf = new CPStringBuilder(getClass().getName());
4600:       buf.append('[');
4601:       buf.append("name=");
4602:       buf.append(name);
4603:       if (value != null)
4604:         {
4605:           buf.append(",value=");
4606:           buf.append(value);
4607:         }
4608:       if (type != null)
4609:         {
4610:           buf.append(",type=");
4611:           buf.append(type);
4612:         }
4613:       if (specified)
4614:         buf.append(",specified");
4615:       buf.append(']');
4616:       return buf.toString();
4617:     }
4618: 
4619:   }
4620: 
4621:   /**
4622:    * Representation of a DTD.
4623:    */
4624:   class Doctype
4625:   {
4626: 
4627:     /**
4628:      * Name of the root element.
4629:      */
4630:     final String rootName;
4631: 
4632:     /**
4633:      * Public ID, if any, of external subset.
4634:      */
4635:     final String publicId;
4636: 
4637:     /**
4638:      * System ID (URL), if any, of external subset.
4639:      */
4640:     final String systemId;
4641: 
4642:     /**
4643:      * Map of element names to content models.
4644:      */
4645:     private final LinkedHashMap elements = new LinkedHashMap();
4646: 
4647:     /**
4648:      * Map of element names to maps of attribute declarations.
4649:      */
4650:     private final LinkedHashMap attlists = new LinkedHashMap();
4651: 
4652:     /**
4653:      * Map of entity names to entities (String or ExternalIds).
4654:      */
4655:     private final LinkedHashMap entities = new LinkedHashMap();
4656: 
4657:     /**
4658:      * Map of notation names to ExternalIds.
4659:      */
4660:     private final LinkedHashMap notations = new LinkedHashMap();
4661: 
4662:     /**
4663:      * Map of anonymous keys to comments.
4664:      */
4665:     private final LinkedHashMap comments = new LinkedHashMap();
4666: 
4667:     /**
4668:      * Map of anonymous keys to processing instructions (String[2]
4669:      * containing {target, data}).
4670:      */
4671:     private final LinkedHashMap pis = new LinkedHashMap();
4672: 
4673:     /**
4674:      * List of keys to all markup entries in the DTD.
4675:      */
4676:     private final LinkedList entries = new LinkedList();
4677: 
4678:     /**
4679:      * Set of the entities defined in the external subset.
4680:      */
4681:     private final HashSet externalEntities = new HashSet();
4682: 
4683:     /**
4684:      * Set of the notations defined in the external subset.
4685:      */
4686:     private final HashSet externalNotations = new HashSet();
4687: 
4688:     /**
4689:      * Counter for making anonymous keys.
4690:      */
4691:     private int anon = 1;
4692: 
4693:     /**
4694:      * Constructor.
4695:      */
4696:     Doctype(String rootName, String publicId, String systemId)
4697:     {
4698:       this.rootName = rootName;
4699:       this.publicId = publicId;
4700:       this.systemId = systemId;
4701:     }
4702: 
4703:     /**
4704:      * Adds an element declaration.
4705:      * @param name the element name
4706:      * @param text the content model text
4707:      * @param model the parsed content model
4708:      */
4709:     void addElementDecl(String name, String text, ContentModel model)
4710:     {
4711:       if (elements.containsKey(name))
4712:         return;
4713:       model.text = text;
4714:       model.external = (inputStack.size() != 1);
4715:       elements.put(name, model);
4716:       entries.add("E" + name);
4717:     }
4718: 
4719:     /**
4720:      * Adds an attribute declaration.
4721:      * @param ename the element name
4722:      * @param aname the attribute name
4723:      * @param decl the attribute declaration details
4724:      */
4725:     void addAttributeDecl(String ename, String aname, AttributeDecl decl)
4726:     {
4727:       LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4728:       if (attlist == null)
4729:         {
4730:           attlist = new LinkedHashMap();
4731:           attlists.put(ename, attlist);
4732:         }
4733:       else if (attlist.containsKey(aname))
4734:         return;
4735:       attlist.put(aname, decl);
4736:       String key = "A" + ename;
4737:       if (!entries.contains(key))
4738:         entries.add(key);
4739:     }
4740: 
4741:     /**
4742:      * Adds an entity declaration.
4743:      * @param name the entity name
4744:      * @param text the entity replacement text
4745:      * @param inExternalSubset if we are in the exernal subset
4746:      */
4747:     void addEntityDecl(String name, String text, boolean inExternalSubset)
4748:     {
4749:       if (entities.containsKey(name))
4750:         return;
4751:       entities.put(name, text);
4752:       entries.add("e" + name);
4753:       if (inExternalSubset)
4754:         externalEntities.add(name);
4755:     }
4756: 
4757:     /**
4758:      * Adds an entity declaration.
4759:      * @param name the entity name
4760:      * @param ids the external IDs
4761:      * @param inExternalSubset if we are in the exernal subset
4762:      */
4763:     void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset)
4764:     {
4765:       if (entities.containsKey(name))
4766:         return;
4767:       entities.put(name, ids);
4768:       entries.add("e" + name);
4769:       if (inExternalSubset)
4770:         externalEntities.add(name);
4771:     }
4772: 
4773:     /**
4774:      * Adds a notation declaration.
4775:      * @param name the notation name
4776:      * @param ids the external IDs
4777:      * @param inExternalSubset if we are in the exernal subset
4778:      */
4779:     void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset)
4780:     {
4781:       if (notations.containsKey(name))
4782:         return;
4783:       notations.put(name, ids);
4784:       entries.add("n" + name);
4785:       if (inExternalSubset)
4786:         externalNotations.add(name);
4787:     }
4788: 
4789:     /**
4790:      * Adds a comment.
4791:      */
4792:     void addComment(String text)
4793:     {
4794:       String key = Integer.toString(anon++);
4795:       comments.put(key, text);
4796:       entries.add("c" + key);
4797:     }
4798: 
4799:     /**
4800:      * Adds a processing instruction.
4801:      */
4802:     void addPI(String target, String data)
4803:     {
4804:       String key = Integer.toString(anon++);
4805:       pis.put(key, new String[] {target, data});
4806:       entries.add("p" + key);
4807:     }
4808: 
4809:     /**
4810:      * Returns the content model for the specified element.
4811:      * @param name the element name
4812:      */
4813:     ContentModel getElementModel(String name)
4814:     {
4815:       return (ContentModel) elements.get(name);
4816:     }
4817: 
4818:     /**
4819:      * Returns the attribute definition for the given attribute
4820:      * @param ename the element name
4821:      * @param aname the attribute name
4822:      */
4823:     AttributeDecl getAttributeDecl(String ename, String aname)
4824:     {
4825:       LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4826:       return (attlist == null) ? null : (AttributeDecl) attlist.get(aname);
4827:     }
4828: 
4829:     /**
4830:      * Indicates whether the specified attribute was declared in the DTD.
4831:      * @param ename the element name
4832:      * @param aname the attribute name
4833:      */
4834:     boolean isAttributeDeclared(String ename, String aname)
4835:     {
4836:       LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4837:       return (attlist == null) ? false : attlist.containsKey(aname);
4838:     }
4839: 
4840:     /**
4841:      * Returns an iterator over the entries in the attribute list for the
4842:      * given element.
4843:      * @param ename the element name
4844:      */
4845:     Iterator attlistIterator(String ename)
4846:     {
4847:       LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4848:       return (attlist == null) ? Collections.EMPTY_LIST.iterator() :
4849:         attlist.entrySet().iterator();
4850:     }
4851: 
4852:     /**
4853:      * Returns the entity (String or ExternalIds) for the given entity name.
4854:      */
4855:     Object getEntity(String name)
4856:     {
4857:       return entities.get(name);
4858:     }
4859: 
4860:     /**
4861:      * Indicates whether the specified entity was declared in the external
4862:      * subset.
4863:      */
4864:     boolean isEntityExternal(String name)
4865:     {
4866:       return externalEntities.contains(name);
4867:     }
4868: 
4869:     /**
4870:      * Returns an iterator over the entity map entries.
4871:      */
4872:     Iterator entityIterator()
4873:     {
4874:       return entities.entrySet().iterator();
4875:     }
4876: 
4877:     /**
4878:      * Returns the notation IDs for the given notation name.
4879:      */
4880:     ExternalIds getNotation(String name)
4881:     {
4882:       return (ExternalIds) notations.get(name);
4883:     }
4884: 
4885:     /**
4886:      * Indicates whether the specified notation was declared in the external
4887:      * subset.
4888:      */
4889:     boolean isNotationExternal(String name)
4890:     {
4891:       return externalNotations.contains(name);
4892:     }
4893: 
4894:     /**
4895:      * Returns the comment associated with the specified (anonymous) key.
4896:      */
4897:     String getComment(String key)
4898:     {
4899:       return (String) comments.get(key);
4900:     }
4901: 
4902:     /**
4903:      * Returns the processing instruction associated with the specified
4904:      * (anonymous) key.
4905:      */
4906:     String[] getPI(String key)
4907:     {
4908:       return (String[]) pis.get(key);
4909:     }
4910: 
4911:     /**
4912:      * Returns an iterator over the keys of the markup entries in this DTD,
4913:      * in the order declared.
4914:      */
4915:     Iterator entryIterator()
4916:     {
4917:       return entries.iterator();
4918:     }
4919: 
4920:   }
4921: 
4922:   /**
4923:    * Combination of an ExternalID and an optional NDataDecl.
4924:    */
4925:   class ExternalIds
4926:   {
4927: 
4928:     /**
4929:      * The public ID.
4930:      */
4931:     String publicId;
4932: 
4933:     /**
4934:      * The system ID.
4935:      */
4936:     String systemId;
4937: 
4938:     /**
4939:      * The notation name declared with the NDATA keyword.
4940:      */
4941:     String notationName;
4942:   }
4943: 
4944:   /**
4945:    * A content model.
4946:    */
4947:   abstract class ContentModel
4948:   {
4949:     static final int EMPTY = 0;
4950:     static final int ANY = 1;
4951:     static final int ELEMENT = 2;
4952:     static final int MIXED = 3;
4953: 
4954:     int min;
4955:     int max;
4956:     final int type;
4957:     String text;
4958:     boolean external;
4959: 
4960:     ContentModel(int type)
4961:     {
4962:       this.type = type;
4963:       min = 1;
4964:       max = 1;
4965:     }
4966: 
4967:   }
4968: 
4969:   /**
4970:    * The EMPTY content model.
4971:    */
4972:   class EmptyContentModel
4973:     extends ContentModel
4974:   {
4975: 
4976:     EmptyContentModel()
4977:     {
4978:       super(ContentModel.EMPTY);
4979:       min = 0;
4980:       max = 0;
4981:     }
4982: 
4983:   }
4984: 
4985:   /**
4986:    * The ANY content model.
4987:    */
4988:   class AnyContentModel
4989:     extends ContentModel
4990:   {
4991: 
4992:     AnyContentModel()
4993:     {
4994:       super(ContentModel.ANY);
4995:       min = 0;
4996:       max = -1;
4997:     }
4998: 
4999:   }
5000: 
5001:   /**
5002:    * An element content model.
5003:    */
5004:   class ElementContentModel
5005:     extends ContentModel
5006:   {
5007: 
5008:     LinkedList contentParticles;
5009:     boolean or;
5010:     String regex; // regular expression cache
5011: 
5012:     ElementContentModel()
5013:     {
5014:       super(ContentModel.ELEMENT);
5015:       contentParticles = new LinkedList();
5016:     }
5017: 
5018:     void addContentParticle(ContentParticle cp)
5019:     {
5020:       contentParticles.add(cp);
5021:     }
5022: 
5023:   }
5024: 
5025:   class ContentParticle
5026:   {
5027: 
5028:     int min = 1;
5029:     int max = 1;
5030:     Object content; // Name (String) or ElementContentModel
5031: 
5032:   }
5033: 
5034:   /**
5035:    * A mixed content model.
5036:    */
5037:   class MixedContentModel
5038:     extends ContentModel
5039:   {
5040: 
5041:     private HashSet names;
5042: 
5043:     MixedContentModel()
5044:     {
5045:       super(ContentModel.MIXED);
5046:       names = new HashSet();
5047:     }
5048: 
5049:     void addName(String name)
5050:     {
5051:       names.add(name);
5052:     }
5053: 
5054:     boolean containsName(String name)
5055:     {
5056:       return names.contains(name);
5057:     }
5058: 
5059:   }
5060: 
5061:   /**
5062:    * An attribute definition.
5063:    */
5064:   class AttributeDecl
5065:   {
5066: 
5067:     /**
5068:      * The attribute type (CDATA, ID, etc).
5069:      */
5070:     final String type;
5071: 
5072:     /**
5073:      * The default value.
5074:      */
5075:     final String value;
5076: 
5077:     /**
5078:      * The value type (#FIXED, #IMPLIED, etc).
5079:      */
5080:     final int valueType;
5081: 
5082:     /**
5083:      * The enumeration text.
5084:      */
5085:     final String enumeration;
5086: 
5087:     /**
5088:      * The enumeration tokens.
5089:      */
5090:     final HashSet values;
5091: 
5092:     /**
5093:      * Whether this attribute declaration occurred in the external subset.
5094:      */
5095:     final boolean external;
5096: 
5097:     AttributeDecl(String type, String value,
5098:                   int valueType, String enumeration,
5099:                   HashSet values, boolean external)
5100:     {
5101:       this.type = type;
5102:       this.value = value;
5103:       this.valueType = valueType;
5104:       this.enumeration = enumeration;
5105:       this.values = values;
5106:       this.external = external;
5107:     }
5108: 
5109:   }
5110: 
5111:   /**
5112:    * An XML input source.
5113:    */
5114:   static class Input
5115:     implements Location
5116:   {
5117: 
5118:     int line = 1, markLine;
5119:     int column, markColumn;
5120:     int offset, markOffset;
5121:     final String publicId, systemId, name;
5122:     final boolean report; // report start- and end-entity
5123:     final boolean normalize; // normalize CR, etc to LF
5124: 
5125:     InputStream in;
5126:     Reader reader;
5127:     UnicodeReader unicodeReader;
5128:     boolean initialized;
5129:     boolean encodingDetected;
5130:     String inputEncoding;
5131:     boolean xml11;
5132: 
5133:     Input(InputStream in, Reader reader, String publicId, String systemId,
5134:           String name, String inputEncoding, boolean report,
5135:           boolean normalize)
5136:     {
5137:       if (inputEncoding == null)
5138:         inputEncoding = "UTF-8";
5139:       this.inputEncoding = inputEncoding;
5140:       this.publicId = publicId;
5141:       this.systemId = systemId;
5142:       this.name = name;
5143:       this.report = report;
5144:       this.normalize = normalize;
5145:       if (in != null)
5146:         {
5147:           if (reader != null)
5148:             throw new IllegalStateException("both byte and char streams "+
5149:                                             "specified");
5150:           if (normalize)
5151:             in = new CRLFInputStream(in);
5152:           in = new BufferedInputStream(in);
5153:           this.in = in;
5154:         }
5155:       else
5156:         {
5157:           this.reader = normalize ? new CRLFReader(reader) : reader;
5158:           unicodeReader = new UnicodeReader(this.reader);
5159:         }
5160:       initialized = false;
5161:     }
5162: 
5163:     // -- Location --
5164: 
5165:     public int getCharacterOffset()
5166:     {
5167:       return offset;
5168:     }
5169: 
5170:     public int getColumnNumber()
5171:     {
5172:       return column;
5173:     }
5174: 
5175:     public int getLineNumber()
5176:     {
5177:       return line;
5178:     }
5179: 
5180:     public String getPublicId()
5181:     {
5182:       return publicId;
5183:     }
5184: 
5185:     public String getSystemId()
5186:     {
5187:       return systemId;
5188:     }
5189: 
5190:     void init()
5191:       throws IOException
5192:     {
5193:       if (initialized)
5194:         return;
5195:       if (in != null)
5196:         detectEncoding();
5197:       initialized = true;
5198:     }
5199: 
5200:     void mark(int len)
5201:       throws IOException
5202:     {
5203:       markOffset = offset;
5204:       markLine = line;
5205:       markColumn = column;
5206:       if (unicodeReader != null)
5207:         unicodeReader.mark(len);
5208:       else
5209:         in.mark(len);
5210:     }
5211: 
5212:     /**
5213:      * Character read.
5214:      */
5215:     int read()
5216:       throws IOException
5217:     {
5218:       offset++;
5219:       int ret = (unicodeReader != null) ? unicodeReader.read() : in.read();
5220:       if (normalize &&
5221:           (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028))))
5222:         {
5223:           // Normalize CR etc to LF
5224:           ret = 0x0a;
5225:         }
5226:       // Locator handling
5227:       if (ret == 0x0a)
5228:         {
5229:           line++;
5230:           column = 0;
5231:         }
5232:       else
5233:         column++;
5234:       return ret;
5235:     }
5236: 
5237:     /**
5238:      * Block read.
5239:      */
5240:     int read(int[] b, int off, int len)
5241:       throws IOException
5242:     {
5243:       int ret;
5244:       if (unicodeReader != null)
5245:         {
5246:           ret = unicodeReader.read(b, off, len);
5247:         }
5248:       else
5249:         {
5250:           byte[] b2 = new byte[len];
5251:           ret = in.read(b2, 0, len);
5252:           if (ret != -1)
5253:             {
5254:               String s = new String(b2, 0, ret, inputEncoding);
5255:               int[] c = UnicodeReader.toCodePointArray(s);
5256:               ret = c.length;
5257:               System.arraycopy(c, 0, b, off, ret);
5258:             }
5259:         }
5260:       if (ret != -1)
5261:         {
5262:           // Locator handling
5263:           for (int i = 0; i < ret; i++)
5264:             {
5265:               int c = b[off + i];
5266:               if (normalize &&
5267:                   (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028))))
5268:                 {
5269:                   // Normalize CR etc to LF
5270:                   c = 0x0a;
5271:                   b[off + i] = c;
5272:                 }
5273:               if (c == 0x0a)
5274:                 {
5275:                   line++;
5276:                   column = 0;
5277:                 }
5278:               else
5279:                 column++;
5280:             }
5281:         }
5282:       return ret;
5283:     }
5284: 
5285:     void reset()
5286:       throws IOException
5287:     {
5288:       if (unicodeReader != null)
5289:         unicodeReader.reset();
5290:       else
5291:         in.reset();
5292:       offset = markOffset;
5293:       line = markLine;
5294:       column = markColumn;
5295:     }
5296: 
5297:     // Detection of input encoding
5298: 
5299:     private static final int[] SIGNATURE_UCS_4_1234 =
5300:       new int[] { 0x00, 0x00, 0x00, 0x3c };
5301:     private static final int[] SIGNATURE_UCS_4_4321 =
5302:       new int[] { 0x3c, 0x00, 0x00, 0x00 };
5303:     private static final int[] SIGNATURE_UCS_4_2143 =
5304:       new int[] { 0x00, 0x00, 0x3c, 0x00 };
5305:     private static final int[] SIGNATURE_UCS_4_3412 =
5306:       new int[] { 0x00, 0x3c, 0x00, 0x00 };
5307:     private static final int[] SIGNATURE_UCS_2_12 =
5308:       new int[] { 0xfe, 0xff };
5309:     private static final int[] SIGNATURE_UCS_2_21 =
5310:       new int[] { 0xff, 0xfe };
5311:     private static final int[] SIGNATURE_UCS_2_12_NOBOM =
5312:       new int[] { 0x00, 0x3c, 0x00, 0x3f };
5313:     private static final int[] SIGNATURE_UCS_2_21_NOBOM =
5314:       new int[] { 0x3c, 0x00, 0x3f, 0x00 };
5315:     private static final int[] SIGNATURE_UTF_8 =
5316:       new int[] { 0x3c, 0x3f, 0x78, 0x6d };
5317:     private static final int[] SIGNATURE_UTF_8_BOM =
5318:       new int[] { 0xef, 0xbb, 0xbf };
5319: 
5320:     /**
5321:      * Detect the input encoding.
5322:      */
5323:     private void detectEncoding()
5324:       throws IOException
5325:     {
5326:       int[] signature = new int[4];
5327:       in.mark(4);
5328:       for (int i = 0; i < 4; i++)
5329:         signature[i] = in.read();
5330:       in.reset();
5331: 
5332:       // 4-byte encodings
5333:       if (equals(SIGNATURE_UCS_4_1234, signature))
5334:         {
5335:           in.read();
5336:           in.read();
5337:           in.read();
5338:           in.read();
5339:           setInputEncoding("UTF-32BE");
5340:           encodingDetected = true;
5341:         }
5342:       else if (equals(SIGNATURE_UCS_4_4321, signature))
5343:         {
5344:           in.read();
5345:           in.read();
5346:           in.read();
5347:           in.read();
5348:           setInputEncoding("UTF-32LE");
5349:           encodingDetected = true;
5350:         }
5351:       else if (equals(SIGNATURE_UCS_4_2143, signature) ||
5352:                equals(SIGNATURE_UCS_4_3412, signature))
5353:         throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering");
5354: 
5355:       // 2-byte encodings
5356:       else if (equals(SIGNATURE_UCS_2_12, signature))
5357:         {
5358:           in.read();
5359:           in.read();
5360:           setInputEncoding("UTF-16BE");
5361:           encodingDetected = true;
5362:         }
5363:       else if (equals(SIGNATURE_UCS_2_21, signature))
5364:         {
5365:           in.read();
5366:           in.read();
5367:           setInputEncoding("UTF-16LE");
5368:           encodingDetected = true;
5369:         }
5370:       else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature))
5371:         {
5372:           //setInputEncoding("UTF-16BE");
5373:           throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5374:         }
5375:       else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature))
5376:         {
5377:           //setInputEncoding("UTF-16LE");
5378:           throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5379:         }
5380:       // ASCII-derived encodings
5381:       else if (equals(SIGNATURE_UTF_8, signature))
5382:         {
5383:           // UTF-8 input encoding implied, TextDecl
5384:         }
5385:       else if (equals(SIGNATURE_UTF_8_BOM, signature))
5386:         {
5387:           in.read();
5388:           in.read();
5389:           in.read();
5390:           setInputEncoding("UTF-8");
5391:           encodingDetected = true;
5392:         }
5393:     }
5394: 
5395:     private static boolean equals(int[] b1, int[] b2)
5396:     {
5397:       for (int i = 0; i < b1.length; i++)
5398:         {
5399:           if (b1[i] != b2[i])
5400:             return false;
5401:         }
5402:       return true;
5403:     }
5404: 
5405:     void setInputEncoding(String encoding)
5406:       throws IOException
5407:     {
5408:       if (encoding.equals(inputEncoding))
5409:         return;
5410:       if ("UTF-16".equalsIgnoreCase(encoding) &&
5411:           inputEncoding.startsWith("UTF-16"))
5412:         return;
5413:       if (encodingDetected)
5414:         throw new UnsupportedEncodingException("document is not in its " +
5415:                                                "declared encoding " +
5416:                                                inputEncoding +
5417:                                                ": " + encoding);
5418:       inputEncoding = encoding;
5419:       finalizeEncoding();
5420:     }
5421: 
5422:     void finalizeEncoding()
5423:       throws IOException
5424:     {
5425:       if (reader != null)
5426:         return;
5427:       reader = new BufferedReader(new InputStreamReader(in, inputEncoding));
5428:       unicodeReader = new UnicodeReader(reader);
5429:       mark(1);
5430:     }
5431: 
5432:   }
5433: 
5434: }