Source for gnu.xml.util.XMLWriter

   1: /* XMLWriter.java --
   2:    Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package gnu.xml.util;
  39: 
  40: import gnu.java.lang.CPStringBuilder;
  41: 
  42: import java.io.BufferedWriter;
  43: import java.io.CharConversionException;
  44: import java.io.IOException;
  45: import java.io.OutputStream;
  46: import java.io.OutputStreamWriter;
  47: import java.io.Writer;
  48: import java.util.Stack;
  49: 
  50: import org.xml.sax.*;
  51: import org.xml.sax.ext.*;
  52: import org.xml.sax.helpers.*;
  53: 
  54: 
  55: /**
  56:  * This class is a SAX handler which writes all its input as a well formed
  57:  * XML or XHTML document.  If driven using SAX2 events, this output may
  58:  * include a recreated document type declaration, subject to limitations
  59:  * of SAX (no internal subset exposed) or DOM (the important declarations,
  60:  * with their documentation, are discarded).
  61:  *
  62:  * <p> By default, text is generated "as-is", but some optional modes
  63:  * are supported.  Pretty-printing is supported, to make life easier
  64:  * for people reading the output.  XHTML (1.0) output has can be made
  65:  * particularly pretty; all the built-in character entities are known.
  66:  * Canonical XML can also be generated, assuming the input is properly
  67:  * formed.
  68:  *
  69:  * <hr>
  70:  *
  71:  * <p> Some of the methods on this class are intended for applications to
  72:  * use directly, rather than as pure SAX2 event callbacks.  Some of those
  73:  * methods access the JavaBeans properties (used to tweak output formats,
  74:  * for example canonicalization and pretty printing).  Subclasses
  75:  * are expected to add new behaviors, not to modify current behavior, so
  76:  * many such methods are final.</p>
  77:  *
  78:  * <p> The <em>write*()</em> methods may be slightly simpler for some
  79:  * applications to use than direct callbacks.  For example, they support
  80:  * a simple policy for encoding data items as the content of a single element.
  81:  *
  82:  * <p> To reuse an XMLWriter you must provide it with a new Writer, since
  83:  * this handler closes the writer it was given as part of its endDocument()
  84:  * handling.  (XML documents have an end of input, and the way to encode
  85:  * that on a stream is to close it.) </p>
  86:  *
  87:  * <hr>
  88:  *
  89:  * <p> Note that any relative URIs in the source document, as found in
  90:  * entity and notation declarations, ought to have been fully resolved by
  91:  * the parser providing events to this handler.  This means that the
  92:  * output text should only have fully resolved URIs, which may not be
  93:  * the desired behavior in cases where later binding is desired. </p>
  94:  *
  95:  * <p> <em>Note that due to SAX2 defaults, you may need to manually
  96:  * ensure that the input events are XML-conformant with respect to namespace
  97:  * prefixes and declarations.  {@link gnu.xml.pipeline.NSFilter} is
  98:  * one solution to this problem, in the context of processing pipelines.</em>
  99:  * Something as simple as connecting this handler to a parser might not
 100:  * generate the correct output.  Another workaround is to ensure that the
 101:  * <em>namespace-prefixes</em> feature is always set to true, if you're
 102:  * hooking this directly up to some XMLReader implementation.
 103:  *
 104:  * @see gnu.xml.pipeline.TextConsumer
 105:  *
 106:  * @author David Brownell
 107:  *
 108:  * @deprecated Please use the javax.xml.stream APIs instead
 109:  */
 110: public class XMLWriter
 111:     implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler
 112: {
 113:     // text prints/escapes differently depending on context
 114:     //  CTX_ENTITY ... entity literal value
 115:     //  CTX_ATTRIBUTE ... attribute literal value
 116:     //  CTX_CONTENT ... content of an element
 117:     //  CTX_UNPARSED ... CDATA, comment, PI, names, etc
 118:     //  CTX_NAME ... name or nmtoken, no escapes possible
 119:     private static final int    CTX_ENTITY = 1;
 120:     private static final int    CTX_ATTRIBUTE = 2;
 121:     private static final int    CTX_CONTENT = 3;
 122:     private static final int    CTX_UNPARSED = 4;
 123:     private static final int    CTX_NAME = 5;
 124: 
 125: // FIXME: names (element, attribute, PI, notation, etc) are not
 126: // currently written out with range checks (escapeChars).
 127: // In non-XHTML, some names can't be directly written; panic!
 128: 
 129:     private static String       sysEOL;
 130: 
 131:     static {
 132:         try {
 133:             sysEOL = System.getProperty ("line.separator", "\n");
 134: 
 135:             // don't use the system's EOL if it's illegal XML.
 136:             if (!isLineEnd (sysEOL))
 137:                 sysEOL = "\n";
 138: 
 139:         } catch (SecurityException e) {
 140:             sysEOL = "\n";
 141:         }
 142:     }
 143: 
 144:     private static boolean isLineEnd (String eol)
 145:     {
 146:         return "\n".equals (eol)
 147:                     || "\r".equals (eol)
 148:                     || "\r\n".equals (eol);
 149:     }
 150: 
 151:     private Writer              out;
 152:     private boolean             inCDATA;
 153:     private int                 elementNestLevel;
 154:     private String              eol = sysEOL;
 155: 
 156:     private short               dangerMask;
 157:     private CPStringBuilder     stringBuf;
 158:     private Locator             locator;
 159:     private ErrorHandler        errHandler;
 160: 
 161:     private boolean             expandingEntities = false;
 162:     private int                 entityNestLevel;
 163:     private boolean             xhtml;
 164:     private boolean             startedDoctype;
 165:     private String              encoding;
 166: 
 167:     private boolean             canonical;
 168:     private boolean             inDoctype;
 169:     private boolean             inEpilogue;
 170: 
 171:     // pretty printing controls
 172:     private boolean             prettyPrinting;
 173:     private int                 column;
 174:     private boolean             noWrap;
 175:     private Stack               space = new Stack ();
 176: 
 177:     // this is not a hard'n'fast rule -- longer lines are OK,
 178:     // but are to be avoided.  Here, prettyprinting is more to
 179:     // show structure "cleanly" than to be precise about it.
 180:     // better to have ragged layout than one line 24Kb long.
 181:     private static final int    lineLength = 75;
 182: 
 183: 
 184:     /**
 185:      * Constructs this handler with System.out used to write SAX events
 186:      * using the UTF-8 encoding.  Avoid using this except when you know
 187:      * it's safe to close System.out at the end of the document.
 188:      */
 189:     public XMLWriter () throws IOException
 190:         { this (System.out); }
 191: 
 192:     /**
 193:      * Constructs a handler which writes all input to the output stream
 194:      * in the UTF-8 encoding, and closes it when endDocument is called.
 195:      * (Yes it's annoying that this throws an exception -- but there's
 196:      * really no way around it, since it's barely possible a JDK may
 197:      * exist somewhere that doesn't know how to emit UTF-8.)
 198:      */
 199:     public XMLWriter (OutputStream out) throws IOException
 200:     {
 201:         this (new OutputStreamWriter (out, "UTF8"));
 202:     }
 203: 
 204:     /**
 205:      * Constructs a handler which writes all input to the writer, and then
 206:      * closes the writer when the document ends.  If an XML declaration is
 207:      * written onto the output, and this class can determine the name of
 208:      * the character encoding for this writer, that encoding name will be
 209:      * included in the XML declaration.
 210:      *
 211:      * <P> See the description of the constructor which takes an encoding
 212:      * name for imporant information about selection of encodings.
 213:      *
 214:      * @param writer XML text is written to this writer.
 215:      */
 216:     public XMLWriter (Writer writer)
 217:     {
 218:         this (writer, null);
 219:     }
 220: 
 221:     /**
 222:      * Constructs a handler which writes all input to the writer, and then
 223:      * closes the writer when the document ends.  If an XML declaration is
 224:      * written onto the output, this class will use the specified encoding
 225:      * name in that declaration.  If no encoding name is specified, no
 226:      * encoding name will be declared unless this class can otherwise
 227:      * determine the name of the character encoding for this writer.
 228:      *
 229:      * <P> At this time, only the UTF-8 ("UTF8") and UTF-16 ("Unicode")
 230:      * output encodings are fully lossless with respect to XML data.  If you
 231:      * use any other encoding you risk having your data be silently mangled
 232:      * on output, as the standard Java character encoding subsystem silently
 233:      * maps non-encodable characters to a question mark ("?") and will not
 234:      * report such errors to applications.
 235:      *
 236:      * <p> For a few other encodings the risk can be reduced. If the writer is
 237:      * a java.io.OutputStreamWriter, and uses either the ISO-8859-1 ("8859_1",
 238:      * "ISO8859_1", etc) or US-ASCII ("ASCII") encodings, content which
 239:      * can't be encoded in those encodings will be written safely.  Where
 240:      * relevant, the XHTML entity names will be used; otherwise, numeric
 241:      * character references will be emitted.
 242:      *
 243:      * <P> However, there remain a number of cases where substituting such
 244:      * entity or character references is not an option.  Such references are
 245:      * not usable within a DTD, comment, PI, or CDATA section.  Neither may
 246:      * they be used when element, attribute, entity, or notation names have
 247:      * the problematic characters.
 248:      *
 249:      * @param writer XML text is written to this writer.
 250:      * @param encoding if non-null, and an XML declaration is written,
 251:      *  this is the name that will be used for the character encoding.
 252:      */
 253:     public XMLWriter (Writer writer, String encoding)
 254:     {
 255:         setWriter (writer, encoding);
 256:     }
 257: 
 258:     private void setEncoding (String encoding)
 259:     {
 260:         if (encoding == null && out instanceof OutputStreamWriter)
 261:             encoding = ((OutputStreamWriter)out).getEncoding ();
 262: 
 263:         if (encoding != null) {
 264:             encoding = encoding.toUpperCase ();
 265: 
 266:             // Use official encoding names where we know them,
 267:             // avoiding the Java-only names.  When using common
 268:             // encodings where we can easily tell if characters
 269:             // are out of range, we'll escape out-of-range
 270:             // characters using character refs for safety.
 271: 
 272:             // I _think_ these are all the main synonyms for these!
 273:             if ("UTF8".equals (encoding)) {
 274:                 encoding = "UTF-8";
 275:             } else if ("US-ASCII".equals (encoding)
 276:                     || "ASCII".equals (encoding)) {
 277:                 dangerMask = (short) 0xff80;
 278:                 encoding = "US-ASCII";
 279:             } else if ("ISO-8859-1".equals (encoding)
 280:                     || "8859_1".equals (encoding)
 281:                     || "ISO8859_1".equals (encoding)) {
 282:                 dangerMask = (short) 0xff00;
 283:                 encoding = "ISO-8859-1";
 284:             } else if ("UNICODE".equals (encoding)
 285:                     || "UNICODE-BIG".equals (encoding)
 286:                     || "UNICODE-LITTLE".equals (encoding)) {
 287:                 encoding = "UTF-16";
 288: 
 289:                 // TODO: UTF-16BE, UTF-16LE ... no BOM; what
 290:                 // release of JDK supports those Unicode names?
 291:             }
 292: 
 293:             if (dangerMask != 0)
 294:                 stringBuf = new CPStringBuilder ();
 295:         }
 296: 
 297:         this.encoding = encoding;
 298:     }
 299: 
 300: 
 301:     /**
 302:      * Resets the handler to write a new text document.
 303:      *
 304:      * @param writer XML text is written to this writer.
 305:      * @param encoding if non-null, and an XML declaration is written,
 306:      *  this is the name that will be used for the character encoding.
 307:      *
 308:      * @exception IllegalStateException if the current
 309:      *  document hasn't yet ended (with {@link #endDocument})
 310:      */
 311:     final public void setWriter (Writer writer, String encoding)
 312:     {
 313:         if (out != null)
 314:             throw new IllegalStateException (
 315:                 "can't change stream in mid course");
 316:         out = writer;
 317:         if (out != null)
 318:             setEncoding (encoding);
 319:         if (!(out instanceof BufferedWriter))
 320:             out = new BufferedWriter (out);
 321:         space.push ("default");
 322:     }
 323: 
 324:     /**
 325:      * Assigns the line ending style to be used on output.
 326:      * @param eolString null to use the system default; else
 327:      *  "\n", "\r", or "\r\n".
 328:      */
 329:     final public void setEOL (String eolString)
 330:     {
 331:         if (eolString == null)
 332:             eol = sysEOL;
 333:         else if (!isLineEnd (eolString))
 334:             eol = eolString;
 335:         else
 336:             throw new IllegalArgumentException (eolString);
 337:     }
 338: 
 339:     /**
 340:      * Assigns the error handler to be used to present most fatal
 341:      * errors.
 342:      */
 343:     public void setErrorHandler (ErrorHandler handler)
 344:     {
 345:         errHandler = handler;
 346:     }
 347: 
 348:     /**
 349:      * Used internally and by subclasses, this encapsulates the logic
 350:      * involved in reporting fatal errors.  It uses locator information
 351:      * for good diagnostics, if available, and gives the application's
 352:      * ErrorHandler the opportunity to handle the error before throwing
 353:      * an exception.
 354:      */
 355:     protected void fatal (String message, Exception e)
 356:     throws SAXException
 357:     {
 358:         SAXParseException       x;
 359: 
 360:         if (locator == null)
 361:             x = new SAXParseException (message, null, null, -1, -1, e);
 362:         else
 363:             x = new SAXParseException (message, locator, e);
 364:         if (errHandler != null)
 365:             errHandler.fatalError (x);
 366:         throw x;
 367:     }
 368: 
 369: 
 370:     // JavaBeans properties
 371: 
 372:     /**
 373:      * Controls whether the output should attempt to follow the "transitional"
 374:      * XHTML rules so that it meets the "HTML Compatibility Guidelines"
 375:      * appendix in the XHTML specification.  A "transitional" Document Type
 376:      * Declaration (DTD) is placed near the beginning of the output document,
 377:      * instead of whatever DTD would otherwise have been placed there, and
 378:      * XHTML empty elements are printed specially.  When writing text in
 379:      * US-ASCII or ISO-8859-1 encodings, the predefined XHTML internal
 380:      * entity names are used (in preference to character references) when
 381:      * writing content characters which can't be expressed in those encodings.
 382:      *
 383:      * <p> When this option is enabled, it is the caller's responsibility
 384:      * to ensure that the input is otherwise valid as XHTML.  Things to
 385:      * be careful of in all cases, as described in the appendix referenced
 386:      * above, include:  <ul>
 387:      *
 388:      *  <li> Element and attribute names must be in lower case, both
 389:      *          in the document and in any CSS style sheet.
 390:      *  <li> All XML constructs must be valid as defined by the XHTML
 391:      *          "transitional" DTD (including all familiar constructs,
 392:      *          even deprecated ones).
 393:      *  <li> The root element must be "html".
 394:      *  <li> Elements that must be empty (such as <em>&lt;br&gt;</em>
 395:      *          must have no content.
 396:      *  <li> Use both <em>lang</em> and <em>xml:lang</em> attributes
 397:      *          when specifying language.
 398:      *  <li> Similarly, use both <em>id</em> and <em>name</em> attributes
 399:      *          when defining elements that may be referred to through
 400:      *          URI fragment identifiers ... and make sure that the
 401:      *          value is a legal NMTOKEN, since not all such HTML 4.0
 402:      *          identifiers are valid in XML.
 403:      *  <li> Be careful with character encodings; make sure you provide
 404:      *          a <em>&lt;meta http-equiv="Content-type"
 405:      *          content="text/xml;charset=..." /&gt;</em> element in
 406:      *          the HTML "head" element, naming the same encoding
 407:      *          used to create this handler.  Also, if that encoding
 408:      *          is anything other than US-ASCII, make sure that if
 409:      *          the document is given a MIME content type, it has
 410:      *          a <em>charset=...</em> attribute with that encoding.
 411:      *  </ul>
 412:      *
 413:      * <p> Additionally, some of the oldest browsers have additional
 414:      * quirks, to address with guidelines such as: <ul>
 415:      *
 416:      *  <li> Processing instructions may be rendered, so avoid them.
 417:      *          (Similarly for an XML declaration.)
 418:      *  <li> Embedded style sheets and scripts should not contain XML
 419:      *          markup delimiters:  &amp;, &lt;, and ]]&gt; are trouble.
 420:      *  <li> Attribute values should not have line breaks or multiple
 421:      *          consecutive white space characters.
 422:      *  <li> Use no more than one of the deprecated (transitional)
 423:      *          <em>&lt;isindex&gt;</em> elements.
 424:      *  <li> Some boolean attributes (such as <em>compact, checked,
 425:      *          disabled, readonly, selected,</em> and more) confuse
 426:      *          some browsers, since they only understand minimized
 427:      *          versions which are illegal in XML.
 428:      *  </ul>
 429:      *
 430:      * <p> Also, some characteristics of the resulting output may be
 431:      * a function of whether the document is later given a MIME
 432:      * content type of <em>text/html</em> rather than one indicating
 433:      * XML (<em>application/xml</em> or <em>text/xml</em>).  Worse,
 434:      * some browsers ignore MIME content types and prefer to rely URI
 435:      * name suffixes -- so an "index.xml" could always be XML, never
 436:      * XHTML, no matter its MIME type.
 437:      */
 438:     final public void setXhtml (boolean value)
 439:     {
 440:         if (locator != null)
 441:             throw new IllegalStateException ("started parsing");
 442:         xhtml = value;
 443:         if (xhtml)
 444:             canonical = false;
 445:     }
 446: 
 447:     /**
 448:      * Returns true if the output attempts to echo the input following
 449:      * "transitional" XHTML rules and matching the "HTML Compatibility
 450:      * Guidelines" so that an HTML version 3 browser can read the output
 451:      * as HTML; returns false (the default) othewise.
 452:      */
 453:     final public boolean isXhtml ()
 454:     {
 455:         return xhtml;
 456:     }
 457: 
 458:     /**
 459:      * Controls whether the output text contains references to
 460:      * entities (the default), or instead contains the expanded
 461:      * values of those entities.
 462:      */
 463:     final public void setExpandingEntities (boolean value)
 464:     {
 465:         if (locator != null)
 466:             throw new IllegalStateException ("started parsing");
 467:         expandingEntities = value;
 468:         if (!expandingEntities)
 469:             canonical = false;
 470:     }
 471: 
 472:     /**
 473:      * Returns true if the output will have no entity references;
 474:      * returns false (the default) otherwise.
 475:      */
 476:     final public boolean isExpandingEntities ()
 477:     {
 478:         return expandingEntities;
 479:     }
 480: 
 481:     /**
 482:      * Controls pretty-printing, which by default is not enabled
 483:      * (and currently is most useful for XHTML output).
 484:      * Pretty printing enables structural indentation, sorting of attributes
 485:      * by name, line wrapping, and potentially other mechanisms for making
 486:      * output more or less readable.
 487:      *
 488:      * <p> At this writing, structural indentation and line wrapping are
 489:      * enabled when pretty printing is enabled and the <em>xml:space</em>
 490:      * attribute has the value <em>default</em> (its other legal value is
 491:      * <em>preserve</em>, as defined in the XML specification).  The three
 492:      * XHTML element types which use another value are recognized by their
 493:      * names (namespaces are ignored).
 494:      *
 495:      * <p> Also, for the record, the "pretty" aspect of printing here
 496:      * is more to provide basic structure on outputs that would otherwise
 497:      * risk being a single long line of text.  For now, expect the
 498:      * structure to be ragged ... unless you'd like to submit a patch
 499:      * to make this be more strictly formatted!
 500:      *
 501:      * @exception IllegalStateException thrown if this method is invoked
 502:      *  after output has begun.
 503:      */
 504:     final public void setPrettyPrinting (boolean value)
 505:     {
 506:         if (locator != null)
 507:             throw new IllegalStateException ("started parsing");
 508:         prettyPrinting = value;
 509:         if (prettyPrinting)
 510:             canonical = false;
 511:     }
 512: 
 513:     /**
 514:      * Returns value of flag controlling pretty printing.
 515:      */
 516:     final public boolean isPrettyPrinting ()
 517:     {
 518:         return prettyPrinting;
 519:     }
 520: 
 521: 
 522:     /**
 523:      * Sets the output style to be canonicalized.  Input events must
 524:      * meet requirements that are slightly more stringent than the
 525:      * basic well-formedness ones, and include:  <ul>
 526:      *
 527:      *  <li> Namespace prefixes must not have been changed from those
 528:      *  in the original document.  (This may only be ensured by setting
 529:      *  the SAX2 XMLReader <em>namespace-prefixes</em> feature flag;
 530:      *  by default, it is cleared.)
 531:      *
 532:      *  <li> Redundant namespace declaration attributes have been
 533:      *  removed.  (If an ancestor element defines a namespace prefix
 534:      *  and that declaration hasn't been overriden, an element must
 535:      *  not redeclare it.)
 536:      *
 537:      *  <li> If comments are not to be included in the canonical output,
 538:      *  they must first be removed from the input event stream; this
 539:      *  <em>Canonical XML with comments</em> by default.
 540:      *
 541:      *  <li> If the input character encoding was not UCS-based, the
 542:      *  character data must have been normalized using Unicode
 543:      *  Normalization Form C.  (UTF-8 and UTF-16 are UCS-based.)
 544:      *
 545:      *  <li> Attribute values must have been normalized, as is done
 546:      *  by any conformant XML processor which processes all external
 547:      *  parameter entities.
 548:      *
 549:      *  <li> Similarly, attribute value defaulting has been performed.
 550:      *
 551:      *  </ul>
 552:      *
 553:      * <p> Note that fragments of XML documents, as specified by an XPath
 554:      * node set, may be canonicalized.  In such cases, elements may need
 555:      * some fixup (for <em>xml:*</em> attributes and application-specific
 556:      * context).
 557:      *
 558:      * @exception IllegalArgumentException if the output encoding
 559:      *  is anything other than UTF-8.
 560:      */
 561:     final public void setCanonical (boolean value)
 562:     {
 563:         if (value && !"UTF-8".equals (encoding))
 564:             throw new IllegalArgumentException ("encoding != UTF-8");
 565:         canonical = value;
 566:         if (canonical) {
 567:             prettyPrinting = xhtml = false;
 568:             expandingEntities = true;
 569:             eol = "\n";
 570:         }
 571:     }
 572: 
 573: 
 574:     /**
 575:      * Returns value of flag controlling canonical output.
 576:      */
 577:     final public boolean isCanonical ()
 578:     {
 579:         return canonical;
 580:     }
 581: 
 582: 
 583:     /**
 584:      * Flushes the output stream.  When this handler is used in long lived
 585:      * pipelines, it can be important to flush buffered state, for example
 586:      * so that it can reach the disk as part of a state checkpoint.
 587:      */
 588:     final public void flush ()
 589:     throws IOException
 590:     {
 591:         if (out != null)
 592:             out.flush ();
 593:     }
 594: 
 595: 
 596:     // convenience routines
 597: 
 598: // FIXME:  probably want a subclass that holds a lot of these...
 599: // and maybe more!
 600: 
 601:     /**
 602:      * Writes the string as if characters() had been called on the contents
 603:      * of the string.  This is particularly useful when applications act as
 604:      * producers and write data directly to event consumers.
 605:      */
 606:     final public void write (String data)
 607:     throws SAXException
 608:     {
 609:         char    buf [] = data.toCharArray ();
 610:         characters (buf, 0, buf.length);
 611:     }
 612: 
 613: 
 614:     /**
 615:      * Writes an element that has content consisting of a single string.
 616:      * @see #writeEmptyElement
 617:      * @see #startElement
 618:      */
 619:     public void writeElement (
 620:         String uri,
 621:         String localName,
 622:         String qName,
 623:         Attributes atts,
 624:         String content
 625:     ) throws SAXException
 626:     {
 627:         if (content == null || content.length () == 0) {
 628:             writeEmptyElement (uri, localName, qName, atts);
 629:             return;
 630:         }
 631:         startElement (uri, localName, qName, atts);
 632:         char chars [] = content.toCharArray ();
 633:         characters (chars, 0, chars.length);
 634:         endElement (uri, localName, qName);
 635:     }
 636: 
 637: 
 638:     /**
 639:      * Writes an element that has content consisting of a single integer,
 640:      * encoded as a decimal string.
 641:      * @see #writeEmptyElement
 642:      * @see #startElement
 643:      */
 644:     public void writeElement (
 645:         String uri,
 646:         String localName,
 647:         String qName,
 648:         Attributes atts,
 649:         int content
 650:     ) throws SAXException
 651:     {
 652:         writeElement (uri, localName, qName, atts, Integer.toString (content));
 653:     }
 654: 
 655: 
 656:     // SAX1 ContentHandler
 657:     /** <b>SAX1</b>:  provides parser status information */
 658:     final public void setDocumentLocator (Locator l)
 659:     {
 660:         locator = l;
 661:     }
 662: 
 663: 
 664:     // URL for dtd that validates against all normal HTML constructs
 665:     private static final String xhtmlFullDTD =
 666:     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
 667: 
 668: 
 669:     /**
 670:      * <b>SAX1</b>:  indicates the beginning of a document parse.
 671:      * If you're writing (well formed) fragments of XML, neither
 672:      * this nor endDocument should be called.
 673:      */
 674:     // NOT final
 675:     public void startDocument ()
 676:     throws SAXException
 677:     {
 678:         try {
 679:             if (out == null)
 680:                 throw new IllegalStateException (
 681:                     "null Writer given to XMLWriter");
 682: 
 683:             // Not all parsers provide the locator we want; this also
 684:             // flags whether events are being sent to this object yet.
 685:             // We could only have this one call if we only printed whole
 686:             // documents ... but we also print fragments, so most of the
 687:             // callbacks here replicate this test.
 688: 
 689:             if (locator == null)
 690:                 locator = new LocatorImpl ();
 691: 
 692:             // Unless the data is in US-ASCII or we're canonicalizing, write
 693:             // the XML declaration if we know the encoding.  US-ASCII won't
 694:             // normally get mangled by web server confusion about the
 695:             // character encodings used.  Plus, it's an easy way to
 696:             // ensure we can write ASCII that's unlikely to confuse
 697:             // elderly HTML parsers.
 698: 
 699:             if (!canonical
 700:                     && dangerMask != (short) 0xff80
 701:                     && encoding != null) {
 702:                 rawWrite ("<?xml version='1.0'");
 703:                 rawWrite (" encoding='" + encoding + "'");
 704:                 rawWrite ("?>");
 705:                 newline ();
 706:             }
 707: 
 708:             if (xhtml) {
 709: 
 710:                 rawWrite ("<!DOCTYPE html PUBLIC");
 711:                 newline ();
 712:                 rawWrite ("  '-//W3C//DTD XHTML 1.0 Transitional//EN'");
 713:                 newline ();
 714:                 rawWrite ("  '");
 715:                     // NOTE:  URL (above) matches the REC
 716:                 rawWrite (xhtmlFullDTD);
 717:                 rawWrite ("'>");
 718:                 newline ();
 719:                 newline ();
 720: 
 721:                 // fake the rest of the handler into ignoring
 722:                 // everything until the root element, so any
 723:                 // XHTML DTD comments, PIs, etc are ignored
 724:                 startedDoctype = true;
 725:             }
 726: 
 727:             entityNestLevel = 0;
 728: 
 729:         } catch (IOException e) {
 730:             fatal ("can't write", e);
 731:         }
 732:     }
 733: 
 734:     /**
 735:      * <b>SAX1</b>:  indicates the completion of a parse.
 736:      * Note that all complete SAX event streams make this call, even
 737:      * if an error is reported during a parse.
 738:      */
 739:     // NOT final
 740:     public void endDocument ()
 741:     throws SAXException
 742:     {
 743:         try {
 744:             if (!canonical) {
 745:                 newline ();
 746:                 newline ();
 747:             }
 748:             out.close ();
 749:             out = null;
 750:             locator = null;
 751:         } catch (IOException e) {
 752:             fatal ("can't write", e);
 753:         }
 754:     }
 755: 
 756:     // XHTML elements declared as EMPTY print differently
 757:     final private static boolean isEmptyElementTag (String tag)
 758:     {
 759:         switch (tag.charAt (0)) {
 760:           case 'a':     return "area".equals (tag);
 761:           case 'b':     return "base".equals (tag)
 762:                             || "basefont".equals (tag)
 763:                             || "br".equals (tag);
 764:           case 'c':     return "col".equals (tag);
 765:           case 'f':     return "frame".equals (tag);
 766:           case 'h':     return "hr".equals (tag);
 767:           case 'i':     return "img".equals (tag)
 768:                             || "input".equals (tag)
 769:                             || "isindex".equals (tag);
 770:           case 'l':     return "link".equals (tag);
 771:           case 'm':     return "meta".equals (tag);
 772:           case 'p':     return "param".equals (tag);
 773:         }
 774:         return false;
 775:     }
 776: 
 777:     private static boolean indentBefore (String tag)
 778:     {
 779:         // basically indent before block content
 780:         // and within structure like tables, lists
 781:         switch (tag.charAt (0)) {
 782:           case 'a':     return "applet".equals (tag);
 783:           case 'b':     return "body".equals (tag)
 784:                             || "blockquote".equals (tag);
 785:           case 'c':     return "center".equals (tag);
 786:           case 'f':     return "frame".equals (tag)
 787:                             || "frameset".equals (tag);
 788:           case 'h':     return "head".equals (tag);
 789:           case 'm':     return "meta".equals (tag);
 790:           case 'o':     return "object".equals (tag);
 791:           case 'p':     return "param".equals (tag)
 792:                             || "pre".equals (tag);
 793:           case 's':     return "style".equals (tag);
 794:           case 't':     return "title".equals (tag)
 795:                             || "td".equals (tag)
 796:                             || "th".equals (tag);
 797:         }
 798:         // ... but not inline elements like "em", "b", "font"
 799:         return false;
 800:     }
 801: 
 802:     private static boolean spaceBefore (String tag)
 803:     {
 804:         // blank line AND INDENT before certain structural content
 805:         switch (tag.charAt (0)) {
 806:           case 'h':     return "h1".equals (tag)
 807:                             || "h2".equals (tag)
 808:                             || "h3".equals (tag)
 809:                             || "h4".equals (tag)
 810:                             || "h5".equals (tag)
 811:                             || "h6".equals (tag)
 812:                             || "hr".equals (tag);
 813:           case 'l':     return "li".equals (tag);
 814:           case 'o':     return "ol".equals (tag);
 815:           case 'p':     return "p".equals (tag);
 816:           case 't':     return "table".equals (tag)
 817:                             || "tr".equals (tag);
 818:           case 'u':     return "ul".equals (tag);
 819:         }
 820:         return false;
 821:     }
 822: 
 823:     // XHTML DTDs say these three have xml:space="preserve"
 824:     private static boolean spacePreserve (String tag)
 825:     {
 826:         return "pre".equals (tag)
 827:                 || "style".equals (tag)
 828:                 || "script".equals (tag);
 829:     }
 830: 
 831:     /**
 832:      * <b>SAX2</b>:  ignored.
 833:      */
 834:     final public void startPrefixMapping (String prefix, String uri)
 835:         {}
 836: 
 837:     /**
 838:      * <b>SAX2</b>:  ignored.
 839:      */
 840:     final public void endPrefixMapping (String prefix)
 841:         {}
 842: 
 843:     private void writeStartTag (
 844:         String name,
 845:         Attributes atts,
 846:         boolean isEmpty
 847:     ) throws SAXException, IOException
 848:     {
 849:         rawWrite ('<');
 850:         rawWrite (name);
 851: 
 852:         // write out attributes ... sorting is particularly useful
 853:         // with output that's been heavily defaulted.
 854:         if (atts != null && atts.getLength () != 0) {
 855: 
 856:             // Set up to write, with optional sorting
 857:             int         indices [] = new int [atts.getLength ()];
 858: 
 859:             for (int i= 0; i < indices.length; i++)
 860:                 indices [i] = i;
 861: 
 862:             // optionally sort
 863: 
 864: // FIXME:  canon xml demands xmlns nodes go first,
 865: // and sorting by URI first (empty first) then localname
 866: // it should maybe use a different sort
 867: 
 868:             if (canonical || prettyPrinting) {
 869: 
 870:                 // insertion sort by attribute name
 871:                 for (int i = 1; i < indices.length; i++) {
 872:                     int n = indices [i], j;
 873:                     String      s = atts.getQName (n);
 874: 
 875:                     for (j = i - 1; j >= 0; j--) {
 876:                         if (s.compareTo (atts.getQName (indices [j]))
 877:                                 >= 0)
 878:                             break;
 879:                         indices [j + 1] = indices [j];
 880:                     }
 881:                     indices [j + 1] = n;
 882:                 }
 883:             }
 884: 
 885:             // write, sorted or no
 886:             for (int i= 0; i < indices.length; i++) {
 887:                 String  s = atts.getQName (indices [i]);
 888: 
 889:                     if (s == null || "".equals (s))
 890:                         throw new IllegalArgumentException ("no XML name");
 891:                 rawWrite (" ");
 892:                 rawWrite (s);
 893:                 rawWrite ("=");
 894:                 writeQuotedValue (atts.getValue (indices [i]),
 895:                     CTX_ATTRIBUTE);
 896:             }
 897:         }
 898:         if (isEmpty)
 899:             rawWrite (" /");
 900:         rawWrite ('>');
 901:     }
 902: 
 903:     /**
 904:      * <b>SAX2</b>:  indicates the start of an element.
 905:      * When XHTML is in use, avoid attribute values with
 906:      * line breaks or multiple whitespace characters, since
 907:      * not all user agents handle them correctly.
 908:      */
 909:     final public void startElement (
 910:         String uri,
 911:         String localName,
 912:         String qName,
 913:         Attributes atts
 914:     ) throws SAXException
 915:     {
 916:         startedDoctype = false;
 917: 
 918:         if (locator == null)
 919:             locator = new LocatorImpl ();
 920: 
 921:         if (qName == null || "".equals (qName))
 922:             throw new IllegalArgumentException ("no XML name");
 923: 
 924:         try {
 925:             if (entityNestLevel != 0)
 926:                 return;
 927:             if (prettyPrinting) {
 928:                 String whitespace = null;
 929: 
 930:                 if (xhtml && spacePreserve (qName))
 931:                     whitespace = "preserve";
 932:                 else if (atts != null)
 933:                     whitespace = atts.getValue ("xml:space");
 934:                 if (whitespace == null)
 935:                     whitespace = (String) space.peek ();
 936:                 space.push (whitespace);
 937: 
 938:                 if ("default".equals (whitespace)) {
 939:                     if (xhtml) {
 940:                         if (spaceBefore (qName)) {
 941:                             newline ();
 942:                             doIndent ();
 943:                         } else if (indentBefore (qName))
 944:                             doIndent ();
 945:                         // else it's inlined, modulo line length
 946:                         // FIXME: incrementing element nest level
 947:                         // for inlined elements causes ugliness
 948:                     } else
 949:                         doIndent ();
 950:                 }
 951:             }
 952:             elementNestLevel++;
 953:             writeStartTag (qName, atts, xhtml && isEmptyElementTag (qName));
 954: 
 955:             if (xhtml) {
 956: // FIXME: if this is an XHTML "pre" element, turn
 957: // off automatic wrapping.
 958:             }
 959: 
 960:         } catch (IOException e) {
 961:             fatal ("can't write", e);
 962:         }
 963:     }
 964: 
 965:     /**
 966:      * Writes an empty element.
 967:      * @see #startElement
 968:      */
 969:     public void writeEmptyElement (
 970:         String uri,
 971:         String localName,
 972:         String qName,
 973:         Attributes atts
 974:     ) throws SAXException
 975:     {
 976:         if (canonical) {
 977:             startElement (uri, localName, qName, atts);
 978:             endElement (uri, localName, qName);
 979:         } else {
 980:             try {
 981:                 writeStartTag (qName, atts, true);
 982:             } catch (IOException e) {
 983:                 fatal ("can't write", e);
 984:             }
 985:         }
 986:     }
 987: 
 988: 
 989:     /** <b>SAX2</b>:  indicates the end of an element */
 990:     final public void endElement (String uri, String localName, String qName)
 991:     throws SAXException
 992:     {
 993:         if (qName == null || "".equals (qName))
 994:             throw new IllegalArgumentException ("no XML name");
 995: 
 996:         try {
 997:             elementNestLevel--;
 998:             if (entityNestLevel != 0)
 999:                 return;
1000:             if (xhtml && isEmptyElementTag (qName))
1001:                 return;
1002:             rawWrite ("</");
1003:             rawWrite (qName);
1004:             rawWrite ('>');
1005: 
1006:             if (prettyPrinting) {
1007:                 if (!space.empty ())
1008:                     space.pop ();
1009:                 else
1010:                     fatal ("stack discipline", null);
1011:             }
1012:             if (elementNestLevel == 0)
1013:                 inEpilogue = true;
1014: 
1015:         } catch (IOException e) {
1016:             fatal ("can't write", e);
1017:         }
1018:     }
1019: 
1020:     /** <b>SAX1</b>:  reports content characters */
1021:     final public void characters (char ch [], int start, int length)
1022:     throws SAXException
1023:     {
1024:         if (locator == null)
1025:             locator = new LocatorImpl ();
1026: 
1027:         try {
1028:             if (entityNestLevel != 0)
1029:                 return;
1030:             if (inCDATA) {
1031:                 escapeChars (ch, start, length, CTX_UNPARSED);
1032:             } else {
1033:                 escapeChars (ch, start, length, CTX_CONTENT);
1034:             }
1035:         } catch (IOException e) {
1036:             fatal ("can't write", e);
1037:         }
1038:     }
1039: 
1040:     /** <b>SAX1</b>:  reports ignorable whitespace */
1041:     final public void ignorableWhitespace (char ch [], int start, int length)
1042:     throws SAXException
1043:     {
1044:         if (locator == null)
1045:             locator = new LocatorImpl ();
1046: 
1047:         try {
1048:             if (entityNestLevel != 0)
1049:                 return;
1050:             // don't forget to map NL to CRLF, CR, etc
1051:             escapeChars (ch, start, length, CTX_CONTENT);
1052:         } catch (IOException e) {
1053:             fatal ("can't write", e);
1054:         }
1055:     }
1056: 
1057:     /**
1058:      * <b>SAX1</b>:  reports a PI.
1059:      * This doesn't check for illegal target names, such as "xml" or "XML",
1060:      * or namespace-incompatible ones like "big:dog"; the caller is
1061:      * responsible for ensuring those names are legal.
1062:      */
1063:     final public void processingInstruction (String target, String data)
1064:     throws SAXException
1065:     {
1066:         if (locator == null)
1067:             locator = new LocatorImpl ();
1068: 
1069:         // don't print internal subset for XHTML
1070:         if (xhtml && startedDoctype)
1071:             return;
1072: 
1073:         // ancient HTML browsers might render these ... their loss.
1074:         // to prevent:  "if (xhtml) return;".
1075: 
1076:         try {
1077:             if (entityNestLevel != 0)
1078:                 return;
1079:             if (canonical && inEpilogue)
1080:                 newline ();
1081:             rawWrite ("<?");
1082:             rawWrite (target);
1083:             rawWrite (' ');
1084:             escapeChars (data.toCharArray (), -1, -1, CTX_UNPARSED);
1085:             rawWrite ("?>");
1086:             if (elementNestLevel == 0 && !(canonical && inEpilogue))
1087:                 newline ();
1088:         } catch (IOException e) {
1089:             fatal ("can't write", e);
1090:         }
1091:     }
1092: 
1093:     /** <b>SAX1</b>: indicates a non-expanded entity reference */
1094:     public void skippedEntity (String name)
1095:     throws SAXException
1096:     {
1097:         try {
1098:             rawWrite ("&");
1099:             rawWrite (name);
1100:             rawWrite (";");
1101:         } catch (IOException e) {
1102:             fatal ("can't write", e);
1103:         }
1104:     }
1105: 
1106:     // SAX2 LexicalHandler
1107: 
1108:     /** <b>SAX2</b>:  called before parsing CDATA characters */
1109:     final public void startCDATA ()
1110:     throws SAXException
1111:     {
1112:         if (locator == null)
1113:             locator = new LocatorImpl ();
1114: 
1115:         if (canonical)
1116:             return;
1117: 
1118:         try {
1119:             inCDATA = true;
1120:             if (entityNestLevel == 0)
1121:                 rawWrite ("<![CDATA[");
1122:         } catch (IOException e) {
1123:             fatal ("can't write", e);
1124:         }
1125:     }
1126: 
1127:     /** <b>SAX2</b>:  called after parsing CDATA characters */
1128:     final public void endCDATA ()
1129:     throws SAXException
1130:     {
1131:         if (canonical)
1132:             return;
1133: 
1134:         try {
1135:             inCDATA = false;
1136:             if (entityNestLevel == 0)
1137:                 rawWrite ("]]>");
1138:         } catch (IOException e) {
1139:             fatal ("can't write", e);
1140:         }
1141:     }
1142: 
1143:     /**
1144:      * <b>SAX2</b>:  called when the doctype is partially parsed
1145:      * Note that this, like other doctype related calls, is ignored
1146:      * when XHTML is in use.
1147:      */
1148:     final public void startDTD (String name, String publicId, String systemId)
1149:     throws SAXException
1150:     {
1151:         if (locator == null)
1152:             locator = new LocatorImpl ();
1153:         if (xhtml)
1154:             return;
1155:         try {
1156:             inDoctype = startedDoctype = true;
1157:             if (canonical)
1158:                 return;
1159:             rawWrite ("<!DOCTYPE ");
1160:             rawWrite (name);
1161:             rawWrite (' ');
1162: 
1163:             if (!expandingEntities) {
1164:                 if (publicId != null)
1165:                     rawWrite ("PUBLIC '" + publicId + "' '" + systemId + "' ");
1166:                 else if (systemId != null)
1167:                     rawWrite ("SYSTEM '" + systemId + "' ");
1168:             }
1169: 
1170:             rawWrite ('[');
1171:             newline ();
1172:         } catch (IOException e) {
1173:             fatal ("can't write", e);
1174:         }
1175:     }
1176: 
1177:     /** <b>SAX2</b>:  called after the doctype is parsed */
1178:     final public void endDTD ()
1179:     throws SAXException
1180:     {
1181:         inDoctype = false;
1182:         if (canonical || xhtml)
1183:             return;
1184:         try {
1185:             rawWrite ("]>");
1186:             newline ();
1187:         } catch (IOException e) {
1188:             fatal ("can't write", e);
1189:         }
1190:     }
1191: 
1192:     /**
1193:      * <b>SAX2</b>:  called before parsing a general entity in content
1194:      */
1195:     final public void startEntity (String name)
1196:     throws SAXException
1197:     {
1198:         try {
1199:             boolean     writeEOL = true;
1200: 
1201:             // Predefined XHTML entities (for characters) will get
1202:             // mapped back later.
1203:             if (xhtml || expandingEntities)
1204:                 return;
1205: 
1206:             entityNestLevel++;
1207:             if (name.equals ("[dtd]"))
1208:                 return;
1209:             if (entityNestLevel != 1)
1210:                 return;
1211:             if (!name.startsWith ("%")) {
1212:                 writeEOL = false;
1213:                 rawWrite ('&');
1214:             }
1215:             rawWrite (name);
1216:             rawWrite (';');
1217:             if (writeEOL)
1218:                 newline ();
1219:         } catch (IOException e) {
1220:             fatal ("can't write", e);
1221:         }
1222:     }
1223: 
1224:     /**
1225:      * <b>SAX2</b>:  called after parsing a general entity in content
1226:      */
1227:     final public void endEntity (String name)
1228:     throws SAXException
1229:     {
1230:         if (xhtml || expandingEntities)
1231:             return;
1232:         entityNestLevel--;
1233:     }
1234: 
1235:     /**
1236:      * <b>SAX2</b>:  called when comments are parsed.
1237:      * When XHTML is used, the old HTML tradition of using comments
1238:      * to for inline CSS, or for JavaScript code is  discouraged.
1239:      * This is because XML processors are encouraged to discard, on
1240:      * the grounds that comments are for users (and perhaps text
1241:      * editors) not programs.  Instead, use external scripts
1242:      */
1243:     final public void comment (char ch [], int start, int length)
1244:     throws SAXException
1245:     {
1246:         if (locator == null)
1247:             locator = new LocatorImpl ();
1248: 
1249:         // don't print internal subset for XHTML
1250:         if (xhtml && startedDoctype)
1251:             return;
1252:         // don't print comment in doctype for canon xml
1253:         if (canonical && inDoctype)
1254:             return;
1255: 
1256:         try {
1257:             boolean indent;
1258: 
1259:             if (prettyPrinting && space.empty ())
1260:                 fatal ("stack discipline", null);
1261:             indent = prettyPrinting && "default".equals (space.peek ());
1262:             if (entityNestLevel != 0)
1263:                 return;
1264:             if (indent)
1265:                 doIndent ();
1266:             if (canonical && inEpilogue)
1267:                 newline ();
1268:             rawWrite ("<!--");
1269:             escapeChars (ch, start, length, CTX_UNPARSED);
1270:             rawWrite ("-->");
1271:             if (indent)
1272:                 doIndent ();
1273:             if (elementNestLevel == 0 && !(canonical && inEpilogue))
1274:                 newline ();
1275:         } catch (IOException e) {
1276:             fatal ("can't write", e);
1277:         }
1278:     }
1279: 
1280:     // SAX1 DTDHandler
1281: 
1282:     /** <b>SAX1</b>:  called on notation declarations */
1283:     final public void notationDecl (String name,
1284:         String publicId, String systemId)
1285:     throws SAXException
1286:     {
1287:         if (xhtml)
1288:             return;
1289:         try {
1290:             // At this time, only SAX2 callbacks start these.
1291:             if (!startedDoctype)
1292:                 return;
1293: 
1294:             if (entityNestLevel != 0)
1295:                 return;
1296:             rawWrite ("<!NOTATION " + name + " ");
1297:             if (publicId != null)
1298:                 rawWrite ("PUBLIC \"" + publicId + '"');
1299:             else
1300:                 rawWrite ("SYSTEM ");
1301:             if (systemId != null)
1302:                 rawWrite ('"' + systemId + '"');
1303:             rawWrite (">");
1304:             newline ();
1305:         } catch (IOException e) {
1306:             fatal ("can't write", e);
1307:         }
1308:     }
1309: 
1310:     /** <b>SAX1</b>:  called on unparsed entity declarations */
1311:     final public void unparsedEntityDecl (String name,
1312:         String publicId, String systemId,
1313:         String notationName)
1314:     throws SAXException
1315:     {
1316:         if (xhtml)
1317:             return;
1318:         try {
1319:             // At this time, only SAX2 callbacks start these.
1320:             if (!startedDoctype)  {
1321:                 // FIXME: write to temporary buffer, and make the start
1322:                 // of the root element write these declarations.
1323:                 return;
1324:             }
1325: 
1326:             if (entityNestLevel != 0)
1327:                 return;
1328:             rawWrite ("<!ENTITY " + name + " ");
1329:             if (publicId != null)
1330:                 rawWrite ("PUBLIC \"" + publicId + '"');
1331:             else
1332:                 rawWrite ("SYSTEM ");
1333:             rawWrite ('"' + systemId + '"');
1334:             rawWrite (" NDATA " + notationName + ">");
1335:             newline ();
1336:         } catch (IOException e) {
1337:             fatal ("can't write", e);
1338:         }
1339:     }
1340: 
1341:     // SAX2 DeclHandler
1342: 
1343:     /** <b>SAX2</b>:  called on attribute declarations */
1344:     final public void attributeDecl (String eName, String aName,
1345:             String type, String mode, String value)
1346:     throws SAXException
1347:     {
1348:         if (xhtml)
1349:             return;
1350:         try {
1351:             // At this time, only SAX2 callbacks start these.
1352:             if (!startedDoctype)
1353:                 return;
1354:             if (entityNestLevel != 0)
1355:                 return;
1356:             rawWrite ("<!ATTLIST " + eName + ' ' + aName + ' ');
1357:             rawWrite (type);
1358:             rawWrite (' ');
1359:             if (mode != null)
1360:                 rawWrite (mode + ' ');
1361:             if (value != null)
1362:                 writeQuotedValue (value, CTX_ATTRIBUTE);
1363:             rawWrite ('>');
1364:             newline ();
1365:         } catch (IOException e) {
1366:             fatal ("can't write", e);
1367:         }
1368:     }
1369: 
1370:     /** <b>SAX2</b>:  called on element declarations */
1371:     final public void elementDecl (String name, String model)
1372:     throws SAXException
1373:     {
1374:         if (xhtml)
1375:             return;
1376:         try {
1377:             // At this time, only SAX2 callbacks start these.
1378:             if (!startedDoctype)
1379:                 return;
1380:             if (entityNestLevel != 0)
1381:                 return;
1382:             rawWrite ("<!ELEMENT " + name + ' ' + model + '>');
1383:             newline ();
1384:         } catch (IOException e) {
1385:             fatal ("can't write", e);
1386:         }
1387:     }
1388: 
1389:     /** <b>SAX2</b>:  called on external entity declarations */
1390:     final public void externalEntityDecl (
1391:         String name,
1392:         String publicId,
1393:         String systemId)
1394:     throws SAXException
1395:     {
1396:         if (xhtml)
1397:             return;
1398:         try {
1399:             // At this time, only SAX2 callbacks start these.
1400:             if (!startedDoctype)
1401:                 return;
1402:             if (entityNestLevel != 0)
1403:                 return;
1404:             rawWrite ("<!ENTITY ");
1405:             if (name.startsWith ("%")) {
1406:                 rawWrite ("% ");
1407:                 rawWrite (name.substring (1));
1408:             } else
1409:                 rawWrite (name);
1410:             if (publicId != null)
1411:                 rawWrite (" PUBLIC \"" + publicId + '"');
1412:             else
1413:                 rawWrite (" SYSTEM ");
1414:             rawWrite ('"' + systemId + "\">");
1415:             newline ();
1416:         } catch (IOException e) {
1417:             fatal ("can't write", e);
1418:         }
1419:     }
1420: 
1421:     /** <b>SAX2</b>:  called on internal entity declarations */
1422:     final public void internalEntityDecl (String name, String value)
1423:     throws SAXException
1424:     {
1425:         if (xhtml)
1426:             return;
1427:         try {
1428:             // At this time, only SAX2 callbacks start these.
1429:             if (!startedDoctype)
1430:                 return;
1431:             if (entityNestLevel != 0)
1432:                 return;
1433:             rawWrite ("<!ENTITY ");
1434:             if (name.startsWith ("%")) {
1435:                 rawWrite ("% ");
1436:                 rawWrite (name.substring (1));
1437:             } else
1438:                 rawWrite (name);
1439:             rawWrite (' ');
1440:             writeQuotedValue (value, CTX_ENTITY);
1441:             rawWrite ('>');
1442:             newline ();
1443:         } catch (IOException e) {
1444:             fatal ("can't write", e);
1445:         }
1446:     }
1447: 
1448:     private void writeQuotedValue (String value, int code)
1449:     throws SAXException, IOException
1450:     {
1451:         char    buf [] = value.toCharArray ();
1452:         int     off = 0, len = buf.length;
1453: 
1454:         // we can't add line breaks to attribute/entity/... values
1455:         noWrap = true;
1456:         rawWrite ('"');
1457:         escapeChars (buf, off, len, code);
1458:         rawWrite ('"');
1459:         noWrap = false;
1460:     }
1461: 
1462:     // From "HTMLlat1x.ent" ... names of entities for ISO-8859-1
1463:     // (Latin/1) characters, all codes:  160-255 (0xA0-0xFF).
1464:     // Codes 128-159 have no assigned values.
1465:     private static final String HTMLlat1x [] = {
1466:         // 160
1467:         "nbsp", "iexcl", "cent", "pound", "curren",
1468:         "yen", "brvbar", "sect", "uml", "copy",
1469: 
1470:         // 170
1471:         "ordf", "laquo", "not", "shy", "reg",
1472:         "macr", "deg", "plusmn", "sup2", "sup3",
1473: 
1474:         // 180
1475:         "acute", "micro", "para", "middot", "cedil",
1476:         "sup1", "ordm", "raquo", "frac14", "frac12",
1477: 
1478:         // 190
1479:         "frac34", "iquest", "Agrave", "Aacute", "Acirc",
1480:         "Atilde", "Auml", "Aring", "AElig", "Ccedil",
1481: 
1482:         // 200
1483:         "Egrave", "Eacute", "Ecirc", "Euml", "Igrave",
1484:         "Iacute", "Icirc", "Iuml", "ETH", "Ntilde",
1485: 
1486:         // 210
1487:         "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml",
1488:         "times", "Oslash", "Ugrave", "Uacute", "Ucirc",
1489: 
1490:         // 220
1491:         "Uuml", "Yacute", "THORN", "szlig", "agrave",
1492:         "aacute", "acirc", "atilde", "auml", "aring",
1493: 
1494:         // 230
1495:         "aelig", "ccedil", "egrave", "eacute", "ecirc",
1496:         "euml", "igrave", "iacute", "icirc", "iuml",
1497: 
1498:         // 240
1499:         "eth", "ntilde", "ograve", "oacute", "ocirc",
1500:         "otilde", "ouml", "divide", "oslash", "ugrave",
1501: 
1502:         // 250
1503:         "uacute", "ucirc", "uuml", "yacute", "thorn",
1504:         "yuml"
1505:     };
1506: 
1507:     // From "HTMLsymbolx.ent" ... some of the symbols that
1508:     // we can conveniently handle.  Entities for the Greek.
1509:     // alphabet (upper and lower cases) are compact.
1510:     private static final String HTMLsymbolx_GR [] = {
1511:         // 913
1512:         "Alpha", "Beta", "Gamma", "Delta", "Epsilon",
1513:         "Zeta", "Eta", "Theta", "Iota", "Kappa",
1514: 
1515:         // 923
1516:         "Lambda", "Mu", "Nu", "Xi", "Omicron",
1517:         "Pi", "Rho", null, "Sigma", "Tau",
1518: 
1519:         // 933
1520:         "Upsilon", "Phi", "Chi", "Psi", "Omega"
1521:     };
1522: 
1523:     private static final String HTMLsymbolx_gr [] = {
1524:         // 945
1525:         "alpha", "beta", "gamma", "delta", "epsilon",
1526:         "zeta", "eta", "theta", "iota", "kappa",
1527: 
1528:         // 955
1529:         "lambda", "mu", "nu", "xi", "omicron",
1530:         "pi", "rho", "sigmaf", "sigma", "tau",
1531: 
1532:         // 965
1533:         "upsilon", "phi", "chi", "psi", "omega"
1534:     };
1535: 
1536: 
1537:     // General routine to write text and substitute predefined
1538:     // entities (XML, and a special case for XHTML) as needed.
1539:     private void escapeChars (char buf [], int off, int len, int code)
1540:     throws SAXException, IOException
1541:     {
1542:         int     first = 0;
1543: 
1544:         if (off < 0) {
1545:             off = 0;
1546:             len = buf.length;
1547:         }
1548:         for (int i = 0; i < len; i++) {
1549:             String      esc;
1550:             char        c = buf [off + i];
1551: 
1552:             switch (c) {
1553:               // Note that CTX_ATTRIBUTE isn't explicitly tested here;
1554:               // all syntax delimiters are escaped in CTX_ATTRIBUTE,
1555:               // otherwise it's similar to CTX_CONTENT
1556: 
1557:               // ampersand flags entity references; entity replacement
1558:               // text has unexpanded references, other text doesn't.
1559:               case '&':
1560:                 if (code == CTX_ENTITY || code == CTX_UNPARSED)
1561:                     continue;
1562:                 esc = "amp";
1563:                 break;
1564: 
1565:               // attributes and text may NOT have literal '<', but
1566:               // entities may have markup constructs
1567:               case '<':
1568:                 if (code == CTX_ENTITY || code == CTX_UNPARSED)
1569:                     continue;
1570:                 esc = "lt";
1571:                 break;
1572: 
1573:               // as above re markup constructs; but otherwise
1574:               // except when canonicalizing, this is for consistency
1575:               case '>':
1576:                 if (code == CTX_ENTITY || code == CTX_UNPARSED)
1577:                     continue;
1578:                 esc = "gt";
1579:                 break;
1580:               case '\'':
1581:                 if (code == CTX_CONTENT || code == CTX_UNPARSED)
1582:                     continue;
1583:                 if (canonical)
1584:                     continue;
1585:                 esc = "apos";
1586:                 break;
1587: 
1588:               // needed when printing quoted attribute/entity values
1589:               case '"':
1590:                 if (code == CTX_CONTENT || code == CTX_UNPARSED)
1591:                     continue;
1592:                 esc = "quot";
1593:                 break;
1594: 
1595:               // make line ends work per host OS convention
1596:               case '\n':
1597:                 esc = eol;
1598:                 break;
1599: 
1600:               //
1601:               // No other characters NEED special treatment ... except
1602:               // for encoding-specific issues, like whether the character
1603:               // can really be represented in that encoding.
1604:               //
1605:               default:
1606:                 //
1607:                 // There are characters we can never write safely; getting
1608:                 // them is an error.
1609:                 //
1610:                 //   (a) They're never legal in XML ... detected by range
1611:                 //      checks, and (eventually) by remerging surrogate
1612:                 //      pairs on output.  (Easy error for apps to prevent.)
1613:                 //
1614:                 //   (b) This encoding can't represent them, and we
1615:                 //      can't make reference substitution (e.g. inside
1616:                 //      CDATA sections, names, PI data, etc).  (Hard for
1617:                 //      apps to prevent, except by using UTF-8 or UTF-16
1618:                 //      as their output encoding.)
1619:                 //
1620:                 // We know a very little bit about what characters
1621:                 // the US-ASCII and ISO-8859-1 encodings support.  For
1622:                 // other encodings we can't detect the second type of
1623:                 // error at all.  (Never an issue for UTF-8 or UTF-16.)
1624:                 //
1625: 
1626: // FIXME:  CR in CDATA is an error; in text, turn to a char ref
1627: 
1628: // FIXME:  CR/LF/TAB in attributes should become char refs
1629: 
1630:                 if ((c > 0xfffd)
1631:                         || ((c < 0x0020) && !((c == 0x0009)
1632:                                 || (c == 0x000A) || (c == 0x000D)))
1633:                         || (((c & dangerMask) != 0)
1634:                             && (code == CTX_UNPARSED))) {
1635: 
1636:                     // if case (b) in CDATA, we might end the section,
1637:                     // write a reference, then restart ... possible
1638:                     // in one DOM L3 draft.
1639: 
1640:                     throw new CharConversionException (
1641:                             "Illegal or non-writable character: U+"
1642:                             + Integer.toHexString (c));
1643:                 }
1644: 
1645:                 //
1646:                 // If the output encoding represents the character
1647:                 // directly, let it do so!  Else we'll escape it.
1648:                 //
1649:                 if ((c & dangerMask) == 0)
1650:                     continue;
1651:                 esc = null;
1652: 
1653:                 // Avoid numeric refs where symbolic ones exist, as
1654:                 // symbolic ones make more sense to humans reading!
1655:                 if (xhtml) {
1656:                     // all the HTMLlat1x.ent entities
1657:                     // (all the "ISO-8859-1" characters)
1658:                     if (c >= 160 && c <= 255)
1659:                         esc = HTMLlat1x [c - 160];
1660: 
1661:                     // not quite half the HTMLsymbolx.ent entities
1662:                     else if (c >= 913 && c <= 937)
1663:                         esc = HTMLsymbolx_GR [c - 913];
1664:                     else if (c >= 945 && c <= 969)
1665:                         esc = HTMLsymbolx_gr [c - 945];
1666: 
1667:                     else switch (c) {
1668:                         // all of the HTMLspecialx.ent entities
1669:                         case  338: esc = "OElig";       break;
1670:                         case  339: esc = "oelig";       break;
1671:                         case  352: esc = "Scaron";      break;
1672:                         case  353: esc = "scaron";      break;
1673:                         case  376: esc = "Yuml";        break;
1674:                         case  710: esc = "circ";        break;
1675:                         case  732: esc = "tilde";       break;
1676:                         case 8194: esc = "ensp";        break;
1677:                         case 8195: esc = "emsp";        break;
1678:                         case 8201: esc = "thinsp";      break;
1679:                         case 8204: esc = "zwnj";        break;
1680:                         case 8205: esc = "zwj";         break;
1681:                         case 8206: esc = "lrm";         break;
1682:                         case 8207: esc = "rlm";         break;
1683:                         case 8211: esc = "ndash";       break;
1684:                         case 8212: esc = "mdash";       break;
1685:                         case 8216: esc = "lsquo";       break;
1686:                         case 8217: esc = "rsquo";       break;
1687:                         case 8218: esc = "sbquo";       break;
1688:                         case 8220: esc = "ldquo";       break;
1689:                         case 8221: esc = "rdquo";       break;
1690:                         case 8222: esc = "bdquo";       break;
1691:                         case 8224: esc = "dagger";      break;
1692:                         case 8225: esc = "Dagger";      break;
1693:                         case 8240: esc = "permil";      break;
1694:                         case 8249: esc = "lsaquo";      break;
1695:                         case 8250: esc = "rsaquo";      break;
1696:                         case 8364: esc = "euro";        break;
1697: 
1698:                         // the other HTMLsymbox.ent entities
1699:                         case  402: esc = "fnof";        break;
1700:                         case  977: esc = "thetasym";    break;
1701:                         case  978: esc = "upsih";       break;
1702:                         case  982: esc = "piv";         break;
1703:                         case 8226: esc = "bull";        break;
1704:                         case 8230: esc = "hellip";      break;
1705:                         case 8242: esc = "prime";       break;
1706:                         case 8243: esc = "Prime";       break;
1707:                         case 8254: esc = "oline";       break;
1708:                         case 8260: esc = "frasl";       break;
1709:                         case 8472: esc = "weierp";      break;
1710:                         case 8465: esc = "image";       break;
1711:                         case 8476: esc = "real";        break;
1712:                         case 8482: esc = "trade";       break;
1713:                         case 8501: esc = "alefsym";     break;
1714:                         case 8592: esc = "larr";        break;
1715:                         case 8593: esc = "uarr";        break;
1716:                         case 8594: esc = "rarr";        break;
1717:                         case 8595: esc = "darr";        break;
1718:                         case 8596: esc = "harr";        break;
1719:                         case 8629: esc = "crarr";       break;
1720:                         case 8656: esc = "lArr";        break;
1721:                         case 8657: esc = "uArr";        break;
1722:                         case 8658: esc = "rArr";        break;
1723:                         case 8659: esc = "dArr";        break;
1724:                         case 8660: esc = "hArr";        break;
1725:                         case 8704: esc = "forall";      break;
1726:                         case 8706: esc = "part";        break;
1727:                         case 8707: esc = "exist";       break;
1728:                         case 8709: esc = "empty";       break;
1729:                         case 8711: esc = "nabla";       break;
1730:                         case 8712: esc = "isin";        break;
1731:                         case 8713: esc = "notin";       break;
1732:                         case 8715: esc = "ni";          break;
1733:                         case 8719: esc = "prod";        break;
1734:                         case 8721: esc = "sum";         break;
1735:                         case 8722: esc = "minus";       break;
1736:                         case 8727: esc = "lowast";      break;
1737:                         case 8730: esc = "radic";       break;
1738:                         case 8733: esc = "prop";        break;
1739:                         case 8734: esc = "infin";       break;
1740:                         case 8736: esc = "ang";         break;
1741:                         case 8743: esc = "and";         break;
1742:                         case 8744: esc = "or";          break;
1743:                         case 8745: esc = "cap";         break;
1744:                         case 8746: esc = "cup";         break;
1745:                         case 8747: esc = "int";         break;
1746:                         case 8756: esc = "there4";      break;
1747:                         case 8764: esc = "sim";         break;
1748:                         case 8773: esc = "cong";        break;
1749:                         case 8776: esc = "asymp";       break;
1750:                         case 8800: esc = "ne";          break;
1751:                         case 8801: esc = "equiv";       break;
1752:                         case 8804: esc = "le";          break;
1753:                         case 8805: esc = "ge";          break;
1754:                         case 8834: esc = "sub";         break;
1755:                         case 8835: esc = "sup";         break;
1756:                         case 8836: esc = "nsub";        break;
1757:                         case 8838: esc = "sube";        break;
1758:                         case 8839: esc = "supe";        break;
1759:                         case 8853: esc = "oplus";       break;
1760:                         case 8855: esc = "otimes";      break;
1761:                         case 8869: esc = "perp";        break;
1762:                         case 8901: esc = "sdot";        break;
1763:                         case 8968: esc = "lceil";       break;
1764:                         case 8969: esc = "rceil";       break;
1765:                         case 8970: esc = "lfloor";      break;
1766:                         case 8971: esc = "rfloor";      break;
1767:                         case 9001: esc = "lang";        break;
1768:                         case 9002: esc = "rang";        break;
1769:                         case 9674: esc = "loz";         break;
1770:                         case 9824: esc = "spades";      break;
1771:                         case 9827: esc = "clubs";       break;
1772:                         case 9829: esc = "hearts";      break;
1773:                         case 9830: esc = "diams";       break;
1774:                     }
1775:                 }
1776: 
1777:                 // else escape with numeric char refs
1778:                 if (esc == null) {
1779:                     stringBuf.setLength (0);
1780:                     stringBuf.append ("#x");
1781:                     stringBuf.append (Integer.toHexString (c).toUpperCase ());
1782:                     esc = stringBuf.toString ();
1783: 
1784:                     // FIXME:  We don't write surrogate pairs correctly.
1785:                     // They should work as one ref per character, since
1786:                     // each pair is one character.  For reading back into
1787:                     // Unicode, it matters beginning in Unicode 3.1 ...
1788:                 }
1789:                 break;
1790:             }
1791:             if (i != first)
1792:                 rawWrite (buf, off + first, i - first);
1793:             first = i + 1;
1794:             if (esc == eol)
1795:                 newline ();
1796:             else {
1797:                 rawWrite ('&');
1798:                 rawWrite (esc);
1799:                 rawWrite (';');
1800:             }
1801:         }
1802:         if (first < len)
1803:             rawWrite (buf, off + first, len - first);
1804:     }
1805: 
1806: 
1807: 
1808:     private void newline ()
1809:     throws SAXException, IOException
1810:     {
1811:         out.write (eol);
1812:         column = 0;
1813:     }
1814: 
1815:     private void doIndent ()
1816:     throws SAXException, IOException
1817:     {
1818:         int     space = elementNestLevel * 2;
1819: 
1820:         newline ();
1821:         column = space;
1822:         // track tabs only at line starts
1823:         while (space > 8) {
1824:             out.write ("\t");
1825:             space -= 8;
1826:         }
1827:         while (space > 0) {
1828:             out.write ("  ");
1829:             space -= 2;
1830:         }
1831:     }
1832: 
1833:     private void rawWrite (char c)
1834:     throws IOException
1835:     {
1836:         out.write (c);
1837:         column++;
1838:     }
1839: 
1840:     private void rawWrite (String s)
1841:     throws SAXException, IOException
1842:     {
1843:         if (prettyPrinting && "default".equals (space.peek ())) {
1844:             char data [] = s.toCharArray ();
1845:             rawWrite (data, 0, data.length);
1846:         } else {
1847:             out.write (s);
1848:             column += s.length ();
1849:         }
1850:     }
1851: 
1852:     // NOTE:  if xhtml, the REC gives some rules about whitespace
1853:     // which we could follow ... notably, many places where conformant
1854:     // agents "must" consolidate/normalize whitespace.  Line ends can
1855:     // be removed there, etc.  This may not be the right place to do
1856:     // such mappings though.
1857: 
1858:     // Line buffering may help clarify algorithms and improve results.
1859: 
1860:     // It's likely xml:space needs more attention.
1861: 
1862:     private void rawWrite (char buf [], int offset, int length)
1863:     throws SAXException, IOException
1864:     {
1865:         boolean         wrap;
1866: 
1867:         if (prettyPrinting && space.empty ())
1868:             fatal ("stack discipline", null);
1869: 
1870:         wrap = prettyPrinting && "default".equals (space.peek ());
1871:         if (!wrap) {
1872:             out.write (buf, offset, length);
1873:             column += length;
1874:             return;
1875:         }
1876: 
1877:         // we're pretty printing and want to fill lines out only
1878:         // to the desired line length.
1879:         while (length > 0) {
1880:             int         target = lineLength - column;
1881:             boolean     wrote = false;
1882: 
1883:             // Do we even have a problem?
1884:             if (target > length || noWrap) {
1885:                 out.write (buf, offset, length);
1886:                 column += length;
1887:                 return;
1888:             }
1889: 
1890:             // break the line at a space character, trying to fill
1891:             // as much of the line as possible.
1892:             char        c;
1893: 
1894:             for (int i = target - 1; i >= 0; i--) {
1895:                 if ((c = buf [offset + i]) == ' ' || c == '\t') {
1896:                     i++;
1897:                     out.write (buf, offset, i);
1898:                     doIndent ();
1899:                     offset += i;
1900:                     length -= i;
1901:                     wrote = true;
1902:                     break;
1903:                 }
1904:             }
1905:             if (wrote)
1906:                 continue;
1907: 
1908:             // no space character permitting break before target
1909:             // line length is filled.  So, take the next one.
1910:             if (target < 0)
1911:                 target = 0;
1912:             for (int i = target; i < length; i++)
1913:                 if ((c = buf [offset + i]) == ' ' || c == '\t') {
1914:                     i++;
1915:                     out.write (buf, offset, i);
1916:                     doIndent ();
1917:                     offset += i;
1918:                     length -= i;
1919:                     wrote = true;
1920:                     break;
1921:                 }
1922:             if (wrote)
1923:                 continue;
1924: 
1925:             // no such luck.
1926:             out.write (buf, offset, length);
1927:             column += length;
1928:             break;
1929:         }
1930:     }
1931: }
Overview Package Class Use Source Tree Index Deprecated About
		Frames \| No Frames