Source for gnu.xml.util.DoParse

   1: /* DoParse.java --
   2:    Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package gnu.xml.util;
  39: 
  40: import gnu.java.lang.CPStringBuilder;
  41: 
  42: import java.io.IOException;
  43: 
  44: import org.xml.sax.ErrorHandler;
  45: import org.xml.sax.InputSource;
  46: import org.xml.sax.SAXException;
  47: import org.xml.sax.SAXParseException;
  48: import org.xml.sax.XMLReader;
  49: import org.xml.sax.helpers.XMLReaderFactory;
  50: 
  51: import gnu.xml.pipeline.EventConsumer;
  52: import gnu.xml.pipeline.EventFilter;
  53: import gnu.xml.pipeline.NSFilter;
  54: import gnu.xml.pipeline.PipelineFactory;
  55: import gnu.xml.pipeline.TeeConsumer;
  56: import gnu.xml.pipeline.ValidationConsumer;
  57: import gnu.xml.pipeline.WellFormednessFilter;
  58: 
  59: /**
  60:  * This class provides a driver which may be invoked from the command line
  61:  * to process a document using a SAX2 parser and a specified XML processing
  62:  * pipeline.
  63:  * This facilitates some common types of command line tools, such as parsing an
  64:  * XML document in order test it for well formedness or validity.
  65:  *
  66:  * <p>The SAX2 XMLReaderFactory should return a SAX2 XML parser which
  67:  * supports both of the standardized extension handlers (for declaration
  68:  * and lexical events).  That parser will be used to produce events.
  69:  *
  70:  * <p>The first parameter to the command gives the name of the document that
  71:  * will be given to that processor.  If it is a file name, it is converted
  72:  * to a URL first.
  73:  *
  74:  * <p>The second parameter describes a simple processing pipeline, and will
  75:  * be used as input to {@link gnu.xml.pipeline.PipelineFactory}
  76:  * methods which identify the processing to be done.  Examples of such a
  77:  * pipeline include <pre>
  78:  *
  79:  *    nsfix | validate                <em>to validate the input document </em>
  80:  *    nsfix | write ( stdout )        <em>to echo the file as XML text</em>
  81:  *    dom | nsfix | write ( stdout )  <em>parse into DOM, print the result</em>
  82:  * </pre>
  83:  *
  84:  * <p> Relatively complex pipelines can be described on the command line, but
  85:  * not all interesting ones will require as little configuration as can be done
  86:  * in that way.  Put filters like "nsfix", perhaps followed by "validate",
  87:  * at the front of the pipeline so they can be optimized out if a parser
  88:  * supports those modes natively.
  89:  *
  90:  * <p> If the parsing is aborted for any reason, the JVM will exit with a
  91:  * failure code.  If a validating parse was done then both validation and
  92:  * well formedness errors will cause a failure.  A non-validating parse
  93:  * will report failure on well formedness errors.
  94:  *
  95:  * @see gnu.xml.pipeline.PipelineFactory
  96:  *
  97:  * @author David Brownell
  98:  */
  99: final public class DoParse
 100: {
 101:     private DoParse () { /* no instances allowed */ }
 102: 
 103:     // first reported nonrecoverable error
 104:     private static SAXParseException    fatal;
 105: 
 106:     // error categories
 107:     private static int                  errorCount;
 108:     private static int                  fatalCount;
 109: 
 110:     /**
 111:      * Command line invoker for this class; pass a filename or URL
 112:      * as the first argument, and a pipeline description as the second.
 113:      * Make sure to use filters to condition the input to stages that
 114:      * require it; an <em>nsfix</em> filter will be a common requirement,
 115:      * to restore syntax that SAX2 parsers delete by default.  Some
 116:      * conditioning filters may be eliminated by setting parser options.
 117:      * (For example, "nsfix" can set the "namespace-prefixes" feature to
 118:      * a non-default value of "true".  In the same way, "validate" can set
 119:      * the "validation" feature to "true".)
 120:      */
 121:     public static void main (String argv [])
 122:     throws IOException
 123:     {
 124:         int             exitStatus = 1;
 125: 
 126:         if (argv.length != 2) {
 127:             System.err.println ("Usage: DoParse [filename|URL] pipeline-spec");
 128:             System.err.println ("Example pipeline specs:");
 129:             System.err.println ("  'nsfix | validate'");
 130:             System.err.println (
 131:                 "       ... restore namespace syntax, validate");
 132:             System.err.println ("  'nsfix | write ( stdout )'");
 133:             System.err.println (
 134:                 "       ... restore namespace syntax, write to stdout as XML"
 135:                 );
 136:             System.exit (1);
 137:         }
 138: 
 139:         try {
 140:             //
 141:             // Get input source for specified document (or try ;-)
 142:             //
 143:             argv [0] = Resolver.getURL (argv [0]);
 144:             InputSource input = new InputSource (argv [0]);
 145: 
 146:             //
 147:             // Get the producer, using the system default parser (which
 148:             // can be overridden for this particular invocation).
 149:             //
 150:             // And the pipeline, using commandline options.
 151:             //
 152:             XMLReader           producer;
 153:             EventConsumer       consumer;
 154: 
 155:             producer = XMLReaderFactory.createXMLReader ();
 156: 
 157:             //
 158:             // XXX pipeline factory now has a pre-tokenized input
 159:             // method, use it ... that way at least some params
 160:             // can be written using quotes (have spaces, ...)
 161:             //
 162:             consumer = PipelineFactory.createPipeline (argv [1]);
 163: 
 164:             //
 165:             // XXX want commandline option for tweaking error handler.
 166:             // Want to be able to present warnings.
 167:             //
 168:             producer.setErrorHandler (new MyErrorHandler ());
 169: 
 170:             // XXX need facility enabling resolving to local DTDs
 171: 
 172:             //
 173:             // Parse.  The pipeline may get optimized a bit, so we
 174:             // can't always fail cleanly for validation without taking
 175:             // a look at the filter stages.
 176:             //
 177:             EventFilter.bind (producer, consumer);
 178:             producer.parse (input);
 179: 
 180:             try {
 181:                 if (producer.getFeature (
 182:                         "http://org.xml/sax/features/validation"))
 183:                     exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0;
 184:                 else if (fatalCount == 0)
 185:                     exitStatus = 0;
 186:             } catch (SAXException e) {
 187:                 if (hasValidator (consumer))
 188:                     exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0;
 189:                 else if (fatalCount == 0)
 190:                     exitStatus = 0;
 191:             }
 192: 
 193:         } catch (java.net.MalformedURLException e) {
 194:             System.err.println ("** Malformed URL: " + e.getMessage ());
 195:             System.err.println ("Is '" + argv [0] + "' a non-existent file?");
 196:             e.printStackTrace ();
 197:                 // e.g. FNF
 198: 
 199:         } catch (SAXParseException e) {
 200:             if (e != fatal) {
 201:                 System.err.print (printParseException ("Parsing Aborted", e));
 202:                 e.printStackTrace ();
 203:                 if (e.getException () != null) {
 204:                     System.err.println ("++ Wrapped exception:");
 205:                     e.getException ().printStackTrace ();
 206:                 }
 207:             }
 208: 
 209:         } catch (SAXException e) {
 210:             Exception   x = e;
 211:             if (e.getException () != null)
 212:                 x = e.getException ();
 213:             x.printStackTrace ();
 214: 
 215:         } catch (Throwable t) {
 216:             t.printStackTrace ();
 217:         }
 218: 
 219:         System.exit (exitStatus);
 220:     }
 221: 
 222:     // returns true if saw a validator (before end or unrecognized node)
 223:     // false otherwise
 224:     private static boolean hasValidator (EventConsumer e)
 225:     {
 226:         if (e == null)
 227:             return false;
 228:         if (e instanceof ValidationConsumer)
 229:             return true;
 230:         if (e instanceof TeeConsumer) {
 231:             TeeConsumer t = (TeeConsumer) e;
 232:             return hasValidator (t.getFirst ())
 233:                 || hasValidator (t.getRest ());
 234:         }
 235:         if (e instanceof WellFormednessFilter
 236:                 || e instanceof NSFilter
 237:                 )
 238:             return hasValidator (((EventFilter)e).getNext ());
 239: 
 240:         // else ... gee, we can't know.  Assume not.
 241: 
 242:         return false;
 243:     }
 244: 
 245:     static class MyErrorHandler implements ErrorHandler
 246:     {
 247:         // dump validation errors, but continue
 248:         public void error (SAXParseException e)
 249:         throws SAXParseException
 250:         {
 251:             errorCount++;
 252:             System.err.print (printParseException ("Error", e));
 253:         }
 254: 
 255:         public void warning (SAXParseException e)
 256:         throws SAXParseException
 257:         {
 258:             // System.err.print (printParseException ("Warning", e));
 259:         }
 260: 
 261:         // try to continue fatal errors, in case a parser reports more
 262:         public void fatalError (SAXParseException e)
 263:         throws SAXParseException
 264:         {
 265:             fatalCount++;
 266:             if (fatal == null)
 267:                 fatal = e;
 268:             System.err.print (printParseException ("Nonrecoverable Error", e));
 269:         }
 270:     }
 271: 
 272:     static private String printParseException (
 273:         String                  label,
 274:         SAXParseException       e
 275:     ) {
 276:         CPStringBuilder buf = new CPStringBuilder ();
 277:         int             temp;
 278: 
 279:         buf.append ("** ");
 280:         buf.append (label);
 281:         buf.append (": ");
 282:         buf.append (e.getMessage ());
 283:         buf.append ('\n');
 284:         if (e.getSystemId () != null) {
 285:             buf.append ("   URI:  ");
 286:             buf.append (e.getSystemId ());
 287:             buf.append ('\n');
 288:         }
 289:         if ((temp = e.getLineNumber ()) != -1) {
 290:             buf.append ("   line: ");
 291:             buf.append (temp);
 292:             buf.append ('\n');
 293:         }
 294:         if ((temp = e.getColumnNumber ()) != -1) {
 295:             buf.append ("   char: ");
 296:             buf.append (temp);
 297:             buf.append ('\n');
 298:         }
 299: 
 300:         return buf.toString ();
 301:     }
 302: }