Frames | No Frames |
1: /* DoParse.java -- 2: Copyright (C) 1999,2000,2001 Free Software Foundation, Inc. 3: 4: This file is part of GNU Classpath. 5: 6: GNU Classpath is free software; you can redistribute it and/or modify 7: it under the terms of the GNU General Public License as published by 8: the Free Software Foundation; either version 2, or (at your option) 9: any later version. 10: 11: GNU Classpath is distributed in the hope that it will be useful, but 12: WITHOUT ANY WARRANTY; without even the implied warranty of 13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14: General Public License for more details. 15: 16: You should have received a copy of the GNU General Public License 17: along with GNU Classpath; see the file COPYING. If not, write to the 18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19: 02110-1301 USA. 20: 21: Linking this library statically or dynamically with other modules is 22: making a combined work based on this library. Thus, the terms and 23: conditions of the GNU General Public License cover the whole 24: combination. 25: 26: As a special exception, the copyright holders of this library give you 27: permission to link this library with independent modules to produce an 28: executable, regardless of the license terms of these independent 29: modules, and to copy and distribute the resulting executable under 30: terms of your choice, provided that you also meet, for each linked 31: independent module, the terms and conditions of the license of that 32: module. An independent module is a module which is not derived from 33: or based on this library. If you modify this library, you may extend 34: this exception to your version of the library, but you are not 35: obligated to do so. If you do not wish to do so, delete this 36: exception statement from your version. */ 37: 38: package gnu.xml.util; 39: 40: import gnu.java.lang.CPStringBuilder; 41: 42: import java.io.IOException; 43: 44: import org.xml.sax.ErrorHandler; 45: import org.xml.sax.InputSource; 46: import org.xml.sax.SAXException; 47: import org.xml.sax.SAXParseException; 48: import org.xml.sax.XMLReader; 49: import org.xml.sax.helpers.XMLReaderFactory; 50: 51: import gnu.xml.pipeline.EventConsumer; 52: import gnu.xml.pipeline.EventFilter; 53: import gnu.xml.pipeline.NSFilter; 54: import gnu.xml.pipeline.PipelineFactory; 55: import gnu.xml.pipeline.TeeConsumer; 56: import gnu.xml.pipeline.ValidationConsumer; 57: import gnu.xml.pipeline.WellFormednessFilter; 58: 59: /** 60: * This class provides a driver which may be invoked from the command line 61: * to process a document using a SAX2 parser and a specified XML processing 62: * pipeline. 63: * This facilitates some common types of command line tools, such as parsing an 64: * XML document in order test it for well formedness or validity. 65: * 66: * <p>The SAX2 XMLReaderFactory should return a SAX2 XML parser which 67: * supports both of the standardized extension handlers (for declaration 68: * and lexical events). That parser will be used to produce events. 69: * 70: * <p>The first parameter to the command gives the name of the document that 71: * will be given to that processor. If it is a file name, it is converted 72: * to a URL first. 73: * 74: * <p>The second parameter describes a simple processing pipeline, and will 75: * be used as input to {@link gnu.xml.pipeline.PipelineFactory} 76: * methods which identify the processing to be done. Examples of such a 77: * pipeline include <pre> 78: * 79: * nsfix | validate <em>to validate the input document </em> 80: * nsfix | write ( stdout ) <em>to echo the file as XML text</em> 81: * dom | nsfix | write ( stdout ) <em>parse into DOM, print the result</em> 82: * </pre> 83: * 84: * <p> Relatively complex pipelines can be described on the command line, but 85: * not all interesting ones will require as little configuration as can be done 86: * in that way. Put filters like "nsfix", perhaps followed by "validate", 87: * at the front of the pipeline so they can be optimized out if a parser 88: * supports those modes natively. 89: * 90: * <p> If the parsing is aborted for any reason, the JVM will exit with a 91: * failure code. If a validating parse was done then both validation and 92: * well formedness errors will cause a failure. A non-validating parse 93: * will report failure on well formedness errors. 94: * 95: * @see gnu.xml.pipeline.PipelineFactory 96: * 97: * @author David Brownell 98: */ 99: final public class DoParse 100: { 101: private DoParse () { /* no instances allowed */ } 102: 103: // first reported nonrecoverable error 104: private static SAXParseException fatal; 105: 106: // error categories 107: private static int errorCount; 108: private static int fatalCount; 109: 110: /** 111: * Command line invoker for this class; pass a filename or URL 112: * as the first argument, and a pipeline description as the second. 113: * Make sure to use filters to condition the input to stages that 114: * require it; an <em>nsfix</em> filter will be a common requirement, 115: * to restore syntax that SAX2 parsers delete by default. Some 116: * conditioning filters may be eliminated by setting parser options. 117: * (For example, "nsfix" can set the "namespace-prefixes" feature to 118: * a non-default value of "true". In the same way, "validate" can set 119: * the "validation" feature to "true".) 120: */ 121: public static void main (String argv []) 122: throws IOException 123: { 124: int exitStatus = 1; 125: 126: if (argv.length != 2) { 127: System.err.println ("Usage: DoParse [filename|URL] pipeline-spec"); 128: System.err.println ("Example pipeline specs:"); 129: System.err.println (" 'nsfix | validate'"); 130: System.err.println ( 131: " ... restore namespace syntax, validate"); 132: System.err.println (" 'nsfix | write ( stdout )'"); 133: System.err.println ( 134: " ... restore namespace syntax, write to stdout as XML" 135: ); 136: System.exit (1); 137: } 138: 139: try { 140: // 141: // Get input source for specified document (or try ;-) 142: // 143: argv [0] = Resolver.getURL (argv [0]); 144: InputSource input = new InputSource (argv [0]); 145: 146: // 147: // Get the producer, using the system default parser (which 148: // can be overridden for this particular invocation). 149: // 150: // And the pipeline, using commandline options. 151: // 152: XMLReader producer; 153: EventConsumer consumer; 154: 155: producer = XMLReaderFactory.createXMLReader (); 156: 157: // 158: // XXX pipeline factory now has a pre-tokenized input 159: // method, use it ... that way at least some params 160: // can be written using quotes (have spaces, ...) 161: // 162: consumer = PipelineFactory.createPipeline (argv [1]); 163: 164: // 165: // XXX want commandline option for tweaking error handler. 166: // Want to be able to present warnings. 167: // 168: producer.setErrorHandler (new MyErrorHandler ()); 169: 170: // XXX need facility enabling resolving to local DTDs 171: 172: // 173: // Parse. The pipeline may get optimized a bit, so we 174: // can't always fail cleanly for validation without taking 175: // a look at the filter stages. 176: // 177: EventFilter.bind (producer, consumer); 178: producer.parse (input); 179: 180: try { 181: if (producer.getFeature ( 182: "http://org.xml/sax/features/validation")) 183: exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0; 184: else if (fatalCount == 0) 185: exitStatus = 0; 186: } catch (SAXException e) { 187: if (hasValidator (consumer)) 188: exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0; 189: else if (fatalCount == 0) 190: exitStatus = 0; 191: } 192: 193: } catch (java.net.MalformedURLException e) { 194: System.err.println ("** Malformed URL: " + e.getMessage ()); 195: System.err.println ("Is '" + argv [0] + "' a non-existent file?"); 196: e.printStackTrace (); 197: // e.g. FNF 198: 199: } catch (SAXParseException e) { 200: if (e != fatal) { 201: System.err.print (printParseException ("Parsing Aborted", e)); 202: e.printStackTrace (); 203: if (e.getException () != null) { 204: System.err.println ("++ Wrapped exception:"); 205: e.getException ().printStackTrace (); 206: } 207: } 208: 209: } catch (SAXException e) { 210: Exception x = e; 211: if (e.getException () != null) 212: x = e.getException (); 213: x.printStackTrace (); 214: 215: } catch (Throwable t) { 216: t.printStackTrace (); 217: } 218: 219: System.exit (exitStatus); 220: } 221: 222: // returns true if saw a validator (before end or unrecognized node) 223: // false otherwise 224: private static boolean hasValidator (EventConsumer e) 225: { 226: if (e == null) 227: return false; 228: if (e instanceof ValidationConsumer) 229: return true; 230: if (e instanceof TeeConsumer) { 231: TeeConsumer t = (TeeConsumer) e; 232: return hasValidator (t.getFirst ()) 233: || hasValidator (t.getRest ()); 234: } 235: if (e instanceof WellFormednessFilter 236: || e instanceof NSFilter 237: ) 238: return hasValidator (((EventFilter)e).getNext ()); 239: 240: // else ... gee, we can't know. Assume not. 241: 242: return false; 243: } 244: 245: static class MyErrorHandler implements ErrorHandler 246: { 247: // dump validation errors, but continue 248: public void error (SAXParseException e) 249: throws SAXParseException 250: { 251: errorCount++; 252: System.err.print (printParseException ("Error", e)); 253: } 254: 255: public void warning (SAXParseException e) 256: throws SAXParseException 257: { 258: // System.err.print (printParseException ("Warning", e)); 259: } 260: 261: // try to continue fatal errors, in case a parser reports more 262: public void fatalError (SAXParseException e) 263: throws SAXParseException 264: { 265: fatalCount++; 266: if (fatal == null) 267: fatal = e; 268: System.err.print (printParseException ("Nonrecoverable Error", e)); 269: } 270: } 271: 272: static private String printParseException ( 273: String label, 274: SAXParseException e 275: ) { 276: CPStringBuilder buf = new CPStringBuilder (); 277: int temp; 278: 279: buf.append ("** "); 280: buf.append (label); 281: buf.append (": "); 282: buf.append (e.getMessage ()); 283: buf.append ('\n'); 284: if (e.getSystemId () != null) { 285: buf.append (" URI: "); 286: buf.append (e.getSystemId ()); 287: buf.append ('\n'); 288: } 289: if ((temp = e.getLineNumber ()) != -1) { 290: buf.append (" line: "); 291: buf.append (temp); 292: buf.append ('\n'); 293: } 294: if ((temp = e.getColumnNumber ()) != -1) { 295: buf.append (" char: "); 296: buf.append (temp); 297: buf.append ('\n'); 298: } 299: 300: return buf.toString (); 301: } 302: }