Source for gnu.xml.util.Resolver

   1: /* Resolver.java --
   2:    Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package gnu.xml.util;
  39: 
  40: import java.io.File;
  41: import java.io.IOException;
  42: import java.util.Dictionary;
  43: 
  44: import org.xml.sax.EntityResolver;
  45: import org.xml.sax.InputSource;
  46: import org.xml.sax.SAXException;
  47: 
  48: /**
  49:  * Utility implementation of a SAX resolver, which can be used to improve
  50:  * network utilization of SAX based XML components.  It does this by
  51:  * supporting local caches of external entities.
  52:  * SAX parsers <em>should</em> use such local caches when possible.
  53:  *
  54:  * @see XCat
  55:  */
  56: public class Resolver implements EntityResolver, Cloneable
  57: {
  58:     /**
  59:      * Updates a dictionary used to map PUBLIC identifiers to file names,
  60:      * so that it uses the mappings in a specified directory.
  61:      *
  62:      * @param mappings Array of string pairs, where the first member
  63:      *  of each pair is a PUBLIC identifier and the second is the
  64:      *  name of a file, relative to the specified directory.
  65:      * @param directory File holding the specified files.
  66:      */
  67:     public static void addDirectoryMapping (
  68:         Dictionary      table,
  69:         String          mappings [][],
  70:         File            directory
  71:     ) throws IOException
  72:     {
  73:         for (int i = 0; i < mappings.length; i++) {
  74:             File        file = new File (directory, mappings [i][1]);
  75:             String      temp;
  76: 
  77:             if (!file.exists ())        // ?? log a warning ??
  78:                 continue;
  79: 
  80:             temp = fileToURL (file);
  81:             table.put (mappings [i][0], temp);
  82:         }
  83:     }
  84: 
  85:         // FIXME: these *URL routines don't quite belong here, except
  86:         // that they're all in the same spirit of making it easy to
  87:         // use local filesystem URIs with XML parsers.
  88: 
  89:     /**
  90:      * Provides the URL for a named file, without relying on the JDK 1.2
  91:      * {@link java.io.File#toURL File.toURL}() utility method.
  92:      *
  93:      * @param filename the file name to convert.  Relative file names
  94:      *  are resolved the way the JVM resolves them (current to the
  95:      *  process-global current working directory).
  96:      *
  97:      * @exception IOException if the file does not exist
  98:      */
  99:     public static String fileNameToURL (String filename)
 100:     throws IOException
 101:     {
 102:         return fileToURL (new File (filename));
 103:     }
 104: 
 105:     /**
 106:      * Provides the URL for a file, without relying on the JDK 1.2
 107:      * {@link java.io.File#toURL File.toURL}() utility method.
 108:      *
 109:      * @param f the file to convert.  Relative file names
 110:      *  are resolved the way the JVM resolves them (current to the
 111:      *  process-global current working directory).
 112:      *
 113:      * @exception IOException if the file does not exist
 114:      */
 115:     public static String fileToURL (File f)
 116:     throws IOException
 117:     {
 118:         String  temp;
 119: 
 120:         // NOTE:  the javax.xml.parsers.DocumentBuilder and
 121:         // javax.xml.transform.stream.StreamSource versions
 122:         // of this don't have this test.  Some JVM versions
 123:         // don't report this error sanely through URL code.
 124:         if (!f.exists ())
 125:             throw new IOException ("no such file: " + f.getName ());
 126: 
 127:             // FIXME: getAbsolutePath() seems buggy; I'm seeing components
 128:             // like "/foo/../" which are clearly not "absolute"
 129:             // and should have been resolved with the filesystem.
 130: 
 131:             // Substituting "/" would be wrong, "foo" may have been
 132:             // symlinked ... the URL code will make that change
 133:             // later, so that things can get _really_ broken!
 134: 
 135:         temp = f.getAbsolutePath ();
 136: 
 137:         if (File.separatorChar != '/')
 138:             temp = temp.replace (File.separatorChar, '/');
 139:         if (!temp.startsWith ("/"))
 140:             temp = "/" + temp;
 141:         if (!temp.endsWith ("/") && f.isDirectory ())
 142:             temp = temp + "/";
 143:         return "file:" + temp;
 144:     }
 145: 
 146: 
 147:     /**
 148:      * Returns a URL string.  Note that if a malformed URL is provided, or
 149:      * the parameter names a nonexistent file, the resulting URL may be
 150:      * malformed.
 151:      *
 152:      * @param fileOrURL If this is the name of a file which exists,
 153:      *  then its URL is returned.  Otherwise the argument is returned.
 154:      */
 155:     public static String getURL (String fileOrURL)
 156:     {
 157:         try {
 158:             return fileNameToURL (fileOrURL);
 159:         } catch (Exception e) {
 160:             return fileOrURL;
 161:         }
 162:     }
 163: 
 164: 
 165: 
 166:     // note:  cloneable, this is just copied; unguarded against mods
 167:     private Dictionary          pubidMapping;
 168: 
 169:     /**
 170:      * Constructs a resolver which understands how to map PUBLIC identifiers
 171:      * to other URIs, typically for local copies of standard DTD components.
 172:      *
 173:      * @param dict maps PUBLIC identifiers to URIs.  This is not
 174:      *  copied; subsequent modifications will be reported through the
 175:      *  resolution operations.
 176:      */
 177:     public Resolver (Dictionary dict)
 178:         { pubidMapping = dict; }
 179: 
 180: 
 181:     // FIXME: want notion of a "system default" resolver, presumably
 182:     // loaded with all sorts of useful stuff.  At the same time need
 183:     // a notion of resolver chaining (failure --> next) so that subsystems
 184:     // can set up things that won't interfere with other ones.
 185: 
 186:     /**
 187:      * This parses most MIME content type strings that have <em>charset=...</em>
 188:      * encoding declarations to and returns the specified encoding.  This
 189:      * conforms to RFC 3023, and is useful when constructing InputSource
 190:      * objects from URLConnection objects or other objects using MIME
 191:      * content typing.
 192:      *
 193:      * @param contentType the MIME content type that will be parsed; must
 194:      *  not be null.
 195:      * @return the appropriate encoding, or null if the content type is
 196:      *  not text and there's no <code>charset=...</code> attribute
 197:      */
 198:     static public String getEncoding (String contentType)
 199:     {
 200:         // currently a dumb parsing algorithm that works "mostly" and handles
 201:         //      ..anything...charset=ABC
 202:         //      ..anything...charset=ABC;otherAttr=DEF
 203:         //      ..anything...charset=ABC (comment);otherAttr=DEF
 204:         //      ..anything...charset= "ABC" (comment);otherAttr=DEF
 205: 
 206:         int     temp;
 207:         String  encoding;
 208:         String  defValue = null;
 209: 
 210:         if (contentType.startsWith ("text/"))
 211:             defValue = contentType.startsWith ("text/html")
 212:                     ? "ISO-8859-1" : "US-ASCII";
 213: 
 214:         // Assumes 'charset' is only an attribute name, not part
 215:         // of a value, comment, or other attribute name
 216:         // ALSO assumes no escaped values like "\;" or "\)"
 217:         if ((temp = contentType.indexOf ("charset")) != -1) {
 218:             // strip out everything up to '=' ...
 219:             temp = contentType.indexOf ('=', temp);
 220:             if (temp == -1)
 221:                 return defValue;
 222:             encoding = contentType.substring (temp + 1);
 223:             // ... and any subsequent attributes
 224:             if ((temp = encoding.indexOf (';')) != -1)
 225:                 encoding = encoding.substring (0, temp);
 226:             // ... and any comments after value
 227:             if ((temp = encoding.indexOf ('(')) != -1)
 228:                 encoding = encoding.substring (0, temp);
 229:             // ... then whitespace, and any (double) quotes
 230:             encoding = encoding.trim ();
 231:             if (encoding.charAt (0) == '"')
 232:                 encoding = encoding.substring (1, encoding.length () - 1);
 233:         } else
 234:             encoding = defValue;
 235:         return encoding;
 236:     }
 237: 
 238: 
 239:     /**
 240:      * Uses a local dictionary of public identifiers to resolve URIs,
 241:      * normally with the goal of minimizing network traffic or latencies.
 242:      */
 243:     public InputSource resolveEntity (String publicId, String systemId)
 244:     throws IOException, SAXException
 245:     {
 246:         InputSource     retval = null;
 247:         String          uri;
 248: 
 249:         if (publicId != null
 250:                 && ((uri = (String) pubidMapping.get (publicId)) != null)) {
 251:             retval = new InputSource (uri);
 252:             retval.setPublicId (publicId);
 253:         }
 254: 
 255:         // Could do URN resolution here
 256: 
 257:         // URL resolution always done by parser
 258: 
 259:         // FIXME: chain to "next" resolver
 260: 
 261:         return retval;
 262:     }
 263: }