Source for gnu.gcj.convert.BytesToUnicode

   1: /* Copyright (C) 1999, 2000, 2001, 2005  Free Software Foundation
   2: 
   3:    This file is part of libgcj.
   4: 
   5: This software is copyrighted work licensed under the terms of the
   6: Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
   7: details.  */
   8: 
   9: package gnu.gcj.convert;
  10: 
  11: import java.nio.charset.Charset;
  12: 
  13: public abstract class BytesToUnicode extends IOConverter
  14: {
  15:   /** Buffer to read bytes from.
  16:    * The characters inbuffer[inpos] ... inbuffer[inlength-1] are available. */
  17:   public byte[] inbuffer;
  18:   /** Starting index in buffer to read bytes from. */
  19:   public int inpos;
  20:   /** End of valid bytes in buffer. */
  21:   public int inlength;
  22: 
  23:   // The name of the default encoding.
  24:   static String defaultEncoding;
  25: 
  26:   /* These keep a small cache of decoders for reuse.  The array holds
  27:      the actual decoders.  The currCachePos is the next value we are
  28:      going to replace in the cache.  We don't just throw the data away
  29:      if the cache is full, because if the cache filled up with stuff
  30:      we don't need then the cache would be worthless.  We instead
  31:      circulate through the cache the implement kind of an LRU
  32:      algorithm. */
  33:   private static final int CACHE_SIZE = 4;  // A power of 2 for speed
  34:   private static BytesToUnicode[] decoderCache
  35:     = new BytesToUnicode[CACHE_SIZE];
  36:   private static int currCachePos = 0;
  37: 
  38:   public abstract String getName();
  39: 
  40:   public static BytesToUnicode getDefaultDecoder()
  41:   {
  42:     try
  43:       {
  44:     synchronized (BytesToUnicode.class)
  45:       {
  46:         if (defaultEncoding == null)
  47:           {
  48:         String encoding
  49:           = canonicalize (System.getProperty("file.encoding",
  50:                              "8859_1"));
  51:         String className = "gnu.gcj.convert.Input_" + encoding;
  52:         try
  53:           {
  54:             Class defaultDecodingClass = Class.forName(className);
  55:             defaultEncoding = encoding;
  56:           }
  57:         catch (ClassNotFoundException ex)
  58:           {
  59:             throw new NoClassDefFoundError("missing default encoding "
  60:                            + encoding + " (class "
  61:                            + className
  62:                            + " not found)");
  63:           }
  64:           }
  65:       }
  66:     return getDecoder (defaultEncoding);
  67:       }
  68:     catch (Throwable ex)
  69:       {
  70:     return new Input_8859_1();
  71:       }
  72:   }
  73: 
  74:   /** Get a byte-stream->char-stream converter given an encoding name. */
  75:   public static BytesToUnicode getDecoder (String encoding)
  76:     throws java.io.UnsupportedEncodingException
  77:   {
  78:     /* First hunt in our cache to see if we have a decoder that is
  79:        already allocated. */
  80:     String canonicalEncoding = canonicalize(encoding);
  81:     synchronized (BytesToUnicode.class)
  82:       {
  83:     int i;
  84:     for (i = 0; i < decoderCache.length; ++i)
  85:       {
  86:         if (decoderCache[i] != null
  87:         && canonicalEncoding.equals(decoderCache[i].getName ()))
  88:           {
  89:         BytesToUnicode rv = decoderCache[i];
  90:         decoderCache[i] = null;
  91:         return rv;
  92:         }
  93:       }
  94:       }
  95: 
  96:     // It's not in the cache, so now we have to do real work.
  97:     String className = "gnu.gcj.convert.Input_" + canonicalEncoding;
  98:     Class decodingClass;
  99:     try 
 100:       { 
 101:     decodingClass = Class.forName(className); 
 102:     return (BytesToUnicode) decodingClass.newInstance();
 103:       } 
 104:     catch (Throwable ex) 
 105:       { 
 106:     try
 107:       {
 108:         // We pass the original name to iconv and let it handle
 109:         // its own aliasing.  Note that we intentionally prefer
 110:         // iconv over nio.
 111:         return new Input_iconv (encoding);
 112:       }
 113:     catch (Throwable _)
 114:       {
 115:         // Ignore, and try the next method.
 116:       }
 117:     try
 118:       {
 119:         return new BytesToCharsetAdaptor(Charset.forName(encoding));
 120:       }
 121:     catch (Throwable _)
 122:       {
 123:         throw new java.io.UnsupportedEncodingException(encoding
 124:                                + " (" + ex + ')');
 125:       }
 126:       }
 127:   }
 128: 
 129:   /** Make input bytes available to the conversion.
 130:    * @param buffer source of input bytes
 131:    * @param pos index of first available byte
 132:    * @param length one more than index of last available byte
 133:    */
 134:   public final void setInput(byte[] buffer, int pos, int length)
 135:   {
 136:     inbuffer = buffer;
 137:     inpos = pos;
 138:     inlength = length;
 139:   }
 140: 
 141:   /** Convert bytes to chars.
 142:    * Input bytes are taken from this.inbuffer.  The available input
 143:    * bytes start at inbuffer[inpos], and end at inbuffer[inlength-1].
 144:    * @param outbuffer buffer for the converted character
 145:    * @param outpos position in buffer to start putting converted characters
 146:    * @param count the maximum number of characters to convert
 147:    * @return number of chars placed in outbuffer.
 148:    * Also, this.inpos is incremented by the number of bytes consumed.
 149:    *
 150:    * (Note the asymmetry in that the input upper bound is inbuffer[inlength-1],
 151:    * while the output upper bound is outbuffer[outpos+count-1].  The
 152:    * justification is that inlength is like the count field of a
 153:    * BufferedInputStream, while the count parameter is like the
 154:    * length parameter of a read request.)  The count parameter is
 155:    * also defined to be <= outbuffer.length - outpos (per the specification
 156:    * of the length parameter for a read request).
 157:    */
 158:   public abstract int read (char[] outbuffer, int outpos, int count);
 159: 
 160:   /** Indicate that the converter is resuable.
 161:    * This class keeps track of converters on a per-encoding basis.
 162:    * When done with an encoder you may call this method to indicate
 163:    * that it can be reused later.
 164:    */
 165:   public void done ()
 166:   {
 167:     synchronized (BytesToUnicode.class)
 168:       {
 169:     this.inbuffer = null;
 170:     this.inpos = 0;
 171:     this.inlength = 0;
 172: 
 173:     decoderCache[currCachePos] = this;
 174:     currCachePos = (currCachePos + 1) % CACHE_SIZE;
 175:       }
 176:   }
 177: }