Source for gnu.gcj.convert.UnicodeToBytes

   1: /* Copyright (C) 1999, 2000, 2001, 2003, 2005, 2006  Free Software Foundation
   2: 
   3:    This file is part of libgcj.
   4: 
   5: This software is copyrighted work licensed under the terms of the
   6: Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
   7: details.  */
   8: 
   9: package gnu.gcj.convert; 
  10: 
  11: import java.nio.charset.Charset;
  12: 
  13: public abstract class UnicodeToBytes extends IOConverter
  14: {
  15:   /** Buffer to emit bytes to.
  16:    * The locations buf[count] ... buf[buf.length-1] are available. */
  17:   public byte[] buf;
  18:   public int count;
  19: 
  20:   // The name of the default encoding.
  21:   static String defaultEncoding;
  22: 
  23:   /* These keep a small cache of encoders for reuse.  The array holds
  24:      the actual encoders.  The currCachePos is the next value we are
  25:      going to replace in the cache.  We don't just throw the data away
  26:      if the cache is full, because if the cache filled up with stuff we
  27:      don't need then the cache would be worthless.  We instead
  28:      circulate through the cache the implement kind of an LRU
  29:      algorithm. */
  30:   private static final int CACHE_SIZE = 4;  // A power of 2 for speed
  31:   private static UnicodeToBytes[] encoderCache
  32:     = new UnicodeToBytes[CACHE_SIZE];
  33:   private static int currCachePos = 0;
  34: 
  35:   public abstract String getName();
  36: 
  37:   public static UnicodeToBytes getDefaultEncoder()
  38:   {
  39:     try
  40:       {
  41:     synchronized (UnicodeToBytes.class)
  42:       {
  43:         if (defaultEncoding == null)
  44:           {
  45:         String encoding
  46:           = canonicalize (System.getProperty("file.encoding",
  47:                              "8859_1"));
  48:         String className = "gnu.gcj.convert.Output_" + encoding;
  49:         try
  50:           {
  51:             Class defaultEncodingClass = Class.forName(className);
  52:             defaultEncoding = encoding;
  53:           }
  54:         catch (ClassNotFoundException ex)
  55:           {
  56:             throw new NoClassDefFoundError("missing default encoding "
  57:                            + encoding + " (class "
  58:                            + className
  59:                            + " not found)");
  60:           }
  61:           }
  62:       }
  63: 
  64:     return getEncoder (defaultEncoding);
  65:       }
  66:     catch (Throwable ex)
  67:       {
  68:     return new Output_8859_1();
  69:       }
  70:   }
  71: 
  72:   /** Get a char-stream->byte-stream converter given an encoding name. */
  73:   public static UnicodeToBytes getEncoder (String encoding)
  74:     throws java.io.UnsupportedEncodingException
  75:   {
  76:     /* First hunt in our cache to see if we have a encoder that is
  77:        already allocated. */
  78:     String canonicalEncoding = canonicalize(encoding);
  79:     synchronized (UnicodeToBytes.class)
  80:       {
  81:     int i;
  82:     for (i = 0; i < encoderCache.length; ++i)
  83:       {
  84:         if (encoderCache[i] != null
  85:         && canonicalEncoding.equals(encoderCache[i].getName ()))
  86:           {
  87:         UnicodeToBytes rv = encoderCache[i];
  88:         encoderCache[i] = null;
  89:         return rv;
  90:         }
  91:       }
  92:       }
  93: 
  94:     String className = "gnu.gcj.convert.Output_" + canonicalEncoding;
  95:     Class encodingClass;
  96:     try 
  97:       { 
  98:     encodingClass = Class.forName(className); 
  99:     return (UnicodeToBytes) encodingClass.newInstance();
 100:       } 
 101:     catch (Throwable ex) 
 102:       { 
 103:     try
 104:           {
 105:         // We pass the original name to iconv and let it handle
 106:         // its own aliasing.  Note that we intentionally prefer
 107:         // iconv over nio.
 108:         return new Output_iconv (encoding);
 109:           }
 110:         catch (Throwable _)
 111:           {
 112:             // Ignore, and try the next method.
 113:           }
 114:     try
 115:       {
 116:             // Try using finding java.nio.charset.Charset and using
 117:             // the adaptor.  Use the original name as Charsets have
 118:             // their own canonical names.
 119:             return new CharsetToBytesAdaptor(Charset.forName(encoding));
 120:       }
 121:     catch (Throwable _)
 122:       {
 123:         // Put the original exception in the throwable.
 124:         throw new java.io.UnsupportedEncodingException(encoding + " ("
 125:                                + ex + ')');
 126:       }
 127:       }
 128:   }
 129: 
 130:   public final void setOutput(byte[] buffer, int count)
 131:   {
 132:     this.buf = buffer;
 133:     this.count = count;
 134:   }
 135: 
 136:   /** Convert chars to bytes.
 137:     * Converted bytes are written to buf, starting at count.
 138:     * @param inbuffer source of characters to convert
 139:     * @param inpos index of initial character in inbuffer to convert
 140:     * @param inlength number of characters to convert
 141:     * @return number of chars converted
 142:     * Also, this.count is increment by the number of bytes converted.
 143:     */
 144:   public abstract int write (char[] inbuffer, int inpos, int inlength);
 145: 
 146:   /** Convert chars to bytes.
 147:     * Converted bytes are written to buf, starting at count.
 148:     * @param str source of characters to convert
 149:     * @param inpos index of initial character in str to convert
 150:     * @param inlength number of characters to convert
 151:     * @param work if non-null, a buffer than can be used
 152:     * @return number of chars converted
 153:     * Also, this.count is increment by the number of bytes converted.
 154:     */
 155:   public int write (String str, int inpos, int inlength, char[] work)
 156:   {
 157:     if (work == null)
 158:       work = new char[inlength];
 159:     int srcEnd = inpos + (inlength > work.length ? work.length : inlength);
 160:     str.getChars(inpos, srcEnd, work, 0);
 161:     return write(work, 0, srcEnd - inpos);
 162:   }
 163: 
 164:   /**
 165:    * Returns true when the converter has consumed some bytes that are
 166:    * not yet converted to characters because further continuation
 167:    * bytes are needed.  Defaults to false, should be overridden by
 168:    * decoders that internally store some bytes.
 169:    */
 170:   public boolean havePendingBytes()
 171:   {
 172:     return false;
 173:   }
 174: 
 175:   /**
 176:    * Users should call this method when the input is coming to an
 177:    * end.  This signals that the next write (which might be
 178:    * zero-length) ought to flush any internal state.
 179:    */
 180:   public void setFinished()
 181:   {
 182:   }
 183: 
 184:   /** Indicate that the converter is resuable.
 185:    * This class keeps track of converters on a per-encoding basis.
 186:    * When done with an encoder you may call this method to indicate
 187:    * that it can be reused later.
 188:    */
 189:   public void done ()
 190:   {
 191:     synchronized (UnicodeToBytes.class)
 192:       {
 193:     this.buf = null;
 194:     this.count = 0;
 195: 
 196:     encoderCache[currCachePos] = this;
 197:     currCachePos = (currCachePos + 1) % CACHE_SIZE;
 198:       }
 199:   }
 200: }