Source for java.net.URLDecoder

   1: /* URLDecoder.java -- Class to decode URL's from encoded form.
   2:    Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: package java.net;
  39: 
  40: import gnu.java.lang.CPStringBuilder;
  41: 
  42: import java.io.UnsupportedEncodingException;
  43: 
  44: 
  45: /**
  46:  * This utility class contains static methods that converts a
  47:  * string encoded in the x-www-form-urlencoded format to the original
  48:  * text.  The x-www-form-urlencoded format replaces certain disallowed
  49:  * characters with encoded equivalents.  All upper case and lower case
  50:  * letters in the US alphabet remain as is, the space character (' ')
  51:  * is replaced with '+' sign, and all other characters are converted to a
  52:  * "%XX" format where XX is the hexadecimal representation of that character
  53:  * in a given character encoding (default is "UTF-8").
  54:  * <p>
  55:  * This method is very useful for decoding strings sent to CGI scripts
  56:  *
  57:  * Written using on-line Java Platform 1.2/1.4 API Specification.
  58:  * Status:  Believed complete and correct.
  59:  *
  60:  * @since 1.2
  61:  *
  62:  * @author Warren Levy (warrenl@cygnus.com)
  63:  * @author Aaron M. Renn (arenn@urbanophile.com) (documentation comments)
  64:  * @author Mark Wielaard (mark@klomp.org)
  65:  */
  66: public class URLDecoder
  67: {
  68:   /**
  69:    * Public contructor. Note that this class has only static methods.
  70:    */
  71:   public URLDecoder()
  72:   {
  73:   }
  74: 
  75:   /**
  76:    * This method translates the passed in string from x-www-form-urlencoded
  77:    * format using the default encoding "UTF-8" to decode the hex encoded
  78:    * unsafe characters.
  79:    *
  80:    * @param s the String to convert
  81:    *
  82:    * @return the converted String
  83:    *
  84:    * @deprecated
  85:    */
  86:   public static String decode(String s)
  87:   {
  88:     try
  89:       {
  90:         return decode(s, "UTF-8");
  91:       }
  92:     catch (UnsupportedEncodingException uee)
  93:       {
  94:         // Should never happen since UTF-8 encoding should always be supported
  95:         return s;
  96:       }
  97:   }
  98: 
  99:   /**
 100:    * This method translates the passed in string from x-www-form-urlencoded
 101:    * format using the given character encoding to decode the hex encoded
 102:    * unsafe characters.
 103:    *
 104:    * This implementation will decode the string even if it contains
 105:    * unsafe characters (characters that should have been encoded) or if the
 106:    * two characters following a % do not represent a hex encoded byte.
 107:    * In those cases the unsafe character or the % character will be added
 108:    * verbatim to the decoded result.
 109:    *
 110:    * @param s the String to convert
 111:    * @param encoding the character encoding to use the decode the hex encoded
 112:    *        unsafe characters
 113:    *
 114:    * @return the converted String
 115:    *
 116:    * @exception UnsupportedEncodingException If the named encoding is not
 117:    * supported
 118:    *
 119:    * @since 1.4
 120:    */
 121:   public static String decode(String s, String encoding)
 122:     throws UnsupportedEncodingException
 123:   {
 124:     // First convert all '+' characters to spaces.
 125:     String str = s.replace('+', ' ');
 126: 
 127:     // Then go through the whole string looking for byte encoded characters
 128:     int i;
 129:     int start = 0;
 130:     byte[] bytes = null;
 131:     int length = str.length();
 132:     CPStringBuilder result = new CPStringBuilder(length);
 133:     while ((i = str.indexOf('%', start)) >= 0)
 134:       {
 135:         // Add all non-encoded characters to the result buffer
 136:         result.append(str.substring(start, i));
 137:         start = i;
 138: 
 139:         // Get all consecutive encoded bytes
 140:         while ((i + 2 < length) && (str.charAt(i) == '%'))
 141:           i += 3;
 142: 
 143:         // Decode all these bytes
 144:         if ((bytes == null) || (bytes.length < ((i - start) / 3)))
 145:           bytes = new byte[((i - start) / 3)];
 146: 
 147:         int index = 0;
 148:         try
 149:           {
 150:             while (start < i)
 151:               {
 152:                 String sub = str.substring(start + 1, start + 3);
 153:                 bytes[index] = (byte) Integer.parseInt(sub, 16);
 154:                 index++;
 155:                 start += 3;
 156:               }
 157:           }
 158:         catch (NumberFormatException nfe)
 159:           {
 160:             // One of the hex encoded strings was bad
 161:           }
 162: 
 163:         // Add the bytes as characters according to the given encoding
 164:         result.append(new String(bytes, 0, index, encoding));
 165: 
 166:         // Make sure we skip to just after a % sign
 167:         // There might not have been enough encoded characters after the %
 168:         // or the hex chars were not actually hex chars (NumberFormatException)
 169:         if (start < length && s.charAt(start) == '%')
 170:           {
 171:             result.append('%');
 172:             start++;
 173:           }
 174:       }
 175: 
 176:     // Add any characters left
 177:     if (start < str.length())
 178:       result.append(str.substring(start));
 179: 
 180:     return result.toString();
 181:   }
 182: } // class URLDecoder