1:
37:
38: package ;
39:
40: import ;
41:
42: import ;
43: import ;
44: import ;
45: import ;
46: import ;
47: import ;
48: import ;
49: import ;
50: import ;
51: import ;
52: import ;
53: import ;
54: import ;
55: import ;
56: import ;
57: import ;
58: import ;
59: import ;
60: import ;
61: import ;
62:
63:
68: public class StreamSerializer
69: {
70:
71: static final int SPACE = 0x20;
72: static final int BANG = 0x21;
73: static final int APOS = 0x27;
74: static final int SLASH = 0x2f;
75: static final int BRA = 0x3c;
76: static final int KET = 0x3e;
77: static final int EQ = 0x3d;
78:
79:
82: static final Map HTML_BOOLEAN_ATTRIBUTES = new HashMap();
83: static
84: {
85: HashSet set;
86:
87: set = new HashSet();
88: set.add("nohref");
89: HTML_BOOLEAN_ATTRIBUTES.put("area", set);
90:
91: set = new HashSet();
92: set.add("ismap");
93: HTML_BOOLEAN_ATTRIBUTES.put("img", set);
94:
95: set = new HashSet();
96: set.add("declare");
97: HTML_BOOLEAN_ATTRIBUTES.put("object", set);
98:
99: set = new HashSet();
100: set.add("noshade");
101: HTML_BOOLEAN_ATTRIBUTES.put("hr", set);
102:
103: set = new HashSet();
104: set.add("compact");
105: HTML_BOOLEAN_ATTRIBUTES.put("dl", set);
106: HTML_BOOLEAN_ATTRIBUTES.put("ol", set);
107: HTML_BOOLEAN_ATTRIBUTES.put("ul", set);
108: HTML_BOOLEAN_ATTRIBUTES.put("dir", set);
109: HTML_BOOLEAN_ATTRIBUTES.put("menu", set);
110:
111: set = new HashSet();
112: set.add("checked");
113: set.add("disabled");
114: set.add("readonly");
115: set.add("ismap");
116: HTML_BOOLEAN_ATTRIBUTES.put("input", set);
117:
118: set = new HashSet();
119: set.add("multiple");
120: set.add("disabled");
121: HTML_BOOLEAN_ATTRIBUTES.put("select", set);
122:
123: set = new HashSet();
124: set.add("disabled");
125: HTML_BOOLEAN_ATTRIBUTES.put("optgroup", set);
126:
127: set = new HashSet();
128: set.add("selected");
129: set.add("disabled");
130: HTML_BOOLEAN_ATTRIBUTES.put("option", set);
131:
132: set = new HashSet();
133: set.add("disabled");
134: set.add("readonly");
135: HTML_BOOLEAN_ATTRIBUTES.put("textarea", set);
136:
137: set = new HashSet();
138: set.add("disabled");
139: HTML_BOOLEAN_ATTRIBUTES.put("button", set);
140:
141: set = new HashSet();
142: set.add("nowrap");
143: HTML_BOOLEAN_ATTRIBUTES.put("th", set);
144: HTML_BOOLEAN_ATTRIBUTES.put("td", set);
145:
146: set = new HashSet();
147: set.add("noresize");
148: HTML_BOOLEAN_ATTRIBUTES.put("frame", set);
149:
150: set = new HashSet();
151: set.add("defer");
152: HTML_BOOLEAN_ATTRIBUTES.put("script", set);
153: }
154:
155:
156: static final HashSet HTML_URIS = new HashSet();
157: static {
158: HTML_URIS.add("http://www.w3.org/1999/xhtml");
159: }
160:
161: protected final String encoding;
162: final Charset charset;
163: final CharsetEncoder encoder;
164: final int mode;
165: final LinkedList namespaces;
166: protected String eol;
167: Collection cdataSectionElements = Collections.EMPTY_SET;
168:
169: protected boolean discardDefaultContent;
170: protected boolean xmlDeclaration = true;
171:
172:
173: private boolean htmlEncoded;
174:
175: public StreamSerializer()
176: {
177: this(Stylesheet.OUTPUT_XML, null, null);
178: }
179:
180: public StreamSerializer(String encoding)
181: {
182: this(Stylesheet.OUTPUT_XML, encoding, null);
183: }
184:
185: public StreamSerializer(int mode, String encoding, String eol)
186: {
187: this.mode = mode;
188: if (encoding == null)
189: encoding = (mode == Stylesheet.OUTPUT_HTML) ? "ISO-8859-1" : "UTF-8";
190: this.encoding = encoding.intern();
191: charset = Charset.forName(this.encoding);
192: encoder = charset.newEncoder();
193: this.eol = (eol != null) ? eol : System.getProperty("line.separator");
194: namespaces = new LinkedList();
195: }
196:
197: void setCdataSectionElements(Collection c)
198: {
199: cdataSectionElements = c;
200: }
201:
202: public void serialize(final Node node, final OutputStream out)
203: throws IOException
204: {
205: serialize(node, out, false);
206: }
207:
208: void serialize(Node node, final OutputStream out,
209: boolean convertToCdata)
210: throws IOException
211: {
212: while (node != null)
213: {
214: Node next = node.getNextSibling();
215: doSerialize(node, out, convertToCdata);
216: node = next;
217: }
218: }
219:
220: private void doSerialize(final Node node, final OutputStream out,
221: boolean convertToCdata)
222: throws IOException
223: {
224: if (out == null)
225: throw new NullPointerException("no output stream");
226: htmlEncoded = false;
227: String value, prefix;
228: Node children;
229: String uri = node.getNamespaceURI();
230: short nt = node.getNodeType();
231: if (convertToCdata && nt == Node.TEXT_NODE)
232: nt = Node.CDATA_SECTION_NODE;
233: switch (nt)
234: {
235: case Node.ATTRIBUTE_NODE:
236: prefix = node.getPrefix();
237: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri) ||
238: XMLConstants.XMLNS_ATTRIBUTE.equals(prefix) ||
239: (prefix != null && prefix.startsWith("xmlns:")))
240: {
241: String nsuri = node.getNodeValue();
242: if (isDefined(nsuri, prefix))
243: break;
244: String name = node.getLocalName();
245: if (name == null)
246: {
247:
248: name = node.getNodeName();
249: int ci = name.indexOf(':');
250: if (ci != -1)
251: name = name.substring(ci + 1);
252: }
253: define(nsuri, name);
254: }
255: else if (uri != null && !isDefined(uri, prefix))
256: {
257: prefix = define(uri, prefix);
258: String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix;
259: out.write(SPACE);
260: out.write(encodeText(nsname));
261: out.write(EQ);
262: String nsvalue = "\"" + encode(uri, true, true) + "\"";
263: out.write(nsvalue.getBytes(encoding));
264: }
265: out.write(SPACE);
266: String a_nodeName = node.getNodeName();
267: out.write(encodeText(a_nodeName));
268: String a_nodeValue = node.getNodeValue();
269: if (mode == Stylesheet.OUTPUT_HTML &&
270: a_nodeName.equals(a_nodeValue) &&
271: isHTMLBoolean((Attr) node, a_nodeName))
272: break;
273: out.write(EQ);
274: value = "\"" + encode(a_nodeValue, true, true) + "\"";
275: out.write(encodeText(value));
276: break;
277: case Node.ELEMENT_NODE:
278: pushNamespaceContext();
279: value = node.getNodeName();
280: out.write(BRA);
281: out.write(encodeText(value));
282: prefix = node.getPrefix();
283: if (uri != null && !isDefined(uri, prefix))
284: {
285: prefix = define(uri, prefix);
286: String nsname = (prefix == null) ? "xmlns" : "xmlns:" + prefix;
287: out.write(SPACE);
288: out.write(encodeText(nsname));
289: out.write(EQ);
290: String nsvalue = "\"" + encode(uri, true, true) + "\"";
291: out.write(encodeText(nsvalue));
292: }
293: NamedNodeMap attrs = node.getAttributes();
294: if (attrs != null)
295: {
296: int len = attrs.getLength();
297: for (int i = 0; i < len; i++)
298: {
299: Attr attr = (Attr) attrs.item(i);
300: if (discardDefaultContent && !attr.getSpecified())
301: {
302:
303: }
304: else
305: serialize(attr, out, false);
306: }
307: }
308: convertToCdata = cdataSectionElements.contains(value);
309: children = node.getFirstChild();
310: if (children == null)
311: {
312: out.write(SLASH);
313: out.write(KET);
314: }
315: else
316: {
317: out.write(KET);
318: serialize(children, out, convertToCdata);
319: out.write(BRA);
320: out.write(SLASH);
321: out.write(encodeText(value));
322: out.write(KET);
323: }
324: popNamespaceContext();
325: break;
326: case Node.TEXT_NODE:
327: value = node.getNodeValue();
328: if (!"yes".equals(node.getUserData("disable-output-escaping")) &&
329: mode != Stylesheet.OUTPUT_TEXT)
330: value = encode(value, false, false);
331: out.write(encodeText(value));
332: break;
333: case Node.CDATA_SECTION_NODE:
334: value = node.getNodeValue();
335:
336:
337: int bbk = value.indexOf("]]>");
338: while (bbk != -1)
339: {
340: String head = value.substring(0, bbk + 2);
341: out.write(encodeText("<![CDATA[" + head + "]]>"));
342: value = value.substring(bbk + 2);
343: bbk = value.indexOf("]]>");
344: }
345:
346: out.write(encodeText("<![CDATA[" + value + "]]>"));
347: break;
348: case Node.COMMENT_NODE:
349: value = "<!--" + node.getNodeValue() + "-->";
350: out.write(encodeText(value));
351: Node cp = node.getParentNode();
352: if (cp != null && cp.getNodeType() == Node.DOCUMENT_NODE)
353: out.write(encodeText(eol));
354: break;
355: case Node.DOCUMENT_NODE:
356: case Node.DOCUMENT_FRAGMENT_NODE:
357: if (mode == Stylesheet.OUTPUT_XML)
358: {
359: if ("UTF-16".equalsIgnoreCase(encoding))
360: {
361: out.write(0xfe);
362: out.write(0xff);
363: }
364: if (!"yes".equals(node.getUserData("omit-xml-declaration")) &&
365: xmlDeclaration)
366: {
367: Document doc = (node instanceof Document) ?
368: (Document) node : null;
369: String version = (doc != null) ? doc.getXmlVersion() : null;
370: if (version == null)
371: version = (String) node.getUserData("version");
372: if (version == null)
373: version = "1.0";
374: out.write(BRA);
375: out.write(0x3f);
376: out.write("xml version=\"".getBytes("US-ASCII"));
377: out.write(version.getBytes("US-ASCII"));
378: out.write(0x22);
379: if (!("UTF-8".equalsIgnoreCase(encoding)))
380: {
381: out.write(" encoding=\"".getBytes("US-ASCII"));
382: out.write(encoding.getBytes("US-ASCII"));
383: out.write(0x22);
384: }
385: if ((doc != null && doc.getXmlStandalone()) ||
386: "yes".equals(node.getUserData("standalone")))
387: out.write(" standalone=\"yes\"".getBytes("US-ASCII"));
388: out.write(0x3f);
389: out.write(KET);
390: out.write(encodeText(eol));
391: }
392:
393:
394: }
395: else if (mode == Stylesheet.OUTPUT_HTML)
396: {
397:
398: String mediaType = (String) node.getUserData("media-type");
399: if (mediaType == null)
400: mediaType = "text/html";
401: String contentType = mediaType + "; charset=" +
402: ((encoding.indexOf(' ') != -1) ?
403: "\"" + encoding + "\"" :
404: encoding);
405: Document doc = (node instanceof Document) ? (Document) node :
406: node.getOwnerDocument();
407: Node html = null;
408: for (Node ctx = node.getFirstChild(); ctx != null;
409: ctx = ctx.getNextSibling())
410: {
411: if (ctx.getNodeType() == Node.ELEMENT_NODE &&
412: isHTMLElement(ctx, "html"))
413: {
414: html = ctx;
415: break;
416: }
417: }
418: if (html != null)
419: {
420: Node head = null;
421: for (Node ctx = html.getFirstChild(); ctx != null;
422: ctx = ctx.getNextSibling())
423: {
424: if (isHTMLElement(ctx, "head"))
425: {
426: head = ctx;
427: break;
428: }
429: }
430: if (head != null)
431: {
432: Node meta = null;
433: Node metaContent = null;
434: for (Node ctx = head.getFirstChild(); ctx != null;
435: ctx = ctx.getNextSibling())
436: {
437: if (isHTMLElement(ctx, "meta"))
438: {
439: NamedNodeMap metaAttrs = ctx.getAttributes();
440: int len = metaAttrs.getLength();
441: String httpEquiv = null;
442: Node content = null;
443: for (int i = 0; i < len; i++)
444: {
445: Node attr = metaAttrs.item(i);
446: String attrName = attr.getNodeName();
447: if ("http-equiv".equalsIgnoreCase(attrName))
448: httpEquiv = attr.getNodeValue();
449: else if ("content".equalsIgnoreCase(attrName))
450: content = attr;
451: }
452: if ("Content-Type".equalsIgnoreCase(httpEquiv))
453: {
454: meta = ctx;
455: metaContent = content;
456: break;
457: }
458: }
459: }
460: if (meta == null)
461: {
462: meta = doc.createElement("meta");
463:
464: Node first = head.getFirstChild();
465: if (first == null)
466: head.appendChild(meta);
467: else
468: head.insertBefore(meta, first);
469: Node metaHttpEquiv = doc.createAttribute("http-equiv");
470: meta.getAttributes().setNamedItem(metaHttpEquiv);
471: metaHttpEquiv.setNodeValue("Content-Type");
472: }
473: if (metaContent == null)
474: {
475: metaContent = doc.createAttribute("content");
476: meta.getAttributes().setNamedItem(metaContent);
477: }
478: metaContent.setNodeValue(contentType);
479: htmlEncoded = true;
480: }
481: }
482: }
483: children = node.getFirstChild();
484: if (children != null)
485: serialize(children, out, convertToCdata);
486: break;
487: case Node.DOCUMENT_TYPE_NODE:
488: DocumentType doctype = (DocumentType) node;
489: out.write(BRA);
490: out.write(BANG);
491: out.write(encodeText("DOCTYPE "));
492: value = doctype.getNodeName();
493: out.write(encodeText(value));
494: String publicId = doctype.getPublicId();
495: if (publicId != null)
496: {
497: out.write(encodeText(" PUBLIC "));
498: out.write(APOS);
499: out.write(encodeText(publicId));
500: out.write(APOS);
501: }
502: String systemId = doctype.getSystemId();
503: if (systemId != null)
504: {
505: out.write(encodeText(" SYSTEM "));
506: out.write(APOS);
507: out.write(encodeText(systemId));
508: out.write(APOS);
509: }
510: String internalSubset = doctype.getInternalSubset();
511: if (internalSubset != null)
512: {
513: out.write(encodeText(internalSubset));
514: }
515: out.write(KET);
516: out.write(eol.getBytes(encoding));
517: break;
518: case Node.ENTITY_REFERENCE_NODE:
519: value = "&" + node.getNodeValue() + ";";
520: out.write(encodeText(value));
521: break;
522: case Node.PROCESSING_INSTRUCTION_NODE:
523: value = "<?" + node.getNodeName() + " " + node.getNodeValue() + "?>";
524: out.write(encodeText(value));
525: Node pp = node.getParentNode();
526: if (pp != null && pp.getNodeType() == Node.DOCUMENT_NODE)
527: {
528: out.write(encodeText(eol));
529: }
530: break;
531: default:
532: System.err.println("Unhandled node type: "+nt);
533: }
534: }
535:
536: boolean isHTMLElement(Node node, String name)
537: {
538: if (node.getNodeType() != Node.ELEMENT_NODE)
539: return false;
540: String localName = node.getLocalName();
541: if (localName == null)
542: localName = node.getNodeName();
543: if (!name.equalsIgnoreCase(localName))
544: return false;
545: String uri = node.getNamespaceURI();
546: return (uri == null || HTML_URIS.contains(uri));
547: }
548:
549: boolean isDefined(String uri, String prefix)
550: {
551: if (XMLConstants.XML_NS_URI.equals(uri))
552: return "xml".equals(prefix);
553: if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(uri))
554: return "xmlns".equals(prefix);
555: if (prefix == null)
556: prefix = "";
557: for (Iterator i = namespaces.iterator(); i.hasNext(); )
558: {
559: Map ctx = (Map) i.next();
560: String val = (String) ctx.get(uri);
561: if (val != null && val.equals(prefix))
562: return true;
563: }
564: return false;
565: }
566:
567: void pushNamespaceContext()
568: {
569: namespaces.addFirst(new HashMap());
570: }
571:
572: String define(String uri, String prefix)
573: {
574: if (namespaces.isEmpty())
575: return prefix;
576: HashMap ctx = (HashMap) namespaces.getFirst();
577: while (ctx.containsValue(prefix))
578: {
579:
580: prefix = prefix + "_";
581: }
582: ctx.put(uri, prefix);
583: return prefix;
584: }
585:
586: void popNamespaceContext()
587: {
588: namespaces.removeFirst();
589: }
590:
591: final byte[] encodeText(String text)
592: throws IOException
593: {
594: encoder.reset();
595: boolean htmlNeedingEncoding =
596: (mode == Stylesheet.OUTPUT_HTML && !htmlEncoded);
597: if (!encoder.canEncode(text) || htmlNeedingEncoding)
598: {
599:
600: CPStringBuilder buf = new CPStringBuilder();
601: int len = text.length();
602: for (int i = 0; i < len; i++)
603: {
604: char c = text.charAt(i);
605: if (!encoder.canEncode(c))
606: {
607:
608: String hex = Integer.toHexString((int) c);
609: buf.append("&#x");
610: buf.append(hex);
611: buf.append(';');
612: }
613: else if (htmlNeedingEncoding)
614: {
615: String entityName = getHTMLCharacterEntity(c);
616: if (entityName != null)
617: {
618: buf.append('&');
619: buf.append(entityName);
620: buf.append(';');
621: }
622: else
623: buf.append(c);
624: }
625: else
626: buf.append(c);
627: }
628: text = buf.toString();
629: }
630: ByteBuffer encoded = encoder.encode(CharBuffer.wrap(text));
631: int len = encoded.limit() - encoded.position();
632: if (encoded.hasArray())
633: {
634: byte[] ret = encoded.array();
635: if (ret.length > len)
636: {
637:
638: byte[] ret2 = new byte[len];
639: System.arraycopy(ret, 0, ret2, 0, len);
640: ret = ret2;
641: }
642: return ret;
643: }
644: encoded.flip();
645: byte[] ret = new byte[len];
646: encoded.get(ret, 0, len);
647: return ret;
648: }
649:
650: String encode(String text, boolean encodeCtl, boolean inAttr)
651: {
652: int len = text.length();
653: CPStringBuilder buf = null;
654: for (int i = 0; i < len; i++)
655: {
656: char c = text.charAt(i);
657: if (c == '<')
658: {
659: if (buf == null)
660: buf = new CPStringBuilder(text.substring(0, i));
661: buf.append("<");
662: }
663: else if (c == '>')
664: {
665: if (buf == null)
666: buf = new CPStringBuilder(text.substring(0, i));
667: buf.append(">");
668: }
669: else if (c == '&')
670: {
671: if (mode == Stylesheet.OUTPUT_HTML && (i + 1) < len &&
672: text.charAt(i + 1) == '{')
673: {
674: if (buf != null)
675: buf.append(c);
676: }
677: else
678: {
679: if (buf == null)
680: buf = new CPStringBuilder(text.substring(0, i));
681: buf.append("&");
682: }
683: }
684: else if (c == '\'' && inAttr)
685: {
686: if (buf == null)
687: buf = new CPStringBuilder(text.substring(0, i));
688: if (mode == Stylesheet.OUTPUT_HTML)
689:
690: buf.append("'");
691: else
692: buf.append("'");
693: }
694: else if (c == '"' && inAttr)
695: {
696: if (buf == null)
697: buf = new CPStringBuilder(text.substring(0, i));
698: buf.append(""");
699: }
700: else if (encodeCtl)
701: {
702: if (c < 0x20)
703: {
704: if (buf == null)
705: buf = new CPStringBuilder(text.substring(0, i));
706: buf.append('&');
707: buf.append('#');
708: buf.append((int) c);
709: buf.append(';');
710: }
711: else if (buf != null)
712: buf.append(c);
713: }
714: else if (buf != null)
715: buf.append(c);
716: }
717: return (buf == null) ? text : buf.toString();
718: }
719:
720: String toString(Node node)
721: {
722: ByteArrayOutputStream out = new ByteArrayOutputStream();
723: try
724: {
725: serialize(node, out);
726: return new String(out.toByteArray(), encoding);
727: }
728: catch (IOException e)
729: {
730: throw new RuntimeException(e.getMessage());
731: }
732: }
733:
734: boolean isHTMLBoolean(Attr attr, String attrName)
735: {
736: attrName = attrName.toLowerCase();
737: Node element = attr.getOwnerElement();
738: String elementName = element.getLocalName();
739: if (elementName == null)
740: {
741: elementName = element.getNodeName();
742: }
743: elementName = elementName.toLowerCase();
744: Collection attributes =
745: (Collection) HTML_BOOLEAN_ATTRIBUTES.get(elementName);
746: return (attributes != null && attributes.contains(attrName));
747: }
748:
749: static String getHTMLCharacterEntity(char c)
750: {
751:
752: switch (c)
753: {
754: case 160: return "nbsp";
755: case 161: return "iexcl";
756: case 162: return "cent";
757: case 163: return "pound";
758: case 164: return "curren";
759: case 165: return "yen";
760: case 166: return "brvbar";
761: case 167: return "sect";
762: case 168: return "uml";
763: case 169: return "copy";
764: case 170: return "ordf";
765: case 171: return "laquo";
766: case 172: return "not";
767: case 173: return "shy";
768: case 174: return "reg";
769: case 175: return "macr";
770: case 176: return "deg";
771: case 177: return "plusmn";
772: case 178: return "sup2";
773: case 179: return "sup3";
774: case 180: return "acute";
775: case 181: return "micro";
776: case 182: return "para";
777: case 183: return "middot";
778: case 184: return "cedil";
779: case 185: return "sup1";
780: case 186: return "ordm";
781: case 187: return "raquo";
782: case 188: return "frac14";
783: case 189: return "frac12";
784: case 190: return "frac34";
785: case 191: return "iquest";
786: case 192: return "Agrave";
787: case 193: return "Aacute";
788: case 194: return "Acirc";
789: case 195: return "Atilde";
790: case 196: return "Auml";
791: case 197: return "Aring";
792: case 198: return "AElig";
793: case 199: return "Ccedil";
794: case 200: return "Egrave";
795: case 201: return "Eacute";
796: case 202: return "Ecirc";
797: case 203: return "Euml";
798: case 204: return "Igrave";
799: case 205: return "Iacute";
800: case 206: return "Icirc";
801: case 207: return "Iuml";
802: case 208: return "ETH";
803: case 209: return "Ntilde";
804: case 210: return "Ograve";
805: case 211: return "Oacute";
806: case 212: return "Ocirc";
807: case 213: return "Otilde";
808: case 214: return "Ouml";
809: case 215: return "times";
810: case 216: return "Oslash";
811: case 217: return "Ugrave";
812: case 218: return "Uacute";
813: case 219: return "Ucirc";
814: case 220: return "Uuml";
815: case 221: return "Yacute";
816: case 222: return "THORN";
817: case 223: return "szlig";
818: case 224: return "agrave";
819: case 225: return "aacute";
820: case 226: return "acirc";
821: case 227: return "atilde";
822: case 228: return "auml";
823: case 229: return "aring";
824: case 230: return "aelig";
825: case 231: return "ccedil";
826: case 232: return "egrave";
827: case 233: return "eacute";
828: case 234: return "ecirc";
829: case 235: return "euml";
830: case 236: return "igrave";
831: case 237: return "iacute";
832: case 238: return "icirc";
833: case 239: return "iuml";
834: case 240: return "eth";
835: case 241: return "ntilde";
836: case 242: return "ograve";
837: case 243: return "oacute";
838: case 244: return "ocirc";
839: case 245: return "otilde";
840: case 246: return "ouml";
841: case 247: return "divide";
842: case 248: return "oslash";
843: case 249: return "ugrave";
844: case 250: return "uacute";
845: case 251: return "ucirc";
846: case 252: return "uuml";
847: case 253: return "yacute";
848: case 254: return "thorn";
849: case 255: return "yuml";
850: default: return null;
851: }
852: }
853:
854: }