Package epydoc :: Package markup :: Module javadoc
[hide private]
[frames] | no frames]

Source Code for Module epydoc.markup.javadoc

  1  # 
  2  # javadoc.py: javadoc docstring parsing 
  3  # Edward Loper 
  4  # 
  5  # Created [07/03/03 12:37 PM] 
  6  # $Id: javadoc.py 1574 2007-03-07 02:55:14Z dvarrazzo $ 
  7  # 
  8   
  9  """ 
 10  Epydoc parser for U{Javadoc<http://java.sun.com/j2se/javadoc/>} 
 11  docstrings.  Javadoc is an HTML-based markup language that was 
 12  developed for documenting Java APIs with inline comments.  It consists 
 13  of raw HTML, augmented by Javadoc tags.  There are two types of 
 14  Javadoc tag: 
 15   
 16    - X{Javadoc block tags} correspond to Epydoc fields.  They are 
 17      marked by starting a line with a string of the form \"C{@M{tag} 
 18      [M{arg}]}\", where C{M{tag}} indicates the type of block, and 
 19      C{M{arg}} is an optional argument.  (For fields that take 
 20      arguments, Javadoc assumes that the single word immediately 
 21      following the tag is an argument; multi-word arguments cannot be 
 22      used with javadoc.)   
 23     
 24    - X{inline Javadoc tags} are used for inline markup.  In particular, 
 25      epydoc uses them for crossreference links between documentation. 
 26      Inline tags may appear anywhere in the text, and have the form 
 27      \"C{{@M{tag} M{[args...]}}}\", where C{M{tag}} indicates the 
 28      type of inline markup, and C{M{args}} are optional arguments. 
 29   
 30  Epydoc supports all Javadoc tags, I{except}: 
 31    - C{{@docRoot}}, which gives the (relative) URL of the generated 
 32      documentation's root. 
 33    - C{{@inheritDoc}}, which copies the documentation of the nearest 
 34      overridden object.  This can be used to combine the documentation 
 35      of the overridden object with the documentation of the 
 36      overridding object. 
 37    - C{@serial}, C{@serialField}, and C{@serialData} which describe the 
 38      serialization (pickling) of an object. 
 39    - C{{@value}}, which copies the value of a constant. 
 40   
 41  @warning: Epydoc only supports HTML output for Javadoc docstrings. 
 42  """ 
 43  __docformat__ = 'epytext en' 
 44   
 45  # Imports 
 46  import re 
 47  from xml.dom.minidom import * 
 48  from epydoc.markup import * 
 49   
50 -def parse_docstring(docstring, errors, **options):
51 """ 52 Parse the given docstring, which is formatted using Javadoc; and 53 return a C{ParsedDocstring} representation of its contents. 54 @param docstring: The docstring to parse 55 @type docstring: C{string} 56 @param errors: A list where any errors generated during parsing 57 will be stored. 58 @type errors: C{list} of L{ParseError} 59 @param options: Extra options. Unknown options are ignored. 60 Currently, no extra options are defined. 61 @rtype: L{ParsedDocstring} 62 """ 63 return ParsedJavadocDocstring(docstring, errors)
64
65 -class ParsedJavadocDocstring(ParsedDocstring):
66 """ 67 An encoded version of a Javadoc docstring. Since Javadoc is a 68 fairly simple markup language, we don't do any processing in 69 advance; instead, we wait to split fields or resolve 70 crossreference links until we need to. 71 72 @group Field Splitting: split_fields, _ARG_FIELDS, _FIELD_RE 73 @cvar _ARG_FIELDS: A list of the fields that take arguments. 74 Since Javadoc doesn't mark arguments in any special way, we 75 must consult this list to decide whether the first word of a 76 field is an argument or not. 77 @cvar _FIELD_RE: A regular expression used to search for Javadoc 78 block tags. 79 80 @group HTML Output: to_html, _LINK_SPLIT_RE, _LINK_RE 81 @cvar _LINK_SPLIT_RE: A regular expression used to search for 82 Javadoc inline tags. 83 @cvar _LINK_RE: A regular expression used to process Javadoc 84 inline tags. 85 """
86 - def __init__(self, docstring, errors=None):
87 """ 88 Create a new C{ParsedJavadocDocstring}. 89 90 @param docstring: The docstring that should be used to 91 construct this C{ParsedJavadocDocstring}. 92 @type docstring: C{string} 93 @param errors: A list where any errors generated during 94 parsing will be stored. If no list is given, then 95 all errors are ignored. 96 @type errors: C{list} of L{ParseError} 97 """ 98 self._docstring = docstring 99 if errors is None: errors = [] 100 self._check_links(errors)
101 102 #//////////////////////////////////////////////////////////// 103 # Field Splitting 104 #//////////////////////////////////////////////////////////// 105 106 _ARG_FIELDS = ('group variable var type cvariable cvar ivariable '+ 107 'ivar param '+ 108 'parameter arg argument raise raises exception '+ 109 'except deffield newfield keyword kwarg kwparam').split() 110 _FIELD_RE = re.compile(r'(^\s*\@\w+[\s$])', re.MULTILINE) 111 112 # Inherit docs from ParsedDocstring.
113 - def split_fields(self, errors=None):
114 115 # Split the docstring into an alternating list of field tags 116 # and text (odd pieces are field tags). 117 pieces = self._FIELD_RE.split(self._docstring) 118 119 # The first piece is the description. 120 descr = ParsedJavadocDocstring(pieces[0]) 121 122 # The remaining pieces are the block fields (alternating tags 123 # and bodies; odd pieces are tags). 124 fields = [] 125 for i in range(1, len(pieces)): 126 if i%2 == 1: 127 # Get the field tag. 128 tag = pieces[i].strip()[1:] 129 else: 130 # Get the field argument (if appropriate). 131 if tag in self._ARG_FIELDS: 132 subpieces = pieces[i].strip().split(None, 1)+['',''] 133 (arg, body) = subpieces[:2] 134 else: 135 (arg, body) = (None, pieces[i]) 136 137 # Special processing for @see fields, since Epydoc 138 # allows unrestricted text in them, but Javadoc just 139 # uses them for xref links: 140 if tag == 'see' and body: 141 if body[0] in '"\'': 142 if body[-1] == body[0]: body = body[1:-1] 143 elif body[0] == '<': pass 144 else: body = '{@link %s}' % body 145 146 # Construct the field. 147 parsed_body = ParsedJavadocDocstring(body) 148 fields.append(Field(tag, arg, parsed_body)) 149 150 if pieces[0].strip(): 151 return (descr, fields) 152 else: 153 return (None, fields)
154 155 #//////////////////////////////////////////////////////////// 156 # HTML Output. 157 #//////////////////////////////////////////////////////////// 158 159 _LINK_SPLIT_RE = re.compile(r'({@link(?:plain)?\s[^}]+})') 160 _LINK_RE = re.compile(r'{@link(?:plain)?\s+' + r'([\w#.]+)' + 161 r'(?:\([^\)]*\))?' + r'(\s+.*)?' + r'}') 162 163 # Inherit docs from ParsedDocstring.
164 - def to_html(self, docstring_linker, **options):
165 # Split the docstring into an alternating list of HTML and 166 # links (odd pieces are links). 167 pieces = self._LINK_SPLIT_RE.split(self._docstring) 168 169 # This function is used to translate {@link ...}s to HTML. 170 translate_xref = docstring_linker.translate_identifier_xref 171 172 # Build up the HTML string from the pieces. For HTML pieces 173 # (even), just add it to html. For link pieces (odd), use 174 # docstring_linker to translate the crossreference link to 175 # HTML for us. 176 html = '' 177 for i in range(len(pieces)): 178 if i%2 == 0: 179 html += pieces[i] 180 else: 181 # Decompose the link into pieces. 182 m = self._LINK_RE.match(pieces[i]) 183 if m is None: continue # Error flagged by _check_links 184 (target, name) = m.groups() 185 186 # Normalize the target name. 187 if target[0] == '#': target = target[1:] 188 target = target.replace('#', '.') 189 target = re.sub(r'\(.*\)', '', target) 190 191 # Provide a name, if it wasn't specified. 192 if name is None: name = target 193 else: name = name.strip() 194 195 # Use docstring_linker to convert the name to html. 196 html += translate_xref(target, name) 197 return html
198 212 213 #//////////////////////////////////////////////////////////// 214 # Plaintext Output. 215 #//////////////////////////////////////////////////////////// 216 217 # Inherit docs from ParsedDocstring. Since we don't define 218 # to_latex, this is used when generating latex output.
219 - def to_plaintext(self, docstring_linker, **options):
220 return self._docstring
221 222 _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)') 223 224 # Jeff's hack to get summary working
225 - def summary(self):
226 # Drop tags 227 doc = "\n".join([ row for row in self._docstring.split('\n') 228 if not row.lstrip().startswith('@') ]) 229 230 m = self._SUMMARY_RE.match(doc) 231 if m: 232 other = doc[m.end():] 233 return (ParsedJavadocDocstring(m.group(1)), 234 other != '' and not other.isspace()) 235 236 else: 237 parts = doc.strip('\n').split('\n', 1) 238 if len(parts) == 1: 239 summary = parts[0] 240 other = False 241 else: 242 summary = parts[0] + '...' 243 other = True 244 245 return ParsedJavadocDocstring(summary), other
246 247 # def concatenate(self, other): 248 # if not isinstance(other, ParsedJavadocDocstring): 249 # raise ValueError, 'Could not concatenate docstrings' 250 # return ParsedJavadocDocstring(self._docstring+other._docstring) 251