Package epydoc :: Package markup :: Module epytext
[hide private]
[frames] | no frames]

Source Code for Module epydoc.markup.epytext

   1  # 
   2  # epytext.py: epydoc formatted docstring parsing 
   3  # Edward Loper 
   4  # 
   5  # Created [04/10/01 12:00 AM] 
   6  # $Id: epytext.py 1652 2007-09-26 04:45:34Z edloper $ 
   7  # 
   8   
   9  """ 
  10  Parser for epytext strings.  Epytext is a lightweight markup whose 
  11  primary intended application is Python documentation strings.  This 
  12  parser converts Epytext strings to a simple DOM-like representation 
  13  (encoded as a tree of L{Element} objects and strings).  Epytext 
  14  strings can contain the following X{structural blocks}: 
  15   
  16      - X{epytext}: The top-level element of the DOM tree. 
  17      - X{para}: A paragraph of text.  Paragraphs contain no newlines,  
  18        and all spaces are soft. 
  19      - X{section}: A section or subsection. 
  20      - X{field}: A tagged field.  These fields provide information 
  21        about specific aspects of a Python object, such as the 
  22        description of a function's parameter, or the author of a 
  23        module. 
  24      - X{literalblock}: A block of literal text.  This text should be 
  25        displayed as it would be displayed in plaintext.  The 
  26        parser removes the appropriate amount of leading whitespace  
  27        from each line in the literal block. 
  28      - X{doctestblock}: A block containing sample python code, 
  29        formatted according to the specifications of the C{doctest} 
  30        module. 
  31      - X{ulist}: An unordered list. 
  32      - X{olist}: An ordered list. 
  33      - X{li}: A list item.  This tag is used both for unordered list 
  34        items and for ordered list items. 
  35   
  36  Additionally, the following X{inline regions} may be used within 
  37  C{para} blocks: 
  38       
  39      - X{code}:   Source code and identifiers. 
  40      - X{math}:   Mathematical expressions. 
  41      - X{index}:  A term which should be included in an index, if one 
  42                   is generated. 
  43      - X{italic}: Italicized text. 
  44      - X{bold}:   Bold-faced text. 
  45      - X{uri}:    A Universal Resource Indicator (URI) or Universal 
  46                   Resource Locator (URL) 
  47      - X{link}:   A Python identifier which should be hyperlinked to 
  48                   the named object's documentation, when possible. 
  49   
  50  The returned DOM tree will conform to the the following Document Type 
  51  Description:: 
  52   
  53     <!ENTITY % colorized '(code | math | index | italic | 
  54                            bold | uri | link | symbol)*'> 
  55   
  56     <!ELEMENT epytext ((para | literalblock | doctestblock | 
  57                        section | ulist | olist)*, fieldlist?)> 
  58   
  59     <!ELEMENT para (#PCDATA | %colorized;)*> 
  60   
  61     <!ELEMENT section (para | listblock | doctestblock | 
  62                        section | ulist | olist)+> 
  63   
  64     <!ELEMENT fieldlist (field+)> 
  65     <!ELEMENT field (tag, arg?, (para | listblock | doctestblock) 
  66                                  ulist | olist)+)> 
  67     <!ELEMENT tag (#PCDATA)> 
  68     <!ELEMENT arg (#PCDATA)> 
  69      
  70     <!ELEMENT literalblock (#PCDATA | %colorized;)*> 
  71     <!ELEMENT doctestblock (#PCDATA)> 
  72   
  73     <!ELEMENT ulist (li+)> 
  74     <!ELEMENT olist (li+)> 
  75     <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+> 
  76     <!ATTLIST li bullet NMTOKEN #IMPLIED> 
  77     <!ATTLIST olist start NMTOKEN #IMPLIED> 
  78   
  79     <!ELEMENT uri     (name, target)> 
  80     <!ELEMENT link    (name, target)> 
  81     <!ELEMENT name    (#PCDATA | %colorized;)*> 
  82     <!ELEMENT target  (#PCDATA)> 
  83      
  84     <!ELEMENT code    (#PCDATA | %colorized;)*> 
  85     <!ELEMENT math    (#PCDATA | %colorized;)*> 
  86     <!ELEMENT italic  (#PCDATA | %colorized;)*> 
  87     <!ELEMENT bold    (#PCDATA | %colorized;)*> 
  88     <!ELEMENT indexed (#PCDATA | %colorized;)> 
  89     <!ATTLIST code style CDATA #IMPLIED> 
  90   
  91     <!ELEMENT symbol (#PCDATA)> 
  92   
  93  @var SYMBOLS: A list of the of escape symbols that are supported 
  94        by epydoc.  Currently the following symbols are supported: 
  95  <<<SYMBOLS>>> 
  96  """ 
  97  # Note: the symbol list is appended to the docstring automatically, 
  98  # below. 
  99   
 100  __docformat__ = 'epytext en' 
 101   
 102  # Code organization.. 
 103  #   1. parse() 
 104  #   2. tokenize() 
 105  #   3. colorize() 
 106  #   4. helpers 
 107  #   5. testing 
 108   
 109  import re, string, types, sys, os.path 
 110  from epydoc.markup import * 
 111  from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex 
 112  from epydoc.markup.doctest import doctest_to_html, doctest_to_latex 
 113   
 114  ################################################## 
 115  ## DOM-Like Encoding 
 116  ################################################## 
 117   
118 -class Element:
119 """ 120 A very simple DOM-like representation for parsed epytext 121 documents. Each epytext document is encoded as a tree whose nodes 122 are L{Element} objects, and whose leaves are C{string}s. Each 123 node is marked by a I{tag} and zero or more I{attributes}. Each 124 attribute is a mapping from a string key to a string value. 125 """
126 - def __init__(self, tag, *children, **attribs):
127 self.tag = tag 128 """A string tag indicating the type of this element. 129 @type: C{string}""" 130 131 self.children = list(children) 132 """A list of the children of this element. 133 @type: C{list} of (C{string} or C{Element})""" 134 135 self.attribs = attribs 136 """A dictionary mapping attribute names to attribute values 137 for this element. 138 @type: C{dict} from C{string} to C{string}"""
139
140 - def __str__(self):
141 """ 142 Return a string representation of this element, using XML 143 notation. 144 @bug: Doesn't escape '<' or '&' or '>'. 145 """ 146 attribs = ''.join([' %s=%r' % t for t in self.attribs.items()]) 147 return ('<%s%s>' % (self.tag, attribs) + 148 ''.join([str(child) for child in self.children]) + 149 '</%s>' % self.tag)
150
151 - def __repr__(self):
152 attribs = ''.join([', %s=%r' % t for t in self.attribs.items()]) 153 args = ''.join([', %r' % c for c in self.children]) 154 return 'Element(%s%s%s)' % (self.tag, args, attribs)
155 156 ################################################## 157 ## Constants 158 ################################################## 159 160 # The possible heading underline characters, listed in order of 161 # heading depth. 162 _HEADING_CHARS = "=-~" 163 164 # Escape codes. These should be needed very rarely. 165 _ESCAPES = {'lb':'{', 'rb': '}'} 166 167 # Symbols. These can be generated via S{...} escapes. 168 SYMBOLS = [ 169 # Arrows 170 '<-', '->', '^', 'v', 171 172 # Greek letters 173 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 174 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu', 175 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma', 176 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega', 177 'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta', 178 'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu', 179 'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma', 180 'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega', 181 182 # HTML character entities 183 'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr', 184 'lArr', 'rArr', 'uArr', 'dArr', 'hArr', 185 'copy', 'times', 'forall', 'exist', 'part', 186 'empty', 'isin', 'notin', 'ni', 'prod', 'sum', 187 'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup', 188 'int', 'there4', 'sim', 'cong', 'asymp', 'ne', 189 'equiv', 'le', 'ge', 'sub', 'sup', 'nsub', 190 'sube', 'supe', 'oplus', 'otimes', 'perp', 191 192 # Alternate (long) names 193 'infinity', 'integral', 'product', 194 '>=', '<=', 195 ] 196 # Convert to a dictionary, for quick lookup 197 _SYMBOLS = {} 198 for symbol in SYMBOLS: _SYMBOLS[symbol] = 1 199 200 # Add symbols to the docstring. 201 symblist = ' ' 202 symblist += ';\n '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol) 203 for symbol in SYMBOLS]) 204 __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist) 205 del symbol, symblist 206 207 # Tags for colorizing text. 208 _COLORIZING_TAGS = { 209 'C': 'code', 210 'M': 'math', 211 'X': 'indexed', 212 'I': 'italic', 213 'B': 'bold', 214 'U': 'uri', 215 'L': 'link', # A Python identifier that should be linked to 216 'E': 'escape', # escapes characters or creates symbols 217 'S': 'symbol', 218 'G': 'graph', 219 } 220 221 # Which tags can use "link syntax" (e.g., U{Python<www.python.org>})? 222 _LINK_COLORIZING_TAGS = ['link', 'uri'] 223 224 ################################################## 225 ## Structuring (Top Level) 226 ################################################## 227
228 -def parse(str, errors = None):
229 """ 230 Return a DOM tree encoding the contents of an epytext string. Any 231 errors generated during parsing will be stored in C{errors}. 232 233 @param str: The epytext string to parse. 234 @type str: C{string} 235 @param errors: A list where any errors generated during parsing 236 will be stored. If no list is specified, then fatal errors 237 will generate exceptions, and non-fatal errors will be 238 ignored. 239 @type errors: C{list} of L{ParseError} 240 @return: a DOM tree encoding the contents of an epytext string. 241 @rtype: C{Element} 242 @raise ParseError: If C{errors} is C{None} and an error is 243 encountered while parsing. 244 """ 245 # Initialize errors list. 246 if errors == None: 247 errors = [] 248 raise_on_error = 1 249 else: 250 raise_on_error = 0 251 252 # Preprocess the string. 253 str = re.sub('\015\012', '\012', str) 254 str = string.expandtabs(str) 255 256 # Tokenize the input string. 257 tokens = _tokenize(str, errors) 258 259 # Have we encountered a field yet? 260 encountered_field = 0 261 262 # Create an document to hold the epytext. 263 doc = Element('epytext') 264 265 # Maintain two parallel stacks: one contains DOM elements, and 266 # gives the ancestors of the current block. The other contains 267 # indentation values, and gives the indentation of the 268 # corresponding DOM elements. An indentation of "None" reflects 269 # an unknown indentation. However, the indentation must be 270 # greater than, or greater than or equal to, the indentation of 271 # the prior element (depending on what type of DOM element it 272 # corresponds to). No 2 consecutive indent_stack values will be 273 # ever be "None." Use initial dummy elements in the stack, so we 274 # don't have to worry about bounds checking. 275 stack = [None, doc] 276 indent_stack = [-1, None] 277 278 for token in tokens: 279 # Uncomment this for debugging: 280 #print ('%s: %s\n%s: %s\n' % 281 # (''.join(['%-11s' % (t and t.tag) for t in stack]), 282 # token.tag, ''.join(['%-11s' % i for i in indent_stack]), 283 # token.indent)) 284 285 # Pop any completed blocks off the stack. 286 _pop_completed_blocks(token, stack, indent_stack) 287 288 # If Token has type PARA, colorize and add the new paragraph 289 if token.tag == Token.PARA: 290 _add_para(doc, token, stack, indent_stack, errors) 291 292 # If Token has type HEADING, add the new section 293 elif token.tag == Token.HEADING: 294 _add_section(doc, token, stack, indent_stack, errors) 295 296 # If Token has type LBLOCK, add the new literal block 297 elif token.tag == Token.LBLOCK: 298 stack[-1].children.append(token.to_dom(doc)) 299 300 # If Token has type DTBLOCK, add the new doctest block 301 elif token.tag == Token.DTBLOCK: 302 stack[-1].children.append(token.to_dom(doc)) 303 304 # If Token has type BULLET, add the new list/list item/field 305 elif token.tag == Token.BULLET: 306 _add_list(doc, token, stack, indent_stack, errors) 307 else: 308 assert 0, 'Unknown token type: '+token.tag 309 310 # Check if the DOM element we just added was a field.. 311 if stack[-1].tag == 'field': 312 encountered_field = 1 313 elif encountered_field == 1: 314 if len(stack) <= 3: 315 estr = ("Fields must be the final elements in an "+ 316 "epytext string.") 317 errors.append(StructuringError(estr, token.startline)) 318 319 # Graphs use inline markup (G{...}) but are really block-level 320 # elements; so "raise" any graphs we generated. This is a bit of 321 # a hack, but the alternative is to define a new markup for 322 # block-level elements, which I'd rather not do. (See sourceforge 323 # bug #1673017.) 324 for child in doc.children: 325 _raise_graphs(child, doc) 326 327 # If there was an error, then signal it! 328 if len([e for e in errors if e.is_fatal()]) > 0: 329 if raise_on_error: 330 raise errors[0] 331 else: 332 return None 333 334 # Return the top-level epytext DOM element. 335 return doc
336
337 -def _raise_graphs(tree, parent):
338 # Recurse to children. 339 have_graph_child = False 340 for elt in tree.children: 341 if isinstance(elt, Element): 342 _raise_graphs(elt, tree) 343 if elt.tag == 'graph': have_graph_child = True 344 345 block = ('section', 'fieldlist', 'field', 'ulist', 'olist', 'li') 346 if have_graph_child and tree.tag not in block: 347 child_index = 0 348 for elt in tree.children: 349 if isinstance(elt, Element) and elt.tag == 'graph': 350 # We found a graph: splice it into the parent. 351 parent_index = parent.children.index(tree) 352 left = tree.children[:child_index] 353 right = tree.children[child_index+1:] 354 parent.children[parent_index:parent_index+1] = [ 355 Element(tree.tag, *left, **tree.attribs), 356 elt, 357 Element(tree.tag, *right, **tree.attribs)] 358 child_index = 0 359 parent_index += 2 360 else: 361 child_index += 1
362
363 -def _pop_completed_blocks(token, stack, indent_stack):
364 """ 365 Pop any completed blocks off the stack. This includes any 366 blocks that we have dedented past, as well as any list item 367 blocks that we've dedented to. The top element on the stack 368 should only be a list if we're about to start a new list 369 item (i.e., if the next token is a bullet). 370 """ 371 indent = token.indent 372 if indent != None: 373 while (len(stack) > 2): 374 pop = 0 375 376 # Dedent past a block 377 if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1 378 elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1 379 380 # Dedent to a list item, if it is follwed by another list 381 # item with the same indentation. 382 elif (token.tag == 'bullet' and indent==indent_stack[-2] and 383 stack[-1].tag in ('li', 'field')): pop=1 384 385 # End of a list (no more list items available) 386 elif (stack[-1].tag in ('ulist', 'olist') and 387 (token.tag != 'bullet' or token.contents[-1] == ':')): 388 pop=1 389 390 # Pop the block, if it's complete. Otherwise, we're done. 391 if pop == 0: return 392 stack.pop() 393 indent_stack.pop()
394
395 -def _add_para(doc, para_token, stack, indent_stack, errors):
396 """Colorize the given paragraph, and add it to the DOM tree.""" 397 # Check indentation, and update the parent's indentation 398 # when appropriate. 399 if indent_stack[-1] == None: 400 indent_stack[-1] = para_token.indent 401 if para_token.indent == indent_stack[-1]: 402 # Colorize the paragraph and add it. 403 para = _colorize(doc, para_token, errors) 404 if para_token.inline: 405 para.attribs['inline'] = True 406 stack[-1].children.append(para) 407 else: 408 estr = "Improper paragraph indentation." 409 errors.append(StructuringError(estr, para_token.startline))
410
411 -def _add_section(doc, heading_token, stack, indent_stack, errors):
412 """Add a new section to the DOM tree, with the given heading.""" 413 if indent_stack[-1] == None: 414 indent_stack[-1] = heading_token.indent 415 elif indent_stack[-1] != heading_token.indent: 416 estr = "Improper heading indentation." 417 errors.append(StructuringError(estr, heading_token.startline)) 418 419 # Check for errors. 420 for tok in stack[2:]: 421 if tok.tag != "section": 422 estr = "Headings must occur at the top level." 423 errors.append(StructuringError(estr, heading_token.startline)) 424 break 425 if (heading_token.level+2) > len(stack): 426 estr = "Wrong underline character for heading." 427 errors.append(StructuringError(estr, heading_token.startline)) 428 429 # Pop the appropriate number of headings so we're at the 430 # correct level. 431 stack[heading_token.level+2:] = [] 432 indent_stack[heading_token.level+2:] = [] 433 434 # Colorize the heading 435 head = _colorize(doc, heading_token, errors, 'heading') 436 437 # Add the section's and heading's DOM elements. 438 sec = Element("section") 439 stack[-1].children.append(sec) 440 stack.append(sec) 441 sec.children.append(head) 442 indent_stack.append(None)
443
444 -def _add_list(doc, bullet_token, stack, indent_stack, errors):
445 """ 446 Add a new list item or field to the DOM tree, with the given 447 bullet or field tag. When necessary, create the associated 448 list. 449 """ 450 # Determine what type of bullet it is. 451 if bullet_token.contents[-1] == '-': 452 list_type = 'ulist' 453 elif bullet_token.contents[-1] == '.': 454 list_type = 'olist' 455 elif bullet_token.contents[-1] == ':': 456 list_type = 'fieldlist' 457 else: 458 raise AssertionError('Bad Bullet: %r' % bullet_token.contents) 459 460 # Is this a new list? 461 newlist = 0 462 if stack[-1].tag != list_type: 463 newlist = 1 464 elif list_type == 'olist' and stack[-1].tag == 'olist': 465 old_listitem = stack[-1].children[-1] 466 old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1] 467 new_bullet = bullet_token.contents.split('.')[:-1] 468 if (new_bullet[:-1] != old_bullet[:-1] or 469 int(new_bullet[-1]) != int(old_bullet[-1])+1): 470 newlist = 1 471 472 # Create the new list. 473 if newlist: 474 if stack[-1].tag is 'fieldlist': 475 # The new list item is not a field list item (since this 476 # is a new list); but it's indented the same as the field 477 # list. This either means that they forgot to indent the 478 # list, or they are trying to put something after the 479 # field list. The first one seems more likely, so we'll 480 # just warn about that (to avoid confusion). 481 estr = "Lists must be indented." 482 errors.append(StructuringError(estr, bullet_token.startline)) 483 if stack[-1].tag in ('ulist', 'olist', 'fieldlist'): 484 stack.pop() 485 indent_stack.pop() 486 487 if (list_type != 'fieldlist' and indent_stack[-1] is not None and 488 bullet_token.indent == indent_stack[-1]): 489 # Ignore this error if there's text on the same line as 490 # the comment-opening quote -- epydoc can't reliably 491 # determine the indentation for that line. 492 if bullet_token.startline != 1 or bullet_token.indent != 0: 493 estr = "Lists must be indented." 494 errors.append(StructuringError(estr, bullet_token.startline)) 495 496 if list_type == 'fieldlist': 497 # Fieldlist should be at the top-level. 498 for tok in stack[2:]: 499 if tok.tag != "section": 500 estr = "Fields must be at the top level." 501 errors.append( 502 StructuringError(estr, bullet_token.startline)) 503 break 504 stack[2:] = [] 505 indent_stack[2:] = [] 506 507 # Add the new list. 508 lst = Element(list_type) 509 stack[-1].children.append(lst) 510 stack.append(lst) 511 indent_stack.append(bullet_token.indent) 512 if list_type == 'olist': 513 start = bullet_token.contents.split('.')[:-1] 514 if start != '1': 515 lst.attribs["start"] = start[-1] 516 517 # Fields are treated somewhat specially: A "fieldlist" 518 # node is created to make the parsing simpler, but fields 519 # are adjoined directly into the "epytext" node, not into 520 # the "fieldlist" node. 521 if list_type == 'fieldlist': 522 li = Element("field") 523 token_words = bullet_token.contents[1:-1].split(None, 1) 524 tag_elt = Element("tag") 525 tag_elt.children.append(token_words[0]) 526 li.children.append(tag_elt) 527 528 if len(token_words) > 1: 529 arg_elt = Element("arg") 530 arg_elt.children.append(token_words[1]) 531 li.children.append(arg_elt) 532 else: 533 li = Element("li") 534 if list_type == 'olist': 535 li.attribs["bullet"] = bullet_token.contents 536 537 # Add the bullet. 538 stack[-1].children.append(li) 539 stack.append(li) 540 indent_stack.append(None)
541 542 ################################################## 543 ## Tokenization 544 ################################################## 545
546 -class Token:
547 """ 548 C{Token}s are an intermediate data structure used while 549 constructing the structuring DOM tree for a formatted docstring. 550 There are five types of C{Token}: 551 552 - Paragraphs 553 - Literal blocks 554 - Doctest blocks 555 - Headings 556 - Bullets 557 558 The text contained in each C{Token} is stored in the 559 C{contents} variable. The string in this variable has been 560 normalized. For paragraphs, this means that it has been converted 561 into a single line of text, with newline/indentation replaced by 562 single spaces. For literal blocks and doctest blocks, this means 563 that the appropriate amount of leading whitespace has been removed 564 from each line. 565 566 Each C{Token} has an indentation level associated with it, 567 stored in the C{indent} variable. This indentation level is used 568 by the structuring procedure to assemble hierarchical blocks. 569 570 @type tag: C{string} 571 @ivar tag: This C{Token}'s type. Possible values are C{Token.PARA} 572 (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK} 573 (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}. 574 575 @type startline: C{int} 576 @ivar startline: The line on which this C{Token} begins. This 577 line number is only used for issuing errors. 578 579 @type contents: C{string} 580 @ivar contents: The normalized text contained in this C{Token}. 581 582 @type indent: C{int} or C{None} 583 @ivar indent: The indentation level of this C{Token} (in 584 number of leading spaces). A value of C{None} indicates an 585 unknown indentation; this is used for list items and fields 586 that begin with one-line paragraphs. 587 588 @type level: C{int} or C{None} 589 @ivar level: The heading-level of this C{Token} if it is a 590 heading; C{None}, otherwise. Valid heading levels are 0, 1, 591 and 2. 592 593 @type inline: C{bool} 594 @ivar inline: If True, the element is an inline level element, comparable 595 to an HTML C{<span>} tag. Else, it is a block level element, comparable 596 to an HTML C{<div>}. 597 598 @type PARA: C{string} 599 @cvar PARA: The C{tag} value for paragraph C{Token}s. 600 @type LBLOCK: C{string} 601 @cvar LBLOCK: The C{tag} value for literal C{Token}s. 602 @type DTBLOCK: C{string} 603 @cvar DTBLOCK: The C{tag} value for doctest C{Token}s. 604 @type HEADING: C{string} 605 @cvar HEADING: The C{tag} value for heading C{Token}s. 606 @type BULLET: C{string} 607 @cvar BULLET: The C{tag} value for bullet C{Token}s. This C{tag} 608 value is also used for field tag C{Token}s, since fields 609 function syntactically the same as list items. 610 """ 611 # The possible token types. 612 PARA = "para" 613 LBLOCK = "literalblock" 614 DTBLOCK = "doctestblock" 615 HEADING = "heading" 616 BULLET = "bullet" 617
618 - def __init__(self, tag, startline, contents, indent, level=None, 619 inline=False):
620 """ 621 Create a new C{Token}. 622 623 @param tag: The type of the new C{Token}. 624 @type tag: C{string} 625 @param startline: The line on which the new C{Token} begins. 626 @type startline: C{int} 627 @param contents: The normalized contents of the new C{Token}. 628 @type contents: C{string} 629 @param indent: The indentation of the new C{Token} (in number 630 of leading spaces). A value of C{None} indicates an 631 unknown indentation. 632 @type indent: C{int} or C{None} 633 @param level: The heading-level of this C{Token} if it is a 634 heading; C{None}, otherwise. 635 @type level: C{int} or C{None} 636 @param inline: Is this C{Token} inline as a C{<span>}?. 637 @type inline: C{bool} 638 """ 639 self.tag = tag 640 self.startline = startline 641 self.contents = contents 642 self.indent = indent 643 self.level = level 644 self.inline = inline
645
646 - def __repr__(self):
647 """ 648 @rtype: C{string} 649 @return: the formal representation of this C{Token}. 650 C{Token}s have formal representaitons of the form:: 651 <Token: para at line 12> 652 """ 653 return '<Token: %s at line %s>' % (self.tag, self.startline)
654
655 - def to_dom(self, doc):
656 """ 657 @return: a DOM representation of this C{Token}. 658 @rtype: L{Element} 659 """ 660 e = Element(self.tag) 661 e.children.append(self.contents) 662 return e
663 664 # Construct regular expressions for recognizing bullets. These are 665 # global so they don't have to be reconstructed each time we tokenize 666 # a docstring. 667 _ULIST_BULLET = '[-]( +|$)' 668 _OLIST_BULLET = '(\d+[.])+( +|$)' 669 _FIELD_BULLET = '@\w+( [^{}:\n]+)?:' 670 _BULLET_RE = re.compile(_ULIST_BULLET + '|' + 671 _OLIST_BULLET + '|' + 672 _FIELD_BULLET) 673 _LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET) 674 _FIELD_BULLET_RE = re.compile(_FIELD_BULLET) 675 del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET 676
677 -def _tokenize_doctest(lines, start, block_indent, tokens, errors):
678 """ 679 Construct a L{Token} containing the doctest block starting at 680 C{lines[start]}, and append it to C{tokens}. C{block_indent} 681 should be the indentation of the doctest block. Any errors 682 generated while tokenizing the doctest block will be appended to 683 C{errors}. 684 685 @param lines: The list of lines to be tokenized 686 @param start: The index into C{lines} of the first line of the 687 doctest block to be tokenized. 688 @param block_indent: The indentation of C{lines[start]}. This is 689 the indentation of the doctest block. 690 @param errors: A list where any errors generated during parsing 691 will be stored. If no list is specified, then errors will 692 generate exceptions. 693 @return: The line number of the first line following the doctest 694 block. 695 696 @type lines: C{list} of C{string} 697 @type start: C{int} 698 @type block_indent: C{int} 699 @type tokens: C{list} of L{Token} 700 @type errors: C{list} of L{ParseError} 701 @rtype: C{int} 702 """ 703 # If they dedent past block_indent, keep track of the minimum 704 # indentation. This is used when removing leading indentation 705 # from the lines of the doctest block. 706 min_indent = block_indent 707 708 linenum = start + 1 709 while linenum < len(lines): 710 # Find the indentation of this line. 711 line = lines[linenum] 712 indent = len(line) - len(line.lstrip()) 713 714 # A blank line ends doctest block. 715 if indent == len(line): break 716 717 # A Dedent past block_indent is an error. 718 if indent < block_indent: 719 min_indent = min(min_indent, indent) 720 estr = 'Improper doctest block indentation.' 721 errors.append(TokenizationError(estr, linenum)) 722 723 # Go on to the next line. 724 linenum += 1 725 726 # Add the token, and return the linenum after the token ends. 727 contents = [line[min_indent:] for line in lines[start:linenum]] 728 contents = '\n'.join(contents) 729 tokens.append(Token(Token.DTBLOCK, start, contents, block_indent)) 730 return linenum
731
732 -def _tokenize_literal(lines, start, block_indent, tokens, errors):
733 """ 734 Construct a L{Token} containing the literal block starting at 735 C{lines[start]}, and append it to C{tokens}. C{block_indent} 736 should be the indentation of the literal block. Any errors 737 generated while tokenizing the literal block will be appended to 738 C{errors}. 739 740 @param lines: The list of lines to be tokenized 741 @param start: The index into C{lines} of the first line of the 742 literal block to be tokenized. 743 @param block_indent: The indentation of C{lines[start]}. This is 744 the indentation of the literal block. 745 @param errors: A list of the errors generated by parsing. Any 746 new errors generated while will tokenizing this paragraph 747 will be appended to this list. 748 @return: The line number of the first line following the literal 749 block. 750 751 @type lines: C{list} of C{string} 752 @type start: C{int} 753 @type block_indent: C{int} 754 @type tokens: C{list} of L{Token} 755 @type errors: C{list} of L{ParseError} 756 @rtype: C{int} 757 """ 758 linenum = start + 1 759 while linenum < len(lines): 760 # Find the indentation of this line. 761 line = lines[linenum] 762 indent = len(line) - len(line.lstrip()) 763 764 # A Dedent to block_indent ends the literal block. 765 # (Ignore blank likes, though) 766 if len(line) != indent and indent <= block_indent: 767 break 768 769 # Go on to the next line. 770 linenum += 1 771 772 # Add the token, and return the linenum after the token ends. 773 contents = [line[block_indent+1:] for line in lines[start:linenum]] 774 contents = '\n'.join(contents) 775 contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents) 776 tokens.append(Token(Token.LBLOCK, start, contents, block_indent)) 777 return linenum
778
779 -def _tokenize_listart(lines, start, bullet_indent, tokens, errors):
780 """ 781 Construct L{Token}s for the bullet and the first paragraph of the 782 list item (or field) starting at C{lines[start]}, and append them 783 to C{tokens}. C{bullet_indent} should be the indentation of the 784 list item. Any errors generated while tokenizing will be 785 appended to C{errors}. 786 787 @param lines: The list of lines to be tokenized 788 @param start: The index into C{lines} of the first line of the 789 list item to be tokenized. 790 @param bullet_indent: The indentation of C{lines[start]}. This is 791 the indentation of the list item. 792 @param errors: A list of the errors generated by parsing. Any 793 new errors generated while will tokenizing this paragraph 794 will be appended to this list. 795 @return: The line number of the first line following the list 796 item's first paragraph. 797 798 @type lines: C{list} of C{string} 799 @type start: C{int} 800 @type bullet_indent: C{int} 801 @type tokens: C{list} of L{Token} 802 @type errors: C{list} of L{ParseError} 803 @rtype: C{int} 804 """ 805 linenum = start + 1 806 para_indent = None 807 doublecolon = lines[start].rstrip()[-2:] == '::' 808 809 # Get the contents of the bullet. 810 para_start = _BULLET_RE.match(lines[start], bullet_indent).end() 811 bcontents = lines[start][bullet_indent:para_start].strip() 812 813 while linenum < len(lines): 814 # Find the indentation of this line. 815 line = lines[linenum] 816 indent = len(line) - len(line.lstrip()) 817 818 # "::" markers end paragraphs. 819 if doublecolon: break 820 if line.rstrip()[-2:] == '::': doublecolon = 1 821 822 # A blank line ends the token 823 if indent == len(line): break 824 825 # Dedenting past bullet_indent ends the list item. 826 if indent < bullet_indent: break 827 828 # A line beginning with a bullet ends the token. 829 if _BULLET_RE.match(line, indent): break 830 831 # If this is the second line, set the paragraph indentation, or 832 # end the token, as appropriate. 833 if para_indent == None: para_indent = indent 834 835 # A change in indentation ends the token 836 if indent != para_indent: break 837 838 # Go on to the next line. 839 linenum += 1 840 841 # Add the bullet token. 842 tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent, 843 inline=True)) 844 845 # Add the paragraph token. 846 pcontents = ([lines[start][para_start:].strip()] + 847 [line.strip() for line in lines[start+1:linenum]]) 848 pcontents = ' '.join(pcontents).strip() 849 if pcontents: 850 tokens.append(Token(Token.PARA, start, pcontents, para_indent, 851 inline=True)) 852 853 # Return the linenum after the paragraph token ends. 854 return linenum
855
856 -def _tokenize_para(lines, start, para_indent, tokens, errors):
857 """ 858 Construct a L{Token} containing the paragraph starting at 859 C{lines[start]}, and append it to C{tokens}. C{para_indent} 860 should be the indentation of the paragraph . Any errors 861 generated while tokenizing the paragraph will be appended to 862 C{errors}. 863 864 @param lines: The list of lines to be tokenized 865 @param start: The index into C{lines} of the first line of the 866 paragraph to be tokenized. 867 @param para_indent: The indentation of C{lines[start]}. This is 868 the indentation of the paragraph. 869 @param errors: A list of the errors generated by parsing. Any 870 new errors generated while will tokenizing this paragraph 871 will be appended to this list. 872 @return: The line number of the first line following the 873 paragraph. 874 875 @type lines: C{list} of C{string} 876 @type start: C{int} 877 @type para_indent: C{int} 878 @type tokens: C{list} of L{Token} 879 @type errors: C{list} of L{ParseError} 880 @rtype: C{int} 881 """ 882 linenum = start + 1 883 doublecolon = 0 884 while linenum < len(lines): 885 # Find the indentation of this line. 886 line = lines[linenum] 887 indent = len(line) - len(line.lstrip()) 888 889 # "::" markers end paragraphs. 890 if doublecolon: break 891 if line.rstrip()[-2:] == '::': doublecolon = 1 892 893 # Blank lines end paragraphs 894 if indent == len(line): break 895 896 # Indentation changes end paragraphs 897 if indent != para_indent: break 898 899 # List bullets end paragraphs 900 if _BULLET_RE.match(line, indent): break 901 902 # Check for mal-formatted field items. 903 if line[indent] == '@': 904 estr = "Possible mal-formatted field item." 905 errors.append(TokenizationError(estr, linenum, is_fatal=0)) 906 907 # Go on to the next line. 908 linenum += 1 909 910 contents = [line.strip() for line in lines[start:linenum]] 911 912 # Does this token look like a heading? 913 if ((len(contents) < 2) or 914 (contents[1][0] not in _HEADING_CHARS) or 915 (abs(len(contents[0])-len(contents[1])) > 5)): 916 looks_like_heading = 0 917 else: 918 looks_like_heading = 1 919 for char in contents[1]: 920 if char != contents[1][0]: 921 looks_like_heading = 0 922 break 923 924 if looks_like_heading: 925 if len(contents[0]) != len(contents[1]): 926 estr = ("Possible heading typo: the number of "+ 927 "underline characters must match the "+ 928 "number of heading characters.") 929 errors.append(TokenizationError(estr, start, is_fatal=0)) 930 else: 931 level = _HEADING_CHARS.index(contents[1][0]) 932 tokens.append(Token(Token.HEADING, start, 933 contents[0], para_indent, level)) 934 return start+2 935 936 # Add the paragraph token, and return the linenum after it ends. 937 contents = ' '.join(contents) 938 tokens.append(Token(Token.PARA, start, contents, para_indent)) 939 return linenum
940
941 -def _tokenize(str, errors):
942 """ 943 Split a given formatted docstring into an ordered list of 944 C{Token}s, according to the epytext markup rules. 945 946 @param str: The epytext string 947 @type str: C{string} 948 @param errors: A list where any errors generated during parsing 949 will be stored. If no list is specified, then errors will 950 generate exceptions. 951 @type errors: C{list} of L{ParseError} 952 @return: a list of the C{Token}s that make up the given string. 953 @rtype: C{list} of L{Token} 954 """ 955 tokens = [] 956 lines = str.split('\n') 957 958 # Scan through the lines, determining what @type of token we're 959 # dealing with, and tokenizing it, as appropriate. 960 linenum = 0 961 while linenum < len(lines): 962 # Get the current line and its indentation. 963 line = lines[linenum] 964 indent = len(line)-len(line.lstrip()) 965 966 if indent == len(line): 967 # Ignore blank lines. 968 linenum += 1 969 continue 970 elif line[indent:indent+4] == '>>> ': 971 # blocks starting with ">>> " are doctest block tokens. 972 linenum = _tokenize_doctest(lines, linenum, indent, 973 tokens, errors) 974 elif _BULLET_RE.match(line, indent): 975 # blocks starting with a bullet are LI start tokens. 976 linenum = _tokenize_listart(lines, linenum, indent, 977 tokens, errors) 978 if tokens[-1].indent != None: 979 indent = tokens[-1].indent 980 else: 981 # Check for mal-formatted field items. 982 if line[indent] == '@': 983 estr = "Possible mal-formatted field item." 984 errors.append(TokenizationError(estr, linenum, is_fatal=0)) 985 986 # anything else is either a paragraph or a heading. 987 linenum = _tokenize_para(lines, linenum, indent, tokens, errors) 988 989 # Paragraph tokens ending in '::' initiate literal blocks. 990 if (tokens[-1].tag == Token.PARA and 991 tokens[-1].contents[-2:] == '::'): 992 tokens[-1].contents = tokens[-1].contents[:-1] 993 linenum = _tokenize_literal(lines, linenum, indent, tokens, errors) 994 995 return tokens
996 997 998 ################################################## 999 ## Inline markup ("colorizing") 1000 ################################################## 1001 1002 # Assorted regular expressions used for colorizing. 1003 _BRACE_RE = re.compile('{|}') 1004 _TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$') 1005
1006 -def _colorize(doc, token, errors, tagName='para'):
1007 """ 1008 Given a string containing the contents of a paragraph, produce a 1009 DOM C{Element} encoding that paragraph. Colorized regions are 1010 represented using DOM C{Element}s, and text is represented using 1011 DOM C{Text}s. 1012 1013 @param errors: A list of errors. Any newly generated errors will 1014 be appended to this list. 1015 @type errors: C{list} of C{string} 1016 1017 @param tagName: The element tag for the DOM C{Element} that should 1018 be generated. 1019 @type tagName: C{string} 1020 1021 @return: a DOM C{Element} encoding the given paragraph. 1022 @returntype: C{Element} 1023 """ 1024 str = token.contents 1025 linenum = 0 1026 1027 # Maintain a stack of DOM elements, containing the ancestors of 1028 # the text currently being analyzed. New elements are pushed when 1029 # "{" is encountered, and old elements are popped when "}" is 1030 # encountered. 1031 stack = [Element(tagName)] 1032 1033 # This is just used to make error-reporting friendlier. It's a 1034 # stack parallel to "stack" containing the index of each element's 1035 # open brace. 1036 openbrace_stack = [0] 1037 1038 # Process the string, scanning for '{' and '}'s. start is the 1039 # index of the first unprocessed character. Each time through the 1040 # loop, we process the text from the first unprocessed character 1041 # to the next open or close brace. 1042 start = 0 1043 while 1: 1044 match = _BRACE_RE.search(str, start) 1045 if match == None: break 1046 end = match.start() 1047 1048 # Open braces start new colorizing elements. When preceeded 1049 # by a capital letter, they specify a colored region, as 1050 # defined by the _COLORIZING_TAGS dictionary. Otherwise, 1051 # use a special "literal braces" element (with tag "litbrace"), 1052 # and convert them to literal braces once we find the matching 1053 # close-brace. 1054 if match.group() == '{': 1055 if (end>0) and 'A' <= str[end-1] <= 'Z': 1056 if (end-1) > start: 1057 stack[-1].children.append(str[start:end-1]) 1058 if str[end-1] not in _COLORIZING_TAGS: 1059 estr = "Unknown inline markup tag." 1060 errors.append(ColorizingError(estr, token, end-1)) 1061 stack.append(Element('unknown')) 1062 else: 1063 tag = _COLORIZING_TAGS[str[end-1]] 1064 stack.append(Element(tag)) 1065 else: 1066 if end > start: 1067 stack[-1].children.append(str[start:end]) 1068 stack.append(Element('litbrace')) 1069 openbrace_stack.append(end) 1070 stack[-2].children.append(stack[-1]) 1071 1072 # Close braces end colorizing elements. 1073 elif match.group() == '}': 1074 # Check for (and ignore) unbalanced braces. 1075 if len(stack) <= 1: 1076 estr = "Unbalanced '}'." 1077 errors.append(ColorizingError(estr, token, end)) 1078 start = end + 1 1079 continue 1080 1081 # Add any remaining text. 1082 if end > start: 1083 stack[-1].children.append(str[start:end]) 1084 1085 # Special handling for symbols: 1086 if stack[-1].tag == 'symbol': 1087 if (len(stack[-1].children) != 1 or 1088 not isinstance(stack[-1].children[0], basestring)): 1089 estr = "Invalid symbol code." 1090 errors.append(ColorizingError(estr, token, end)) 1091 else: 1092 symb = stack[-1].children[0] 1093 if symb in _SYMBOLS: 1094 # It's a symbol 1095 stack[-2].children[-1] = Element('symbol', symb) 1096 else: 1097 estr = "Invalid symbol code." 1098 errors.append(ColorizingError(estr, token, end)) 1099 1100 # Special handling for escape elements: 1101 if stack[-1].tag == 'escape': 1102 if (len(stack[-1].children) != 1 or 1103 not isinstance(stack[-1].children[0], basestring)): 1104 estr = "Invalid escape code." 1105 errors.append(ColorizingError(estr, token, end)) 1106 else: 1107 escp = stack[-1].children[0] 1108 if escp in _ESCAPES: 1109 # It's an escape from _ESCPAES 1110 stack[-2].children[-1] = _ESCAPES[escp] 1111 elif len(escp) == 1: 1112 # It's a single-character escape (eg E{.}) 1113 stack[-2].children[-1] = escp 1114 else: 1115 estr = "Invalid escape code." 1116 errors.append(ColorizingError(estr, token, end)) 1117 1118 # Special handling for literal braces elements: 1119 if stack[-1].tag == 'litbrace': 1120 stack[-2].children[-1:] = ['{'] + stack[-1].children + ['}'] 1121 1122 # Special handling for graphs: 1123 if stack[-1].tag == 'graph': 1124 _colorize_graph(doc, stack[-1], token, end, errors) 1125 1126 # Special handling for link-type elements: 1127 if stack[-1].tag in _LINK_COLORIZING_TAGS: 1128 _colorize_link(doc, stack[-1], token, end, errors) 1129 1130 # Pop the completed element. 1131 openbrace_stack.pop() 1132 stack.pop() 1133 1134 start = end+1 1135 1136 # Add any final text. 1137 if start < len(str): 1138 stack[-1].children.append(str[start:]) 1139 1140 if len(stack) != 1: 1141 estr = "Unbalanced '{'." 1142 errors.append(ColorizingError(estr, token, openbrace_stack[-1])) 1143 1144 return stack[0]
1145 1146 GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph'] 1147
1148 -def _colorize_graph(doc, graph, token, end, errors):
1149 """ 1150 Eg:: 1151 G{classtree} 1152 G{classtree x, y, z} 1153 G{importgraph} 1154 """ 1155 bad_graph_spec = False 1156 1157 children = graph.children[:] 1158 graph.children = [] 1159 1160 if len(children) != 1 or not isinstance(children[0], basestring): 1161 bad_graph_spec = "Bad graph specification" 1162 else: 1163 pieces = children[0].split(None, 1) 1164 graphtype = pieces[0].replace(':','').strip().lower() 1165 if graphtype in GRAPH_TYPES: 1166 if len(pieces) == 2: 1167 if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]): 1168 args = pieces[1].replace(',', ' ').replace(':','').split() 1169 else: 1170 bad_graph_spec = "Bad graph arg list" 1171 else: 1172 args = [] 1173 else: 1174 bad_graph_spec = ("Bad graph type %s -- use one of %s" % 1175 (pieces[0], ', '.join(GRAPH_TYPES))) 1176 1177 if bad_graph_spec: 1178 errors.append(ColorizingError(bad_graph_spec, token, end)) 1179 graph.children.append('none') 1180 graph.children.append('') 1181 return 1182 1183 graph.children.append(graphtype) 1184 for arg in args: 1185 graph.children.append(arg)
1186 1234 1235 ################################################## 1236 ## Formatters 1237 ################################################## 1238
1239 -def to_epytext(tree, indent=0, seclevel=0):
1240 """ 1241 Convert a DOM document encoding epytext back to an epytext string. 1242 This is the inverse operation from L{parse}. I.e., assuming there 1243 are no errors, the following is true: 1244 - C{parse(to_epytext(tree)) == tree} 1245 1246 The inverse is true, except that whitespace, line wrapping, and 1247 character escaping may be done differently. 1248 - C{to_epytext(parse(str)) == str} (approximately) 1249 1250 @param tree: A DOM document encoding of an epytext string. 1251 @type tree: C{Element} 1252 @param indent: The indentation for the string representation of 1253 C{tree}. Each line of the returned string will begin with 1254 C{indent} space characters. 1255 @type indent: C{int} 1256 @param seclevel: The section level that C{tree} appears at. This 1257 is used to generate section headings. 1258 @type seclevel: C{int} 1259 @return: The epytext string corresponding to C{tree}. 1260 @rtype: C{string} 1261 """ 1262 if isinstance(tree, basestring): 1263 str = re.sub(r'\{', '\0', tree) 1264 str = re.sub(r'\}', '\1', str) 1265 return str 1266 1267 if tree.tag == 'epytext': indent -= 2 1268 if tree.tag == 'section': seclevel += 1 1269 variables = [to_epytext(c, indent+2, seclevel) for c in tree.children] 1270 childstr = ''.join(variables) 1271 1272 # Clean up for literal blocks (add the double "::" back) 1273 childstr = re.sub(':(\s*)\2', '::\\1', childstr) 1274 1275 if tree.tag == 'para': 1276 str = wordwrap(childstr, indent)+'\n' 1277 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) 1278 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) 1279 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) 1280 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) 1281 str = re.sub('\0', 'E{lb}', str) 1282 str = re.sub('\1', 'E{rb}', str) 1283 return str 1284 elif tree.tag == 'li': 1285 bullet = tree.attribs.get('bullet') or '-' 1286 return indent*' '+ bullet + ' ' + childstr.lstrip() 1287 elif tree.tag == 'heading': 1288 str = re.sub('\0', 'E{lb}',childstr) 1289 str = re.sub('\1', 'E{rb}', str) 1290 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1291 return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n' 1292 elif tree.tag == 'doctestblock': 1293 str = re.sub('\0', '{', childstr) 1294 str = re.sub('\1', '}', str) 1295 lines = [' '+indent*' '+line for line in str.split('\n')] 1296 return '\n'.join(lines) + '\n\n' 1297 elif tree.tag == 'literalblock': 1298 str = re.sub('\0', '{', childstr) 1299 str = re.sub('\1', '}', str) 1300 lines = [(indent+1)*' '+line for line in str.split('\n')] 1301 return '\2' + '\n'.join(lines) + '\n\n' 1302 elif tree.tag == 'field': 1303 numargs = 0 1304 while tree.children[numargs+1].tag == 'arg': numargs += 1 1305 tag = variables[0] 1306 args = variables[1:1+numargs] 1307 body = variables[1+numargs:] 1308 str = (indent)*' '+'@'+variables[0] 1309 if args: str += '(' + ', '.join(args) + ')' 1310 return str + ':\n' + ''.join(body) 1311 elif tree.tag == 'target': 1312 return '<%s>' % childstr 1313 elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', 1314 'section', 'olist', 'ulist', 'name'): 1315 return childstr 1316 elif tree.tag == 'symbol': 1317 return 'E{%s}' % childstr 1318 elif tree.tag == 'graph': 1319 return 'G{%s}' % ' '.join(variables) 1320 else: 1321 for (tag, name) in _COLORIZING_TAGS.items(): 1322 if name == tree.tag: 1323 return '%s{%s}' % (tag, childstr) 1324 raise ValueError('Unknown DOM element %r' % tree.tag)
1325 1326 SYMBOL_TO_PLAINTEXT = { 1327 'crarr': '\\', 1328 } 1329
1330 -def to_plaintext(tree, indent=0, seclevel=0):
1331 """ 1332 Convert a DOM document encoding epytext to a string representation. 1333 This representation is similar to the string generated by 1334 C{to_epytext}, but C{to_plaintext} removes inline markup, prints 1335 escaped characters in unescaped form, etc. 1336 1337 @param tree: A DOM document encoding of an epytext string. 1338 @type tree: C{Element} 1339 @param indent: The indentation for the string representation of 1340 C{tree}. Each line of the returned string will begin with 1341 C{indent} space characters. 1342 @type indent: C{int} 1343 @param seclevel: The section level that C{tree} appears at. This 1344 is used to generate section headings. 1345 @type seclevel: C{int} 1346 @return: The epytext string corresponding to C{tree}. 1347 @rtype: C{string} 1348 """ 1349 if isinstance(tree, basestring): return tree 1350 1351 if tree.tag == 'section': seclevel += 1 1352 1353 # Figure out the child indent level. 1354 if tree.tag == 'epytext': cindent = indent 1355 elif tree.tag == 'li' and tree.attribs.get('bullet'): 1356 cindent = indent + 1 + len(tree.attribs.get('bullet')) 1357 else: 1358 cindent = indent + 2 1359 variables = [to_plaintext(c, cindent, seclevel) for c in tree.children] 1360 childstr = ''.join(variables) 1361 1362 if tree.tag == 'para': 1363 return wordwrap(childstr, indent)+'\n' 1364 elif tree.tag == 'li': 1365 # We should be able to use getAttribute here; but there's no 1366 # convenient way to test if an element has an attribute.. 1367 bullet = tree.attribs.get('bullet') or '-' 1368 return indent*' ' + bullet + ' ' + childstr.lstrip() 1369 elif tree.tag == 'heading': 1370 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1371 return ((indent-2)*' ' + childstr + '\n' + 1372 (indent-2)*' ' + uline + '\n') 1373 elif tree.tag == 'doctestblock': 1374 lines = [(indent+2)*' '+line for line in childstr.split('\n')] 1375 return '\n'.join(lines) + '\n\n' 1376 elif tree.tag == 'literalblock': 1377 lines = [(indent+1)*' '+line for line in childstr.split('\n')] 1378 return '\n'.join(lines) + '\n\n' 1379 elif tree.tag == 'fieldlist': 1380 return childstr 1381 elif tree.tag == 'field': 1382 numargs = 0 1383 while tree.children[numargs+1].tag == 'arg': numargs += 1 1384 tag = variables[0] 1385 args = variables[1:1+numargs] 1386 body = variables[1+numargs:] 1387 str = (indent)*' '+'@'+variables[0] 1388 if args: str += '(' + ', '.join(args) + ')' 1389 return str + ':\n' + ''.join(body) 1390 elif tree.tag == 'uri': 1391 if len(variables) != 2: raise ValueError('Bad URI ') 1392 elif variables[0] == variables[1]: return '<%s>' % variables[1] 1393 else: return '%r<%s>' % (variables[0], variables[1]) 1394 elif tree.tag == 'link': 1395 if len(variables) != 2: raise ValueError('Bad Link') 1396 return '%s' % variables[0] 1397 elif tree.tag in ('olist', 'ulist'): 1398 # [xx] always use condensed lists. 1399 ## Use a condensed list if each list item is 1 line long. 1400 #for child in variables: 1401 # if child.count('\n') > 2: return childstr 1402 return childstr.replace('\n\n', '\n')+'\n' 1403 elif tree.tag == 'symbol': 1404 return '%s' % SYMBOL_TO_PLAINTEXT.get(childstr, childstr) 1405 elif tree.tag == 'graph': 1406 return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:])) 1407 else: 1408 # Assume that anything else can be passed through. 1409 return childstr
1410
1411 -def to_debug(tree, indent=4, seclevel=0):
1412 """ 1413 Convert a DOM document encoding epytext back to an epytext string, 1414 annotated with extra debugging information. This function is 1415 similar to L{to_epytext}, but it adds explicit information about 1416 where different blocks begin, along the left margin. 1417 1418 @param tree: A DOM document encoding of an epytext string. 1419 @type tree: C{Element} 1420 @param indent: The indentation for the string representation of 1421 C{tree}. Each line of the returned string will begin with 1422 C{indent} space characters. 1423 @type indent: C{int} 1424 @param seclevel: The section level that C{tree} appears at. This 1425 is used to generate section headings. 1426 @type seclevel: C{int} 1427 @return: The epytext string corresponding to C{tree}. 1428 @rtype: C{string} 1429 """ 1430 if isinstance(tree, basestring): 1431 str = re.sub(r'\{', '\0', tree) 1432 str = re.sub(r'\}', '\1', str) 1433 return str 1434 1435 if tree.tag == 'section': seclevel += 1 1436 variables = [to_debug(c, indent+2, seclevel) for c in tree.children] 1437 childstr = ''.join(variables) 1438 1439 # Clean up for literal blocks (add the double "::" back) 1440 childstr = re.sub(':( *\n \|\n)\2', '::\\1', childstr) 1441 1442 if tree.tag == 'para': 1443 str = wordwrap(childstr, indent-6, 69)+'\n' 1444 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str) 1445 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str) 1446 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str) 1447 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str) 1448 str = re.sub('\0', 'E{lb}', str) 1449 str = re.sub('\1', 'E{rb}', str) 1450 lines = str.rstrip().split('\n') 1451 lines[0] = ' P>|' + lines[0] 1452 lines[1:] = [' |'+l for l in lines[1:]] 1453 return '\n'.join(lines)+'\n |\n' 1454 elif tree.tag == 'li': 1455 bullet = tree.attribs.get('bullet') or '-' 1456 return ' LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip() 1457 elif tree.tag in ('olist', 'ulist'): 1458 return 'LIST>|'+(indent-4)*' '+childstr[indent+2:] 1459 elif tree.tag == 'heading': 1460 str = re.sub('\0', 'E{lb}', childstr) 1461 str = re.sub('\1', 'E{rb}', str) 1462 uline = len(childstr)*_HEADING_CHARS[seclevel-1] 1463 return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' + 1464 ' |'+(indent-8)*' ' + uline + '\n') 1465 elif tree.tag == 'doctestblock': 1466 str = re.sub('\0', '{', childstr) 1467 str = re.sub('\1', '}', str) 1468 lines = [' |'+(indent-4)*' '+line for line in str.split('\n')] 1469 lines[0] = 'DTST>'+lines[0][5:] 1470 return '\n'.join(lines) + '\n |\n' 1471 elif tree.tag == 'literalblock': 1472 str = re.sub('\0', '{', childstr) 1473 str = re.sub('\1', '}', str) 1474 lines = [' |'+(indent-5)*' '+line for line in str.split('\n')] 1475 lines[0] = ' LIT>'+lines[0][5:] 1476 return '\2' + '\n'.join(lines) + '\n |\n' 1477 elif tree.tag == 'field': 1478 numargs = 0 1479 while tree.children[numargs+1].tag == 'arg': numargs += 1 1480 tag = variables[0] 1481 args = variables[1:1+numargs] 1482 body = variables[1+numargs:] 1483 str = ' FLD>|'+(indent-6)*' '+'@'+variables[0] 1484 if args: str += '(' + ', '.join(args) + ')' 1485 return str + ':\n' + ''.join(body) 1486 elif tree.tag == 'target': 1487 return '<%s>' % childstr 1488 elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext', 1489 'section', 'olist', 'ulist', 'name'): 1490 return childstr 1491 elif tree.tag == 'symbol': 1492 return 'E{%s}' % childstr 1493 elif tree.tag == 'graph': 1494 return 'G{%s}' % ' '.join(variables) 1495 else: 1496 for (tag, name) in _COLORIZING_TAGS.items(): 1497 if name == tree.tag: 1498 return '%s{%s}' % (tag, childstr) 1499 raise ValueError('Unknown DOM element %r' % tree.tag)
1500 1501 ################################################## 1502 ## Top-Level Wrapper function 1503 ##################################################
1504 -def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr):
1505 """ 1506 Pretty-parse the string. This parses the string, and catches any 1507 warnings or errors produced. Any warnings and errors are 1508 displayed, and the resulting DOM parse structure is returned. 1509 1510 @param str: The string to parse. 1511 @type str: C{string} 1512 @param show_warnings: Whether or not to display non-fatal errors 1513 generated by parsing C{str}. 1514 @type show_warnings: C{boolean} 1515 @param show_errors: Whether or not to display fatal errors 1516 generated by parsing C{str}. 1517 @type show_errors: C{boolean} 1518 @param stream: The stream that warnings and errors should be 1519 written to. 1520 @type stream: C{stream} 1521 @return: a DOM document encoding the contents of C{str}. 1522 @rtype: C{Element} 1523 @raise SyntaxError: If any fatal errors were encountered. 1524 """ 1525 errors = [] 1526 confused = 0 1527 try: 1528 val = parse(str, errors) 1529 warnings = [e for e in errors if not e.is_fatal()] 1530 errors = [e for e in errors if e.is_fatal()] 1531 except: 1532 confused = 1 1533 1534 if not show_warnings: warnings = [] 1535 warnings.sort() 1536 errors.sort() 1537 if warnings: 1538 print >>stream, '='*SCRWIDTH 1539 print >>stream, "WARNINGS" 1540 print >>stream, '-'*SCRWIDTH 1541 for warning in warnings: 1542 print >>stream, warning.as_warning() 1543 print >>stream, '='*SCRWIDTH 1544 if errors and show_errors: 1545 if not warnings: print >>stream, '='*SCRWIDTH 1546 print >>stream, "ERRORS" 1547 print >>stream, '-'*SCRWIDTH 1548 for error in errors: 1549 print >>stream, error 1550 print >>stream, '='*SCRWIDTH 1551 1552 if confused: raise 1553 elif errors: raise SyntaxError('Encountered Errors') 1554 else: return val
1555 1556 ################################################## 1557 ## Parse Errors 1558 ################################################## 1559
1560 -class TokenizationError(ParseError):
1561 """ 1562 An error generated while tokenizing a formatted documentation 1563 string. 1564 """
1565
1566 -class StructuringError(ParseError):
1567 """ 1568 An error generated while structuring a formatted documentation 1569 string. 1570 """
1571
1572 -class ColorizingError(ParseError):
1573 """ 1574 An error generated while colorizing a paragraph. 1575 """
1576 - def __init__(self, descr, token, charnum, is_fatal=1):
1577 """ 1578 Construct a new colorizing exception. 1579 1580 @param descr: A short description of the error. 1581 @type descr: C{string} 1582 @param token: The token where the error occured 1583 @type token: L{Token} 1584 @param charnum: The character index of the position in 1585 C{token} where the error occured. 1586 @type charnum: C{int} 1587 """ 1588 ParseError.__init__(self, descr, token.startline, is_fatal) 1589 self.token = token 1590 self.charnum = charnum
1591 1592 CONTEXT_RANGE = 20
1593 - def descr(self):
1594 RANGE = self.CONTEXT_RANGE 1595 if self.charnum <= RANGE: 1596 left = self.token.contents[0:self.charnum] 1597 else: 1598 left = '...'+self.token.contents[self.charnum-RANGE:self.charnum] 1599 if (len(self.token.contents)-self.charnum) <= RANGE: 1600 right = self.token.contents[self.charnum:] 1601 else: 1602 right = (self.token.contents[self.charnum:self.charnum+RANGE] 1603 + '...') 1604 return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left)))
1605 1606 ################################################## 1607 ## Convenience parsers 1608 ################################################## 1609
1610 -def parse_as_literal(str):
1611 """ 1612 Return a DOM document matching the epytext DTD, containing a 1613 single literal block. That literal block will include the 1614 contents of the given string. This method is typically used as a 1615 fall-back when the parser fails. 1616 1617 @param str: The string which should be enclosed in a literal 1618 block. 1619 @type str: C{string} 1620 1621 @return: A DOM document containing C{str} in a single literal 1622 block. 1623 @rtype: C{Element} 1624 """ 1625 return Element('epytext', Element('literalblock', str))
1626
1627 -def parse_as_para(str):
1628 """ 1629 Return a DOM document matching the epytext DTD, containing a 1630 single paragraph. That paragraph will include the contents of the 1631 given string. This can be used to wrap some forms of 1632 automatically generated information (such as type names) in 1633 paragraphs. 1634 1635 @param str: The string which should be enclosed in a paragraph. 1636 @type str: C{string} 1637 1638 @return: A DOM document containing C{str} in a single paragraph. 1639 @rtype: C{Element} 1640 """ 1641 return Element('epytext', Element('para', str))
1642 1643 ################################################################# 1644 ## SUPPORT FOR EPYDOC 1645 ################################################################# 1646
1647 -def parse_docstring(docstring, errors, **options):
1648 """ 1649 Parse the given docstring, which is formatted using epytext; and 1650 return a C{ParsedDocstring} representation of its contents. 1651 @param docstring: The docstring to parse 1652 @type docstring: C{string} 1653 @param errors: A list where any errors generated during parsing 1654 will be stored. 1655 @type errors: C{list} of L{ParseError} 1656 @param options: Extra options. Unknown options are ignored. 1657 Currently, no extra options are defined. 1658 @rtype: L{ParsedDocstring} 1659 """ 1660 return ParsedEpytextDocstring(parse(docstring, errors), **options)
1661
1662 -class ParsedEpytextDocstring(ParsedDocstring):
1663 SYMBOL_TO_HTML = { 1664 # Symbols 1665 '<-': '&larr;', '->': '&rarr;', '^': '&uarr;', 'v': '&darr;', 1666 1667 # Greek letters 1668 'alpha': '&alpha;', 'beta': '&beta;', 'gamma': '&gamma;', 1669 'delta': '&delta;', 'epsilon': '&epsilon;', 'zeta': '&zeta;', 1670 'eta': '&eta;', 'theta': '&theta;', 'iota': '&iota;', 1671 'kappa': '&kappa;', 'lambda': '&lambda;', 'mu': '&mu;', 1672 'nu': '&nu;', 'xi': '&xi;', 'omicron': '&omicron;', 1673 'pi': '&pi;', 'rho': '&rho;', 'sigma': '&sigma;', 1674 'tau': '&tau;', 'upsilon': '&upsilon;', 'phi': '&phi;', 1675 'chi': '&chi;', 'psi': '&psi;', 'omega': '&omega;', 1676 'Alpha': '&Alpha;', 'Beta': '&Beta;', 'Gamma': '&Gamma;', 1677 'Delta': '&Delta;', 'Epsilon': '&Epsilon;', 'Zeta': '&Zeta;', 1678 'Eta': '&Eta;', 'Theta': '&Theta;', 'Iota': '&Iota;', 1679 'Kappa': '&Kappa;', 'Lambda': '&Lambda;', 'Mu': '&Mu;', 1680 'Nu': '&Nu;', 'Xi': '&Xi;', 'Omicron': '&Omicron;', 1681 'Pi': '&Pi;', 'Rho': '&Rho;', 'Sigma': '&Sigma;', 1682 'Tau': '&Tau;', 'Upsilon': '&Upsilon;', 'Phi': '&Phi;', 1683 'Chi': '&Chi;', 'Psi': '&Psi;', 'Omega': '&Omega;', 1684 1685 # HTML character entities 1686 'larr': '&larr;', 'rarr': '&rarr;', 'uarr': '&uarr;', 1687 'darr': '&darr;', 'harr': '&harr;', 'crarr': '&crarr;', 1688 'lArr': '&lArr;', 'rArr': '&rArr;', 'uArr': '&uArr;', 1689 'dArr': '&dArr;', 'hArr': '&hArr;', 1690 'copy': '&copy;', 'times': '&times;', 'forall': '&forall;', 1691 'exist': '&exist;', 'part': '&part;', 1692 'empty': '&empty;', 'isin': '&isin;', 'notin': '&notin;', 1693 'ni': '&ni;', 'prod': '&prod;', 'sum': '&sum;', 1694 'prop': '&prop;', 'infin': '&infin;', 'ang': '&ang;', 1695 'and': '&and;', 'or': '&or;', 'cap': '&cap;', 'cup': '&cup;', 1696 'int': '&int;', 'there4': '&there4;', 'sim': '&sim;', 1697 'cong': '&cong;', 'asymp': '&asymp;', 'ne': '&ne;', 1698 'equiv': '&equiv;', 'le': '&le;', 'ge': '&ge;', 1699 'sub': '&sub;', 'sup': '&sup;', 'nsub': '&nsub;', 1700 'sube': '&sube;', 'supe': '&supe;', 'oplus': '&oplus;', 1701 'otimes': '&otimes;', 'perp': '&perp;', 1702 1703 # Alternate (long) names 1704 'infinity': '&infin;', 'integral': '&int;', 'product': '&prod;', 1705 '<=': '&le;', '>=': '&ge;', 1706 } 1707 1708 SYMBOL_TO_LATEX = { 1709 # Symbols 1710 '<-': r'\(\leftarrow\)', '->': r'\(\rightarrow\)', 1711 '^': r'\(\uparrow\)', 'v': r'\(\downarrow\)', 1712 1713 # Greek letters (use lower case when upcase not available) 1714 1715 'alpha': r'\(\alpha\)', 'beta': r'\(\beta\)', 'gamma': 1716 r'\(\gamma\)', 'delta': r'\(\delta\)', 'epsilon': 1717 r'\(\epsilon\)', 'zeta': r'\(\zeta\)', 'eta': r'\(\eta\)', 1718 'theta': r'\(\theta\)', 'iota': r'\(\iota\)', 'kappa': 1719 r'\(\kappa\)', 'lambda': r'\(\lambda\)', 'mu': r'\(\mu\)', 1720 'nu': r'\(\nu\)', 'xi': r'\(\xi\)', 'omicron': r'\(o\)', 'pi': 1721 r'\(\pi\)', 'rho': r'\(\rho\)', 'sigma': r'\(\sigma\)', 'tau': 1722 r'\(\tau\)', 'upsilon': r'\(\upsilon\)', 'phi': r'\(\phi\)', 1723 'chi': r'\(\chi\)', 'psi': r'\(\psi\)', 'omega': 1724 r'\(\omega\)', 1725 1726 'Alpha': r'\(\alpha\)', 'Beta': r'\(\beta\)', 'Gamma': 1727 r'\(\Gamma\)', 'Delta': r'\(\Delta\)', 'Epsilon': 1728 r'\(\epsilon\)', 'Zeta': r'\(\zeta\)', 'Eta': r'\(\eta\)', 1729 'Theta': r'\(\Theta\)', 'Iota': r'\(\iota\)', 'Kappa': 1730 r'\(\kappa\)', 'Lambda': r'\(\Lambda\)', 'Mu': r'\(\mu\)', 1731 'Nu': r'\(\nu\)', 'Xi': r'\(\Xi\)', 'Omicron': r'\(o\)', 'Pi': 1732 r'\(\Pi\)', 'ho': r'\(\rho\)', 'Sigma': r'\(\Sigma\)', 'Tau': 1733 r'\(\tau\)', 'Upsilon': r'\(\Upsilon\)', 'Phi': r'\(\Phi\)', 1734 'Chi': r'\(\chi\)', 'Psi': r'\(\Psi\)', 'Omega': 1735 r'\(\Omega\)', 1736 1737 # HTML character entities 1738 'larr': r'\(\leftarrow\)', 'rarr': r'\(\rightarrow\)', 'uarr': 1739 r'\(\uparrow\)', 'darr': r'\(\downarrow\)', 'harr': 1740 r'\(\leftrightarrow\)', 'crarr': r'\(\hookleftarrow\)', 1741 'lArr': r'\(\Leftarrow\)', 'rArr': r'\(\Rightarrow\)', 'uArr': 1742 r'\(\Uparrow\)', 'dArr': r'\(\Downarrow\)', 'hArr': 1743 r'\(\Leftrightarrow\)', 'copy': r'{\textcopyright}', 1744 'times': r'\(\times\)', 'forall': r'\(\forall\)', 'exist': 1745 r'\(\exists\)', 'part': r'\(\partial\)', 'empty': 1746 r'\(\emptyset\)', 'isin': r'\(\in\)', 'notin': r'\(\notin\)', 1747 'ni': r'\(\ni\)', 'prod': r'\(\prod\)', 'sum': r'\(\sum\)', 1748 'prop': r'\(\propto\)', 'infin': r'\(\infty\)', 'ang': 1749 r'\(\angle\)', 'and': r'\(\wedge\)', 'or': r'\(\vee\)', 'cap': 1750 r'\(\cap\)', 'cup': r'\(\cup\)', 'int': r'\(\int\)', 'there4': 1751 r'\(\therefore\)', 'sim': r'\(\sim\)', 'cong': r'\(\cong\)', 1752 'asymp': r'\(\approx\)', 'ne': r'\(\ne\)', 'equiv': 1753 r'\(\equiv\)', 'le': r'\(\le\)', 'ge': r'\(\ge\)', 'sub': 1754 r'\(\subset\)', 'sup': r'\(\supset\)', 'nsub': r'\(\supset\)', 1755 'sube': r'\(\subseteq\)', 'supe': r'\(\supseteq\)', 'oplus': 1756 r'\(\oplus\)', 'otimes': r'\(\otimes\)', 'perp': r'\(\perp\)', 1757 1758 # Alternate (long) names 1759 'infinity': r'\(\infty\)', 'integral': r'\(\int\)', 'product': 1760 r'\(\prod\)', '<=': r'\(\le\)', '>=': r'\(\ge\)', 1761 } 1762
1763 - def __init__(self, dom_tree, **options):
1764 self._tree = dom_tree 1765 # Caching: 1766 self._html = self._latex = self._plaintext = None 1767 self._terms = None 1768 # inline option -- mark top-level children as inline. 1769 if options.get('inline') and self._tree is not None: 1770 for elt in self._tree.children: 1771 elt.attribs['inline'] = True
1772
1773 - def __str__(self):
1774 return str(self._tree)
1775
1776 - def to_html(self, docstring_linker, directory=None, docindex=None, 1777 context=None, **options):
1778 if self._html is not None: return self._html 1779 if self._tree is None: return '' 1780 indent = options.get('indent', 0) 1781 self._html = self._to_html(self._tree, docstring_linker, directory, 1782 docindex, context, indent) 1783 return self._html
1784
1785 - def to_latex(self, docstring_linker, **options):
1786 if self._latex is not None: return self._latex 1787 if self._tree is None: return '' 1788 indent = options.get('indent', 0) 1789 self._hyperref = options.get('hyperref', 1) 1790 self._latex = self._to_latex(self._tree, docstring_linker, indent) 1791 return self._latex
1792
1793 - def to_plaintext(self, docstring_linker, **options):
1794 # [XX] don't cache -- different options might be used!! 1795 #if self._plaintext is not None: return self._plaintext 1796 if self._tree is None: return '' 1797 if 'indent' in options: 1798 self._plaintext = to_plaintext(self._tree, 1799 indent=options['indent']) 1800 else: 1801 self._plaintext = to_plaintext(self._tree) 1802 return self._plaintext
1803
1804 - def _index_term_key(self, tree):
1805 str = to_plaintext(tree) 1806 str = re.sub(r'\s\s+', '-', str) 1807 return "index-"+re.sub("[^a-zA-Z0-9]", "_", str)
1808
1809 - def _to_html(self, tree, linker, directory, docindex, context, 1810 indent=0, seclevel=0):
1811 if isinstance(tree, basestring): 1812 return plaintext_to_html(tree) 1813 1814 if tree.tag == 'epytext': indent -= 2 1815 if tree.tag == 'section': seclevel += 1 1816 1817 # Process the variables first. 1818 variables = [self._to_html(c, linker, directory, docindex, context, 1819 indent+2, seclevel) 1820 for c in tree.children] 1821 1822 # Construct the HTML string for the variables. 1823 childstr = ''.join(variables) 1824 1825 # Perform the approriate action for the DOM tree type. 1826 if tree.tag == 'para': 1827 return wordwrap( 1828 (tree.attribs.get('inline') and '%s' or '<p>%s</p>') % childstr, 1829 indent) 1830 elif tree.tag == 'code': 1831 style = tree.attribs.get('style') 1832 if style: 1833 return '<code class="%s">%s</code>' % (style, childstr) 1834 else: 1835 return '<code>%s</code>' % childstr 1836 elif tree.tag == 'uri': 1837 return ('<a href="%s" target="_top">%s</a>' % 1838 (variables[1], variables[0])) 1839 elif tree.tag == 'link': 1840 return linker.translate_identifier_xref(variables[1], variables[0]) 1841 elif tree.tag == 'italic': 1842 return '<i>%s</i>' % childstr 1843 elif tree.tag == 'math': 1844 return '<i class="math">%s</i>' % childstr 1845 elif tree.tag == 'indexed': 1846 term = Element('epytext', *tree.children, **tree.attribs) 1847 return linker.translate_indexterm(ParsedEpytextDocstring(term)) 1848 #term_key = self._index_term_key(tree) 1849 #return linker.translate_indexterm(childstr, term_key) 1850 elif tree.tag == 'bold': 1851 return '<b>%s</b>' % childstr 1852 elif tree.tag == 'ulist': 1853 return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ') 1854 elif tree.tag == 'olist': 1855 start = tree.attribs.get('start') or '' 1856 return ('%s<ol start="%s">\n%s%s</ol>\n' % 1857 (indent*' ', start, childstr, indent*' ')) 1858 elif tree.tag == 'li': 1859 return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ') 1860 elif tree.tag == 'heading': 1861 return ('%s<h%s class="heading">%s</h%s>\n' % 1862 ((indent-2)*' ', seclevel, childstr, seclevel)) 1863 elif tree.tag == 'literalblock': 1864 return '<pre class="literalblock">\n%s\n</pre>\n' % childstr 1865 elif tree.tag == 'doctestblock': 1866 return doctest_to_html(tree.children[0].strip()) 1867 elif tree.tag == 'fieldlist': 1868 raise AssertionError("There should not be any field lists left") 1869 elif tree.tag in ('epytext', 'section', 'tag', 'arg', 1870 'name', 'target', 'html'): 1871 return childstr 1872 elif tree.tag == 'symbol': 1873 symbol = tree.children[0] 1874 return self.SYMBOL_TO_HTML.get(symbol, '[%s]' % symbol) 1875 elif tree.tag == 'graph': 1876 # Generate the graph. 1877 graph = self._build_graph(variables[0], variables[1:], linker, 1878 docindex, context) 1879 if not graph: return '' 1880 # Write the graph. 1881 image_url = '%s.gif' % graph.uid 1882 image_file = os.path.join(directory, image_url) 1883 return graph.to_html(image_file, image_url) 1884 else: 1885 raise ValueError('Unknown epytext DOM element %r' % tree.tag)
1886 1887 #GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph']
1888 - def _build_graph(self, graph_type, graph_args, linker, 1889 docindex, context):
1890 # Generate the graph 1891 if graph_type == 'classtree': 1892 from epydoc.apidoc import ClassDoc 1893 if graph_args: 1894 bases = [docindex.find(name, context) 1895 for name in graph_args] 1896 elif isinstance(context, ClassDoc): 1897 bases = [context] 1898 else: 1899 log.warning("Could not construct class tree: you must " 1900 "specify one or more base classes.") 1901 return None 1902 from epydoc.docwriter.dotgraph import class_tree_graph 1903 return class_tree_graph(bases, linker, context) 1904 elif graph_type == 'packagetree': 1905 from epydoc.apidoc import ModuleDoc 1906 if graph_args: 1907 packages = [docindex.find(name, context) 1908 for name in graph_args] 1909 elif isinstance(context, ModuleDoc): 1910 packages = [context] 1911 else: 1912 log.warning("Could not construct package tree: you must " 1913 "specify one or more root packages.") 1914 return None 1915 from epydoc.docwriter.dotgraph import package_tree_graph 1916 return package_tree_graph(packages, linker, context) 1917 elif graph_type == 'importgraph': 1918 from epydoc.apidoc import ModuleDoc 1919 modules = [d for d in docindex.root if isinstance(d, ModuleDoc)] 1920 from epydoc.docwriter.dotgraph import import_graph 1921 return import_graph(modules, docindex, linker, context) 1922 1923 elif graph_type == 'callgraph': 1924 if graph_args: 1925 docs = [docindex.find(name, context) for name in graph_args] 1926 docs = [doc for doc in docs if doc is not None] 1927 else: 1928 docs = [context] 1929 from epydoc.docwriter.dotgraph import call_graph 1930 return call_graph(docs, docindex, linker, context) 1931 else: 1932 log.warning("Unknown graph type %s" % graph_type)
1933 1934
1935 - def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0):
1936 if isinstance(tree, basestring): 1937 return plaintext_to_latex(tree, breakany=breakany) 1938 1939 if tree.tag == 'section': seclevel += 1 1940 1941 # Figure out the child indent level. 1942 if tree.tag == 'epytext': cindent = indent 1943 else: cindent = indent + 2 1944 variables = [self._to_latex(c, linker, cindent, seclevel, breakany) 1945 for c in tree.children] 1946 childstr = ''.join(variables) 1947 1948 if tree.tag == 'para': 1949 return wordwrap(childstr, indent)+'\n' 1950 elif tree.tag == 'code': 1951 return '\\texttt{%s}' % childstr 1952 elif tree.tag == 'uri': 1953 if len(variables) != 2: raise ValueError('Bad URI ') 1954 if self._hyperref: 1955 # ~ and # should not be escaped in the URI. 1956 uri = tree.children[1].children[0] 1957 uri = uri.replace('{\\textasciitilde}', '~') 1958 uri = uri.replace('\\#', '#') 1959 if variables[0] == variables[1]: 1960 return '\\href{%s}{\\textit{%s}}' % (uri, variables[1]) 1961 else: 1962 return ('%s\\footnote{\\href{%s}{%s}}' % 1963 (variables[0], uri, variables[1])) 1964 else: 1965 if variables[0] == variables[1]: 1966 return '\\textit{%s}' % variables[1] 1967 else: 1968 return '%s\\footnote{%s}' % (variables[0], variables[1]) 1969 elif tree.tag == 'link': 1970 if len(variables) != 2: raise ValueError('Bad Link') 1971 return linker.translate_identifier_xref(variables[1], variables[0]) 1972 elif tree.tag == 'italic': 1973 return '\\textit{%s}' % childstr 1974 elif tree.tag == 'math': 1975 return '\\textit{%s}' % childstr 1976 elif tree.tag == 'indexed': 1977 term = Element('epytext', *tree.children, **tree.attribs) 1978 return linker.translate_indexterm(ParsedEpytextDocstring(term)) 1979 elif tree.tag == 'bold': 1980 return '\\textbf{%s}' % childstr 1981 elif tree.tag == 'li': 1982 return indent*' ' + '\\item ' + childstr.lstrip() 1983 elif tree.tag == 'heading': 1984 return ' '*(indent-2) + '(section) %s\n\n' % childstr 1985 elif tree.tag == 'doctestblock': 1986 return doctest_to_latex(tree.children[0].strip()) 1987 elif tree.tag == 'literalblock': 1988 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr 1989 elif tree.tag == 'fieldlist': 1990 return indent*' '+'{omitted fieldlist}\n' 1991 elif tree.tag == 'olist': 1992 return (' '*indent + '\\begin{enumerate}\n\n' + 1993 ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' + 1994 childstr + 1995 ' '*indent + '\\end{enumerate}\n\n') 1996 elif tree.tag == 'ulist': 1997 return (' '*indent + '\\begin{itemize}\n' + 1998 ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' + 1999 childstr + 2000 ' '*indent + '\\end{itemize}\n\n') 2001 elif tree.tag == 'symbol': 2002 symbol = tree.children[0] 2003 return self.SYMBOL_TO_LATEX.get(symbol, '[%s]' % symbol) 2004 elif tree.tag == 'graph': 2005 return '(GRAPH)' 2006 #raise ValueError, 'graph not implemented yet for latex' 2007 else: 2008 # Assume that anything else can be passed through. 2009 return childstr
2010 2011 _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)') 2012
2013 - def summary(self):
2014 if self._tree is None: return self, False 2015 tree = self._tree 2016 doc = Element('epytext') 2017 2018 # Find the first paragraph. 2019 variables = tree.children 2020 while (len(variables) > 0) and (variables[0].tag != 'para'): 2021 if variables[0].tag in ('section', 'ulist', 'olist', 'li'): 2022 variables = variables[0].children 2023 else: 2024 variables = variables[1:] 2025 2026 # Special case: if the docstring contains a single literal block, 2027 # then try extracting the summary from it. 2028 if (len(variables) == 0 and len(tree.children) == 1 and 2029 tree.children[0].tag == 'literalblock'): 2030 str = re.split(r'\n\s*(\n|$).*', 2031 tree.children[0].children[0], 1)[0] 2032 variables = [Element('para')] 2033 variables[0].children.append(str) 2034 2035 # If we didn't find a paragraph, return an empty epytext. 2036 if len(variables) == 0: return ParsedEpytextDocstring(doc), False 2037 2038 # Is there anything else, excluding tags, after the first variable? 2039 long_docs = False 2040 for var in variables[1:]: 2041 if isinstance(var, Element) and var.tag == 'fieldlist': 2042 continue 2043 long_docs = True 2044 break 2045 2046 # Extract the first sentence. 2047 parachildren = variables[0].children 2048 para = Element('para', inline=True) 2049 doc.children.append(para) 2050 for parachild in parachildren: 2051 if isinstance(parachild, basestring): 2052 m = self._SUMMARY_RE.match(parachild) 2053 if m: 2054 para.children.append(m.group(1)) 2055 long_docs |= parachild is not parachildren[-1] 2056 if not long_docs: 2057 other = parachild[m.end():] 2058 if other and not other.isspace(): 2059 long_docs = True 2060 return ParsedEpytextDocstring(doc), long_docs 2061 para.children.append(parachild) 2062 2063 return ParsedEpytextDocstring(doc), long_docs
2064
2065 - def split_fields(self, errors=None):
2066 if self._tree is None: return (self, ()) 2067 tree = Element(self._tree.tag, *self._tree.children, 2068 **self._tree.attribs) 2069 fields = [] 2070 2071 if (tree.children and 2072 tree.children[-1].tag == 'fieldlist' and 2073 tree.children[-1].children): 2074 field_nodes = tree.children[-1].children 2075 del tree.children[-1] 2076 2077 for field in field_nodes: 2078 # Get the tag 2079 tag = field.children[0].children[0].lower() 2080 del field.children[0] 2081 2082 # Get the argument. 2083 if field.children and field.children[0].tag == 'arg': 2084 arg = field.children[0].children[0] 2085 del field.children[0] 2086 else: 2087 arg = None 2088 2089 # Process the field. 2090 field.tag = 'epytext' 2091 fields.append(Field(tag, arg, ParsedEpytextDocstring(field))) 2092 2093 # Save the remaining docstring as the description.. 2094 if tree.children and tree.children[0].children: 2095 return ParsedEpytextDocstring(tree), fields 2096 else: 2097 return None, fields
2098 2099
2100 - def index_terms(self):
2101 if self._terms is None: 2102 self._terms = [] 2103 self._index_terms(self._tree, self._terms) 2104 return self._terms
2105
2106 - def _index_terms(self, tree, terms):
2107 if tree is None or isinstance(tree, basestring): 2108 return 2109 2110 if tree.tag == 'indexed': 2111 term = Element('epytext', *tree.children, **tree.attribs) 2112 terms.append(ParsedEpytextDocstring(term)) 2113 2114 # Look for index items in child nodes. 2115 for child in tree.children: 2116 self._index_terms(child, terms)
2117