1
2
3
4
5
6
7
8
9 """
10 Parser for epytext strings. Epytext is a lightweight markup whose
11 primary intended application is Python documentation strings. This
12 parser converts Epytext strings to a simple DOM-like representation
13 (encoded as a tree of L{Element} objects and strings). Epytext
14 strings can contain the following X{structural blocks}:
15
16 - X{epytext}: The top-level element of the DOM tree.
17 - X{para}: A paragraph of text. Paragraphs contain no newlines,
18 and all spaces are soft.
19 - X{section}: A section or subsection.
20 - X{field}: A tagged field. These fields provide information
21 about specific aspects of a Python object, such as the
22 description of a function's parameter, or the author of a
23 module.
24 - X{literalblock}: A block of literal text. This text should be
25 displayed as it would be displayed in plaintext. The
26 parser removes the appropriate amount of leading whitespace
27 from each line in the literal block.
28 - X{doctestblock}: A block containing sample python code,
29 formatted according to the specifications of the C{doctest}
30 module.
31 - X{ulist}: An unordered list.
32 - X{olist}: An ordered list.
33 - X{li}: A list item. This tag is used both for unordered list
34 items and for ordered list items.
35
36 Additionally, the following X{inline regions} may be used within
37 C{para} blocks:
38
39 - X{code}: Source code and identifiers.
40 - X{math}: Mathematical expressions.
41 - X{index}: A term which should be included in an index, if one
42 is generated.
43 - X{italic}: Italicized text.
44 - X{bold}: Bold-faced text.
45 - X{uri}: A Universal Resource Indicator (URI) or Universal
46 Resource Locator (URL)
47 - X{link}: A Python identifier which should be hyperlinked to
48 the named object's documentation, when possible.
49
50 The returned DOM tree will conform to the the following Document Type
51 Description::
52
53 <!ENTITY % colorized '(code | math | index | italic |
54 bold | uri | link | symbol)*'>
55
56 <!ELEMENT epytext ((para | literalblock | doctestblock |
57 section | ulist | olist)*, fieldlist?)>
58
59 <!ELEMENT para (#PCDATA | %colorized;)*>
60
61 <!ELEMENT section (para | listblock | doctestblock |
62 section | ulist | olist)+>
63
64 <!ELEMENT fieldlist (field+)>
65 <!ELEMENT field (tag, arg?, (para | listblock | doctestblock)
66 ulist | olist)+)>
67 <!ELEMENT tag (#PCDATA)>
68 <!ELEMENT arg (#PCDATA)>
69
70 <!ELEMENT literalblock (#PCDATA | %colorized;)*>
71 <!ELEMENT doctestblock (#PCDATA)>
72
73 <!ELEMENT ulist (li+)>
74 <!ELEMENT olist (li+)>
75 <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+>
76 <!ATTLIST li bullet NMTOKEN #IMPLIED>
77 <!ATTLIST olist start NMTOKEN #IMPLIED>
78
79 <!ELEMENT uri (name, target)>
80 <!ELEMENT link (name, target)>
81 <!ELEMENT name (#PCDATA | %colorized;)*>
82 <!ELEMENT target (#PCDATA)>
83
84 <!ELEMENT code (#PCDATA | %colorized;)*>
85 <!ELEMENT math (#PCDATA | %colorized;)*>
86 <!ELEMENT italic (#PCDATA | %colorized;)*>
87 <!ELEMENT bold (#PCDATA | %colorized;)*>
88 <!ELEMENT indexed (#PCDATA | %colorized;)>
89 <!ATTLIST code style CDATA #IMPLIED>
90
91 <!ELEMENT symbol (#PCDATA)>
92
93 @var SYMBOLS: A list of the of escape symbols that are supported
94 by epydoc. Currently the following symbols are supported:
95 <<<SYMBOLS>>>
96 """
97
98
99
100 __docformat__ = 'epytext en'
101
102
103
104
105
106
107
108
109 import re, string, types, sys, os.path
110 from epydoc.markup import *
111 from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex
112 from epydoc.markup.doctest import doctest_to_html, doctest_to_latex
113
114
115
116
117
119 """
120 A very simple DOM-like representation for parsed epytext
121 documents. Each epytext document is encoded as a tree whose nodes
122 are L{Element} objects, and whose leaves are C{string}s. Each
123 node is marked by a I{tag} and zero or more I{attributes}. Each
124 attribute is a mapping from a string key to a string value.
125 """
126 - def __init__(self, tag, *children, **attribs):
127 self.tag = tag
128 """A string tag indicating the type of this element.
129 @type: C{string}"""
130
131 self.children = list(children)
132 """A list of the children of this element.
133 @type: C{list} of (C{string} or C{Element})"""
134
135 self.attribs = attribs
136 """A dictionary mapping attribute names to attribute values
137 for this element.
138 @type: C{dict} from C{string} to C{string}"""
139
141 """
142 Return a string representation of this element, using XML
143 notation.
144 @bug: Doesn't escape '<' or '&' or '>'.
145 """
146 attribs = ''.join([' %s=%r' % t for t in self.attribs.items()])
147 return ('<%s%s>' % (self.tag, attribs) +
148 ''.join([str(child) for child in self.children]) +
149 '</%s>' % self.tag)
150
152 attribs = ''.join([', %s=%r' % t for t in self.attribs.items()])
153 args = ''.join([', %r' % c for c in self.children])
154 return 'Element(%s%s%s)' % (self.tag, args, attribs)
155
156
157
158
159
160
161
162 _HEADING_CHARS = "=-~"
163
164
165 _ESCAPES = {'lb':'{', 'rb': '}'}
166
167
168 SYMBOLS = [
169
170 '<-', '->', '^', 'v',
171
172
173 'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta',
174 'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu',
175 'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma',
176 'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega',
177 'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta',
178 'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu',
179 'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma',
180 'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega',
181
182
183 'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr',
184 'lArr', 'rArr', 'uArr', 'dArr', 'hArr',
185 'copy', 'times', 'forall', 'exist', 'part',
186 'empty', 'isin', 'notin', 'ni', 'prod', 'sum',
187 'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup',
188 'int', 'there4', 'sim', 'cong', 'asymp', 'ne',
189 'equiv', 'le', 'ge', 'sub', 'sup', 'nsub',
190 'sube', 'supe', 'oplus', 'otimes', 'perp',
191
192
193 'infinity', 'integral', 'product',
194 '>=', '<=',
195 ]
196
197 _SYMBOLS = {}
198 for symbol in SYMBOLS: _SYMBOLS[symbol] = 1
199
200
201 symblist = ' '
202 symblist += ';\n '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol)
203 for symbol in SYMBOLS])
204 __doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist)
205 del symbol, symblist
206
207
208 _COLORIZING_TAGS = {
209 'C': 'code',
210 'M': 'math',
211 'X': 'indexed',
212 'I': 'italic',
213 'B': 'bold',
214 'U': 'uri',
215 'L': 'link',
216 'E': 'escape',
217 'S': 'symbol',
218 'G': 'graph',
219 }
220
221
222 _LINK_COLORIZING_TAGS = ['link', 'uri']
223
224
225
226
227
228 -def parse(str, errors = None):
229 """
230 Return a DOM tree encoding the contents of an epytext string. Any
231 errors generated during parsing will be stored in C{errors}.
232
233 @param str: The epytext string to parse.
234 @type str: C{string}
235 @param errors: A list where any errors generated during parsing
236 will be stored. If no list is specified, then fatal errors
237 will generate exceptions, and non-fatal errors will be
238 ignored.
239 @type errors: C{list} of L{ParseError}
240 @return: a DOM tree encoding the contents of an epytext string.
241 @rtype: C{Element}
242 @raise ParseError: If C{errors} is C{None} and an error is
243 encountered while parsing.
244 """
245
246 if errors == None:
247 errors = []
248 raise_on_error = 1
249 else:
250 raise_on_error = 0
251
252
253 str = re.sub('\015\012', '\012', str)
254 str = string.expandtabs(str)
255
256
257 tokens = _tokenize(str, errors)
258
259
260 encountered_field = 0
261
262
263 doc = Element('epytext')
264
265
266
267
268
269
270
271
272
273
274
275 stack = [None, doc]
276 indent_stack = [-1, None]
277
278 for token in tokens:
279
280
281
282
283
284
285
286 _pop_completed_blocks(token, stack, indent_stack)
287
288
289 if token.tag == Token.PARA:
290 _add_para(doc, token, stack, indent_stack, errors)
291
292
293 elif token.tag == Token.HEADING:
294 _add_section(doc, token, stack, indent_stack, errors)
295
296
297 elif token.tag == Token.LBLOCK:
298 stack[-1].children.append(token.to_dom(doc))
299
300
301 elif token.tag == Token.DTBLOCK:
302 stack[-1].children.append(token.to_dom(doc))
303
304
305 elif token.tag == Token.BULLET:
306 _add_list(doc, token, stack, indent_stack, errors)
307 else:
308 assert 0, 'Unknown token type: '+token.tag
309
310
311 if stack[-1].tag == 'field':
312 encountered_field = 1
313 elif encountered_field == 1:
314 if len(stack) <= 3:
315 estr = ("Fields must be the final elements in an "+
316 "epytext string.")
317 errors.append(StructuringError(estr, token.startline))
318
319
320
321
322
323
324 for child in doc.children:
325 _raise_graphs(child, doc)
326
327
328 if len([e for e in errors if e.is_fatal()]) > 0:
329 if raise_on_error:
330 raise errors[0]
331 else:
332 return None
333
334
335 return doc
336
338
339 have_graph_child = False
340 for elt in tree.children:
341 if isinstance(elt, Element):
342 _raise_graphs(elt, tree)
343 if elt.tag == 'graph': have_graph_child = True
344
345 block = ('section', 'fieldlist', 'field', 'ulist', 'olist', 'li')
346 if have_graph_child and tree.tag not in block:
347 child_index = 0
348 for elt in tree.children:
349 if isinstance(elt, Element) and elt.tag == 'graph':
350
351 parent_index = parent.children.index(tree)
352 left = tree.children[:child_index]
353 right = tree.children[child_index+1:]
354 parent.children[parent_index:parent_index+1] = [
355 Element(tree.tag, *left, **tree.attribs),
356 elt,
357 Element(tree.tag, *right, **tree.attribs)]
358 child_index = 0
359 parent_index += 2
360 else:
361 child_index += 1
362
364 """
365 Pop any completed blocks off the stack. This includes any
366 blocks that we have dedented past, as well as any list item
367 blocks that we've dedented to. The top element on the stack
368 should only be a list if we're about to start a new list
369 item (i.e., if the next token is a bullet).
370 """
371 indent = token.indent
372 if indent != None:
373 while (len(stack) > 2):
374 pop = 0
375
376
377 if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1
378 elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1
379
380
381
382 elif (token.tag == 'bullet' and indent==indent_stack[-2] and
383 stack[-1].tag in ('li', 'field')): pop=1
384
385
386 elif (stack[-1].tag in ('ulist', 'olist') and
387 (token.tag != 'bullet' or token.contents[-1] == ':')):
388 pop=1
389
390
391 if pop == 0: return
392 stack.pop()
393 indent_stack.pop()
394
395 -def _add_para(doc, para_token, stack, indent_stack, errors):
396 """Colorize the given paragraph, and add it to the DOM tree."""
397
398
399 if indent_stack[-1] == None:
400 indent_stack[-1] = para_token.indent
401 if para_token.indent == indent_stack[-1]:
402
403 para = _colorize(doc, para_token, errors)
404 if para_token.inline:
405 para.attribs['inline'] = True
406 stack[-1].children.append(para)
407 else:
408 estr = "Improper paragraph indentation."
409 errors.append(StructuringError(estr, para_token.startline))
410
411 -def _add_section(doc, heading_token, stack, indent_stack, errors):
412 """Add a new section to the DOM tree, with the given heading."""
413 if indent_stack[-1] == None:
414 indent_stack[-1] = heading_token.indent
415 elif indent_stack[-1] != heading_token.indent:
416 estr = "Improper heading indentation."
417 errors.append(StructuringError(estr, heading_token.startline))
418
419
420 for tok in stack[2:]:
421 if tok.tag != "section":
422 estr = "Headings must occur at the top level."
423 errors.append(StructuringError(estr, heading_token.startline))
424 break
425 if (heading_token.level+2) > len(stack):
426 estr = "Wrong underline character for heading."
427 errors.append(StructuringError(estr, heading_token.startline))
428
429
430
431 stack[heading_token.level+2:] = []
432 indent_stack[heading_token.level+2:] = []
433
434
435 head = _colorize(doc, heading_token, errors, 'heading')
436
437
438 sec = Element("section")
439 stack[-1].children.append(sec)
440 stack.append(sec)
441 sec.children.append(head)
442 indent_stack.append(None)
443
444 -def _add_list(doc, bullet_token, stack, indent_stack, errors):
445 """
446 Add a new list item or field to the DOM tree, with the given
447 bullet or field tag. When necessary, create the associated
448 list.
449 """
450
451 if bullet_token.contents[-1] == '-':
452 list_type = 'ulist'
453 elif bullet_token.contents[-1] == '.':
454 list_type = 'olist'
455 elif bullet_token.contents[-1] == ':':
456 list_type = 'fieldlist'
457 else:
458 raise AssertionError('Bad Bullet: %r' % bullet_token.contents)
459
460
461 newlist = 0
462 if stack[-1].tag != list_type:
463 newlist = 1
464 elif list_type == 'olist' and stack[-1].tag == 'olist':
465 old_listitem = stack[-1].children[-1]
466 old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1]
467 new_bullet = bullet_token.contents.split('.')[:-1]
468 if (new_bullet[:-1] != old_bullet[:-1] or
469 int(new_bullet[-1]) != int(old_bullet[-1])+1):
470 newlist = 1
471
472
473 if newlist:
474 if stack[-1].tag is 'fieldlist':
475
476
477
478
479
480
481 estr = "Lists must be indented."
482 errors.append(StructuringError(estr, bullet_token.startline))
483 if stack[-1].tag in ('ulist', 'olist', 'fieldlist'):
484 stack.pop()
485 indent_stack.pop()
486
487 if (list_type != 'fieldlist' and indent_stack[-1] is not None and
488 bullet_token.indent == indent_stack[-1]):
489
490
491
492 if bullet_token.startline != 1 or bullet_token.indent != 0:
493 estr = "Lists must be indented."
494 errors.append(StructuringError(estr, bullet_token.startline))
495
496 if list_type == 'fieldlist':
497
498 for tok in stack[2:]:
499 if tok.tag != "section":
500 estr = "Fields must be at the top level."
501 errors.append(
502 StructuringError(estr, bullet_token.startline))
503 break
504 stack[2:] = []
505 indent_stack[2:] = []
506
507
508 lst = Element(list_type)
509 stack[-1].children.append(lst)
510 stack.append(lst)
511 indent_stack.append(bullet_token.indent)
512 if list_type == 'olist':
513 start = bullet_token.contents.split('.')[:-1]
514 if start != '1':
515 lst.attribs["start"] = start[-1]
516
517
518
519
520
521 if list_type == 'fieldlist':
522 li = Element("field")
523 token_words = bullet_token.contents[1:-1].split(None, 1)
524 tag_elt = Element("tag")
525 tag_elt.children.append(token_words[0])
526 li.children.append(tag_elt)
527
528 if len(token_words) > 1:
529 arg_elt = Element("arg")
530 arg_elt.children.append(token_words[1])
531 li.children.append(arg_elt)
532 else:
533 li = Element("li")
534 if list_type == 'olist':
535 li.attribs["bullet"] = bullet_token.contents
536
537
538 stack[-1].children.append(li)
539 stack.append(li)
540 indent_stack.append(None)
541
542
543
544
545
547 """
548 C{Token}s are an intermediate data structure used while
549 constructing the structuring DOM tree for a formatted docstring.
550 There are five types of C{Token}:
551
552 - Paragraphs
553 - Literal blocks
554 - Doctest blocks
555 - Headings
556 - Bullets
557
558 The text contained in each C{Token} is stored in the
559 C{contents} variable. The string in this variable has been
560 normalized. For paragraphs, this means that it has been converted
561 into a single line of text, with newline/indentation replaced by
562 single spaces. For literal blocks and doctest blocks, this means
563 that the appropriate amount of leading whitespace has been removed
564 from each line.
565
566 Each C{Token} has an indentation level associated with it,
567 stored in the C{indent} variable. This indentation level is used
568 by the structuring procedure to assemble hierarchical blocks.
569
570 @type tag: C{string}
571 @ivar tag: This C{Token}'s type. Possible values are C{Token.PARA}
572 (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK}
573 (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}.
574
575 @type startline: C{int}
576 @ivar startline: The line on which this C{Token} begins. This
577 line number is only used for issuing errors.
578
579 @type contents: C{string}
580 @ivar contents: The normalized text contained in this C{Token}.
581
582 @type indent: C{int} or C{None}
583 @ivar indent: The indentation level of this C{Token} (in
584 number of leading spaces). A value of C{None} indicates an
585 unknown indentation; this is used for list items and fields
586 that begin with one-line paragraphs.
587
588 @type level: C{int} or C{None}
589 @ivar level: The heading-level of this C{Token} if it is a
590 heading; C{None}, otherwise. Valid heading levels are 0, 1,
591 and 2.
592
593 @type inline: C{bool}
594 @ivar inline: If True, the element is an inline level element, comparable
595 to an HTML C{<span>} tag. Else, it is a block level element, comparable
596 to an HTML C{<div>}.
597
598 @type PARA: C{string}
599 @cvar PARA: The C{tag} value for paragraph C{Token}s.
600 @type LBLOCK: C{string}
601 @cvar LBLOCK: The C{tag} value for literal C{Token}s.
602 @type DTBLOCK: C{string}
603 @cvar DTBLOCK: The C{tag} value for doctest C{Token}s.
604 @type HEADING: C{string}
605 @cvar HEADING: The C{tag} value for heading C{Token}s.
606 @type BULLET: C{string}
607 @cvar BULLET: The C{tag} value for bullet C{Token}s. This C{tag}
608 value is also used for field tag C{Token}s, since fields
609 function syntactically the same as list items.
610 """
611
612 PARA = "para"
613 LBLOCK = "literalblock"
614 DTBLOCK = "doctestblock"
615 HEADING = "heading"
616 BULLET = "bullet"
617
618 - def __init__(self, tag, startline, contents, indent, level=None,
619 inline=False):
620 """
621 Create a new C{Token}.
622
623 @param tag: The type of the new C{Token}.
624 @type tag: C{string}
625 @param startline: The line on which the new C{Token} begins.
626 @type startline: C{int}
627 @param contents: The normalized contents of the new C{Token}.
628 @type contents: C{string}
629 @param indent: The indentation of the new C{Token} (in number
630 of leading spaces). A value of C{None} indicates an
631 unknown indentation.
632 @type indent: C{int} or C{None}
633 @param level: The heading-level of this C{Token} if it is a
634 heading; C{None}, otherwise.
635 @type level: C{int} or C{None}
636 @param inline: Is this C{Token} inline as a C{<span>}?.
637 @type inline: C{bool}
638 """
639 self.tag = tag
640 self.startline = startline
641 self.contents = contents
642 self.indent = indent
643 self.level = level
644 self.inline = inline
645
647 """
648 @rtype: C{string}
649 @return: the formal representation of this C{Token}.
650 C{Token}s have formal representaitons of the form::
651 <Token: para at line 12>
652 """
653 return '<Token: %s at line %s>' % (self.tag, self.startline)
654
656 """
657 @return: a DOM representation of this C{Token}.
658 @rtype: L{Element}
659 """
660 e = Element(self.tag)
661 e.children.append(self.contents)
662 return e
663
664
665
666
667 _ULIST_BULLET = '[-]( +|$)'
668 _OLIST_BULLET = '(\d+[.])+( +|$)'
669 _FIELD_BULLET = '@\w+( [^{}:\n]+)?:'
670 _BULLET_RE = re.compile(_ULIST_BULLET + '|' +
671 _OLIST_BULLET + '|' +
672 _FIELD_BULLET)
673 _LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET)
674 _FIELD_BULLET_RE = re.compile(_FIELD_BULLET)
675 del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET
676
678 """
679 Construct a L{Token} containing the doctest block starting at
680 C{lines[start]}, and append it to C{tokens}. C{block_indent}
681 should be the indentation of the doctest block. Any errors
682 generated while tokenizing the doctest block will be appended to
683 C{errors}.
684
685 @param lines: The list of lines to be tokenized
686 @param start: The index into C{lines} of the first line of the
687 doctest block to be tokenized.
688 @param block_indent: The indentation of C{lines[start]}. This is
689 the indentation of the doctest block.
690 @param errors: A list where any errors generated during parsing
691 will be stored. If no list is specified, then errors will
692 generate exceptions.
693 @return: The line number of the first line following the doctest
694 block.
695
696 @type lines: C{list} of C{string}
697 @type start: C{int}
698 @type block_indent: C{int}
699 @type tokens: C{list} of L{Token}
700 @type errors: C{list} of L{ParseError}
701 @rtype: C{int}
702 """
703
704
705
706 min_indent = block_indent
707
708 linenum = start + 1
709 while linenum < len(lines):
710
711 line = lines[linenum]
712 indent = len(line) - len(line.lstrip())
713
714
715 if indent == len(line): break
716
717
718 if indent < block_indent:
719 min_indent = min(min_indent, indent)
720 estr = 'Improper doctest block indentation.'
721 errors.append(TokenizationError(estr, linenum))
722
723
724 linenum += 1
725
726
727 contents = [line[min_indent:] for line in lines[start:linenum]]
728 contents = '\n'.join(contents)
729 tokens.append(Token(Token.DTBLOCK, start, contents, block_indent))
730 return linenum
731
733 """
734 Construct a L{Token} containing the literal block starting at
735 C{lines[start]}, and append it to C{tokens}. C{block_indent}
736 should be the indentation of the literal block. Any errors
737 generated while tokenizing the literal block will be appended to
738 C{errors}.
739
740 @param lines: The list of lines to be tokenized
741 @param start: The index into C{lines} of the first line of the
742 literal block to be tokenized.
743 @param block_indent: The indentation of C{lines[start]}. This is
744 the indentation of the literal block.
745 @param errors: A list of the errors generated by parsing. Any
746 new errors generated while will tokenizing this paragraph
747 will be appended to this list.
748 @return: The line number of the first line following the literal
749 block.
750
751 @type lines: C{list} of C{string}
752 @type start: C{int}
753 @type block_indent: C{int}
754 @type tokens: C{list} of L{Token}
755 @type errors: C{list} of L{ParseError}
756 @rtype: C{int}
757 """
758 linenum = start + 1
759 while linenum < len(lines):
760
761 line = lines[linenum]
762 indent = len(line) - len(line.lstrip())
763
764
765
766 if len(line) != indent and indent <= block_indent:
767 break
768
769
770 linenum += 1
771
772
773 contents = [line[block_indent+1:] for line in lines[start:linenum]]
774 contents = '\n'.join(contents)
775 contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents)
776 tokens.append(Token(Token.LBLOCK, start, contents, block_indent))
777 return linenum
778
780 """
781 Construct L{Token}s for the bullet and the first paragraph of the
782 list item (or field) starting at C{lines[start]}, and append them
783 to C{tokens}. C{bullet_indent} should be the indentation of the
784 list item. Any errors generated while tokenizing will be
785 appended to C{errors}.
786
787 @param lines: The list of lines to be tokenized
788 @param start: The index into C{lines} of the first line of the
789 list item to be tokenized.
790 @param bullet_indent: The indentation of C{lines[start]}. This is
791 the indentation of the list item.
792 @param errors: A list of the errors generated by parsing. Any
793 new errors generated while will tokenizing this paragraph
794 will be appended to this list.
795 @return: The line number of the first line following the list
796 item's first paragraph.
797
798 @type lines: C{list} of C{string}
799 @type start: C{int}
800 @type bullet_indent: C{int}
801 @type tokens: C{list} of L{Token}
802 @type errors: C{list} of L{ParseError}
803 @rtype: C{int}
804 """
805 linenum = start + 1
806 para_indent = None
807 doublecolon = lines[start].rstrip()[-2:] == '::'
808
809
810 para_start = _BULLET_RE.match(lines[start], bullet_indent).end()
811 bcontents = lines[start][bullet_indent:para_start].strip()
812
813 while linenum < len(lines):
814
815 line = lines[linenum]
816 indent = len(line) - len(line.lstrip())
817
818
819 if doublecolon: break
820 if line.rstrip()[-2:] == '::': doublecolon = 1
821
822
823 if indent == len(line): break
824
825
826 if indent < bullet_indent: break
827
828
829 if _BULLET_RE.match(line, indent): break
830
831
832
833 if para_indent == None: para_indent = indent
834
835
836 if indent != para_indent: break
837
838
839 linenum += 1
840
841
842 tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent,
843 inline=True))
844
845
846 pcontents = ([lines[start][para_start:].strip()] +
847 [line.strip() for line in lines[start+1:linenum]])
848 pcontents = ' '.join(pcontents).strip()
849 if pcontents:
850 tokens.append(Token(Token.PARA, start, pcontents, para_indent,
851 inline=True))
852
853
854 return linenum
855
857 """
858 Construct a L{Token} containing the paragraph starting at
859 C{lines[start]}, and append it to C{tokens}. C{para_indent}
860 should be the indentation of the paragraph . Any errors
861 generated while tokenizing the paragraph will be appended to
862 C{errors}.
863
864 @param lines: The list of lines to be tokenized
865 @param start: The index into C{lines} of the first line of the
866 paragraph to be tokenized.
867 @param para_indent: The indentation of C{lines[start]}. This is
868 the indentation of the paragraph.
869 @param errors: A list of the errors generated by parsing. Any
870 new errors generated while will tokenizing this paragraph
871 will be appended to this list.
872 @return: The line number of the first line following the
873 paragraph.
874
875 @type lines: C{list} of C{string}
876 @type start: C{int}
877 @type para_indent: C{int}
878 @type tokens: C{list} of L{Token}
879 @type errors: C{list} of L{ParseError}
880 @rtype: C{int}
881 """
882 linenum = start + 1
883 doublecolon = 0
884 while linenum < len(lines):
885
886 line = lines[linenum]
887 indent = len(line) - len(line.lstrip())
888
889
890 if doublecolon: break
891 if line.rstrip()[-2:] == '::': doublecolon = 1
892
893
894 if indent == len(line): break
895
896
897 if indent != para_indent: break
898
899
900 if _BULLET_RE.match(line, indent): break
901
902
903 if line[indent] == '@':
904 estr = "Possible mal-formatted field item."
905 errors.append(TokenizationError(estr, linenum, is_fatal=0))
906
907
908 linenum += 1
909
910 contents = [line.strip() for line in lines[start:linenum]]
911
912
913 if ((len(contents) < 2) or
914 (contents[1][0] not in _HEADING_CHARS) or
915 (abs(len(contents[0])-len(contents[1])) > 5)):
916 looks_like_heading = 0
917 else:
918 looks_like_heading = 1
919 for char in contents[1]:
920 if char != contents[1][0]:
921 looks_like_heading = 0
922 break
923
924 if looks_like_heading:
925 if len(contents[0]) != len(contents[1]):
926 estr = ("Possible heading typo: the number of "+
927 "underline characters must match the "+
928 "number of heading characters.")
929 errors.append(TokenizationError(estr, start, is_fatal=0))
930 else:
931 level = _HEADING_CHARS.index(contents[1][0])
932 tokens.append(Token(Token.HEADING, start,
933 contents[0], para_indent, level))
934 return start+2
935
936
937 contents = ' '.join(contents)
938 tokens.append(Token(Token.PARA, start, contents, para_indent))
939 return linenum
940
942 """
943 Split a given formatted docstring into an ordered list of
944 C{Token}s, according to the epytext markup rules.
945
946 @param str: The epytext string
947 @type str: C{string}
948 @param errors: A list where any errors generated during parsing
949 will be stored. If no list is specified, then errors will
950 generate exceptions.
951 @type errors: C{list} of L{ParseError}
952 @return: a list of the C{Token}s that make up the given string.
953 @rtype: C{list} of L{Token}
954 """
955 tokens = []
956 lines = str.split('\n')
957
958
959
960 linenum = 0
961 while linenum < len(lines):
962
963 line = lines[linenum]
964 indent = len(line)-len(line.lstrip())
965
966 if indent == len(line):
967
968 linenum += 1
969 continue
970 elif line[indent:indent+4] == '>>> ':
971
972 linenum = _tokenize_doctest(lines, linenum, indent,
973 tokens, errors)
974 elif _BULLET_RE.match(line, indent):
975
976 linenum = _tokenize_listart(lines, linenum, indent,
977 tokens, errors)
978 if tokens[-1].indent != None:
979 indent = tokens[-1].indent
980 else:
981
982 if line[indent] == '@':
983 estr = "Possible mal-formatted field item."
984 errors.append(TokenizationError(estr, linenum, is_fatal=0))
985
986
987 linenum = _tokenize_para(lines, linenum, indent, tokens, errors)
988
989
990 if (tokens[-1].tag == Token.PARA and
991 tokens[-1].contents[-2:] == '::'):
992 tokens[-1].contents = tokens[-1].contents[:-1]
993 linenum = _tokenize_literal(lines, linenum, indent, tokens, errors)
994
995 return tokens
996
997
998
999
1000
1001
1002
1003 _BRACE_RE = re.compile('{|}')
1004 _TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$')
1005
1006 -def _colorize(doc, token, errors, tagName='para'):
1007 """
1008 Given a string containing the contents of a paragraph, produce a
1009 DOM C{Element} encoding that paragraph. Colorized regions are
1010 represented using DOM C{Element}s, and text is represented using
1011 DOM C{Text}s.
1012
1013 @param errors: A list of errors. Any newly generated errors will
1014 be appended to this list.
1015 @type errors: C{list} of C{string}
1016
1017 @param tagName: The element tag for the DOM C{Element} that should
1018 be generated.
1019 @type tagName: C{string}
1020
1021 @return: a DOM C{Element} encoding the given paragraph.
1022 @returntype: C{Element}
1023 """
1024 str = token.contents
1025 linenum = 0
1026
1027
1028
1029
1030
1031 stack = [Element(tagName)]
1032
1033
1034
1035
1036 openbrace_stack = [0]
1037
1038
1039
1040
1041
1042 start = 0
1043 while 1:
1044 match = _BRACE_RE.search(str, start)
1045 if match == None: break
1046 end = match.start()
1047
1048
1049
1050
1051
1052
1053
1054 if match.group() == '{':
1055 if (end>0) and 'A' <= str[end-1] <= 'Z':
1056 if (end-1) > start:
1057 stack[-1].children.append(str[start:end-1])
1058 if str[end-1] not in _COLORIZING_TAGS:
1059 estr = "Unknown inline markup tag."
1060 errors.append(ColorizingError(estr, token, end-1))
1061 stack.append(Element('unknown'))
1062 else:
1063 tag = _COLORIZING_TAGS[str[end-1]]
1064 stack.append(Element(tag))
1065 else:
1066 if end > start:
1067 stack[-1].children.append(str[start:end])
1068 stack.append(Element('litbrace'))
1069 openbrace_stack.append(end)
1070 stack[-2].children.append(stack[-1])
1071
1072
1073 elif match.group() == '}':
1074
1075 if len(stack) <= 1:
1076 estr = "Unbalanced '}'."
1077 errors.append(ColorizingError(estr, token, end))
1078 start = end + 1
1079 continue
1080
1081
1082 if end > start:
1083 stack[-1].children.append(str[start:end])
1084
1085
1086 if stack[-1].tag == 'symbol':
1087 if (len(stack[-1].children) != 1 or
1088 not isinstance(stack[-1].children[0], basestring)):
1089 estr = "Invalid symbol code."
1090 errors.append(ColorizingError(estr, token, end))
1091 else:
1092 symb = stack[-1].children[0]
1093 if symb in _SYMBOLS:
1094
1095 stack[-2].children[-1] = Element('symbol', symb)
1096 else:
1097 estr = "Invalid symbol code."
1098 errors.append(ColorizingError(estr, token, end))
1099
1100
1101 if stack[-1].tag == 'escape':
1102 if (len(stack[-1].children) != 1 or
1103 not isinstance(stack[-1].children[0], basestring)):
1104 estr = "Invalid escape code."
1105 errors.append(ColorizingError(estr, token, end))
1106 else:
1107 escp = stack[-1].children[0]
1108 if escp in _ESCAPES:
1109
1110 stack[-2].children[-1] = _ESCAPES[escp]
1111 elif len(escp) == 1:
1112
1113 stack[-2].children[-1] = escp
1114 else:
1115 estr = "Invalid escape code."
1116 errors.append(ColorizingError(estr, token, end))
1117
1118
1119 if stack[-1].tag == 'litbrace':
1120 stack[-2].children[-1:] = ['{'] + stack[-1].children + ['}']
1121
1122
1123 if stack[-1].tag == 'graph':
1124 _colorize_graph(doc, stack[-1], token, end, errors)
1125
1126
1127 if stack[-1].tag in _LINK_COLORIZING_TAGS:
1128 _colorize_link(doc, stack[-1], token, end, errors)
1129
1130
1131 openbrace_stack.pop()
1132 stack.pop()
1133
1134 start = end+1
1135
1136
1137 if start < len(str):
1138 stack[-1].children.append(str[start:])
1139
1140 if len(stack) != 1:
1141 estr = "Unbalanced '{'."
1142 errors.append(ColorizingError(estr, token, openbrace_stack[-1]))
1143
1144 return stack[0]
1145
1146 GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph']
1147
1149 """
1150 Eg::
1151 G{classtree}
1152 G{classtree x, y, z}
1153 G{importgraph}
1154 """
1155 bad_graph_spec = False
1156
1157 children = graph.children[:]
1158 graph.children = []
1159
1160 if len(children) != 1 or not isinstance(children[0], basestring):
1161 bad_graph_spec = "Bad graph specification"
1162 else:
1163 pieces = children[0].split(None, 1)
1164 graphtype = pieces[0].replace(':','').strip().lower()
1165 if graphtype in GRAPH_TYPES:
1166 if len(pieces) == 2:
1167 if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]):
1168 args = pieces[1].replace(',', ' ').replace(':','').split()
1169 else:
1170 bad_graph_spec = "Bad graph arg list"
1171 else:
1172 args = []
1173 else:
1174 bad_graph_spec = ("Bad graph type %s -- use one of %s" %
1175 (pieces[0], ', '.join(GRAPH_TYPES)))
1176
1177 if bad_graph_spec:
1178 errors.append(ColorizingError(bad_graph_spec, token, end))
1179 graph.children.append('none')
1180 graph.children.append('')
1181 return
1182
1183 graph.children.append(graphtype)
1184 for arg in args:
1185 graph.children.append(arg)
1186
1188 variables = link.children[:]
1189
1190
1191 if len(variables)==0 or not isinstance(variables[-1], basestring):
1192 estr = "Bad %s target." % link.tag
1193 errors.append(ColorizingError(estr, token, end))
1194 return
1195
1196
1197 match2 = _TARGET_RE.match(variables[-1])
1198 if match2:
1199 (text, target) = match2.groups()
1200 variables[-1] = text
1201
1202 elif len(variables) == 1:
1203 target = variables[0]
1204 else:
1205 estr = "Bad %s target." % link.tag
1206 errors.append(ColorizingError(estr, token, end))
1207 return
1208
1209
1210 name_elt = Element('name', *variables)
1211
1212
1213
1214 target = re.sub(r'\s', '', target)
1215 if link.tag=='uri':
1216 if not re.match(r'\w+:', target):
1217 if re.match(r'\w+@(\w+)(\.\w+)*', target):
1218 target = 'mailto:' + target
1219 else:
1220 target = 'http://'+target
1221 elif link.tag=='link':
1222
1223 target = re.sub(r'\(.*\)$', '', target)
1224 if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target):
1225 estr = "Bad link target."
1226 errors.append(ColorizingError(estr, token, end))
1227 return
1228
1229
1230 target_elt = Element('target', target)
1231
1232
1233 link.children = [name_elt, target_elt]
1234
1235
1236
1237
1238
1239 -def to_epytext(tree, indent=0, seclevel=0):
1240 """
1241 Convert a DOM document encoding epytext back to an epytext string.
1242 This is the inverse operation from L{parse}. I.e., assuming there
1243 are no errors, the following is true:
1244 - C{parse(to_epytext(tree)) == tree}
1245
1246 The inverse is true, except that whitespace, line wrapping, and
1247 character escaping may be done differently.
1248 - C{to_epytext(parse(str)) == str} (approximately)
1249
1250 @param tree: A DOM document encoding of an epytext string.
1251 @type tree: C{Element}
1252 @param indent: The indentation for the string representation of
1253 C{tree}. Each line of the returned string will begin with
1254 C{indent} space characters.
1255 @type indent: C{int}
1256 @param seclevel: The section level that C{tree} appears at. This
1257 is used to generate section headings.
1258 @type seclevel: C{int}
1259 @return: The epytext string corresponding to C{tree}.
1260 @rtype: C{string}
1261 """
1262 if isinstance(tree, basestring):
1263 str = re.sub(r'\{', '\0', tree)
1264 str = re.sub(r'\}', '\1', str)
1265 return str
1266
1267 if tree.tag == 'epytext': indent -= 2
1268 if tree.tag == 'section': seclevel += 1
1269 variables = [to_epytext(c, indent+2, seclevel) for c in tree.children]
1270 childstr = ''.join(variables)
1271
1272
1273 childstr = re.sub(':(\s*)\2', '::\\1', childstr)
1274
1275 if tree.tag == 'para':
1276 str = wordwrap(childstr, indent)+'\n'
1277 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str)
1278 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str)
1279 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str)
1280 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str)
1281 str = re.sub('\0', 'E{lb}', str)
1282 str = re.sub('\1', 'E{rb}', str)
1283 return str
1284 elif tree.tag == 'li':
1285 bullet = tree.attribs.get('bullet') or '-'
1286 return indent*' '+ bullet + ' ' + childstr.lstrip()
1287 elif tree.tag == 'heading':
1288 str = re.sub('\0', 'E{lb}',childstr)
1289 str = re.sub('\1', 'E{rb}', str)
1290 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1291 return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n'
1292 elif tree.tag == 'doctestblock':
1293 str = re.sub('\0', '{', childstr)
1294 str = re.sub('\1', '}', str)
1295 lines = [' '+indent*' '+line for line in str.split('\n')]
1296 return '\n'.join(lines) + '\n\n'
1297 elif tree.tag == 'literalblock':
1298 str = re.sub('\0', '{', childstr)
1299 str = re.sub('\1', '}', str)
1300 lines = [(indent+1)*' '+line for line in str.split('\n')]
1301 return '\2' + '\n'.join(lines) + '\n\n'
1302 elif tree.tag == 'field':
1303 numargs = 0
1304 while tree.children[numargs+1].tag == 'arg': numargs += 1
1305 tag = variables[0]
1306 args = variables[1:1+numargs]
1307 body = variables[1+numargs:]
1308 str = (indent)*' '+'@'+variables[0]
1309 if args: str += '(' + ', '.join(args) + ')'
1310 return str + ':\n' + ''.join(body)
1311 elif tree.tag == 'target':
1312 return '<%s>' % childstr
1313 elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext',
1314 'section', 'olist', 'ulist', 'name'):
1315 return childstr
1316 elif tree.tag == 'symbol':
1317 return 'E{%s}' % childstr
1318 elif tree.tag == 'graph':
1319 return 'G{%s}' % ' '.join(variables)
1320 else:
1321 for (tag, name) in _COLORIZING_TAGS.items():
1322 if name == tree.tag:
1323 return '%s{%s}' % (tag, childstr)
1324 raise ValueError('Unknown DOM element %r' % tree.tag)
1325
1326 SYMBOL_TO_PLAINTEXT = {
1327 'crarr': '\\',
1328 }
1329
1330 -def to_plaintext(tree, indent=0, seclevel=0):
1331 """
1332 Convert a DOM document encoding epytext to a string representation.
1333 This representation is similar to the string generated by
1334 C{to_epytext}, but C{to_plaintext} removes inline markup, prints
1335 escaped characters in unescaped form, etc.
1336
1337 @param tree: A DOM document encoding of an epytext string.
1338 @type tree: C{Element}
1339 @param indent: The indentation for the string representation of
1340 C{tree}. Each line of the returned string will begin with
1341 C{indent} space characters.
1342 @type indent: C{int}
1343 @param seclevel: The section level that C{tree} appears at. This
1344 is used to generate section headings.
1345 @type seclevel: C{int}
1346 @return: The epytext string corresponding to C{tree}.
1347 @rtype: C{string}
1348 """
1349 if isinstance(tree, basestring): return tree
1350
1351 if tree.tag == 'section': seclevel += 1
1352
1353
1354 if tree.tag == 'epytext': cindent = indent
1355 elif tree.tag == 'li' and tree.attribs.get('bullet'):
1356 cindent = indent + 1 + len(tree.attribs.get('bullet'))
1357 else:
1358 cindent = indent + 2
1359 variables = [to_plaintext(c, cindent, seclevel) for c in tree.children]
1360 childstr = ''.join(variables)
1361
1362 if tree.tag == 'para':
1363 return wordwrap(childstr, indent)+'\n'
1364 elif tree.tag == 'li':
1365
1366
1367 bullet = tree.attribs.get('bullet') or '-'
1368 return indent*' ' + bullet + ' ' + childstr.lstrip()
1369 elif tree.tag == 'heading':
1370 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1371 return ((indent-2)*' ' + childstr + '\n' +
1372 (indent-2)*' ' + uline + '\n')
1373 elif tree.tag == 'doctestblock':
1374 lines = [(indent+2)*' '+line for line in childstr.split('\n')]
1375 return '\n'.join(lines) + '\n\n'
1376 elif tree.tag == 'literalblock':
1377 lines = [(indent+1)*' '+line for line in childstr.split('\n')]
1378 return '\n'.join(lines) + '\n\n'
1379 elif tree.tag == 'fieldlist':
1380 return childstr
1381 elif tree.tag == 'field':
1382 numargs = 0
1383 while tree.children[numargs+1].tag == 'arg': numargs += 1
1384 tag = variables[0]
1385 args = variables[1:1+numargs]
1386 body = variables[1+numargs:]
1387 str = (indent)*' '+'@'+variables[0]
1388 if args: str += '(' + ', '.join(args) + ')'
1389 return str + ':\n' + ''.join(body)
1390 elif tree.tag == 'uri':
1391 if len(variables) != 2: raise ValueError('Bad URI ')
1392 elif variables[0] == variables[1]: return '<%s>' % variables[1]
1393 else: return '%r<%s>' % (variables[0], variables[1])
1394 elif tree.tag == 'link':
1395 if len(variables) != 2: raise ValueError('Bad Link')
1396 return '%s' % variables[0]
1397 elif tree.tag in ('olist', 'ulist'):
1398
1399
1400
1401
1402 return childstr.replace('\n\n', '\n')+'\n'
1403 elif tree.tag == 'symbol':
1404 return '%s' % SYMBOL_TO_PLAINTEXT.get(childstr, childstr)
1405 elif tree.tag == 'graph':
1406 return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:]))
1407 else:
1408
1409 return childstr
1410
1411 -def to_debug(tree, indent=4, seclevel=0):
1412 """
1413 Convert a DOM document encoding epytext back to an epytext string,
1414 annotated with extra debugging information. This function is
1415 similar to L{to_epytext}, but it adds explicit information about
1416 where different blocks begin, along the left margin.
1417
1418 @param tree: A DOM document encoding of an epytext string.
1419 @type tree: C{Element}
1420 @param indent: The indentation for the string representation of
1421 C{tree}. Each line of the returned string will begin with
1422 C{indent} space characters.
1423 @type indent: C{int}
1424 @param seclevel: The section level that C{tree} appears at. This
1425 is used to generate section headings.
1426 @type seclevel: C{int}
1427 @return: The epytext string corresponding to C{tree}.
1428 @rtype: C{string}
1429 """
1430 if isinstance(tree, basestring):
1431 str = re.sub(r'\{', '\0', tree)
1432 str = re.sub(r'\}', '\1', str)
1433 return str
1434
1435 if tree.tag == 'section': seclevel += 1
1436 variables = [to_debug(c, indent+2, seclevel) for c in tree.children]
1437 childstr = ''.join(variables)
1438
1439
1440 childstr = re.sub(':( *\n \|\n)\2', '::\\1', childstr)
1441
1442 if tree.tag == 'para':
1443 str = wordwrap(childstr, indent-6, 69)+'\n'
1444 str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str)
1445 str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str)
1446 str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str)
1447 str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str)
1448 str = re.sub('\0', 'E{lb}', str)
1449 str = re.sub('\1', 'E{rb}', str)
1450 lines = str.rstrip().split('\n')
1451 lines[0] = ' P>|' + lines[0]
1452 lines[1:] = [' |'+l for l in lines[1:]]
1453 return '\n'.join(lines)+'\n |\n'
1454 elif tree.tag == 'li':
1455 bullet = tree.attribs.get('bullet') or '-'
1456 return ' LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip()
1457 elif tree.tag in ('olist', 'ulist'):
1458 return 'LIST>|'+(indent-4)*' '+childstr[indent+2:]
1459 elif tree.tag == 'heading':
1460 str = re.sub('\0', 'E{lb}', childstr)
1461 str = re.sub('\1', 'E{rb}', str)
1462 uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1463 return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' +
1464 ' |'+(indent-8)*' ' + uline + '\n')
1465 elif tree.tag == 'doctestblock':
1466 str = re.sub('\0', '{', childstr)
1467 str = re.sub('\1', '}', str)
1468 lines = [' |'+(indent-4)*' '+line for line in str.split('\n')]
1469 lines[0] = 'DTST>'+lines[0][5:]
1470 return '\n'.join(lines) + '\n |\n'
1471 elif tree.tag == 'literalblock':
1472 str = re.sub('\0', '{', childstr)
1473 str = re.sub('\1', '}', str)
1474 lines = [' |'+(indent-5)*' '+line for line in str.split('\n')]
1475 lines[0] = ' LIT>'+lines[0][5:]
1476 return '\2' + '\n'.join(lines) + '\n |\n'
1477 elif tree.tag == 'field':
1478 numargs = 0
1479 while tree.children[numargs+1].tag == 'arg': numargs += 1
1480 tag = variables[0]
1481 args = variables[1:1+numargs]
1482 body = variables[1+numargs:]
1483 str = ' FLD>|'+(indent-6)*' '+'@'+variables[0]
1484 if args: str += '(' + ', '.join(args) + ')'
1485 return str + ':\n' + ''.join(body)
1486 elif tree.tag == 'target':
1487 return '<%s>' % childstr
1488 elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext',
1489 'section', 'olist', 'ulist', 'name'):
1490 return childstr
1491 elif tree.tag == 'symbol':
1492 return 'E{%s}' % childstr
1493 elif tree.tag == 'graph':
1494 return 'G{%s}' % ' '.join(variables)
1495 else:
1496 for (tag, name) in _COLORIZING_TAGS.items():
1497 if name == tree.tag:
1498 return '%s{%s}' % (tag, childstr)
1499 raise ValueError('Unknown DOM element %r' % tree.tag)
1500
1501
1502
1503
1504 -def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr):
1505 """
1506 Pretty-parse the string. This parses the string, and catches any
1507 warnings or errors produced. Any warnings and errors are
1508 displayed, and the resulting DOM parse structure is returned.
1509
1510 @param str: The string to parse.
1511 @type str: C{string}
1512 @param show_warnings: Whether or not to display non-fatal errors
1513 generated by parsing C{str}.
1514 @type show_warnings: C{boolean}
1515 @param show_errors: Whether or not to display fatal errors
1516 generated by parsing C{str}.
1517 @type show_errors: C{boolean}
1518 @param stream: The stream that warnings and errors should be
1519 written to.
1520 @type stream: C{stream}
1521 @return: a DOM document encoding the contents of C{str}.
1522 @rtype: C{Element}
1523 @raise SyntaxError: If any fatal errors were encountered.
1524 """
1525 errors = []
1526 confused = 0
1527 try:
1528 val = parse(str, errors)
1529 warnings = [e for e in errors if not e.is_fatal()]
1530 errors = [e for e in errors if e.is_fatal()]
1531 except:
1532 confused = 1
1533
1534 if not show_warnings: warnings = []
1535 warnings.sort()
1536 errors.sort()
1537 if warnings:
1538 print >>stream, '='*SCRWIDTH
1539 print >>stream, "WARNINGS"
1540 print >>stream, '-'*SCRWIDTH
1541 for warning in warnings:
1542 print >>stream, warning.as_warning()
1543 print >>stream, '='*SCRWIDTH
1544 if errors and show_errors:
1545 if not warnings: print >>stream, '='*SCRWIDTH
1546 print >>stream, "ERRORS"
1547 print >>stream, '-'*SCRWIDTH
1548 for error in errors:
1549 print >>stream, error
1550 print >>stream, '='*SCRWIDTH
1551
1552 if confused: raise
1553 elif errors: raise SyntaxError('Encountered Errors')
1554 else: return val
1555
1556
1557
1558
1559
1561 """
1562 An error generated while tokenizing a formatted documentation
1563 string.
1564 """
1565
1567 """
1568 An error generated while structuring a formatted documentation
1569 string.
1570 """
1571
1573 """
1574 An error generated while colorizing a paragraph.
1575 """
1576 - def __init__(self, descr, token, charnum, is_fatal=1):
1577 """
1578 Construct a new colorizing exception.
1579
1580 @param descr: A short description of the error.
1581 @type descr: C{string}
1582 @param token: The token where the error occured
1583 @type token: L{Token}
1584 @param charnum: The character index of the position in
1585 C{token} where the error occured.
1586 @type charnum: C{int}
1587 """
1588 ParseError.__init__(self, descr, token.startline, is_fatal)
1589 self.token = token
1590 self.charnum = charnum
1591
1592 CONTEXT_RANGE = 20
1594 RANGE = self.CONTEXT_RANGE
1595 if self.charnum <= RANGE:
1596 left = self.token.contents[0:self.charnum]
1597 else:
1598 left = '...'+self.token.contents[self.charnum-RANGE:self.charnum]
1599 if (len(self.token.contents)-self.charnum) <= RANGE:
1600 right = self.token.contents[self.charnum:]
1601 else:
1602 right = (self.token.contents[self.charnum:self.charnum+RANGE]
1603 + '...')
1604 return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left)))
1605
1606
1607
1608
1609
1611 """
1612 Return a DOM document matching the epytext DTD, containing a
1613 single literal block. That literal block will include the
1614 contents of the given string. This method is typically used as a
1615 fall-back when the parser fails.
1616
1617 @param str: The string which should be enclosed in a literal
1618 block.
1619 @type str: C{string}
1620
1621 @return: A DOM document containing C{str} in a single literal
1622 block.
1623 @rtype: C{Element}
1624 """
1625 return Element('epytext', Element('literalblock', str))
1626
1628 """
1629 Return a DOM document matching the epytext DTD, containing a
1630 single paragraph. That paragraph will include the contents of the
1631 given string. This can be used to wrap some forms of
1632 automatically generated information (such as type names) in
1633 paragraphs.
1634
1635 @param str: The string which should be enclosed in a paragraph.
1636 @type str: C{string}
1637
1638 @return: A DOM document containing C{str} in a single paragraph.
1639 @rtype: C{Element}
1640 """
1641 return Element('epytext', Element('para', str))
1642
1643
1644
1645
1646
1648 """
1649 Parse the given docstring, which is formatted using epytext; and
1650 return a C{ParsedDocstring} representation of its contents.
1651 @param docstring: The docstring to parse
1652 @type docstring: C{string}
1653 @param errors: A list where any errors generated during parsing
1654 will be stored.
1655 @type errors: C{list} of L{ParseError}
1656 @param options: Extra options. Unknown options are ignored.
1657 Currently, no extra options are defined.
1658 @rtype: L{ParsedDocstring}
1659 """
1660 return ParsedEpytextDocstring(parse(docstring, errors), **options)
1661
1662 -class ParsedEpytextDocstring(ParsedDocstring):
1663 SYMBOL_TO_HTML = {
1664
1665 '<-': '←', '->': '→', '^': '↑', 'v': '↓',
1666
1667
1668 'alpha': 'α', 'beta': 'β', 'gamma': 'γ',
1669 'delta': 'δ', 'epsilon': 'ε', 'zeta': 'ζ',
1670 'eta': 'η', 'theta': 'θ', 'iota': 'ι',
1671 'kappa': 'κ', 'lambda': 'λ', 'mu': 'μ',
1672 'nu': 'ν', 'xi': 'ξ', 'omicron': 'ο',
1673 'pi': 'π', 'rho': 'ρ', 'sigma': 'σ',
1674 'tau': 'τ', 'upsilon': 'υ', 'phi': 'φ',
1675 'chi': 'χ', 'psi': 'ψ', 'omega': 'ω',
1676 'Alpha': 'Α', 'Beta': 'Β', 'Gamma': 'Γ',
1677 'Delta': 'Δ', 'Epsilon': 'Ε', 'Zeta': 'Ζ',
1678 'Eta': 'Η', 'Theta': 'Θ', 'Iota': 'Ι',
1679 'Kappa': 'Κ', 'Lambda': 'Λ', 'Mu': 'Μ',
1680 'Nu': 'Ν', 'Xi': 'Ξ', 'Omicron': 'Ο',
1681 'Pi': 'Π', 'Rho': 'Ρ', 'Sigma': 'Σ',
1682 'Tau': 'Τ', 'Upsilon': 'Υ', 'Phi': 'Φ',
1683 'Chi': 'Χ', 'Psi': 'Ψ', 'Omega': 'Ω',
1684
1685
1686 'larr': '←', 'rarr': '→', 'uarr': '↑',
1687 'darr': '↓', 'harr': '↔', 'crarr': '↵',
1688 'lArr': '⇐', 'rArr': '⇒', 'uArr': '⇑',
1689 'dArr': '⇓', 'hArr': '⇔',
1690 'copy': '©', 'times': '×', 'forall': '∀',
1691 'exist': '∃', 'part': '∂',
1692 'empty': '∅', 'isin': '∈', 'notin': '∉',
1693 'ni': '∋', 'prod': '∏', 'sum': '∑',
1694 'prop': '∝', 'infin': '∞', 'ang': '∠',
1695 'and': '∧', 'or': '∨', 'cap': '∩', 'cup': '∪',
1696 'int': '∫', 'there4': '∴', 'sim': '∼',
1697 'cong': '≅', 'asymp': '≈', 'ne': '≠',
1698 'equiv': '≡', 'le': '≤', 'ge': '≥',
1699 'sub': '⊂', 'sup': '⊃', 'nsub': '⊄',
1700 'sube': '⊆', 'supe': '⊇', 'oplus': '⊕',
1701 'otimes': '⊗', 'perp': '⊥',
1702
1703
1704 'infinity': '∞', 'integral': '∫', 'product': '∏',
1705 '<=': '≤', '>=': '≥',
1706 }
1707
1708 SYMBOL_TO_LATEX = {
1709
1710 '<-': r'\(\leftarrow\)', '->': r'\(\rightarrow\)',
1711 '^': r'\(\uparrow\)', 'v': r'\(\downarrow\)',
1712
1713
1714
1715 'alpha': r'\(\alpha\)', 'beta': r'\(\beta\)', 'gamma':
1716 r'\(\gamma\)', 'delta': r'\(\delta\)', 'epsilon':
1717 r'\(\epsilon\)', 'zeta': r'\(\zeta\)', 'eta': r'\(\eta\)',
1718 'theta': r'\(\theta\)', 'iota': r'\(\iota\)', 'kappa':
1719 r'\(\kappa\)', 'lambda': r'\(\lambda\)', 'mu': r'\(\mu\)',
1720 'nu': r'\(\nu\)', 'xi': r'\(\xi\)', 'omicron': r'\(o\)', 'pi':
1721 r'\(\pi\)', 'rho': r'\(\rho\)', 'sigma': r'\(\sigma\)', 'tau':
1722 r'\(\tau\)', 'upsilon': r'\(\upsilon\)', 'phi': r'\(\phi\)',
1723 'chi': r'\(\chi\)', 'psi': r'\(\psi\)', 'omega':
1724 r'\(\omega\)',
1725
1726 'Alpha': r'\(\alpha\)', 'Beta': r'\(\beta\)', 'Gamma':
1727 r'\(\Gamma\)', 'Delta': r'\(\Delta\)', 'Epsilon':
1728 r'\(\epsilon\)', 'Zeta': r'\(\zeta\)', 'Eta': r'\(\eta\)',
1729 'Theta': r'\(\Theta\)', 'Iota': r'\(\iota\)', 'Kappa':
1730 r'\(\kappa\)', 'Lambda': r'\(\Lambda\)', 'Mu': r'\(\mu\)',
1731 'Nu': r'\(\nu\)', 'Xi': r'\(\Xi\)', 'Omicron': r'\(o\)', 'Pi':
1732 r'\(\Pi\)', 'ho': r'\(\rho\)', 'Sigma': r'\(\Sigma\)', 'Tau':
1733 r'\(\tau\)', 'Upsilon': r'\(\Upsilon\)', 'Phi': r'\(\Phi\)',
1734 'Chi': r'\(\chi\)', 'Psi': r'\(\Psi\)', 'Omega':
1735 r'\(\Omega\)',
1736
1737
1738 'larr': r'\(\leftarrow\)', 'rarr': r'\(\rightarrow\)', 'uarr':
1739 r'\(\uparrow\)', 'darr': r'\(\downarrow\)', 'harr':
1740 r'\(\leftrightarrow\)', 'crarr': r'\(\hookleftarrow\)',
1741 'lArr': r'\(\Leftarrow\)', 'rArr': r'\(\Rightarrow\)', 'uArr':
1742 r'\(\Uparrow\)', 'dArr': r'\(\Downarrow\)', 'hArr':
1743 r'\(\Leftrightarrow\)', 'copy': r'{\textcopyright}',
1744 'times': r'\(\times\)', 'forall': r'\(\forall\)', 'exist':
1745 r'\(\exists\)', 'part': r'\(\partial\)', 'empty':
1746 r'\(\emptyset\)', 'isin': r'\(\in\)', 'notin': r'\(\notin\)',
1747 'ni': r'\(\ni\)', 'prod': r'\(\prod\)', 'sum': r'\(\sum\)',
1748 'prop': r'\(\propto\)', 'infin': r'\(\infty\)', 'ang':
1749 r'\(\angle\)', 'and': r'\(\wedge\)', 'or': r'\(\vee\)', 'cap':
1750 r'\(\cap\)', 'cup': r'\(\cup\)', 'int': r'\(\int\)', 'there4':
1751 r'\(\therefore\)', 'sim': r'\(\sim\)', 'cong': r'\(\cong\)',
1752 'asymp': r'\(\approx\)', 'ne': r'\(\ne\)', 'equiv':
1753 r'\(\equiv\)', 'le': r'\(\le\)', 'ge': r'\(\ge\)', 'sub':
1754 r'\(\subset\)', 'sup': r'\(\supset\)', 'nsub': r'\(\supset\)',
1755 'sube': r'\(\subseteq\)', 'supe': r'\(\supseteq\)', 'oplus':
1756 r'\(\oplus\)', 'otimes': r'\(\otimes\)', 'perp': r'\(\perp\)',
1757
1758
1759 'infinity': r'\(\infty\)', 'integral': r'\(\int\)', 'product':
1760 r'\(\prod\)', '<=': r'\(\le\)', '>=': r'\(\ge\)',
1761 }
1762
1763 - def __init__(self, dom_tree, **options):
1764 self._tree = dom_tree
1765
1766 self._html = self._latex = self._plaintext = None
1767 self._terms = None
1768
1769 if options.get('inline') and self._tree is not None:
1770 for elt in self._tree.children:
1771 elt.attribs['inline'] = True
1772
1773 - def __str__(self):
1774 return str(self._tree)
1775
1776 - def to_html(self, docstring_linker, directory=None, docindex=None,
1777 context=None, **options):
1778 if self._html is not None: return self._html
1779 if self._tree is None: return ''
1780 indent = options.get('indent', 0)
1781 self._html = self._to_html(self._tree, docstring_linker, directory,
1782 docindex, context, indent)
1783 return self._html
1784
1785 - def to_latex(self, docstring_linker, **options):
1786 if self._latex is not None: return self._latex
1787 if self._tree is None: return ''
1788 indent = options.get('indent', 0)
1789 self._hyperref = options.get('hyperref', 1)
1790 self._latex = self._to_latex(self._tree, docstring_linker, indent)
1791 return self._latex
1792
1793 - def to_plaintext(self, docstring_linker, **options):
1794
1795
1796 if self._tree is None: return ''
1797 if 'indent' in options:
1798 self._plaintext = to_plaintext(self._tree,
1799 indent=options['indent'])
1800 else:
1801 self._plaintext = to_plaintext(self._tree)
1802 return self._plaintext
1803
1804 - def _index_term_key(self, tree):
1805 str = to_plaintext(tree)
1806 str = re.sub(r'\s\s+', '-', str)
1807 return "index-"+re.sub("[^a-zA-Z0-9]", "_", str)
1808
1809 - def _to_html(self, tree, linker, directory, docindex, context,
1810 indent=0, seclevel=0):
1811 if isinstance(tree, basestring):
1812 return plaintext_to_html(tree)
1813
1814 if tree.tag == 'epytext': indent -= 2
1815 if tree.tag == 'section': seclevel += 1
1816
1817
1818 variables = [self._to_html(c, linker, directory, docindex, context,
1819 indent+2, seclevel)
1820 for c in tree.children]
1821
1822
1823 childstr = ''.join(variables)
1824
1825
1826 if tree.tag == 'para':
1827 return wordwrap(
1828 (tree.attribs.get('inline') and '%s' or '<p>%s</p>') % childstr,
1829 indent)
1830 elif tree.tag == 'code':
1831 style = tree.attribs.get('style')
1832 if style:
1833 return '<code class="%s">%s</code>' % (style, childstr)
1834 else:
1835 return '<code>%s</code>' % childstr
1836 elif tree.tag == 'uri':
1837 return ('<a href="%s" target="_top">%s</a>' %
1838 (variables[1], variables[0]))
1839 elif tree.tag == 'link':
1840 return linker.translate_identifier_xref(variables[1], variables[0])
1841 elif tree.tag == 'italic':
1842 return '<i>%s</i>' % childstr
1843 elif tree.tag == 'math':
1844 return '<i class="math">%s</i>' % childstr
1845 elif tree.tag == 'indexed':
1846 term = Element('epytext', *tree.children, **tree.attribs)
1847 return linker.translate_indexterm(ParsedEpytextDocstring(term))
1848
1849
1850 elif tree.tag == 'bold':
1851 return '<b>%s</b>' % childstr
1852 elif tree.tag == 'ulist':
1853 return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ')
1854 elif tree.tag == 'olist':
1855 start = tree.attribs.get('start') or ''
1856 return ('%s<ol start="%s">\n%s%s</ol>\n' %
1857 (indent*' ', start, childstr, indent*' '))
1858 elif tree.tag == 'li':
1859 return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ')
1860 elif tree.tag == 'heading':
1861 return ('%s<h%s class="heading">%s</h%s>\n' %
1862 ((indent-2)*' ', seclevel, childstr, seclevel))
1863 elif tree.tag == 'literalblock':
1864 return '<pre class="literalblock">\n%s\n</pre>\n' % childstr
1865 elif tree.tag == 'doctestblock':
1866 return doctest_to_html(tree.children[0].strip())
1867 elif tree.tag == 'fieldlist':
1868 raise AssertionError("There should not be any field lists left")
1869 elif tree.tag in ('epytext', 'section', 'tag', 'arg',
1870 'name', 'target', 'html'):
1871 return childstr
1872 elif tree.tag == 'symbol':
1873 symbol = tree.children[0]
1874 return self.SYMBOL_TO_HTML.get(symbol, '[%s]' % symbol)
1875 elif tree.tag == 'graph':
1876
1877 graph = self._build_graph(variables[0], variables[1:], linker,
1878 docindex, context)
1879 if not graph: return ''
1880
1881 image_url = '%s.gif' % graph.uid
1882 image_file = os.path.join(directory, image_url)
1883 return graph.to_html(image_file, image_url)
1884 else:
1885 raise ValueError('Unknown epytext DOM element %r' % tree.tag)
1886
1887
1888 - def _build_graph(self, graph_type, graph_args, linker,
1889 docindex, context):
1890
1891 if graph_type == 'classtree':
1892 from epydoc.apidoc import ClassDoc
1893 if graph_args:
1894 bases = [docindex.find(name, context)
1895 for name in graph_args]
1896 elif isinstance(context, ClassDoc):
1897 bases = [context]
1898 else:
1899 log.warning("Could not construct class tree: you must "
1900 "specify one or more base classes.")
1901 return None
1902 from epydoc.docwriter.dotgraph import class_tree_graph
1903 return class_tree_graph(bases, linker, context)
1904 elif graph_type == 'packagetree':
1905 from epydoc.apidoc import ModuleDoc
1906 if graph_args:
1907 packages = [docindex.find(name, context)
1908 for name in graph_args]
1909 elif isinstance(context, ModuleDoc):
1910 packages = [context]
1911 else:
1912 log.warning("Could not construct package tree: you must "
1913 "specify one or more root packages.")
1914 return None
1915 from epydoc.docwriter.dotgraph import package_tree_graph
1916 return package_tree_graph(packages, linker, context)
1917 elif graph_type == 'importgraph':
1918 from epydoc.apidoc import ModuleDoc
1919 modules = [d for d in docindex.root if isinstance(d, ModuleDoc)]
1920 from epydoc.docwriter.dotgraph import import_graph
1921 return import_graph(modules, docindex, linker, context)
1922
1923 elif graph_type == 'callgraph':
1924 if graph_args:
1925 docs = [docindex.find(name, context) for name in graph_args]
1926 docs = [doc for doc in docs if doc is not None]
1927 else:
1928 docs = [context]
1929 from epydoc.docwriter.dotgraph import call_graph
1930 return call_graph(docs, docindex, linker, context)
1931 else:
1932 log.warning("Unknown graph type %s" % graph_type)
1933
1934
1935 - def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0):
1936 if isinstance(tree, basestring):
1937 return plaintext_to_latex(tree, breakany=breakany)
1938
1939 if tree.tag == 'section': seclevel += 1
1940
1941
1942 if tree.tag == 'epytext': cindent = indent
1943 else: cindent = indent + 2
1944 variables = [self._to_latex(c, linker, cindent, seclevel, breakany)
1945 for c in tree.children]
1946 childstr = ''.join(variables)
1947
1948 if tree.tag == 'para':
1949 return wordwrap(childstr, indent)+'\n'
1950 elif tree.tag == 'code':
1951 return '\\texttt{%s}' % childstr
1952 elif tree.tag == 'uri':
1953 if len(variables) != 2: raise ValueError('Bad URI ')
1954 if self._hyperref:
1955
1956 uri = tree.children[1].children[0]
1957 uri = uri.replace('{\\textasciitilde}', '~')
1958 uri = uri.replace('\\#', '#')
1959 if variables[0] == variables[1]:
1960 return '\\href{%s}{\\textit{%s}}' % (uri, variables[1])
1961 else:
1962 return ('%s\\footnote{\\href{%s}{%s}}' %
1963 (variables[0], uri, variables[1]))
1964 else:
1965 if variables[0] == variables[1]:
1966 return '\\textit{%s}' % variables[1]
1967 else:
1968 return '%s\\footnote{%s}' % (variables[0], variables[1])
1969 elif tree.tag == 'link':
1970 if len(variables) != 2: raise ValueError('Bad Link')
1971 return linker.translate_identifier_xref(variables[1], variables[0])
1972 elif tree.tag == 'italic':
1973 return '\\textit{%s}' % childstr
1974 elif tree.tag == 'math':
1975 return '\\textit{%s}' % childstr
1976 elif tree.tag == 'indexed':
1977 term = Element('epytext', *tree.children, **tree.attribs)
1978 return linker.translate_indexterm(ParsedEpytextDocstring(term))
1979 elif tree.tag == 'bold':
1980 return '\\textbf{%s}' % childstr
1981 elif tree.tag == 'li':
1982 return indent*' ' + '\\item ' + childstr.lstrip()
1983 elif tree.tag == 'heading':
1984 return ' '*(indent-2) + '(section) %s\n\n' % childstr
1985 elif tree.tag == 'doctestblock':
1986 return doctest_to_latex(tree.children[0].strip())
1987 elif tree.tag == 'literalblock':
1988 return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr
1989 elif tree.tag == 'fieldlist':
1990 return indent*' '+'{omitted fieldlist}\n'
1991 elif tree.tag == 'olist':
1992 return (' '*indent + '\\begin{enumerate}\n\n' +
1993 ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' +
1994 childstr +
1995 ' '*indent + '\\end{enumerate}\n\n')
1996 elif tree.tag == 'ulist':
1997 return (' '*indent + '\\begin{itemize}\n' +
1998 ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' +
1999 childstr +
2000 ' '*indent + '\\end{itemize}\n\n')
2001 elif tree.tag == 'symbol':
2002 symbol = tree.children[0]
2003 return self.SYMBOL_TO_LATEX.get(symbol, '[%s]' % symbol)
2004 elif tree.tag == 'graph':
2005 return '(GRAPH)'
2006
2007 else:
2008
2009 return childstr
2010
2011 _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)')
2012
2013 - def summary(self):
2014 if self._tree is None: return self, False
2015 tree = self._tree
2016 doc = Element('epytext')
2017
2018
2019 variables = tree.children
2020 while (len(variables) > 0) and (variables[0].tag != 'para'):
2021 if variables[0].tag in ('section', 'ulist', 'olist', 'li'):
2022 variables = variables[0].children
2023 else:
2024 variables = variables[1:]
2025
2026
2027
2028 if (len(variables) == 0 and len(tree.children) == 1 and
2029 tree.children[0].tag == 'literalblock'):
2030 str = re.split(r'\n\s*(\n|$).*',
2031 tree.children[0].children[0], 1)[0]
2032 variables = [Element('para')]
2033 variables[0].children.append(str)
2034
2035
2036 if len(variables) == 0: return ParsedEpytextDocstring(doc), False
2037
2038
2039 long_docs = False
2040 for var in variables[1:]:
2041 if isinstance(var, Element) and var.tag == 'fieldlist':
2042 continue
2043 long_docs = True
2044 break
2045
2046
2047 parachildren = variables[0].children
2048 para = Element('para', inline=True)
2049 doc.children.append(para)
2050 for parachild in parachildren:
2051 if isinstance(parachild, basestring):
2052 m = self._SUMMARY_RE.match(parachild)
2053 if m:
2054 para.children.append(m.group(1))
2055 long_docs |= parachild is not parachildren[-1]
2056 if not long_docs:
2057 other = parachild[m.end():]
2058 if other and not other.isspace():
2059 long_docs = True
2060 return ParsedEpytextDocstring(doc), long_docs
2061 para.children.append(parachild)
2062
2063 return ParsedEpytextDocstring(doc), long_docs
2064
2065 - def split_fields(self, errors=None):
2066 if self._tree is None: return (self, ())
2067 tree = Element(self._tree.tag, *self._tree.children,
2068 **self._tree.attribs)
2069 fields = []
2070
2071 if (tree.children and
2072 tree.children[-1].tag == 'fieldlist' and
2073 tree.children[-1].children):
2074 field_nodes = tree.children[-1].children
2075 del tree.children[-1]
2076
2077 for field in field_nodes:
2078
2079 tag = field.children[0].children[0].lower()
2080 del field.children[0]
2081
2082
2083 if field.children and field.children[0].tag == 'arg':
2084 arg = field.children[0].children[0]
2085 del field.children[0]
2086 else:
2087 arg = None
2088
2089
2090 field.tag = 'epytext'
2091 fields.append(Field(tag, arg, ParsedEpytextDocstring(field)))
2092
2093
2094 if tree.children and tree.children[0].children:
2095 return ParsedEpytextDocstring(tree), fields
2096 else:
2097 return None, fields
2098
2099
2100 - def index_terms(self):
2101 if self._terms is None:
2102 self._terms = []
2103 self._index_terms(self._tree, self._terms)
2104 return self._terms
2105
2106 - def _index_terms(self, tree, terms):
2107 if tree is None or isinstance(tree, basestring):
2108 return
2109
2110 if tree.tag == 'indexed':
2111 term = Element('epytext', *tree.children, **tree.attribs)
2112 terms.append(ParsedEpytextDocstring(term))
2113
2114
2115 for child in tree.children:
2116 self._index_terms(child, terms)
2117