Package epydoc :: Module docparser
[hide private]
[frames] | no frames]

Source Code for Module epydoc.docparser

   1  # epydoc -- Source code parsing 
   2  # 
   3  # Copyright (C) 2005 Edward Loper 
   4  # Author: Edward Loper <edloper@loper.org> 
   5  # URL: <http://epydoc.sf.net> 
   6  # 
   7  # $Id: docparser.py 1673 2008-01-29 05:42:58Z edloper $ 
   8   
   9  """ 
  10  Extract API documentation about python objects by parsing their source 
  11  code. 
  12   
  13  The function L{parse_docs()}, which provides the main interface 
  14  of this module, reads and parses the Python source code for a 
  15  module, and uses it to create an L{APIDoc} object containing 
  16  the API documentation for the variables and values defined in 
  17  that modules. 
  18   
  19  Currently, C{parse_docs()} extracts documentation from the following 
  20  source code constructions: 
  21   
  22    - module docstring 
  23    - import statements 
  24    - class definition blocks 
  25    - function definition blocks 
  26    - assignment statements 
  27      - simple assignment statements 
  28      - assignment statements with multiple C{'='}s 
  29      - assignment statements with unpacked left-hand sides 
  30      - assignment statements that wrap a function in classmethod 
  31        or staticmethod. 
  32      - assignment to special variables __path__, __all__, and 
  33        __docformat__. 
  34    - delete statements 
  35   
  36  C{parse_docs()} does not yet support the following source code 
  37  constructions: 
  38   
  39    - assignment statements that create properties 
  40   
  41  By default, C{parse_docs()} will expore the contents of top-level 
  42  C{try} and C{if} blocks.  If desired, C{parse_docs()} can also 
  43  be configured to explore the contents of C{while} and C{for} blocks. 
  44  (See the configuration constants, below.) 
  45   
  46  @todo: Make it possible to extend the functionality of C{parse_docs()}, 
  47         by replacing process_line with a dispatch table that can be 
  48         customized (similarly to C{docintrospector.register_introspector()}). 
  49  """ 
  50  __docformat__ = 'epytext en' 
  51   
  52  ###################################################################### 
  53  ## Imports 
  54  ###################################################################### 
  55   
  56  # Python source code parsing: 
  57  import token, tokenize 
  58  # Finding modules: 
  59  import imp 
  60  # File services: 
  61  import os, os.path, sys 
  62  # Unicode: 
  63  import codecs 
  64  # API documentation encoding: 
  65  from epydoc.apidoc import * 
  66  # For looking up the docs of builtins: 
  67  import __builtin__, exceptions 
  68  import epydoc.docintrospecter  
  69  # Misc utility functions: 
  70  from epydoc.util import * 
  71  # Backwards compatibility 
  72  from epydoc.compat import * 
  73   
  74  ###################################################################### 
  75  ## Doc Parser 
  76  ###################################################################### 
  77   
78 -class ParseError(Exception):
79 """ 80 An exception that is used to signify that C{docparser} encountered 81 syntactically invalid Python code while processing a Python source 82 file. 83 """
84 85 _moduledoc_cache = {} 86 """A cache of C{ModuleDoc}s that we've already created. 87 C{_moduledoc_cache} is a dictionary mapping from filenames to 88 C{ValueDoc} objects. 89 @type: C{dict}""" 90 91 #//////////////////////////////////////////////////////////// 92 # Configuration Constants 93 #//////////////////////////////////////////////////////////// 94 95 #{ Configuration Constants: Control Flow 96 PARSE_TRY_BLOCKS = True 97 """Should the contents of C{try} blocks be examined?""" 98 PARSE_EXCEPT_BLOCKS = True 99 """Should the contents of C{except} blocks be examined?""" 100 PARSE_FINALLY_BLOCKS = True 101 """Should the contents of C{finally} blocks be examined?""" 102 PARSE_IF_BLOCKS = True 103 """Should the contents of C{if} blocks be examined?""" 104 PARSE_ELSE_BLOCKS = True 105 """Should the contents of C{else} and C{elif} blocks be examined?""" 106 PARSE_WHILE_BLOCKS = False 107 """Should the contents of C{while} blocks be examined?""" 108 PARSE_FOR_BLOCKS = False 109 """Should the contents of C{for} blocks be examined?""" 110 111 #{ Configuration Constants: Imports 112 IMPORT_HANDLING = 'link' 113 """What should C{docparser} do when it encounters an import 114 statement? 115 - C{'link'}: Create variabledoc objects with imported_from pointers 116 to the source object. 117 - C{'parse'}: Parse the imported file, to find the actual 118 documentation for the imported object. (This will fall back 119 to the 'link' behavior if the imported file can't be parsed, 120 e.g., if it's a builtin.) 121 """ 122 123 IMPORT_STAR_HANDLING = 'parse' 124 """When C{docparser} encounters a C{'from M{m} import *'} 125 statement, and is unable to parse C{M{m}} (either because 126 L{IMPORT_HANDLING}=C{'link'}, or because parsing failed), how 127 should it determine the list of identifiers expored by C{M{m}}? 128 - C{'ignore'}: ignore the import statement, and don't create 129 any new variables. 130 - C{'parse'}: parse it to find a list of the identifiers that it 131 exports. (This will fall back to the 'ignore' behavior if the 132 imported file can't be parsed, e.g., if it's a builtin.) 133 - C{'introspect'}: import the module and introspect it (using C{dir}) 134 to find a list of the identifiers that it exports. (This will 135 fall back to the 'ignore' behavior if the imported file can't 136 be parsed, e.g., if it's a builtin.) 137 """ 138 139 DEFAULT_DECORATOR_BEHAVIOR = 'transparent' 140 """When C{DocParse} encounters an unknown decorator, what should 141 it do to the documentation of the decorated function? 142 - C{'transparent'}: leave the function's documentation as-is. 143 - C{'opaque'}: replace the function's documentation with an 144 empty C{ValueDoc} object, reflecting the fact that we have no 145 knowledge about what value the decorator returns. 146 """ 147 148 BASE_HANDLING = 'parse'#'link' 149 """What should C{docparser} do when it encounters a base class that 150 was imported from another module? 151 - C{'link'}: Create a valuedoc with a C{proxy_for} pointer to the 152 base class. 153 - C{'parse'}: Parse the file containing the base class, to find 154 the actual documentation for it. (This will fall back to the 155 'link' behavior if the imported file can't be parsed, e.g., if 156 it's a builtin.) 157 """ 158 159 #{ Configuration Constants: Comment docstrings 160 COMMENT_DOCSTRING_MARKER = '#:' 161 """The prefix used to mark comments that contain attribute 162 docstrings for variables.""" 163 164 #{ Configuration Constants: Grouping 165 START_GROUP_MARKER = '#{' 166 """The prefix used to mark a comment that starts a group. This marker 167 should be followed (on the same line) by the name of the group. 168 Following a start-group comment, all variables defined at the same 169 indentation level will be assigned to this group name, until the 170 parser reaches the end of the file, a matching end-group comment, or 171 another start-group comment at the same indentation level. 172 """ 173 174 END_GROUP_MARKER = '#}' 175 """The prefix used to mark a comment that ends a group. See 176 L{START_GROUP_MARKER}.""" 177 178 #///////////////////////////////////////////////////////////////// 179 #{ Module parser 180 #///////////////////////////////////////////////////////////////// 181
182 -def parse_docs(filename=None, name=None, context=None, is_script=False):
183 """ 184 Generate the API documentation for a specified object by 185 parsing Python source files, and return it as a L{ValueDoc}. 186 The object to generate documentation for may be specified 187 using the C{filename} parameter I{or} the C{name} parameter. 188 (It is an error to specify both a filename and a name; or to 189 specify neither a filename nor a name). 190 191 @param filename: The name of the file that contains the python 192 source code for a package, module, or script. If 193 C{filename} is specified, then C{parse} will return a 194 C{ModuleDoc} describing its contents. 195 @param name: The fully-qualified python dotted name of any 196 value (including packages, modules, classes, and 197 functions). C{parse_docs()} will automatically figure out 198 which module(s) it needs to parse in order to find the 199 documentation for the specified object. 200 @param context: The API documentation for the package that 201 contains C{filename}. If no context is given, then 202 C{filename} is assumed to contain a top-level module or 203 package. It is an error to specify a C{context} if the 204 C{name} argument is used. 205 @rtype: L{ValueDoc} 206 """ 207 # Always introspect __builtins__ & exceptions (e.g., in case 208 # they're used as base classes.) 209 epydoc.docintrospecter.introspect_docs(__builtin__) 210 epydoc.docintrospecter.introspect_docs(exceptions) 211 212 # If our input is a python object name, then delegate to 213 # _find(). 214 if filename is None and name is not None: 215 if context: 216 raise ValueError("context should only be specified together " 217 "with filename, not with name.") 218 name = DottedName(name) 219 val_doc = _find(name) 220 if val_doc.canonical_name is UNKNOWN: 221 val_doc.canonical_name = name 222 return val_doc 223 224 # If our input is a filename, then create a ModuleDoc for it, 225 # and use process_file() to populate its attributes. 226 elif filename is not None and name is None: 227 # Use a python source version, if possible. 228 if not is_script: 229 try: filename = py_src_filename(filename) 230 except ValueError, e: raise ImportError('%s' % e) 231 232 # Check the cache, first. 233 if filename in _moduledoc_cache: 234 return _moduledoc_cache[filename] 235 236 log.info("Parsing %s" % filename) 237 238 # If the context wasn't provided, then check if the file is in 239 # a package directory. If so, then update basedir & name to 240 # contain the topmost package's directory and the fully 241 # qualified name for this file. (This update assume the 242 # default value of __path__ for the parent packages; if the 243 # parent packages override their __path__s, then this can 244 # cause us not to find the value.) 245 if context is None and not is_script: 246 basedir = os.path.split(filename)[0] 247 name = os.path.splitext(os.path.split(filename)[1])[0] 248 if name == '__init__': 249 basedir, name = os.path.split(basedir) 250 context = _parse_package(basedir) 251 252 # Figure out the canonical name of the module we're parsing. 253 if not is_script: 254 module_name, is_pkg = _get_module_name(filename, context) 255 else: 256 module_name = DottedName(munge_script_name(filename)) 257 is_pkg = False 258 259 # Create a new ModuleDoc for the module, & add it to the cache. 260 module_doc = ModuleDoc(canonical_name=module_name, variables={}, 261 sort_spec=[], imports=[], 262 filename=filename, package=context, 263 is_package=is_pkg, submodules=[], 264 docs_extracted_by='parser') 265 module_doc.defining_module = module_doc 266 _moduledoc_cache[filename] = module_doc 267 268 # Set the module's __path__ to its default value. 269 if is_pkg: 270 module_doc.path = [os.path.split(module_doc.filename)[0]] 271 272 # Add this module to the parent package's list of submodules. 273 if context is not None: 274 context.submodules.append(module_doc) 275 276 # Tokenize & process the contents of the module's source file. 277 try: 278 process_file(module_doc) 279 except tokenize.TokenError, e: 280 msg, (srow, scol) = e.args 281 raise ParseError('Error during parsing: %s ' 282 '(%s, line %d, char %d)' % 283 (msg, module_doc.filename, srow, scol)) 284 except IndentationError, e: 285 raise ParseError('Error during parsing: %s (%s)' % 286 (e, module_doc.filename)) 287 288 # Handle any special variables (__path__, __docformat__, etc.) 289 handle_special_module_vars(module_doc) 290 291 # Return the completed ModuleDoc 292 return module_doc 293 else: 294 raise ValueError("Expected exactly one of the following " 295 "arguments: name, filename")
296
297 -def _parse_package(package_dir):
298 """ 299 If the given directory is a package directory, then parse its 300 __init__.py file (and the __init__.py files of all ancestor 301 packages); and return its C{ModuleDoc}. 302 """ 303 if not is_package_dir(package_dir): 304 return None 305 parent_dir = os.path.split(package_dir)[0] 306 parent_doc = _parse_package(parent_dir) 307 package_file = os.path.join(package_dir, '__init__') 308 return parse_docs(filename=package_file, context=parent_doc)
309 310 # Special vars: 311 # C{__docformat__}, C{__all__}, and C{__path__}.
312 -def handle_special_module_vars(module_doc):
313 # If __docformat__ is defined, parse its value. 314 toktree = _module_var_toktree(module_doc, '__docformat__') 315 if toktree is not None: 316 try: module_doc.docformat = parse_string(toktree) 317 except: pass 318 del module_doc.variables['__docformat__'] 319 320 # If __all__ is defined, parse its value. 321 toktree = _module_var_toktree(module_doc, '__all__') 322 if toktree is not None: 323 try: 324 public_names = set(parse_string_list(toktree)) 325 for name, var_doc in module_doc.variables.items(): 326 if name in public_names: 327 var_doc.is_public = True 328 if not isinstance(var_doc, ModuleDoc): 329 var_doc.is_imported = False 330 else: 331 var_doc.is_public = False 332 except ParseError: 333 # If we couldn't parse the list, give precedence to introspection. 334 for name, var_doc in module_doc.variables.items(): 335 if not isinstance(var_doc, ModuleDoc): 336 var_doc.is_imported = UNKNOWN 337 del module_doc.variables['__all__'] 338 339 # If __path__ is defined, then extract its value (pkgs only) 340 if module_doc.is_package: 341 toktree = _module_var_toktree(module_doc, '__path__') 342 if toktree is not None: 343 try: 344 module_doc.path = parse_string_list(toktree) 345 except ParseError: 346 pass # [xx] 347 del module_doc.variables['__path__']
348
349 -def _module_var_toktree(module_doc, name):
350 var_doc = module_doc.variables.get(name) 351 if (var_doc is None or var_doc.value in (None, UNKNOWN) or 352 var_doc.value.toktree is UNKNOWN): 353 return None 354 else: 355 return var_doc.value.toktree
356 357 #//////////////////////////////////////////////////////////// 358 #{ Module Lookup 359 #//////////////////////////////////////////////////////////// 360
361 -def _find(name, package_doc=None):
362 """ 363 Return the API documentaiton for the object whose name is 364 C{name}. C{package_doc}, if specified, is the API 365 documentation for the package containing the named object. 366 """ 367 # If we're inside a package, then find the package's path. 368 if package_doc is None: 369 path = None 370 elif package_doc.path is not UNKNOWN: 371 path = package_doc.path 372 else: 373 path = [os.path.split(package_doc.filename)[0]] 374 375 # The leftmost identifier in `name` should be a module or 376 # package on the given path; find it and parse it. 377 filename = _get_filename(name[0], path) 378 module_doc = parse_docs(filename, context=package_doc) 379 380 # If the name just has one identifier, then the module we just 381 # parsed is the object we're looking for; return it. 382 if len(name) == 1: return module_doc 383 384 # Otherwise, we're looking for something inside the module. 385 # First, check to see if it's in a variable (but ignore 386 # variables that just contain imported submodules). 387 if not _is_submodule_import_var(module_doc, name[1]): 388 try: return _find_in_namespace(name[1:], module_doc) 389 except ImportError: pass 390 391 # If not, then check to see if it's in a subpackage. 392 if module_doc.is_package: 393 return _find(name[1:], module_doc) 394 395 # If it's not in a variable or a subpackage, then we can't 396 # find it. 397 raise ImportError('Could not find value')
398
399 -def _is_submodule_import_var(module_doc, var_name):
400 """ 401 Return true if C{var_name} is the name of a variable in 402 C{module_doc} that just contains an C{imported_from} link to a 403 submodule of the same name. (I.e., is a variable created when 404 a package imports one of its own submodules.) 405 """ 406 var_doc = module_doc.variables.get(var_name) 407 full_var_name = DottedName(module_doc.canonical_name, var_name) 408 return (var_doc is not None and 409 var_doc.imported_from == full_var_name)
410
411 -def _find_in_namespace(name, namespace_doc):
412 if name[0] not in namespace_doc.variables: 413 raise ImportError('Could not find value') 414 415 # Look up the variable in the namespace. 416 var_doc = namespace_doc.variables[name[0]] 417 if var_doc.value is UNKNOWN: 418 raise ImportError('Could not find value') 419 val_doc = var_doc.value 420 421 # If the variable's value was imported, then follow its 422 # alias link. 423 if var_doc.imported_from not in (None, UNKNOWN): 424 return _find(var_doc.imported_from+name[1:]) 425 426 # Otherwise, if the name has one identifier, then this is the 427 # value we're looking for; return it. 428 elif len(name) == 1: 429 return val_doc 430 431 # Otherwise, if this value is a namespace, look inside it. 432 elif isinstance(val_doc, NamespaceDoc): 433 return _find_in_namespace(name[1:], val_doc) 434 435 # Otherwise, we ran into a dead end. 436 else: 437 raise ImportError('Could not find value')
438
439 -def _get_filename(identifier, path=None):
440 if path is UNKNOWN: path = None 441 try: 442 fp, filename, (s,m,typ) = imp.find_module(identifier, path) 443 if fp is not None: fp.close() 444 except ImportError: 445 raise ImportError, 'No Python source file found.' 446 447 if typ == imp.PY_SOURCE: 448 return filename 449 elif typ == imp.PY_COMPILED: 450 # See if we can find a corresponding non-compiled version. 451 filename = re.sub('.py\w$', '.py', filename) 452 if not os.path.exists(filename): 453 raise ImportError, 'No Python source file found.' 454 return filename 455 elif typ == imp.PKG_DIRECTORY: 456 filename = os.path.join(filename, '__init__.py') 457 if not os.path.exists(filename): 458 filename = os.path.join(filename, '__init__.pyw') 459 if not os.path.exists(filename): 460 raise ImportError, 'No package file found.' 461 return filename 462 elif typ == imp.C_BUILTIN: 463 raise ImportError, 'No Python source file for builtin modules.' 464 elif typ == imp.C_EXTENSION: 465 raise ImportError, 'No Python source file for c extensions.' 466 else: 467 raise ImportError, 'No Python source file found.'
468 469 #///////////////////////////////////////////////////////////////// 470 #{ File tokenization loop 471 #///////////////////////////////////////////////////////////////// 472
473 -def process_file(module_doc):
474 """ 475 Read the given C{ModuleDoc}'s file, and add variables 476 corresponding to any objects defined in that file. In 477 particular, read and tokenize C{module_doc.filename}, and 478 process each logical line using L{process_line()}. 479 """ 480 # Keep track of the current line number: 481 lineno = None 482 483 # Use this list to collect the tokens on a single logical line: 484 line_toks = [] 485 486 # This list contains one APIDoc for each indentation level. 487 # The first element is the APIDoc for the module, and each 488 # subsequent element is the APIDoc for the object at that 489 # indentation level. The final element of the list is the 490 # C{APIDoc} for the entity that we're currently processing. 491 parent_docs = [module_doc] 492 493 # The APIDoc for the object that was defined by the previous 494 # line, if any; or None otherwise. This is used to update 495 # parent_docs when we encounter an indent; and to decide what 496 # object (if any) is described by a docstring. 497 prev_line_doc = module_doc 498 499 # A list of comments that occur before or on the current 500 # logical line, used to build the comment docstring. Each 501 # element is a tuple (comment_text, comment_lineno). 502 comments = [] 503 504 # A list of decorator lines that occur before the current 505 # logical line. This is used so we can process a function 506 # declaration line and its decorators all at once. 507 decorators = [] 508 509 # A list of group names, one for each indentation level. This is 510 # used to keep track groups that are defined by comment markers 511 # START_GROUP_MARKER and END_GROUP_MARKER. 512 groups = [None] 513 514 # When we encounter a comment start group marker, set this to the 515 # name of the group; but wait until we're ready to process the 516 # next line before we actually set groups[-1] to this value. This 517 # is necessary because at the top of a block, the tokenizer gives 518 # us comments before the INDENT token; but if we encounter a group 519 # start marker at the top of a block, then we want it to apply 520 # inside that block, not outside it. 521 start_group = None 522 523 # Check if the source file declares an encoding. 524 encoding = get_module_encoding(module_doc.filename) 525 526 # The token-eating loop: 527 try: 528 module_file = codecs.open(module_doc.filename, 'rU', encoding) 529 except LookupError: 530 log.warning("Unknown encoding %r for %s; using the default" 531 "encoding instead (iso-8859-1)" % 532 (encoding, module_doc.filename)) 533 encoding = 'iso-8859-1' 534 module_file = codecs.open(module_doc.filename, 'rU', encoding) 535 tok_iter = tokenize.generate_tokens(module_file.readline) 536 for toktype, toktext, (srow,scol), (erow,ecol), line_str in tok_iter: 537 # BOM encoding marker: ignore. 538 if (toktype == token.ERRORTOKEN and 539 (toktext == u'\ufeff' or 540 toktext.encode(encoding) == '\xef\xbb\xbf')): 541 pass 542 543 # Error token: abort 544 elif toktype == token.ERRORTOKEN: 545 raise ParseError('Error during parsing: invalid syntax ' 546 '(%s, line %d, char %d: %r)' % 547 (module_doc.filename, srow, scol, toktext)) 548 549 # Indent token: update the parent_doc stack. 550 elif toktype == token.INDENT: 551 if prev_line_doc is None: 552 parent_docs.append(parent_docs[-1]) 553 else: 554 parent_docs.append(prev_line_doc) 555 groups.append(None) 556 557 # Dedent token: update the parent_doc stack. 558 elif toktype == token.DEDENT: 559 if line_toks == []: 560 parent_docs.pop() 561 groups.pop() 562 else: 563 # This *should* only happen if the file ends on an 564 # indented line, with no final newline. 565 # (otherwise, this is the wrong thing to do.) 566 pass 567 568 # Line-internal newline token: if we're still at the start of 569 # the logical line, and we've seen one or more comment lines, 570 # then discard them: blank lines are not allowed between a 571 # comment block and the thing it describes. 572 elif toktype == tokenize.NL: 573 if comments and not line_toks: 574 log.warning('Ignoring docstring comment block followed by ' 575 'a blank line in %r on line %r' % 576 (module_doc.filename, srow-1)) 577 comments = [] 578 579 # Comment token: add to comments if appropriate. 580 elif toktype == tokenize.COMMENT: 581 if toktext.startswith(COMMENT_DOCSTRING_MARKER): 582 comment_line = toktext[len(COMMENT_DOCSTRING_MARKER):].rstrip() 583 if comment_line.startswith(" "): 584 comment_line = comment_line[1:] 585 comments.append( [comment_line, srow]) 586 elif toktext.startswith(START_GROUP_MARKER): 587 start_group = toktext[len(START_GROUP_MARKER):].strip() 588 elif toktext.startswith(END_GROUP_MARKER): 589 for i in range(len(groups)-1, -1, -1): 590 if groups[i]: 591 groups[i] = None 592 break 593 else: 594 log.warning("Got group end marker without a corresponding " 595 "start marker in %r on line %r" % 596 (module_doc.filename, srow)) 597 598 # Normal token: Add it to line_toks. (If it's a non-unicode 599 # string literal, then we need to re-encode using the file's 600 # encoding, to get back to the original 8-bit data; and then 601 # convert that string with 8-bit data to a 7-bit ascii 602 # representation.) 603 elif toktype != token.NEWLINE and toktype != token.ENDMARKER: 604 if lineno is None: lineno = srow 605 if toktype == token.STRING: 606 str_prefixes = re.match('[^\'"]*', toktext).group() 607 if 'u' not in str_prefixes: 608 s = toktext.encode(encoding) 609 toktext = decode_with_backslashreplace(s) 610 line_toks.append( (toktype, toktext) ) 611 612 # Decorator line: add it to the decorators list. 613 elif line_toks and line_toks[0] == (token.OP, '@'): 614 decorators.append(shallow_parse(line_toks)) 615 line_toks = [] 616 617 # End of line token, but nothing to do. 618 elif line_toks == []: 619 pass 620 621 # End of line token: parse the logical line & process it. 622 else: 623 if start_group: 624 groups[-1] = start_group 625 start_group = None 626 627 if parent_docs[-1] != 'skip_block': 628 try: 629 prev_line_doc = process_line( 630 shallow_parse(line_toks), parent_docs, prev_line_doc, 631 lineno, comments, decorators, encoding) 632 except ParseError, e: 633 raise ParseError('Error during parsing: invalid ' 634 'syntax (%s, line %d) -- %s' % 635 (module_doc.filename, lineno, e)) 636 except KeyboardInterrupt, e: raise 637 except Exception, e: 638 log.error('Internal error during parsing (%s, line ' 639 '%s):\n%s' % (module_doc.filename, lineno, e)) 640 raise 641 642 # grouping... 643 if groups[-1] and prev_line_doc not in (None, 'skip_block'): 644 if isinstance(prev_line_doc, VariableDoc): 645 # prev_line_doc's container will only be 646 # UNKNOWN if it's an instance variable that 647 # didn't have a doc-comment, but might still 648 # be followed by a docstring. Since we 649 # tokenize in order, we can't do lookahead to 650 # see if the variable will have a comment; but 651 # it should only be added to the container if 652 # it does. So we defer the grouping of that 653 # to be handled by process_docstring instead. 654 if prev_line_doc.container is not UNKNOWN: 655 add_to_group(prev_line_doc.container, 656 prev_line_doc, groups[-1]) 657 elif isinstance(parent_docs[-1], NamespaceDoc): 658 add_to_group(parent_docs[-1], prev_line_doc, 659 groups[-1]) 660 else: 661 prev_line_doc = None 662 663 # Reset line contents. 664 line_toks = [] 665 lineno = None 666 comments = [] 667 decorators = []
668
669 -def add_to_group(container, api_doc, group_name):
670 if container.group_specs is UNKNOWN: 671 container.group_specs = [] 672 673 if isinstance(api_doc, VariableDoc): 674 var_name = api_doc.name 675 else: 676 if api_doc.canonical_name is UNKNOWN: log.debug('ouch', `api_doc`) 677 var_name = api_doc.canonical_name[-1] 678 679 for (name, group_vars) in container.group_specs: 680 if name == group_name: 681 group_vars.append(var_name) 682 return 683 else: 684 container.group_specs.append( (group_name, [var_name]) )
685
686 -def script_guard(line):
687 """Detect the idiomatic trick C{if __name__ == "__main__":}""" 688 return (len(line) == 5 689 and line[1][1] == '__name__' # this is the most selective 690 and line[0][1] == 'if' 691 and line[2][1] == '==' 692 and line[4][1] == ':' 693 and line[3][1][1:-1] == '__main__')
694 695 #///////////////////////////////////////////////////////////////// 696 #{ Shallow parser 697 #///////////////////////////////////////////////////////////////// 698
699 -def shallow_parse(line_toks):
700 """ 701 Given a flat list of tokens, return a nested tree structure 702 (called a X{token tree}), whose leaves are identical to the 703 original list, but whose structure reflects the structure 704 implied by the grouping tokens (i.e., parenthases, braces, and 705 brackets). If the parenthases, braces, and brackets do not 706 match, or are not balanced, then raise a ParseError. 707 708 Assign some structure to a sequence of structure (group parens). 709 """ 710 stack = [[]] 711 parens = [] 712 for tok in line_toks: 713 toktype, toktext = tok 714 if toktext in ('(','[','{'): 715 parens.append(tok) 716 stack.append([tok]) 717 elif toktext in ('}',']',')'): 718 if not parens: 719 raise ParseError('Unbalanced parens') 720 left_paren = parens.pop()[1] 721 if left_paren+toktext not in ('()', '[]', '{}'): 722 raise ParseError('Mismatched parens') 723 lst = stack.pop() 724 lst.append(tok) 725 stack[-1].append(lst) 726 else: 727 stack[-1].append(tok) 728 if len(stack) != 1 or len(parens) != 0: 729 raise ParseError('Unbalanced parens') 730 return stack[0]
731 732 #///////////////////////////////////////////////////////////////// 733 #{ Line processing 734 #///////////////////////////////////////////////////////////////// 735 # The methods process_*() are used to handle lines. 736
737 -def process_line(line, parent_docs, prev_line_doc, lineno, 738 comments, decorators, encoding):
739 """ 740 @return: C{new-doc}, C{decorator}..? 741 """ 742 args = (line, parent_docs, prev_line_doc, lineno, 743 comments, decorators, encoding) 744 745 if not line: # blank line. 746 return None 747 elif (token.OP, ':') in line[:-1]: 748 return process_one_line_block(*args) 749 elif (token.OP, ';') in line: 750 return process_multi_stmt(*args) 751 elif line[0] == (token.NAME, 'def'): 752 return process_funcdef(*args) 753 elif line[0] == (token.OP, '@'): 754 return process_funcdef(*args) 755 elif line[0] == (token.NAME, 'class'): 756 return process_classdef(*args) 757 elif line[0] == (token.NAME, 'import'): 758 return process_import(*args) 759 elif line[0] == (token.NAME, 'from'): 760 return process_from_import(*args) 761 elif line[0] == (token.NAME, 'del'): 762 return process_del(*args) 763 elif len(line)==1 and line[0][0] == token.STRING: 764 return process_docstring(*args) 765 elif (token.OP, '=') in line: 766 return process_assignment(*args) 767 elif (line[0][0] == token.NAME and 768 line[0][1] in CONTROL_FLOW_KEYWORDS): 769 return process_control_flow_line(*args) 770 else: 771 return None
772 # [xx] do something with control structures like for/if? 773 774 #///////////////////////////////////////////////////////////////// 775 # Line handler: control flow 776 #///////////////////////////////////////////////////////////////// 777 778 CONTROL_FLOW_KEYWORDS = [ 779 #: A list of the control flow keywords. If a line begins with 780 #: one of these keywords, then it should be handled by 781 #: C{process_control_flow_line}. 782 'if', 'elif', 'else', 'while', 'for', 'try', 'except', 'finally'] 783
784 -def process_control_flow_line(line, parent_docs, prev_line_doc, 785 lineno, comments, decorators, encoding):
786 keyword = line[0][1] 787 788 # If it's a 'for' block: create the loop variable. 789 if keyword == 'for' and PARSE_FOR_BLOCKS: 790 loopvar_name = parse_dotted_name( 791 split_on(line[1:], (token.NAME, 'in'))[0]) 792 parent = get_lhs_parent(loopvar_name, parent_docs) 793 if parent is not None: 794 var_doc = VariableDoc(name=loopvar_name[-1], is_alias=False, 795 is_imported=False, is_instvar=False, 796 docs_extracted_by='parser') 797 set_variable(parent, var_doc) 798 799 if ((keyword == 'if' and PARSE_IF_BLOCKS and not script_guard(line)) or 800 (keyword == 'elif' and PARSE_ELSE_BLOCKS) or 801 (keyword == 'else' and PARSE_ELSE_BLOCKS) or 802 (keyword == 'while' and PARSE_WHILE_BLOCKS) or 803 (keyword == 'for' and PARSE_FOR_BLOCKS) or 804 (keyword == 'try' and PARSE_TRY_BLOCKS) or 805 (keyword == 'except' and PARSE_EXCEPT_BLOCKS) or 806 (keyword == 'finally' and PARSE_FINALLY_BLOCKS)): 807 # Return "None" to indicate that we should process the 808 # block using the same context that we were already in. 809 return None 810 else: 811 # Return 'skip_block' to indicate that we should ignore 812 # the contents of this block. 813 return 'skip_block'
814 815 #///////////////////////////////////////////////////////////////// 816 # Line handler: imports 817 #///////////////////////////////////////////////////////////////// 818 # [xx] I could optionally add ValueDoc's for the imported 819 # variables with proxy_for set to the imported source; but 820 # I don't think I gain much of anything by doing so. 821
822 -def process_import(line, parent_docs, prev_line_doc, lineno, 823 comments, decorators, encoding):
824 if not isinstance(parent_docs[-1], NamespaceDoc): return 825 826 names = split_on(line[1:], (token.OP, ',')) 827 828 for name in names: 829 name_pieces = split_on(name, (token.NAME, 'as')) 830 if len(name_pieces) == 1: 831 src_name = parse_dotted_name(name_pieces[0]) 832 _import_var(src_name, parent_docs) 833 elif len(name_pieces) == 2: 834 if len(name_pieces[1]) != 1: 835 raise ParseError('Expected identifier after "as"') 836 src_name = parse_dotted_name(name_pieces[0]) 837 var_name = parse_name(name_pieces[1][0]) 838 _import_var_as(src_name, var_name, parent_docs) 839 else: 840 raise ParseError('Multiple "as" tokens in import')
841
842 -def process_from_import(line, parent_docs, prev_line_doc, lineno, 843 comments, decorators, encoding):
844 if not isinstance(parent_docs[-1], NamespaceDoc): return 845 846 pieces = split_on(line[1:], (token.NAME, 'import')) 847 if len(pieces) != 2 or not pieces[0] or not pieces[1]: 848 raise ParseError("Bad from-import") 849 lhs, rhs = pieces 850 851 # The RHS might be parenthasized, as specified by PEP 328: 852 # http://www.python.org/peps/pep-0328.html 853 if (len(rhs) == 1 and isinstance(rhs[0], list) and 854 rhs[0][0] == (token.OP, '(') and rhs[0][-1] == (token.OP, ')')): 855 rhs = rhs[0][1:-1] 856 857 # >>> from __future__ import nested_scopes 858 if lhs == [(token.NAME, '__future__')]: 859 return 860 861 # >>> from sys import * 862 elif rhs == [(token.OP, '*')]: 863 src_name = parse_dotted_name(lhs) 864 _process_fromstar_import(src_name, parent_docs) 865 866 # >>> from os.path import join, split 867 else: 868 # Allow relative imports in this case, as per PEP 328 869 src_name = parse_dotted_name(lhs, 870 parent_name=parent_docs[-1].canonical_name) 871 parts = split_on(rhs, (token.OP, ',')) 872 for part in parts: 873 # from m import x 874 if len(part) == 1: 875 var_name = parse_name(part[0]) 876 _import_var_as(DottedName(src_name, var_name), 877 var_name, parent_docs) 878 879 # from m import x as y 880 elif len(part) == 3 and part[1] == (token.NAME, 'as'): 881 orig_name = parse_name(part[0]) 882 var_name = parse_name(part[2]) 883 _import_var_as(DottedName(src_name, orig_name), 884 var_name, parent_docs) 885 886 else: 887 ParseError("Bad from-import")
888
889 -def _process_fromstar_import(src, parent_docs):
890 """ 891 Handle a statement of the form: 892 >>> from <src> import * 893 894 If L{IMPORT_HANDLING} is C{'parse'}, then first try to parse 895 the module C{M{<src>}}, and copy all of its exported variables 896 to C{parent_docs[-1]}. 897 898 Otherwise, try to determine the names of the variables exported by 899 C{M{<src>}}, and create a new variable for each export. If 900 L{IMPORT_STAR_HANDLING} is C{'parse'}, then the list of exports if 901 found by parsing C{M{<src>}}; if it is C{'introspect'}, then the 902 list of exports is found by importing and introspecting 903 C{M{<src>}}. 904 """ 905 # This is redundant: already checked by caller. 906 if not isinstance(parent_docs[-1], NamespaceDoc): return 907 908 # If src is package-local, then convert it to a global name. 909 src = _global_name(src, parent_docs) 910 911 # Record the import 912 parent_docs[0].imports.append(src) # mark that it's .*?? 913 914 # [xx] add check for if we already have the source docs in our 915 # cache?? 916 917 if (IMPORT_HANDLING == 'parse' or 918 IMPORT_STAR_HANDLING == 'parse'): # [xx] is this ok? 919 try: module_doc = _find(src) 920 except ImportError: module_doc = None 921 if isinstance(module_doc, ModuleDoc): 922 for name, imp_var in module_doc.variables.items(): 923 # [xx] this is not exactly correct, but close. It 924 # does the wrong thing if a __var__ is explicitly 925 # listed in __all__. 926 if (imp_var.is_public and 927 not (name.startswith('__') and name.endswith('__'))): 928 var_doc = _add_import_var(DottedName(src, name), name, 929 parent_docs[-1]) 930 if IMPORT_HANDLING == 'parse': 931 var_doc.value = imp_var.value 932 933 # If we got here, then either IMPORT_HANDLING='link' or we 934 # failed to parse the `src` module. 935 if IMPORT_STAR_HANDLING == 'introspect': 936 try: module = __import__(str(src), {}, {}, [0]) 937 except: return # We couldn't import it. 938 if module is None: return # We couldn't import it. 939 if hasattr(module, '__all__'): 940 names = list(module.__all__) 941 else: 942 names = [n for n in dir(module) if not n.startswith('_')] 943 for name in names: 944 _add_import_var(DottedName(src, name), name, parent_docs[-1])
945
946 -def _import_var(name, parent_docs):
947 """ 948 Handle a statement of the form: 949 >>> import <name> 950 951 If L{IMPORT_HANDLING} is C{'parse'}, then first try to find 952 the value by parsing; and create an appropriate variable in 953 parentdoc. 954 955 Otherwise, add a variable for the imported variable. (More than 956 one variable may be created for cases like C{'import a.b'}, where 957 we need to create a variable C{'a'} in parentdoc containing a 958 proxy module; and a variable C{'b'} in the proxy module. 959 """ 960 # This is redundant: already checked by caller. 961 if not isinstance(parent_docs[-1], NamespaceDoc): return 962 963 # If name is package-local, then convert it to a global name. 964 src = _global_name(name, parent_docs) 965 src_prefix = src[:len(src)-len(name)] 966 967 # Record the import 968 parent_docs[0].imports.append(name) 969 970 # [xx] add check for if we already have the source docs in our 971 # cache?? 972 973 if IMPORT_HANDLING == 'parse': 974 # Check to make sure that we can actually find the value. 975 try: val_doc = _find(src) 976 except ImportError: val_doc = None 977 if val_doc is not None: 978 # We found it; but it's not the value itself we want to 979 # import, but the module containing it; so import that 980 # module (=top_mod) and create a variable for it. 981 top_mod = src_prefix+name[0] 982 var_doc = _add_import_var(top_mod, name[0], parent_docs[-1]) 983 var_doc.value = _find(DottedName(name[0])) 984 return 985 986 # If we got here, then either IMPORT_HANDLING='link', or we 987 # did not successfully find the value's docs by parsing; use 988 # a variable with an UNKNOWN value. 989 990 # Create any necessary intermediate proxy module values. 991 container = parent_docs[-1] 992 for i, identifier in enumerate(name[:-1]): 993 if (identifier not in container.variables or 994 not isinstance(container.variables[identifier], ModuleDoc)): 995 var_doc = _add_import_var(name[:i+1], identifier, container) 996 var_doc.value = ModuleDoc(variables={}, sort_spec=[], 997 proxy_for=src_prefix+name[:i+1], 998 submodules={}, 999 docs_extracted_by='parser') 1000 container = container.variables[identifier].value 1001 1002 # Add the variable to the container. 1003 _add_import_var(src, name[-1], container)
1004
1005 -def _import_var_as(src, name, parent_docs):
1006 """ 1007 Handle a statement of the form: 1008 >>> import src as name 1009 1010 If L{IMPORT_HANDLING} is C{'parse'}, then first try to find 1011 the value by parsing; and create an appropriate variable in 1012 parentdoc. 1013 1014 Otherwise, create a variables with its C{imported_from} attribute 1015 pointing to the imported object. 1016 """ 1017 # This is redundant: already checked by caller. 1018 if not isinstance(parent_docs[-1], NamespaceDoc): return 1019 1020 # If src is package-local, then convert it to a global name. 1021 src = _global_name(src, parent_docs) 1022 1023 # Record the import 1024 parent_docs[0].imports.append(src) 1025 1026 if IMPORT_HANDLING == 'parse': 1027 # Parse the value and create a variable for it. 1028 try: val_doc = _find(src) 1029 except ImportError: val_doc = None 1030 if val_doc is not None: 1031 var_doc = VariableDoc(name=name, value=val_doc, 1032 is_imported=True, is_alias=False, 1033 imported_from=src, 1034 docs_extracted_by='parser') 1035 set_variable(parent_docs[-1], var_doc) 1036 return 1037 1038 # If we got here, then either IMPORT_HANDLING='link', or we 1039 # did not successfully find the value's docs by parsing; use a 1040 # variable with a proxy value. 1041 _add_import_var(src, name, parent_docs[-1])
1042
1043 -def _add_import_var(src, name, container):
1044 """ 1045 Add a new imported variable named C{name} to C{container}, with 1046 C{imported_from=src}. 1047 """ 1048 var_doc = VariableDoc(name=name, is_imported=True, is_alias=False, 1049 imported_from=src, docs_extracted_by='parser') 1050 set_variable(container, var_doc) 1051 return var_doc
1052
1053 -def _global_name(name, parent_docs):
1054 """ 1055 If the given name is package-local (relative to the current 1056 context, as determined by C{parent_docs}), then convert it 1057 to a global name. 1058 """ 1059 # Get the containing package from parent_docs. 1060 if parent_docs[0].is_package: 1061 package = parent_docs[0] 1062 else: 1063 package = parent_docs[0].package 1064 1065 # Check each package (from closest to furthest) to see if it 1066 # contains a module named name[0]; if so, then treat `name` as 1067 # relative to that package. 1068 while package not in (None, UNKNOWN): 1069 try: 1070 fp = imp.find_module(name[0], package.path)[0] 1071 if fp is not None: fp.close() 1072 except ImportError: 1073 # No submodule found here; try the next package up. 1074 package = package.package 1075 continue 1076 # A submodule was found; return its name. 1077 return package.canonical_name + name 1078 1079 # We didn't find any package containing `name`; so just return 1080 # `name` as-is. 1081 return name
1082 1083 #///////////////////////////////////////////////////////////////// 1084 # Line handler: assignment 1085 #///////////////////////////////////////////////////////////////// 1086
1087 -def process_assignment(line, parent_docs, prev_line_doc, lineno, 1088 comments, decorators, encoding):
1089 # Divide the assignment statement into its pieces. 1090 pieces = split_on(line, (token.OP, '=')) 1091 1092 lhs_pieces = pieces[:-1] 1093 rhs = pieces[-1] 1094 1095 # Decide whether the variable is an instance variable or not. 1096 # If it's an instance var, then discard the value. 1097 is_instvar = lhs_is_instvar(lhs_pieces, parent_docs) 1098 1099 # if it's not an instance var, and we're not in a namespace, 1100 # then it's just a local var -- so ignore it. 1101 if not (is_instvar or isinstance(parent_docs[-1], NamespaceDoc)): 1102 return None 1103 1104 # Evaluate the right hand side. 1105 if not is_instvar: 1106 rhs_val, is_alias = rhs_to_valuedoc(rhs, parent_docs) 1107 else: 1108 rhs_val, is_alias = UNKNOWN, False 1109 1110 # Assign the right hand side value to each left hand side. 1111 # (Do the rightmost assignment first) 1112 lhs_pieces.reverse() 1113 for lhs in lhs_pieces: 1114 # Try treating the LHS as a simple dotted name. 1115 try: lhs_name = parse_dotted_name(lhs) 1116 except: lhs_name = None 1117 if lhs_name is not None: 1118 lhs_parent = get_lhs_parent(lhs_name, parent_docs) 1119 if lhs_parent is None: continue 1120 1121 # Skip a special class variable. 1122 if lhs_name[-1] == '__slots__': 1123 continue 1124 1125 # Create the VariableDoc. 1126 var_doc = VariableDoc(name=lhs_name[-1], value=rhs_val, 1127 is_imported=False, is_alias=is_alias, 1128 is_instvar=is_instvar, 1129 docs_extracted_by='parser') 1130 # Extract a docstring from the comments, when present, 1131 # but only if there's a single LHS. 1132 if len(lhs_pieces) == 1: 1133 add_docstring_from_comments(var_doc, comments) 1134 1135 # Assign the variable to the containing namespace, 1136 # *unless* the variable is an instance variable 1137 # without a comment docstring. In that case, we'll 1138 # only want to add it if we later discover that it's 1139 # followed by a variable docstring. If it is, then 1140 # process_docstring will take care of adding it to the 1141 # containing clas. (This is a little hackish, but 1142 # unfortunately is necessary because we won't know if 1143 # this assignment line is followed by a docstring 1144 # until later.) 1145 if (not is_instvar) or comments: 1146 set_variable(lhs_parent, var_doc, True) 1147 1148 # If it's the only var, then return the VarDoc for use 1149 # as the new `prev_line_doc`. 1150 if (len(lhs_pieces) == 1 and 1151 (len(lhs_name) == 1 or is_instvar)): 1152 return var_doc 1153 1154 # Otherwise, the LHS must be a complex expression; use 1155 # dotted_names_in() to decide what variables it contains, 1156 # and create VariableDoc's for all of them (with UNKNOWN 1157 # value). 1158 else: 1159 for lhs_name in dotted_names_in(lhs_pieces): 1160 lhs_parent = get_lhs_parent(lhs_name, parent_docs) 1161 if lhs_parent is None: continue 1162 var_doc = VariableDoc(name=lhs_name[-1], 1163 is_imported=False, 1164 is_alias=is_alias, 1165 is_instvar=is_instvar, 1166 docs_extracted_by='parser') 1167 set_variable(lhs_parent, var_doc, True) 1168 1169 # If we have multiple left-hand-sides, then all but the 1170 # rightmost one are considered aliases. 1171 is_alias = True
1172 1173
1174 -def lhs_is_instvar(lhs_pieces, parent_docs):
1175 if not isinstance(parent_docs[-1], RoutineDoc): 1176 return False 1177 # make sure that lhs_pieces is <self>.<name>, where <self> is 1178 # the name of the first arg to the containing routinedoc, and 1179 # <name> is a simple name. 1180 posargs = parent_docs[-1].posargs 1181 if posargs is UNKNOWN: return False 1182 if not (len(lhs_pieces)==1 and len(posargs) > 0 and 1183 len(lhs_pieces[0]) == 3 and 1184 lhs_pieces[0][0] == (token.NAME, posargs[0]) and 1185 lhs_pieces[0][1] == (token.OP, '.') and 1186 lhs_pieces[0][2][0] == token.NAME): 1187 return False 1188 # Make sure we're in an instance method, and not a 1189 # module-level function. 1190 for i in range(len(parent_docs)-1, -1, -1): 1191 if isinstance(parent_docs[i], ClassDoc): 1192 return True 1193 elif parent_docs[i] != parent_docs[-1]: 1194 return False 1195 return False
1196
1197 -def rhs_to_valuedoc(rhs, parent_docs):
1198 # Dotted variable: 1199 try: 1200 rhs_name = parse_dotted_name(rhs) 1201 rhs_val = lookup_value(rhs_name, parent_docs) 1202 if rhs_val is not None and rhs_val is not UNKNOWN: 1203 return rhs_val, True 1204 except ParseError: 1205 pass 1206 1207 # Decorators: 1208 if (len(rhs)==2 and rhs[0][0] == token.NAME and 1209 isinstance(rhs[1], list)): 1210 arg_val, _ = rhs_to_valuedoc(rhs[1][1:-1], parent_docs) 1211 if isinstance(arg_val, RoutineDoc): 1212 doc = apply_decorator(DottedName(rhs[0][1]), arg_val) 1213 doc.canonical_name = UNKNOWN 1214 doc.parse_repr = pp_toktree(rhs) 1215 return doc, False 1216 1217 # Nothing else to do: make a val with the source as its repr. 1218 return GenericValueDoc(parse_repr=pp_toktree(rhs), toktree=rhs, 1219 defining_module=parent_docs[0], 1220 docs_extracted_by='parser'), False
1221
1222 -def get_lhs_parent(lhs_name, parent_docs):
1223 assert isinstance(lhs_name, DottedName) 1224 1225 # For instance vars inside an __init__ method: 1226 if isinstance(parent_docs[-1], RoutineDoc): 1227 for i in range(len(parent_docs)-1, -1, -1): 1228 if isinstance(parent_docs[i], ClassDoc): 1229 return parent_docs[i] 1230 else: 1231 raise ValueError("%r is not a namespace or method" % 1232 parent_docs[-1]) 1233 1234 # For local variables: 1235 if len(lhs_name) == 1: 1236 return parent_docs[-1] 1237 1238 # For non-local variables: 1239 return lookup_value(lhs_name.container(), parent_docs)
1240 1241 #///////////////////////////////////////////////////////////////// 1242 # Line handler: single-line blocks 1243 #///////////////////////////////////////////////////////////////// 1244
1245 -def process_one_line_block(line, parent_docs, prev_line_doc, lineno, 1246 comments, decorators, encoding):
1247 """ 1248 The line handler for single-line blocks, such as: 1249 1250 >>> def f(x): return x*2 1251 1252 This handler calls L{process_line} twice: once for the tokens 1253 up to and including the colon, and once for the remaining 1254 tokens. The comment docstring is applied to the first line 1255 only. 1256 @return: C{None} 1257 """ 1258 i = line.index((token.OP, ':')) 1259 doc1 = process_line(line[:i+1], parent_docs, prev_line_doc, 1260 lineno, comments, decorators, encoding) 1261 doc2 = process_line(line[i+1:], parent_docs+[doc1], 1262 doc1, lineno, None, [], encoding) 1263 return doc1
1264 1265 #///////////////////////////////////////////////////////////////// 1266 # Line handler: semicolon-separated statements 1267 #///////////////////////////////////////////////////////////////// 1268
1269 -def process_multi_stmt(line, parent_docs, prev_line_doc, lineno, 1270 comments, decorators, encoding):
1271 """ 1272 The line handler for semicolon-separated statements, such as: 1273 1274 >>> x=1; y=2; z=3 1275 1276 This handler calls L{process_line} once for each statement. 1277 The comment docstring is not passed on to any of the 1278 sub-statements. 1279 @return: C{None} 1280 """ 1281 for statement in split_on(line, (token.OP, ';')): 1282 if not statement: continue 1283 doc = process_line(statement, parent_docs, prev_line_doc, 1284 lineno, None, decorators, encoding) 1285 prev_line_doc = doc 1286 decorators = [] 1287 return None
1288 1289 #///////////////////////////////////////////////////////////////// 1290 # Line handler: delete statements 1291 #///////////////////////////////////////////////////////////////// 1292
1293 -def process_del(line, parent_docs, prev_line_doc, lineno, 1294 comments, decorators, encoding):
1295 """ 1296 The line handler for delete statements, such as: 1297 1298 >>> del x, y.z 1299 1300 This handler calls L{del_variable} for each dotted variable in 1301 the variable list. The variable list may be nested. Complex 1302 expressions in the variable list (such as C{x[3]}) are ignored. 1303 @return: C{None} 1304 """ 1305 # If we're not in a namespace, then ignore it. 1306 parent_doc = parent_docs[-1] 1307 if not isinstance(parent_doc, NamespaceDoc): return 1308 1309 var_list = split_on(line[1:], (token.OP, ',')) 1310 for var_name in dotted_names_in(var_list): 1311 del_variable(parent_docs[-1], var_name) 1312 1313 return None
1314 1315 #///////////////////////////////////////////////////////////////// 1316 # Line handler: docstrings 1317 #///////////////////////////////////////////////////////////////// 1318
1319 -def process_docstring(line, parent_docs, prev_line_doc, lineno, 1320 comments, decorators, encoding):
1321 """ 1322 The line handler for bare string literals. If 1323 C{prev_line_doc} is not C{None}, then the string literal is 1324 added to that C{APIDoc} as a docstring. If it already has a 1325 docstring (from comment docstrings), then the new docstring 1326 will be appended to the old one. 1327 """ 1328 if prev_line_doc is None: return 1329 docstring = parse_string(line) 1330 1331 # If the docstring is a str, then convert it to unicode. 1332 # According to a strict reading of PEP 263, this might not be the 1333 # right thing to do; but it will almost always be what the 1334 # module's author intended. 1335 if isinstance(docstring, str): 1336 try: 1337 docstring = docstring.decode(encoding) 1338 except UnicodeDecodeError: 1339 # If decoding failed, then fall back on using 1340 # decode_with_backslashreplace, which will map e.g. 1341 # "\xe9" -> u"\\xe9". 1342 docstring = decode_with_backslashreplace(docstring) 1343 log.warning("While parsing %s: docstring is not a unicode " 1344 "string, but it contains non-ascii data." % 1345 prev_line_doc.canonical_name) 1346 1347 # If the modified APIDoc is an instance variable, and it has 1348 # not yet been added to its class's C{variables} list, 1349 # then add it now. This is done here, rather than in the 1350 # process_assignment() call that created the variable, because 1351 # we only want to add instance variables if they have an 1352 # associated docstring. (For more info, see the comment above 1353 # the set_variable() call in process_assignment().) 1354 added_instvar = False 1355 if (isinstance(prev_line_doc, VariableDoc) and 1356 prev_line_doc.is_instvar and 1357 prev_line_doc.docstring in (None, UNKNOWN)): 1358 for i in range(len(parent_docs)-1, -1, -1): 1359 if isinstance(parent_docs[i], ClassDoc): 1360 set_variable(parent_docs[i], prev_line_doc, True) 1361 added_instvar = True 1362 break 1363 1364 if prev_line_doc.docstring not in (None, UNKNOWN): 1365 log.warning("%s has both a comment-docstring and a normal " 1366 "(string) docstring; ignoring the comment-" 1367 "docstring." % prev_line_doc.canonical_name) 1368 1369 prev_line_doc.docstring = docstring 1370 prev_line_doc.docstring_lineno = lineno 1371 1372 # If the modified APIDoc is an instance variable, and we added it 1373 # to the class's variables list here, then it still needs to be 1374 # grouped too; so return it for use as the new "prev_line_doc." 1375 if added_instvar: 1376 return prev_line_doc
1377 1378 1379 #///////////////////////////////////////////////////////////////// 1380 # Line handler: function declarations 1381 #///////////////////////////////////////////////////////////////// 1382
1383 -def process_funcdef(line, parent_docs, prev_line_doc, lineno, 1384 comments, decorators, encoding):
1385 """ 1386 The line handler for function declaration lines, such as: 1387 1388 >>> def f(a, b=22, (c,d)): 1389 1390 This handler creates and initializes a new C{VariableDoc} 1391 containing a C{RoutineDoc}, adds the C{VariableDoc} to the 1392 containing namespace, and returns the C{RoutineDoc}. 1393 """ 1394 # Check syntax. 1395 if len(line) != 4 or line[3] != (token.OP, ':'): 1396 raise ParseError("Bad function definition line") 1397 1398 # If we're not in a namespace, then ignore it. 1399 parent_doc = parent_docs[-1] 1400 if not isinstance(parent_doc, NamespaceDoc): return 1401 1402 # Get the function's name 1403 func_name = parse_name(line[1]) 1404 canonical_name = DottedName(parent_doc.canonical_name, func_name) 1405 1406 # Create the function's RoutineDoc. 1407 func_doc = RoutineDoc(canonical_name=canonical_name, 1408 defining_module=parent_docs[0], 1409 lineno=lineno, docs_extracted_by='parser') 1410 1411 # Process the signature. 1412 init_arglist(func_doc, line[2]) 1413 1414 # If the preceeding comment includes a docstring, then add it. 1415 add_docstring_from_comments(func_doc, comments) 1416 1417 # Apply any decorators. 1418 func_doc.decorators = [pp_toktree(deco[1:]) for deco in decorators] 1419 decorators.reverse() 1420 for decorator in decorators: 1421 try: 1422 deco_name = parse_dotted_name(decorator[1:]) 1423 except ParseError: 1424 deco_name = None 1425 if func_doc.canonical_name is not UNKNOWN: 1426 deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]), 1427 func_doc.canonical_name) 1428 elif func_doc.parse_repr not in (None, UNKNOWN): 1429 # [xx] this case should be improved.. when will func_doc 1430 # have a known parse_repr?? 1431 deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]), 1432 func_doc.parse_repr) 1433 else: 1434 deco_repr = UNKNOWN 1435 func_doc = apply_decorator(deco_name, func_doc) 1436 func_doc.parse_repr = deco_repr 1437 # [XX] Is there a reson the following should be done? It 1438 # causes the grouping code to break. Presumably the canonical 1439 # name should remain valid if we're just applying a standard 1440 # decorator. 1441 #func_doc.canonical_name = UNKNOWN 1442 1443 # Add a variable to the containing namespace. 1444 var_doc = VariableDoc(name=func_name, value=func_doc, 1445 is_imported=False, is_alias=False, 1446 docs_extracted_by='parser') 1447 set_variable(parent_doc, var_doc) 1448 1449 # Return the new ValueDoc. 1450 return func_doc
1451
1452 -def apply_decorator(decorator_name, func_doc):
1453 # [xx] what if func_doc is not a RoutineDoc? 1454 if decorator_name == DottedName('staticmethod'): 1455 return StaticMethodDoc(**func_doc.__dict__) 1456 elif decorator_name == DottedName('classmethod'): 1457 return ClassMethodDoc(**func_doc.__dict__) 1458 elif DEFAULT_DECORATOR_BEHAVIOR == 'transparent': 1459 return func_doc.__class__(**func_doc.__dict__) # make a copy. 1460 elif DEFAULT_DECORATOR_BEHAVIOR == 'opaque': 1461 return GenericValueDoc(docs_extracted_by='parser') 1462 else: 1463 raise ValueError, 'Bad value for DEFAULT_DECORATOR_BEHAVIOR'
1464
1465 -def init_arglist(func_doc, arglist):
1466 if not isinstance(arglist, list) or arglist[0] != (token.OP, '('): 1467 raise ParseError("Bad argument list") 1468 1469 # Initialize to defaults. 1470 func_doc.posargs = [] 1471 func_doc.posarg_defaults = [] 1472 func_doc.vararg = None 1473 func_doc.kwarg = None 1474 1475 # Divide the arglist into individual args. 1476 args = split_on(arglist[1:-1], (token.OP, ',')) 1477 1478 # Keyword argument. 1479 if args and args[-1][0] == (token.OP, '**'): 1480 if len(args[-1]) != 2 or args[-1][1][0] != token.NAME: 1481 raise ParseError("Expected name after ** in argument list") 1482 func_doc.kwarg = args[-1][1][1] 1483 args.pop() 1484 1485 # Vararg argument. 1486 if args and args[-1][0] == (token.OP, '*'): 1487 if len(args[-1]) != 2 or args[-1][1][0] != token.NAME: 1488 raise ParseError("Expected name after * in argument list") 1489 func_doc.vararg = args[-1][1][1] 1490 args.pop() 1491 1492 # Positional arguments. 1493 for arg in args: 1494 func_doc.posargs.append(parse_funcdef_arg(arg[0])) 1495 if len(arg) == 1: 1496 func_doc.posarg_defaults.append(None) 1497 elif arg[1] != (token.OP, '=') or len(arg) == 2: 1498 raise ParseError("Bad argument list") 1499 else: 1500 default_repr = pp_toktree(arg[2:], 'tight') 1501 default_val = GenericValueDoc(parse_repr=default_repr, 1502 docs_extracted_by='parser') 1503 func_doc.posarg_defaults.append(default_val)
1504 1505 #///////////////////////////////////////////////////////////////// 1506 # Line handler: class declarations 1507 #///////////////////////////////////////////////////////////////// 1508
1509 -def process_classdef(line, parent_docs, prev_line_doc, lineno, 1510 comments, decorators, encoding):
1511 """ 1512 The line handler for class declaration lines, such as: 1513 1514 >>> class Foo(Bar, Baz): 1515 1516 This handler creates and initializes a new C{VariableDoc} 1517 containing a C{ClassDoc}, adds the C{VariableDoc} to the 1518 containing namespace, and returns the C{ClassDoc}. 1519 """ 1520 # Check syntax 1521 if len(line)<3 or len(line)>4 or line[-1] != (token.OP, ':'): 1522 raise ParseError("Bad class definition line") 1523 1524 # If we're not in a namespace, then ignore it. 1525 parent_doc = parent_docs[-1] 1526 if not isinstance(parent_doc, NamespaceDoc): return 1527 1528 # Get the class's name 1529 class_name = parse_name(line[1]) 1530 canonical_name = DottedName(parent_doc.canonical_name, class_name) 1531 1532 # Create the class's ClassDoc & VariableDoc. 1533 class_doc = ClassDoc(variables={}, sort_spec=[], 1534 bases=[], subclasses=[], 1535 canonical_name=canonical_name, 1536 defining_module=parent_docs[0], 1537 docs_extracted_by='parser') 1538 var_doc = VariableDoc(name=class_name, value=class_doc, 1539 is_imported=False, is_alias=False, 1540 docs_extracted_by='parser') 1541 1542 # Add the bases. 1543 if len(line) == 4: 1544 if (not isinstance(line[2], list) or 1545 line[2][0] != (token.OP, '(')): 1546 raise ParseError("Expected base list") 1547 try: 1548 for base_name in parse_classdef_bases(line[2]): 1549 class_doc.bases.append(find_base(base_name, parent_docs)) 1550 except ParseError, e: 1551 log.warning("Unable to extract the base list for %s: %s" % 1552 (canonical_name, e)) 1553 class_doc.bases = UNKNOWN 1554 else: 1555 class_doc.bases = [] 1556 1557 # Register ourselves as a subclass to our bases. 1558 if class_doc.bases is not UNKNOWN: 1559 for basedoc in class_doc.bases: 1560 if isinstance(basedoc, ClassDoc): 1561 # This test avoids that a subclass gets listed twice when 1562 # both introspection and parsing. 1563 # [XXX] This check only works because currently parsing is 1564 # always performed just after introspection of the same 1565 # class. A more complete fix shuld be independent from 1566 # calling order; probably the subclasses list should be 1567 # replaced by a ClassDoc set or a {name: ClassDoc} mapping. 1568 if (basedoc.subclasses 1569 and basedoc.subclasses[-1].canonical_name 1570 != class_doc.canonical_name): 1571 basedoc.subclasses.append(class_doc) 1572 1573 # If the preceeding comment includes a docstring, then add it. 1574 add_docstring_from_comments(class_doc, comments) 1575 1576 # Add the VariableDoc to our container. 1577 set_variable(parent_doc, var_doc) 1578 1579 return class_doc
1580
1581 -def _proxy_base(**attribs):
1582 return ClassDoc(variables={}, sort_spec=[], bases=[], subclasses=[], 1583 docs_extracted_by='parser', **attribs)
1584
1585 -def find_base(name, parent_docs):
1586 assert isinstance(name, DottedName) 1587 1588 # Find the variable containing the base. 1589 base_var = lookup_variable(name, parent_docs) 1590 if base_var is None: 1591 # If we didn't find it, then it must have been imported. 1592 # First, check if it looks like it's contained in any 1593 # known imported variable: 1594 if len(name) > 1: 1595 src = lookup_name(name[0], parent_docs) 1596 if (src is not None and 1597 src.imported_from not in (None, UNKNOWN)): 1598 base_src = DottedName(src.imported_from, name[1:]) 1599 base_var = VariableDoc(name=name[-1], is_imported=True, 1600 is_alias=False, imported_from=base_src, 1601 docs_extracted_by='parser') 1602 # Otherwise, it must have come from an "import *" statement 1603 # (or from magic, such as direct manipulation of the module's 1604 # dictionary), so we don't know where it came from. So 1605 # there's nothing left but to use an empty proxy. 1606 if base_var is None: 1607 return _proxy_base(parse_repr=str(name)) 1608 #raise ParseError("Could not find %s" % name) 1609 1610 # If the variable has a value, return that value. 1611 if base_var.value is not UNKNOWN: 1612 return base_var.value 1613 1614 # Otherwise, if BASE_HANDLING is 'parse', try parsing the docs for 1615 # the base class; if that fails, or if BASE_HANDLING is 'link', 1616 # just make a proxy object. 1617 if base_var.imported_from not in (None, UNKNOWN): 1618 if BASE_HANDLING == 'parse': 1619 old_sys_path = sys.path 1620 try: 1621 dirname = os.path.split(parent_docs[0].filename)[0] 1622 sys.path = [dirname] + sys.path 1623 try: 1624 return parse_docs(name=str(base_var.imported_from)) 1625 except ParseError: 1626 log.info('Unable to parse base', base_var.imported_from) 1627 except ImportError: 1628 log.info('Unable to find base', base_var.imported_from) 1629 finally: 1630 sys.path = old_sys_path 1631 1632 # Either BASE_HANDLING='link' or parsing the base class failed; 1633 # return a proxy value for the base class. 1634 return _proxy_base(proxy_for=base_var.imported_from) 1635 else: 1636 return _proxy_base(parse_repr=str(name))
1637 1638 #///////////////////////////////////////////////////////////////// 1639 #{ Parsing 1640 #///////////////////////////////////////////////////////////////// 1641
1642 -def dotted_names_in(elt_list):
1643 """ 1644 Return a list of all simple dotted names in the given 1645 expression. 1646 """ 1647 names = [] 1648 while elt_list: 1649 elt = elt_list.pop() 1650 if len(elt) == 1 and isinstance(elt[0], list): 1651 # Nested list: process the contents 1652 elt_list.extend(split_on(elt[0][1:-1], (token.OP, ','))) 1653 else: 1654 try: 1655 names.append(parse_dotted_name(elt)) 1656 except ParseError: 1657 pass # complex expression -- ignore 1658 return names
1659
1660 -def parse_name(elt, strip_parens=False):
1661 """ 1662 If the given token tree element is a name token, then return 1663 that name as a string. Otherwise, raise ParseError. 1664 @param strip_parens: If true, then if elt is a single name 1665 enclosed in parenthases, then return that name. 1666 """ 1667 if strip_parens and isinstance(elt, list): 1668 while (isinstance(elt, list) and len(elt) == 3 and 1669 elt[0] == (token.OP, '(') and 1670 elt[-1] == (token.OP, ')')): 1671 elt = elt[1] 1672 if isinstance(elt, list) or elt[0] != token.NAME: 1673 raise ParseError("Bad name") 1674 return elt[1]
1675
1676 -def parse_dotted_name(elt_list, strip_parens=True, parent_name=None):
1677 """ 1678 @param parent_name: canonical name of referring module, to resolve 1679 relative imports. 1680 @type parent_name: L{DottedName} 1681 @bug: does not handle 'x.(y).z' 1682 """ 1683 if len(elt_list) == 0: raise ParseError("Bad dotted name") 1684 1685 # Handle ((x.y).z). (If the contents of the parens include 1686 # anything other than dotted names, such as (x,y), then we'll 1687 # catch it below and raise a ParseError. 1688 while (isinstance(elt_list[0], list) and 1689 len(elt_list[0]) >= 3 and 1690 elt_list[0][0] == (token.OP, '(') and 1691 elt_list[0][-1] == (token.OP, ')')): 1692 elt_list[:1] = elt_list[0][1:-1] 1693 1694 # Convert a relative import into an absolute name. 1695 prefix_name = None 1696 if parent_name is not None and elt_list[0][-1] == '.': 1697 items = 1 1698 while len(elt_list) > items and elt_list[items][-1] == '.': 1699 items += 1 1700 1701 elt_list = elt_list[items:] 1702 prefix_name = parent_name[:-items] 1703 1704 # >>> from . import foo 1705 if not elt_list: 1706 if prefix_name == []: 1707 raise ParseError("Attempted relative import in non-package, " 1708 "or beyond toplevel package") 1709 return prefix_name 1710 1711 if len(elt_list) % 2 != 1: raise ParseError("Bad dotted name") 1712 name = DottedName(parse_name(elt_list[0], True)) 1713 if prefix_name is not None: 1714 name = prefix_name + name 1715 1716 for i in range(2, len(elt_list), 2): 1717 dot, identifier = elt_list[i-1], elt_list[i] 1718 if dot != (token.OP, '.'): 1719 raise ParseError("Bad dotted name") 1720 name = DottedName(name, parse_name(identifier, True)) 1721 return name
1722
1723 -def split_on(elt_list, split_tok):
1724 # [xx] add code to guarantee each elt is non-empty. 1725 result = [[]] 1726 for elt in elt_list: 1727 if elt == split_tok: 1728 if result[-1] == []: raise ParseError("Empty element from split") 1729 result.append([]) 1730 else: 1731 result[-1].append(elt) 1732 if result[-1] == []: result.pop() 1733 return result
1734
1735 -def parse_funcdef_arg(elt):
1736 """ 1737 If the given tree token element contains a valid function 1738 definition argument (i.e., an identifier token or nested list 1739 of identifiers), then return a corresponding string identifier 1740 or nested list of string identifiers. Otherwise, raise a 1741 ParseError. 1742 """ 1743 if isinstance(elt, list): 1744 if elt[0] == (token.OP, '('): 1745 if len(elt) == 3: 1746 return parse_funcdef_arg(elt[1]) 1747 else: 1748 return [parse_funcdef_arg(e) 1749 for e in elt[1:-1] 1750 if e != (token.OP, ',')] 1751 else: 1752 raise ParseError("Bad argument -- expected name or tuple") 1753 elif elt[0] == token.NAME: 1754 return elt[1] 1755 else: 1756 raise ParseError("Bad argument -- expected name or tuple")
1757
1758 -def parse_classdef_bases(elt):
1759 """ 1760 If the given tree token element contains a valid base list 1761 (that contains only dotted names), then return a corresponding 1762 list of L{DottedName}s. Otherwise, raise a ParseError. 1763 1764 @bug: Does not handle either of:: 1765 - class A( (base.in.parens) ): pass 1766 - class B( (lambda:calculated.base)() ): pass 1767 """ 1768 if (not isinstance(elt, list) or 1769 elt[0] != (token.OP, '(')): 1770 raise ParseError("Bad base list") 1771 1772 return [parse_dotted_name(n) 1773 for n in split_on(elt[1:-1], (token.OP, ','))]
1774 1775 # Used by: base list; 'del'; ...
1776 -def parse_dotted_name_list(elt_list):
1777 """ 1778 If the given list of tree token elements contains a 1779 comma-separated list of dotted names, then return a 1780 corresponding list of L{DottedName} objects. Otherwise, raise 1781 ParseError. 1782 """ 1783 names = [] 1784 1785 state = 0 1786 for elt in elt_list: 1787 # State 0 -- Expecting a name, or end of arglist 1788 if state == 0: 1789 # Make sure it's a name 1790 if isinstance(elt, tuple) and elt[0] == token.NAME: 1791 names.append(DottedName(elt[1])) 1792 state = 1 1793 else: 1794 raise ParseError("Expected a name") 1795 # State 1 -- Expecting comma, period, or end of arglist 1796 elif state == 1: 1797 if elt == (token.OP, '.'): 1798 state = 2 1799 elif elt == (token.OP, ','): 1800 state = 0 1801 else: 1802 raise ParseError("Expected '.' or ',' or end of list") 1803 # State 2 -- Continuation of dotted name. 1804 elif state == 2: 1805 if isinstance(elt, tuple) and elt[0] == token.NAME: 1806 names[-1] = DottedName(names[-1], elt[1]) 1807 state = 1 1808 else: 1809 raise ParseError("Expected a name") 1810 if state == 2: 1811 raise ParseError("Expected a name") 1812 return names
1813
1814 -def parse_string(elt_list):
1815 if len(elt_list) == 1 and elt_list[0][0] == token.STRING: 1816 # [xx] use something safer here? But it needs to deal with 1817 # any string type (eg r"foo\bar" etc). 1818 return eval(elt_list[0][1]) 1819 else: 1820 raise ParseError("Expected a string")
1821 1822 # ['1', 'b', 'c']
1823 -def parse_string_list(elt_list):
1824 if (len(elt_list) == 1 and isinstance(elt_list, list) and 1825 elt_list[0][0][1] in ('(', '[')): 1826 elt_list = elt_list[0][1:-1] 1827 1828 string_list = [] 1829 for string_elt in split_on(elt_list, (token.OP, ',')): 1830 string_list.append(parse_string(string_elt)) 1831 1832 return string_list
1833 1834 #///////////////////////////////////////////////////////////////// 1835 #{ Variable Manipulation 1836 #///////////////////////////////////////////////////////////////// 1837
1838 -def set_variable(namespace, var_doc, preserve_docstring=False):
1839 """ 1840 Add var_doc to namespace. If namespace already contains a 1841 variable with the same name, then discard the old variable. If 1842 C{preserve_docstring} is true, then keep the old variable's 1843 docstring when overwriting a variable. 1844 """ 1845 # Choose which dictionary we'll be storing the variable in. 1846 if not isinstance(namespace, NamespaceDoc): 1847 return 1848 1849 # This happens when the class definition has not been parsed, e.g. in 1850 # sf bug #1693253 on ``Exception.x = y`` 1851 if namespace.sort_spec is UNKNOWN: 1852 namespace.sort_spec = namespace.variables.keys() 1853 1854 # If we already have a variable with this name, then remove the 1855 # old VariableDoc from the sort_spec list; and if we gave its 1856 # value a canonical name, then delete it. 1857 if var_doc.name in namespace.variables: 1858 namespace.sort_spec.remove(var_doc.name) 1859 old_var_doc = namespace.variables[var_doc.name] 1860 if (old_var_doc.is_alias == False and 1861 old_var_doc.value is not UNKNOWN): 1862 old_var_doc.value.canonical_name = UNKNOWN 1863 if (preserve_docstring and var_doc.docstring in (None, UNKNOWN) and 1864 old_var_doc.docstring not in (None, UNKNOWN)): 1865 var_doc.docstring = old_var_doc.docstring 1866 var_doc.docstring_lineno = old_var_doc.docstring_lineno 1867 # Add the variable to the namespace. 1868 namespace.variables[var_doc.name] = var_doc 1869 namespace.sort_spec.append(var_doc.name) 1870 assert var_doc.container is UNKNOWN 1871 var_doc.container = namespace
1872
1873 -def del_variable(namespace, name):
1874 if not isinstance(namespace, NamespaceDoc): 1875 return 1876 1877 if name[0] in namespace.variables: 1878 if len(name) == 1: 1879 var_doc = namespace.variables[name[0]] 1880 namespace.sort_spec.remove(name[0]) 1881 del namespace.variables[name[0]] 1882 if not var_doc.is_alias and var_doc.value is not UNKNOWN: 1883 var_doc.value.canonical_name = UNKNOWN 1884 else: 1885 del_variable(namespace.variables[name[0]].value, name[1:])
1886 1887 #///////////////////////////////////////////////////////////////// 1888 #{ Name Lookup 1889 #///////////////////////////////////////////////////////////////// 1890
1891 -def lookup_name(identifier, parent_docs):
1892 """ 1893 Find and return the documentation for the variable named by 1894 the given identifier. 1895 1896 @rtype: L{VariableDoc} or C{None} 1897 """ 1898 # We need to check 3 namespaces: locals, globals, and builtins. 1899 # Note that this is true even if we're in a version of python with 1900 # nested scopes, because nested scope lookup does not apply to 1901 # nested class definitions, and we're not worried about variables 1902 # in nested functions. 1903 if not isinstance(identifier, basestring): 1904 raise TypeError('identifier must be a string') 1905 1906 # Locals 1907 if isinstance(parent_docs[-1], NamespaceDoc): 1908 if identifier in parent_docs[-1].variables: 1909 return parent_docs[-1].variables[identifier] 1910 1911 # Globals (aka the containing module) 1912 if isinstance(parent_docs[0], NamespaceDoc): 1913 if identifier in parent_docs[0].variables: 1914 return parent_docs[0].variables[identifier] 1915 1916 # Builtins 1917 builtins = epydoc.docintrospecter.introspect_docs(__builtin__) 1918 if isinstance(builtins, NamespaceDoc): 1919 if identifier in builtins.variables: 1920 return builtins.variables[identifier] 1921 1922 # We didn't find it; return None. 1923 return None
1924
1925 -def lookup_variable(dotted_name, parent_docs):
1926 assert isinstance(dotted_name, DottedName) 1927 # If it's a simple identifier, use lookup_name. 1928 if len(dotted_name) == 1: 1929 return lookup_name(dotted_name[0], parent_docs) 1930 1931 # If it's a dotted name with multiple pieces, look up the 1932 # namespace containing the var (=parent) first; and then 1933 # look for the var in that namespace. 1934 else: 1935 parent = lookup_value(dotted_name[:-1], parent_docs) 1936 if (isinstance(parent, NamespaceDoc) and 1937 dotted_name[-1] in parent.variables): 1938 return parent.variables[dotted_name[-1]] 1939 else: 1940 return None # var not found.
1941
1942 -def lookup_value(dotted_name, parent_docs):
1943 """ 1944 Find and return the documentation for the value contained in 1945 the variable with the given name in the current namespace. 1946 """ 1947 assert isinstance(dotted_name, DottedName) 1948 var_doc = lookup_name(dotted_name[0], parent_docs) 1949 1950 for i in range(1, len(dotted_name)): 1951 if var_doc is None: return None 1952 1953 if isinstance(var_doc.value, NamespaceDoc): 1954 var_dict = var_doc.value.variables 1955 elif (var_doc.value is UNKNOWN and 1956 var_doc.imported_from not in (None, UNKNOWN)): 1957 src_name = var_doc.imported_from + dotted_name[i:] 1958 # [xx] do I want to create a proxy here?? 1959 return GenericValueDoc(proxy_for=src_name, 1960 parse_repr=str(dotted_name), 1961 docs_extracted_by='parser') 1962 else: 1963 return None 1964 1965 var_doc = var_dict.get(dotted_name[i]) 1966 1967 if var_doc is None: return None 1968 return var_doc.value
1969 1970 #///////////////////////////////////////////////////////////////// 1971 #{ Docstring Comments 1972 #///////////////////////////////////////////////////////////////// 1973
1974 -def add_docstring_from_comments(api_doc, comments):
1975 if api_doc is None or not comments: return 1976 api_doc.docstring = '\n'.join([line for (line, lineno) in comments]) 1977 api_doc.docstring_lineno = comments[0][1]
1978 1979 #///////////////////////////////////////////////////////////////// 1980 #{ Tree tokens 1981 #///////////////////////////////////////////////////////////////// 1982
1983 -def _join_toktree(s1, s2):
1984 # Join them. s1 = left side; s2 = right side. 1985 if (s2=='' or s1=='' or 1986 s1 in ('-','`') or s2 in ('}',']',')','`',':') or 1987 s2[0] in ('.',',') or s1[-1] in ('(','[','{','.','\n',' ') or 1988 (s2[0] == '(' and s1[-1] not in (',','='))): 1989 return '%s%s' % (s1,s2) 1990 elif (spacing=='tight' and 1991 s1[-1] in '+-*/=,' or s2[0] in '+-*/=,'): 1992 return '%s%s' % (s1, s2) 1993 else: 1994 return '%s %s' % (s1, s2)
1995
1996 -def _pp_toktree_add_piece(spacing, pieces, piece):
1997 s1 = pieces[-1] 1998 s2 = piece 1999 2000 if (s2=='' or s1=='' or 2001 s1 in ('-','`') or s2 in ('}',']',')','`',':') or 2002 s2[0] in ('.',',') or s1[-1] in ('(','[','{','.','\n',' ') or 2003 (s2[0] == '(' and s1[-1] not in (',','='))): 2004 pass 2005 elif (spacing=='tight' and 2006 s1[-1] in '+-*/=,' or s2[0] in '+-*/=,'): 2007 pass 2008 else: 2009 pieces.append(' ') 2010 2011 pieces.append(piece)
2012
2013 -def pp_toktree(elts, spacing='normal', indent=0):
2014 pieces = [''] 2015 _pp_toktree(elts, spacing, indent, pieces) 2016 return ''.join(pieces)
2017
2018 -def _pp_toktree(elts, spacing, indent, pieces):
2019 add_piece = _pp_toktree_add_piece 2020 2021 for elt in elts: 2022 # Put a blank line before class & def statements. 2023 if elt == (token.NAME, 'class') or elt == (token.NAME, 'def'): 2024 add_piece(spacing, pieces, '\n%s' % (' '*indent)) 2025 2026 if isinstance(elt, tuple): 2027 if elt[0] == token.NEWLINE: 2028 add_piece(spacing, pieces, ' '+elt[1]) 2029 add_piece(spacing, pieces, '\n%s' % (' '*indent)) 2030 elif elt[0] == token.INDENT: 2031 add_piece(spacing, pieces, ' ') 2032 indent += 1 2033 elif elt[0] == token.DEDENT: 2034 assert pieces[-1] == ' ' 2035 pieces.pop() 2036 indent -= 1 2037 elif elt[0] == tokenize.COMMENT: 2038 add_piece(spacing, pieces, elt[1].rstrip() + '\n') 2039 add_piece(' '*indent) 2040 else: 2041 add_piece(spacing, pieces, elt[1]) 2042 else: 2043 _pp_toktree(elt, spacing, indent, pieces)
2044 2045 #///////////////////////////////////////////////////////////////// 2046 #{ Helper Functions 2047 #///////////////////////////////////////////////////////////////// 2048
2049 -def get_module_encoding(filename):
2050 """ 2051 @see: U{PEP 263<http://www.python.org/peps/pep-0263.html>} 2052 """ 2053 module_file = open(filename, 'rU') 2054 try: 2055 lines = [module_file.readline() for i in range(2)] 2056 if lines[0].startswith('\xef\xbb\xbf'): 2057 return 'utf-8' 2058 else: 2059 for line in lines: 2060 m = re.search("coding[:=]\s*([-\w.]+)", line) 2061 if m: return m.group(1) 2062 2063 # Fall back on Python's default encoding. 2064 return 'iso-8859-1' # aka 'latin-1' 2065 finally: 2066 module_file.close()
2067
2068 -def _get_module_name(filename, package_doc):
2069 """ 2070 Return (dotted_name, is_package) 2071 """ 2072 name = re.sub(r'.py\w?$', '', os.path.split(filename)[1]) 2073 if name == '__init__': 2074 is_package = True 2075 name = os.path.split(os.path.split(filename)[0])[1] 2076 else: 2077 is_package = False 2078 2079 # [XX] if the module contains a script, then `name` may not 2080 # necessarily be a valid identifier -- which will cause 2081 # DottedName to raise an exception. Is that what I want? 2082 if package_doc is None: 2083 dotted_name = DottedName(name) 2084 else: 2085 dotted_name = DottedName(package_doc.canonical_name, name) 2086 2087 # Check if the module looks like it's shadowed by a variable. 2088 # If so, then add a "'" to the end of its canonical name, to 2089 # distinguish it from the variable. 2090 if package_doc is not None and name in package_doc.variables: 2091 vardoc = package_doc.variables[name] 2092 if (vardoc.value not in (None, UNKNOWN) and 2093 vardoc.imported_from != dotted_name): 2094 log.warning("Module %s might be shadowed by a variable with " 2095 "the same name." % dotted_name) 2096 dotted_name = DottedName(str(dotted_name)+"'") 2097 2098 return dotted_name, is_package
2099
2100 -def flatten(lst, out=None):
2101 """ 2102 @return: a flat list containing the leaves of the given nested 2103 list. 2104 @param lst: The nested list that should be flattened. 2105 """ 2106 if out is None: out = [] 2107 for elt in lst: 2108 if isinstance(elt, (list, tuple)): 2109 flatten(elt, out) 2110 else: 2111 out.append(elt) 2112 return out
2113