Package epydoc :: Package markup :: Module pyval_repr
[hide private]
[frames] | no frames]

Source Code for Module epydoc.markup.pyval_repr

  1  # epydoc -- Marked-up Representations for Python Values 
  2  # 
  3  # Copyright (C) 2005 Edward Loper 
  4  # Author: Edward Loper <edloper@loper.org> 
  5  # URL: <http://epydoc.sf.net> 
  6  # 
  7  # $Id: apidoc.py 1448 2007-02-11 00:05:34Z dvarrazzo $ 
  8   
  9  """ 
 10  Syntax highlighter for Python values.  Currently provides special 
 11  colorization support for: 
 12   
 13    - lists, tuples, sets, frozensets, dicts 
 14    - numbers 
 15    - strings 
 16    - compiled regexps 
 17   
 18  The highlighter also takes care of line-wrapping, and automatically 
 19  stops generating repr output as soon as it has exceeded the specified 
 20  number of lines (which should make it faster than pprint for large 
 21  values).  It does I{not} bother to do automatic cycle detection, 
 22  because maxlines is typically around 5, so it's really not worth it. 
 23   
 24  The syntax-highlighted output is encoded using a 
 25  L{ParsedEpytextDocstring}, which can then be used to generate output in 
 26  a variety of formats. 
 27  """ 
 28  __docformat__ = 'epytext en' 
 29   
 30  # Implementation note: we use exact tests for classes (list, etc) 
 31  # rather than using isinstance, because subclasses might override 
 32  # __repr__. 
 33   
 34  import types, re 
 35  import epydoc.apidoc 
 36  from epydoc.util import decode_with_backslashreplace 
 37  from epydoc.util import plaintext_to_html, plaintext_to_latex 
 38  from epydoc.compat import * 
 39  import sre_parse, sre_constants 
 40   
 41  from epydoc.markup.epytext import Element, ParsedEpytextDocstring 
 42   
43 -def is_re_pattern(pyval):
44 return type(pyval).__name__ == 'SRE_Pattern'
45
46 -class _ColorizerState:
47 """ 48 An object uesd to keep track of the current state of the pyval 49 colorizer. The L{mark()}/L{restore()} methods can be used to set 50 a backup point, and restore back to that backup point. This is 51 used by several colorization methods that first try colorizing 52 their object on a single line (setting linebreakok=False); and 53 then fall back on a multi-line output if that fails. The L{score} 54 variable is used to keep track of a 'score', reflecting how good 55 we think this repr is. E.g., unhelpful values like '<Foo instance 56 at 0x12345>' get low scores. If the score is too low, we'll use 57 the parse-derived repr instead. 58 """
59 - def __init__(self):
60 self.result = [] 61 self.charpos = 0 62 self.lineno = 1 63 self.linebreakok = True 64 65 #: How good this represention is? 66 self.score = 0
67
68 - def mark(self):
69 return (len(self.result), self.charpos, 70 self.lineno, self.linebreakok, self.score)
71
72 - def restore(self, mark):
73 n, self.charpos, self.lineno, self.linebreakok, self.score = mark 74 del self.result[n:]
75
76 -class _Maxlines(Exception):
77 """A control-flow exception that is raised when PyvalColorizer 78 exeeds the maximum number of allowed lines."""
79
80 -class _Linebreak(Exception):
81 """A control-flow exception that is raised when PyvalColorizer 82 generates a string containing a newline, but the state object's 83 linebreakok variable is False."""
84
85 -class ColorizedPyvalRepr(ParsedEpytextDocstring):
86 """ 87 @ivar score: A score, evaluating how good this repr is. 88 @ivar is_complete: True if this colorized repr completely describes 89 the object. 90 """
91 - def __init__(self, tree, score, is_complete):
92 ParsedEpytextDocstring.__init__(self, tree) 93 self.score = score 94 self.is_complete = is_complete
95
96 -def colorize_pyval(pyval, parse_repr=None, min_score=None, 97 linelen=75, maxlines=5, linebreakok=True, sort=True):
98 return PyvalColorizer(linelen, maxlines, linebreakok, sort).colorize( 99 pyval, parse_repr, min_score)
100
101 -class PyvalColorizer:
102 """ 103 Syntax highlighter for Python values. 104 """ 105
106 - def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True):
107 self.linelen = linelen 108 self.maxlines = maxlines 109 self.linebreakok = linebreakok 110 self.sort = sort
111 112 #//////////////////////////////////////////////////////////// 113 # Colorization Tags & other constants 114 #//////////////////////////////////////////////////////////// 115 116 GROUP_TAG = 'variable-group' # e.g., "[" and "]" 117 COMMA_TAG = 'variable-op' # The "," that separates elements 118 COLON_TAG = 'variable-op' # The ":" in dictionaries 119 CONST_TAG = None # None, True, False 120 NUMBER_TAG = None # ints, floats, etc 121 QUOTE_TAG = 'variable-quote' # Quotes around strings. 122 STRING_TAG = 'variable-string' # Body of string literals 123 124 RE_CHAR_TAG = None 125 RE_GROUP_TAG = 're-group' 126 RE_REF_TAG = 're-ref' 127 RE_OP_TAG = 're-op' 128 RE_FLAGS_TAG = 're-flags' 129 130 ELLIPSIS = Element('code', u'...', style='variable-ellipsis') 131 LINEWRAP = Element('symbol', u'crarr') 132 UNKNOWN_REPR = Element('code', u'??', style='variable-unknown') 133 134 GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE) 135 136 ESCAPE_UNICODE = False # should we escape non-ascii unicode chars? 137 138 #//////////////////////////////////////////////////////////// 139 # Entry Point 140 #//////////////////////////////////////////////////////////// 141
142 - def colorize(self, pyval, parse_repr=None, min_score=None):
143 """ 144 @return: A L{ColorizedPyvalRepr} describing the given pyval. 145 """ 146 UNKNOWN = epydoc.apidoc.UNKNOWN 147 # Create an object to keep track of the colorization. 148 state = _ColorizerState() 149 state.linebreakok = self.linebreakok 150 # Colorize the value. If we reach maxlines, then add on an 151 # ellipsis marker and call it a day. 152 try: 153 if pyval is not UNKNOWN: 154 self._colorize(pyval, state) 155 elif parse_repr not in (None, UNKNOWN): 156 self._output(parse_repr, None, state) 157 else: 158 state.result.append(PyvalColorizer.UNKNOWN_REPR) 159 is_complete = True 160 except (_Maxlines, _Linebreak): 161 if self.linebreakok: 162 state.result.append('\n') 163 state.result.append(self.ELLIPSIS) 164 else: 165 if state.result[-1] is self.LINEWRAP: 166 state.result.pop() 167 self._trim_result(state.result, 3) 168 state.result.append(self.ELLIPSIS) 169 is_complete = False 170 # If we didn't score high enough, then try again. 171 if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN) 172 and min_score is not None and state.score < min_score): 173 return self.colorize(UNKNOWN, parse_repr) 174 # Put it all together. 175 tree = Element('epytext', *state.result) 176 return ColorizedPyvalRepr(tree, state.score, is_complete)
177
178 - def _colorize(self, pyval, state):
179 pyval_type = type(pyval) 180 state.score += 1 181 182 if pyval is None or pyval is True or pyval is False: 183 self._output(unicode(pyval), self.CONST_TAG, state) 184 elif pyval_type in (int, float, long, types.ComplexType): 185 self._output(unicode(pyval), self.NUMBER_TAG, state) 186 elif pyval_type is str: 187 self._colorize_str(pyval, state, '', 'string-escape') 188 elif pyval_type is unicode: 189 if self.ESCAPE_UNICODE: 190 self._colorize_str(pyval, state, 'u', 'unicode-escape') 191 else: 192 self._colorize_str(pyval, state, 'u', None) 193 elif pyval_type is list: 194 self._multiline(self._colorize_iter, pyval, state, '[', ']') 195 elif pyval_type is tuple: 196 self._multiline(self._colorize_iter, pyval, state, '(', ')') 197 elif pyval_type is set: 198 self._multiline(self._colorize_iter, self._sort(pyval), 199 state, 'set([', '])') 200 elif pyval_type is frozenset: 201 self._multiline(self._colorize_iter, self._sort(pyval), 202 state, 'frozenset([', '])') 203 elif pyval_type is dict: 204 self._multiline(self._colorize_dict, self._sort(pyval.items()), 205 state, '{', '}') 206 elif is_re_pattern(pyval): 207 self._colorize_re(pyval, state) 208 else: 209 try: 210 pyval_repr = repr(pyval) 211 if not isinstance(pyval_repr, (str, unicode)): 212 pyval_repr = unicode(pyval_repr) 213 pyval_repr_ok = True 214 except KeyboardInterrupt: 215 raise 216 except: 217 pyval_repr_ok = False 218 state.score -= 100 219 220 if pyval_repr_ok: 221 if self.GENERIC_OBJECT_RE.match(pyval_repr): 222 state.score -= 5 223 self._output(pyval_repr, None, state) 224 else: 225 state.result.append(self.UNKNOWN_REPR)
226
227 - def _sort(self, items):
228 if not self.sort: return items 229 try: return sorted(items) 230 except KeyboardInterrupt: raise 231 except: return items
232
233 - def _trim_result(self, result, num_chars):
234 while num_chars > 0: 235 if not result: return 236 if isinstance(result[-1], Element): 237 assert len(result[-1].children) == 1 238 trim = min(num_chars, len(result[-1].children[0])) 239 result[-1].children[0] = result[-1].children[0][:-trim] 240 if not result[-1].children[0]: result.pop() 241 num_chars -= trim 242 else: 243 trim = min(num_chars, len(result[-1])) 244 result[-1] = result[-1][:-trim] 245 if not result[-1]: result.pop() 246 num_chars -= trim
247 248 #//////////////////////////////////////////////////////////// 249 # Object Colorization Functions 250 #//////////////////////////////////////////////////////////// 251
252 - def _multiline(self, func, pyval, state, *args):
253 """ 254 Helper for container-type colorizers. First, try calling 255 C{func(pyval, state, *args)} with linebreakok set to false; 256 and if that fails, then try again with it set to true. 257 """ 258 linebreakok = state.linebreakok 259 mark = state.mark() 260 261 try: 262 state.linebreakok = False 263 func(pyval, state, *args) 264 state.linebreakok = linebreakok 265 266 except _Linebreak: 267 if not linebreakok: 268 raise 269 state.restore(mark) 270 func(pyval, state, *args)
271
272 - def _colorize_iter(self, pyval, state, prefix, suffix):
273 self._output(prefix, self.GROUP_TAG, state) 274 indent = state.charpos 275 for i, elt in enumerate(pyval): 276 if i>=1: 277 if state.linebreakok: 278 self._output(',', self.COMMA_TAG, state) 279 self._output('\n'+' '*indent, None, state) 280 else: 281 self._output(', ', self.COMMA_TAG, state) 282 self._colorize(elt, state) 283 self._output(suffix, self.GROUP_TAG, state)
284
285 - def _colorize_dict(self, items, state, prefix, suffix):
286 self._output(prefix, self.GROUP_TAG, state) 287 indent = state.charpos 288 for i, (key, val) in enumerate(items): 289 if i>=1: 290 if state.linebreakok: 291 self._output(',', self.COMMA_TAG, state) 292 self._output('\n'+' '*indent, None, state) 293 else: 294 self._output(', ', self.COMMA_TAG, state) 295 self._colorize(key, state) 296 self._output(': ', self.COLON_TAG, state) 297 self._colorize(val, state) 298 self._output(suffix, self.GROUP_TAG, state)
299
300 - def _colorize_str(self, pyval, state, prefix, encoding):
301 # Decide which quote to use. 302 if '\n' in pyval and state.linebreakok: quote = "'''" 303 else: quote = "'" 304 # Divide the string into lines. 305 if state.linebreakok: 306 lines = pyval.split('\n') 307 else: 308 lines = [pyval] 309 # Open quote. 310 self._output(prefix+quote, self.QUOTE_TAG, state) 311 # Body 312 for i, line in enumerate(lines): 313 if i>0: self._output('\n', None, state) 314 if encoding: line = line.encode(encoding) 315 self._output(line, self.STRING_TAG, state) 316 # Close quote. 317 self._output(quote, self.QUOTE_TAG, state)
318
319 - def _colorize_re(self, pyval, state):
320 # Extract the flag & pattern from the regexp. 321 pat, flags = pyval.pattern, pyval.flags 322 # If the pattern is a string, decode it to unicode. 323 if isinstance(pat, str): 324 pat = decode_with_backslashreplace(pat) 325 # Parse the regexp pattern. 326 tree = sre_parse.parse(pat, flags) 327 groups = dict([(num,name) for (name,num) in 328 tree.pattern.groupdict.items()]) 329 # Colorize it! 330 self._output("re.compile(r'", None, state) 331 self._colorize_re_flags(tree.pattern.flags, state) 332 self._colorize_re_tree(tree, state, True, groups) 333 self._output("')", None, state)
334
335 - def _colorize_re_flags(self, flags, state):
336 if flags: 337 flags = [c for (c,n) in sorted(sre_parse.FLAGS.items()) 338 if (n&flags)] 339 flags = '(?%s)' % ''.join(flags) 340 self._output(flags, self.RE_FLAGS_TAG, state)
341
342 - def _colorize_re_tree(self, tree, state, noparen, groups):
343 assert noparen in (True, False) 344 if len(tree) > 1 and not noparen: 345 self._output('(', self.RE_GROUP_TAG, state) 346 for elt in tree: 347 op = elt[0] 348 args = elt[1] 349 350 if op == sre_constants.LITERAL: 351 c = unichr(args) 352 # Add any appropriate escaping. 353 if c in '.^$\\*+?{}[]|()\'': c = '\\'+c 354 elif c == '\t': c = '\\t' 355 elif c == '\r': c = '\\r' 356 elif c == '\n': c = '\\n' 357 elif c == '\f': c = '\\f' 358 elif c == '\v': c = '\\v' 359 elif ord(c) > 0xffff: c = r'\U%08x' % ord(c) 360 elif ord(c) > 0xff: c = r'\u%04x' % ord(c) 361 elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c) 362 self._output(c, self.RE_CHAR_TAG, state) 363 364 elif op == sre_constants.ANY: 365 self._output('.', self.RE_CHAR_TAG, state) 366 367 elif op == sre_constants.BRANCH: 368 if args[0] is not None: 369 raise ValueError('Branch expected None arg but got %s' 370 % args[0]) 371 for i, item in enumerate(args[1]): 372 if i > 0: 373 self._output('|', self.RE_OP_TAG, state) 374 self._colorize_re_tree(item, state, True, groups) 375 376 elif op == sre_constants.IN: 377 if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY): 378 self._colorize_re_tree(args, state, False, groups) 379 else: 380 self._output('[', self.RE_GROUP_TAG, state) 381 self._colorize_re_tree(args, state, True, groups) 382 self._output(']', self.RE_GROUP_TAG, state) 383 384 elif op == sre_constants.CATEGORY: 385 if args == sre_constants.CATEGORY_DIGIT: val = r'\d' 386 elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D' 387 elif args == sre_constants.CATEGORY_SPACE: val = r'\s' 388 elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S' 389 elif args == sre_constants.CATEGORY_WORD: val = r'\w' 390 elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W' 391 else: raise ValueError('Unknown category %s' % args) 392 self._output(val, self.RE_CHAR_TAG, state) 393 394 elif op == sre_constants.AT: 395 if args == sre_constants.AT_BEGINNING_STRING: val = r'\A' 396 elif args == sre_constants.AT_BEGINNING: val = r'^' 397 elif args == sre_constants.AT_END: val = r'$' 398 elif args == sre_constants.AT_BOUNDARY: val = r'\b' 399 elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B' 400 elif args == sre_constants.AT_END_STRING: val = r'\Z' 401 else: raise ValueError('Unknown position %s' % args) 402 self._output(val, self.RE_CHAR_TAG, state) 403 404 elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT): 405 minrpt = args[0] 406 maxrpt = args[1] 407 if maxrpt == sre_constants.MAXREPEAT: 408 if minrpt == 0: val = '*' 409 elif minrpt == 1: val = '+' 410 else: val = '{%d,}' % (minrpt) 411 elif minrpt == 0: 412 if maxrpt == 1: val = '?' 413 else: val = '{,%d}' % (maxrpt) 414 elif minrpt == maxrpt: 415 val = '{%d}' % (maxrpt) 416 else: 417 val = '{%d,%d}' % (minrpt, maxrpt) 418 if op == sre_constants.MIN_REPEAT: 419 val += '?' 420 421 self._colorize_re_tree(args[2], state, False, groups) 422 self._output(val, self.RE_OP_TAG, state) 423 424 elif op == sre_constants.SUBPATTERN: 425 if args[0] is None: 426 self._output('(?:', self.RE_GROUP_TAG, state) 427 elif args[0] in groups: 428 self._output('(?P<', self.RE_GROUP_TAG, state) 429 self._output(groups[args[0]], self.RE_REF_TAG, state) 430 self._output('>', self.RE_GROUP_TAG, state) 431 elif isinstance(args[0], (int, long)): 432 # This is cheating: 433 self._output('(', self.RE_GROUP_TAG, state) 434 else: 435 self._output('(?P<', self.RE_GROUP_TAG, state) 436 self._output(args[0], self.RE_REF_TAG, state) 437 self._output('>', self.RE_GROUP_TAG, state) 438 self._colorize_re_tree(args[1], state, True, groups) 439 self._output(')', self.RE_GROUP_TAG, state) 440 441 elif op == sre_constants.GROUPREF: 442 self._output('\\%d' % args, self.RE_REF_TAG, state) 443 444 elif op == sre_constants.RANGE: 445 self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),), 446 state, False, groups ) 447 self._output('-', self.RE_OP_TAG, state) 448 self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),), 449 state, False, groups ) 450 451 elif op == sre_constants.NEGATE: 452 self._output('^', self.RE_OP_TAG, state) 453 454 elif op == sre_constants.ASSERT: 455 if args[0] > 0: 456 self._output('(?=', self.RE_GROUP_TAG, state) 457 else: 458 self._output('(?<=', self.RE_GROUP_TAG, state) 459 self._colorize_re_tree(args[1], state, True, groups) 460 self._output(')', self.RE_GROUP_TAG, state) 461 462 elif op == sre_constants.ASSERT_NOT: 463 if args[0] > 0: 464 self._output('(?!', self.RE_GROUP_TAG, state) 465 else: 466 self._output('(?<!', self.RE_GROUP_TAG, state) 467 self._colorize_re_tree(args[1], state, True, groups) 468 self._output(')', self.RE_GROUP_TAG, state) 469 470 elif op == sre_constants.NOT_LITERAL: 471 self._output('[^', self.RE_GROUP_TAG, state) 472 self._colorize_re_tree( ((sre_constants.LITERAL, args),), 473 state, False, groups ) 474 self._output(']', self.RE_GROUP_TAG, state) 475 else: 476 log.error("Error colorizing regexp: unknown elt %r" % elt) 477 if len(tree) > 1 and not noparen: 478 self._output(')', self.RE_GROUP_TAG, state)
479 480 #//////////////////////////////////////////////////////////// 481 # Output function 482 #//////////////////////////////////////////////////////////// 483
484 - def _output(self, s, tag, state):
485 """ 486 Add the string `s` to the result list, tagging its contents 487 with tag `tag`. Any lines that go beyond `self.linelen` will 488 be line-wrapped. If the total number of lines exceeds 489 `self.maxlines`, then raise a `_Maxlines` exception. 490 """ 491 # Make sure the string is unicode. 492 if isinstance(s, str): 493 s = decode_with_backslashreplace(s) 494 495 # Split the string into segments. The first segment is the 496 # content to add to the current line, and the remaining 497 # segments are new lines. 498 segments = s.split('\n') 499 500 for i, segment in enumerate(segments): 501 # If this isn't the first segment, then add a newline to 502 # split it from the previous segment. 503 if i > 0: 504 if (state.lineno+1) > self.maxlines: 505 raise _Maxlines() 506 if not state.linebreakok: 507 raise _Linebreak() 508 state.result.append(u'\n') 509 state.lineno += 1 510 state.charpos = 0 511 512 # If the segment fits on the current line, then just call 513 # markup to tag it, and store the result. 514 if state.charpos + len(segment) <= self.linelen: 515 state.charpos += len(segment) 516 if tag: 517 segment = Element('code', segment, style=tag) 518 state.result.append(segment) 519 520 # If the segment doesn't fit on the current line, then 521 # line-wrap it, and insert the remainder of the line into 522 # the segments list that we're iterating over. (We'll go 523 # the the beginning of the next line at the start of the 524 # next iteration through the loop.) 525 else: 526 split = self.linelen-state.charpos 527 segments.insert(i+1, segment[split:]) 528 segment = segment[:split] 529 if tag: 530 segment = Element('code', segment, style=tag) 531 state.result += [segment, self.LINEWRAP]
532