epydoc.util

1 # epydoc -- Utility functions 2 # 3 # Copyright (C) 2005 Edward Loper 4 # Author: Edward Loper <edloper@loper.org> 5 # URL: <http://epydoc.sf.net> 6 # 7 # $Id: util.py 1671 2008-01-29 02:55:49Z edloper $ 8 9 """ 10 Miscellaneous utility functions that are used by multiple modules. 11 12 @group Python source types: is_module_file, is_package_dir, is_pyname, 13 py_src_filename 14 @group Text processing: wordwrap, decode_with_backslashreplace, 15 plaintext_to_html 16 """ 17 __docformat__ = 'epytext en' 18 19 import os, os.path, re 20 21 ###################################################################### 22 ## Python Source Types 23 ###################################################################### 24 25 PY_SRC_EXTENSIONS = ['.py', '.pyw'] 26 PY_BIN_EXTENSIONS = ['.pyc', '.so', '.pyd'] 27

28 -def is_module_file(path):

29 # Make sure it's a file name. 30 if not isinstance(path, basestring): 31 return False 32 (dir, filename) = os.path.split(path) 33 (basename, extension) = os.path.splitext(filename) 34 return (os.path.isfile(path) and 35 re.match('[a-zA-Z_]\w*$', basename) and 36 extension in PY_SRC_EXTENSIONS+PY_BIN_EXTENSIONS)

37

38 -def is_src_filename(filename):

39 if not isinstance(filename, basestring): return False 40 if not os.path.exists(filename): return False 41 return os.path.splitext(filename)[1] in PY_SRC_EXTENSIONS

42

43 -def is_package_dir(dirname):

44 """ 45 Return true if the given directory is a valid package directory 46 (i.e., it names a directory that contains a valid __init__ file, 47 and its name is a valid identifier). 48 """ 49 # Make sure it's a directory name. 50 if not isinstance(dirname, basestring): 51 return False 52 if not os.path.isdir(dirname): 53 return False 54 dirname = os.path.abspath(dirname) 55 # Make sure it's a valid identifier. (Special case for 56 # "foo/", where os.path.split -> ("foo", "").) 57 (parent, dir) = os.path.split(dirname) 58 if dir == '': (parent, dir) = os.path.split(parent) 59 60 # The following constraint was removed because of sourceforge 61 # bug #1787028 -- in some cases (eg eggs), it's too strict. 62 #if not re.match('\w+$', dir): 63 # return False 64 65 for name in os.listdir(dirname): 66 filename = os.path.join(dirname, name) 67 if name.startswith('__init__.') and is_module_file(filename): 68 return True 69 else: 70 return False

71

72 -def is_pyname(name):

73 return re.match(r"\w+(\.\w+)*$", name)

74

75 -def py_src_filename(filename):

76 basefile, extension = os.path.splitext(filename) 77 if extension in PY_SRC_EXTENSIONS: 78 return filename 79 else: 80 for ext in PY_SRC_EXTENSIONS: 81 if os.path.isfile('%s%s' % (basefile, ext)): 82 return '%s%s' % (basefile, ext) 83 else: 84 raise ValueError('Could not find a corresponding ' 85 'Python source file for %r.' % filename)

86

87 -def munge_script_name(filename):

88 name = os.path.split(filename)[1] 89 name = re.sub(r'\W', '_', name) 90 return 'script-'+name

91 92 ###################################################################### 93 ## Text Processing 94 ###################################################################### 95

96 -def decode_with_backslashreplace(s):

97 r""" 98 Convert the given 8-bit string into unicode, treating any 99 character c such that ord(c)<128 as an ascii character, and 100 converting any c such that ord(c)>128 into a backslashed escape 101 sequence. 102 103 >>> decode_with_backslashreplace('abc\xff\xe8') 104 u'abc\\xff\\xe8' 105 """ 106 # s.encode('string-escape') is not appropriate here, since it 107 # also adds backslashes to some ascii chars (eg \ and '). 108 assert isinstance(s, str) 109 return (s 110 .decode('latin1') 111 .encode('ascii', 'backslashreplace') 112 .decode('ascii'))

113

114 -def wordwrap(str, indent=0, right=75, startindex=0, splitchars=''):

115 """ 116 Word-wrap the given string. I.e., add newlines to the string such 117 that any lines that are longer than C{right} are broken into 118 shorter lines (at the first whitespace sequence that occurs before 119 index C{right}). If the given string contains newlines, they will 120 I{not} be removed. Any lines that begin with whitespace will not 121 be wordwrapped. 122 123 @param indent: If specified, then indent each line by this number 124 of spaces. 125 @type indent: C{int} 126 @param right: The right margin for word wrapping. Lines that are 127 longer than C{right} will be broken at the first whitespace 128 sequence before the right margin. 129 @type right: C{int} 130 @param startindex: If specified, then assume that the first line 131 is already preceeded by C{startindex} characters. 132 @type startindex: C{int} 133 @param splitchars: A list of non-whitespace characters which can 134 be used to split a line. (E.g., use '/\\' to allow path names 135 to be split over multiple lines.) 136 @rtype: C{str} 137 """ 138 if splitchars: 139 chunks = re.split(r'( +|\n|[^ \n%s]*[%s])' % 140 (re.escape(splitchars), re.escape(splitchars)), 141 str.expandtabs()) 142 else: 143 chunks = re.split(r'( +|\n)', str.expandtabs()) 144 result = [' '*(indent-startindex)] 145 charindex = max(indent, startindex) 146 for chunknum, chunk in enumerate(chunks): 147 if (charindex+len(chunk) > right and charindex > 0) or chunk == '\n': 148 result.append('\n' + ' '*indent) 149 charindex = indent 150 if chunk[:1] not in ('\n', ' '): 151 result.append(chunk) 152 charindex += len(chunk) 153 else: 154 result.append(chunk) 155 charindex += len(chunk) 156 return ''.join(result).rstrip()+'\n'

157

158 -def plaintext_to_html(s):

159 """ 160 @return: An HTML string that encodes the given plaintext string. 161 In particular, special characters (such as C{'<'} and C{'&'}) 162 are escaped. 163 @rtype: C{string} 164 """ 165 s = s.replace('&', '&').replace('"', '"') 166 s = s.replace('<', '<').replace('>', '>') 167 return s

168

169 -def plaintext_to_latex(str, nbsp=0, breakany=0):

170 """ 171 @return: A LaTeX string that encodes the given plaintext string. 172 In particular, special characters (such as C{'$'} and C{'_'}) 173 are escaped, and tabs are expanded. 174 @rtype: C{string} 175 @param breakany: Insert hyphenation marks, so that LaTeX can 176 break the resulting string at any point. This is useful for 177 small boxes (e.g., the type box in the variable list table). 178 @param nbsp: Replace every space with a non-breaking space 179 (C{'~'}). 180 """ 181 # These get converted to hyphenation points later 182 if breakany: str = re.sub('(.)', '\\1\1', str) 183 184 # These get converted to \textbackslash later. 185 str = str.replace('\\', '\0') 186 187 # Expand tabs 188 str = str.expandtabs() 189 190 # These elements need to be backslashed. 191 str = re.sub(r'([#$&%_\${}])', r'\\\1', str) 192 193 # These elements have special names. 194 str = str.replace('|', '{\\textbar}') 195 str = str.replace('<', '{\\textless}') 196 str = str.replace('>', '{\\textgreater}') 197 str = str.replace('^', '{\\textasciicircum}') 198 str = str.replace('~', '{\\textasciitilde}') 199 str = str.replace('\0', r'{\textbackslash}') 200 201 # replace spaces with non-breaking spaces 202 if nbsp: str = str.replace(' ', '~') 203 204 # Convert \1's to hyphenation points. 205 if breakany: str = str.replace('\1', r'\-') 206 207 return str

208

209 -class RunSubprocessError(OSError):

210 - def __init__(self, cmd, out, err):

211 OSError.__init__(self, '%s failed' % cmd[0]) 212 self.out = out 213 self.err = err

214

215 -def run_subprocess(cmd, data=None):

216 """ 217 Execute the command C{cmd} in a subprocess. 218 219 @param cmd: The command to execute, specified as a list 220 of string. 221 @param data: A string containing data to send to the 222 subprocess. 223 @return: A tuple C{(out, err)}. 224 @raise OSError: If there is any problem executing the 225 command, or if its exitval is not 0. 226 """ 227 if isinstance(cmd, basestring): 228 cmd = cmd.split() 229 230 # Under Python 2.4+, use subprocess 231 try: 232 from subprocess import Popen, PIPE 233 pipe = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE) 234 out, err = pipe.communicate(data) 235 if hasattr(pipe, 'returncode'): 236 if pipe.returncode == 0: 237 return out, err 238 else: 239 raise RunSubprocessError(cmd, out, err) 240 else: 241 # Assume that there was an error iff anything was written 242 # to the child's stderr. 243 if err == '': 244 return out, err 245 else: 246 raise RunSubprocessError(cmd, out, err) 247 except ImportError: 248 pass 249 250 # Under Python 2.3 or earlier, on unix, use popen2.Popen3 so we 251 # can access the return value. 252 import popen2 253 if hasattr(popen2, 'Popen3'): 254 pipe = popen2.Popen3(' '.join(cmd), True) 255 to_child = pipe.tochild 256 from_child = pipe.fromchild 257 child_err = pipe.childerr 258 if data: 259 to_child.write(data) 260 to_child.close() 261 out = err = '' 262 while pipe.poll() is None: 263 out += from_child.read() 264 err += child_err.read() 265 out += from_child.read() 266 err += child_err.read() 267 if pipe.wait() == 0: 268 return out, err 269 else: 270 raise RunSubprocessError(cmd, out, err) 271 272 # Under Python 2.3 or earlier, on non-unix, use os.popen3 273 else: 274 to_child, from_child, child_err = os.popen3(' '.join(cmd), 'b') 275 if data: 276 try: 277 to_child.write(data) 278 # Guard for a broken pipe error 279 except IOError, e: 280 raise OSError(e) 281 to_child.close() 282 out = from_child.read() 283 err = child_err.read() 284 # Assume that there was an error iff anything was written 285 # to the child's stderr. 286 if err == '': 287 return out, err 288 else: 289 raise RunSubprocessError(cmd, out, err)

290

Source Code for Module epydoc.util