Working with C code¶
“Hello world”¶
Here’s a simple “hello world” C program:
#include <stdio.h> int main(int argc, char *argv[]) { printf("Hello, python\n"); }
Here’s a Python script that locates the function at one pass within the compile and prints various interesting things about it:
import gcc # Here's a callback. We will wire it up below: def on_pass_execution(p, fn): # This pass is called fairly early on, per-function, after the # CFG has been built: if p.name == '*warn_function_return': # For this pass, "fn" will be an instance of gcc.Function: print('fn: %r' % fn) print('fn.decl.name: %r' % fn.decl.name) # fn.decl is an instance of gcc.FunctionDecl: print('return type: %r' % str(fn.decl.type.type)) print('argument types: %r' % [str(t) for t in fn.decl.type.argument_types]) assert isinstance(fn.cfg, gcc.Cfg) # None for some early passes assert len(fn.cfg.basic_blocks) == 3 assert fn.cfg.basic_blocks[0] == fn.cfg.entry assert fn.cfg.basic_blocks[1] == fn.cfg.exit bb = fn.cfg.basic_blocks[2] for i,stmt in enumerate(bb.gimple): print('gimple[%i]:' % i) print(' str(stmt): %r' % str(stmt)) print(' repr(stmt): %r' % repr(stmt)) if isinstance(stmt, gcc.GimpleCall): from gccutils import pprint print(' type(stmt.fn): %r' % type(stmt.fn)) print(' str(stmt.fn): %r' % str(stmt.fn)) for i, arg in enumerate(stmt.args): print(' str(stmt.args[%i]): %r' % (i, str(stmt.args[i]))) print(' str(stmt.lhs): %s' % str(stmt.lhs)) # Wire up our callback: gcc.register_callback(gcc.PLUGIN_PASS_EXECUTION, on_pass_execution)
We can run the script during the compile like this:
./gcc-with-python script.py test.c
Here’s the expected output:
fn: gcc.Function('main') fn.decl.name: 'main' return type: 'int' argument types: ['int', 'char * *'] gimple[0]: str(stmt): '__builtin_puts (&"Hello, python"[0]);' repr(stmt): 'gcc.GimpleCall()' type(stmt.fn): <type 'gcc.AddrExpr'> str(stmt.fn): '__builtin_puts' str(stmt.args[0]): '&"Hello, python"[0]' str(stmt.lhs): None gimple[1]: str(stmt): 'return;' repr(stmt): 'gcc.GimpleReturn()'
Notice how the call to printf has already been optimized into a call to __builtin_puts.
Spell-checking string constants within source code¶
This example add a spell-checker pass to GCC: all string constants are run through the “enchant” spelling-checker:
$ ./gcc-with-python tests/examples/spelling-checker/script.py input.c
The Python code for this is:
import gcc # Use the Python bindings to the "enchant" spellchecker: import enchant spellingdict = enchant.Dict("en_US") class SpellcheckingPass(gcc.GimplePass): def execute(self, fun): # This is called per-function during compilation: for bb in fun.cfg.basic_blocks: if bb.gimple: for stmt in bb.gimple: stmt.walk_tree(self.spellcheck_node, stmt.loc) def spellcheck_node(self, node, loc): # Spellcheck any textual constants found within the node: if isinstance(node, gcc.StringCst): words = node.constant.split() for word in words: if not spellingdict.check(word): # Warn about the spelling error (controlling the warning # with the -Wall command-line option): if gcc.warning(loc, 'Possibly misspelt word in string constant: %r' % word, gcc.Option('-Wall')): # and, if the warning was not suppressed at the command line, emit # suggested respellings: suggestions = spellingdict.suggest(word) if suggestions: gcc.inform(loc, 'Suggested respellings: %r' % ', '.join(suggestions)) ps = SpellcheckingPass(name='spellchecker') ps.register_after('cfg')
Given this sample C source file:
#include <stdio.h> int main(int argc, char *argv[]) { const char *p = argc ? "correctly spelled" : "not so korectly speled"; printf("The quick brown fox jumps over the lazy dog\n"); printf("Ths s n xmple f spllng mstke\n"); }
these warnings are emitted on stderr:
tests/examples/spelling-checker/input.c: In function 'main': tests/examples/spelling-checker/input.c:24:48: warning: Possibly misspelt word in string constant: 'korectly' [-Wall] tests/examples/spelling-checker/input.c:24:48: note: Suggested respellings: 'correctly' tests/examples/spelling-checker/input.c:24:48: warning: Possibly misspelt word in string constant: 'speled' [-Wall] tests/examples/spelling-checker/input.c:24:48: note: Suggested respellings: 'speed, spieled, spelled, spewed, speckled, peeled, sped' tests/examples/spelling-checker/input.c:28:11: warning: Possibly misspelt word in string constant: 'Ths' [-Wall] tests/examples/spelling-checker/input.c:28:11: note: Suggested respellings: "Th, Th's, Ohs, Thu, TVs, T's, Th s, Ts, This, Thus, The, Tho, Tbs, Thy, Goths" tests/examples/spelling-checker/input.c:28:11: warning: Possibly misspelt word in string constant: 'xmple' [-Wall] tests/examples/spelling-checker/input.c:28:11: note: Suggested respellings: 'ample' tests/examples/spelling-checker/input.c:28:11: warning: Possibly misspelt word in string constant: 'spllng' [-Wall] tests/examples/spelling-checker/input.c:28:11: note: Suggested respellings: 'spelling' tests/examples/spelling-checker/input.c:28:11: warning: Possibly misspelt word in string constant: 'mstke' [-Wall] tests/examples/spelling-checker/input.c:28:11: note: Suggested respellings: 'mistake'
Finding global variables¶
This example adds a pass that warns about uses of global variables:
$ ./gcc-with-python \ tests/examples/find-global-state/script.py \ -c \ tests/examples/find-global-state/input.c
The Python code for this is:
import gcc from gccutils import get_src_for_loc DEBUG=0 def is_const(type_): if DEBUG: type_.debug() if hasattr(type_, 'const'): if type_.const: return True # Don't bother warning about an array of const e.g. # const char [] if isinstance(type_, gcc.ArrayType): item_type = type_.dereference if is_const(item_type): return True class StateFinder: def __init__(self): # Locate all declarations of variables holding "global" state: self.global_decls = set() for var in gcc.get_variables(): type_ = var.decl.type if DEBUG: print('var.decl: %r' % var.decl) print(type_) # Don't bother warning about const data: if is_const(type_): continue self.global_decls.add(var.decl) if DEBUG: print('self.global_decls: %r' % self.global_decls) self.state_users = set() def find_state_users(self, node, loc): if isinstance(node, gcc.VarDecl): if node in self.global_decls: # store the state users for later replay, so that # we can eliminate duplicates # e.g. two references to "q" in "q += p" # and replay in source-location order: self.state_users.add( (loc, node) ) def flush(self): # Emit warnings, sorted by source location: for loc, node in sorted(self.state_users, key=lambda pair:pair[0]): gcc.inform(loc, 'use of global state "%s %s" here' % (node.type, node)) def on_pass_execution(p, fn): if p.name == '*free_lang_data': sf = StateFinder() # Locate uses of such variables: for node in gcc.get_callgraph_nodes(): fun = node.decl.function if fun: cfg = fun.cfg if cfg: for bb in cfg.basic_blocks: stmts = bb.gimple if stmts: for stmt in stmts: stmt.walk_tree(sf.find_state_users, stmt.loc) # Flush the data that was found: sf.flush() gcc.register_callback(gcc.PLUGIN_PASS_EXECUTION, on_pass_execution)
Given this sample C source file:
#include <stdio.h> static int a_global; struct { int f; } bar; extern int foo; int test(int j) { /* A local variable, which should *not* be reported: */ int i; i = j * 4; return i + 1; } int test2(int p) { static int q = 0; q += p; return p * q; } int test3(int k) { /* We should *not* report about __FUNCTION__ here: */ printf("%s:%i:%s\n", __FILE__, __LINE__, __FUNCTION__); } int test4() { return foo; } int test6() { return bar.f; } struct banana { int f; }; const struct banana a_banana; int test7() { return a_banana.f; }
these warnings are emitted on stderr:
tests/examples/find-global-state/input.c:41:nn: note: use of global state "int q" here tests/examples/find-global-state/input.c:41:nn: note: use of global state "int q" here tests/examples/find-global-state/input.c:42:nn: note: use of global state "int q" here tests/examples/find-global-state/input.c:53:nn: note: use of global state "int foo" here tests/examples/find-global-state/input.c:58:nn: note: use of global state "struct { int f; } bar" here