""" Example for dynamic programming with Tag Tables... originated from a posting to comp.lang.python by Tim Peters: [Tim] > [Marc-Andre] > I can stick in any matching function I want, so I might even > let re.match() do some of the work. That should get me pretty close > to their semantics -- ok, I can't do it all the way: Sure you can: just let re.match() do *all* the work! Presto, tables are as powerful as re. > e.g. I currently don't have registers so back-references to already > matched groups will probably not work without reanalysing them. So you have trouble recognizing e.g. the language of the form ... where "tag" can be any (say) arbitrary alphanumeric string? this clause is in that language , this clause isn't , while the whole sentence is -- if you ignore the trailing period . It's even better if you can do computation on backreferences and use the results to guide further parsing. E.g., recognizing Fortran Hollerith strings requires this (a string of digits, followed by "H" or "h", followed by any string of characters whose length is equal to the decimal value of the string of digits; and that's too hard for regexps too). teasingly y'rs - tim """ from mx.TextTools import * tables = [None] def opening_tag(taglist,text,l,r,subtags): # First append an entry to the taglist tagname = text[l+1:r-1] taglist.append(('open '+tagname,l,r,subtags)) # Now build a tag table that searches for tables[0] = ((None,sWordStart,TextSearch('')), ) def closing_tag(taglist,text,l,r,subtags): tagname = text[l+2:r-1] taglist.append(('close '+tagname,l,r,subtags)) TIM = ( # Check starting tag (opening_tag,Table+CallTag, ((None,Is,'<'), (None,AllInSet,alphanumeric_set), (None,Is,'>'), )), # Find closing tag ('text',TableInList,(tables,0)), # For completeness mark the closing tag too (closing_tag,Table+CallTag, ((None,Word,''), )), ) def _test(): while 1: text = raw_input('Enter a string in TIM: ') if not text: break result,taglist,next = tag(text,TIM) if result: print 'The text you gave was recognized as TIM:' print_tags(text,taglist) else: print "Sorry, but the text doesn't qualify as TIM." print 'The search stopped at:' print repr(text[:next]) + '<<<' if __name__ == '__main__': _test()