#!/usr/local/bin/perl ## ## doclist.pl (C) 1995 Andrew Scherpbier ## ## This program will list the information in the documentdb generated by htdig. ## use GDBM_File; $dbfile = $ARGV[0]; tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "Unable to open $dbfile: $!"; while (($key, $value) = each %docdb) { next if $key =~ /^nextDocID/; %record = parse_ref_record($value); print "Title: $record{'TITLE'}\n"; print "Descriptions: $record{'DESCRIPTIONS'}\n"; print "URL: $record{'URL'}\n"; print "\n"; } sub parse_ref_record { local($value) = @_; local(%rec, $length, $count, $result); while (length($value) > 0) { $what = unpack("C", $value); $value = substr($value, 1); if ($what == 0) { # ID $rec{"ID"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 1) { # TIME $rec{"TIME"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 2) { # ACCESSED $rec{"ACCESSED"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 3) { # STATE $rec{"STATE"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 4) { # SIZE $rec{"SIZE"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 5) { # LINKS $rec{"LINKS"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 6) { # IMAGESIZE $rec{"IMAGESIZE"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 7) { # HOPCOUNT $rec{"HOPCOUNT"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 8) { # URL $length = unpack("i", $value); $rec{"URL"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 9) { # HEAD $length = unpack("i", $value); $rec{"HEAD"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 10) { # TITLE $length = unpack("i", $value); $rec{"TITLE"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 11) { # DESCRIPTIONS $count = unpack("i", $value); $value = substr($value, 4); $result = ""; foreach (1 .. $count) { $length = unpack("i", $value); $result = $result . unpack("x4 A$length", $value) . ""; $value = substr($value, 4 + $length); } chop $result; $rec{"DESCRIPTIONS"} = $result; } elsif ($what == 12) { # ANCHORS $count = unpack("i", $value); $value = substr($value, 4); $result = ""; foreach (1 .. $count) { $length = unpack("i", $value); $result = $result . unpack("x4 A$length", $value) . ""; $value = substr($value, 4 + $length); } chop $result; $rec{"ANCHORS"} = $result; } elsif ($what == 13) { # EMAIL $length = unpack("i", $value); $rec{"EMAIL"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 14) { # NOTIFICATION $length = unpack("i", $value); $rec{"NOTIFICATION"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 15) { # SUBJECT $length = unpack("i", $value); $rec{"SUBJECT"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 16) { # STRING (ignore, but unpack) $length = unpack("i", $value); $rec{"STRING"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 17) { # METADSC $length = unpack("i", $value); $rec{"METADSC"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 18) { # BACKLINKS $rec{"BACKLINKS"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 19) { # SIGNATURE $rec{"SIG"} = unpack("i", $value); $value = substr($value, 4); } } return %rec; }