#!/usr/bin/perl -w use strict; use XML::LibXML; use IO::File; # ------------------------------------------------------------------------- # # (c) 2003 christian p. glahn # ------------------------------------------------------------------------- # # ------------------------------------------------------------------------- # # This is an example how to use the DOM interface of XML::LibXML The # script reads a XML File with a module specification. If the module # contains several classes, the script fetches them and stores the # data into different POD Files. # # Note this is just an example, to demonstrate how XML::LibXML works. # The code works for the XML::LibXML documentation, but may not work # for any other docbook file. # # If you are interested what the results are, check the README and the POD # files shipped with XML::LibXML. # ------------------------------------------------------------------------- # # ------------------------------------------------------------------------- # # SYNOPSIS: # xmllibxmldocs.pl $dokbook_file $targetdir # my $srcfile = shift @ARGV; my $targetdir = shift @ARGV; unless ( $targetdir =~ /\/$/ ) { $targetdir .= "/"; } # ------------------------------------------------------------------------- # # # ------------------------------------------------------------------------- # # init the parser my $parser = XML::LibXML->new(); $parser->load_ext_dtd(0); $parser->keep_blanks(0); # ------------------------------------------------------------------------- # # # ------------------------------------------------------------------------- # # load the document into memory. my $doc = $parser->parse_file( $srcfile ); # ------------------------------------------------------------------------- # # # ------------------------------------------------------------------------- # # good implementations would use XSLT to convert a docbook to any other # text format. Since the module does not presume libxslt installed, we # have to do the dirty job. my $ch = ChapterHandler->new($targetdir); # ------------------------------------------------------------------------- # # init the common parts in all pods my ( $bookinfo ) = $doc->findnodes( "//bookinfo" ); $ch->set_general_info( $bookinfo ); # ------------------------------------------------------------------------- # # ------------------------------------------------------------------------- # # then process each chapter of the XML::LibXML book my @chapters = $doc->findnodes( "//chapter" ); foreach my $chap ( @chapters ) { $ch->handle( $chap ); } # ------------------------------------------------------------------------- # # ------------------------------------------------------------------------- # # ------------------------------------------------------------------------- # # the class to process our dokbook file # ------------------------------------------------------------------------- # package ChapterHandler; use XML::LibXML; # ------------------------------------------------------------------------- # # the constructor # ------------------------------------------------------------------------- # sub new{ my $class = shift; my $dir = shift; my $self = bless {directory => $dir}, $class; return $self; } # ------------------------------------------------------------------------- # # ------------------------------------------------------------------------- # # set_general_info # ------------------------------------------------------------------------- # # processes the bookinfo tag of XML::LibXML to extract common information such # as version or copyright information sub set_general_info { my $self = shift; my $infonode = shift; return unless defined $infonode; my $infostr = "=head1 AUTHORS\n\n"; my @authors = $infonode->findnodes( "authorgroup/author" ); foreach my $author ( @authors ) { my ( $node_fn ) = $author->getChildrenByTagName( "firstname" ); my ( $node_sn ) = $author->getChildrenByTagName( "surname" ); if ( defined $node_fn ) { $infostr .= $node_fn->string_value(); } if ( defined $node_sn ) { $infostr .= " ". $node_sn->string_value(); } if ( defined $author->nextSibling() ) { $infostr .= ", \n"; } else { $infostr .= "\n\n"; } } my ( $version ) = $infonode->findnodes( "edition" ); if ( defined $version ) { $infostr .= "\n=head1 VERSION\n\n" . $version->string_value() . "\n\n"; } my @copyright = $infonode->findnodes( "copyright" ); if ( @copyright ) { $infostr .= "=head1 COPYRIGHT\n\n"; foreach my $copyright (@copyright) { my $node_y = $copyright->getChildrenByTagName( "year" ); my $node_h = $copyright->getChildrenByTagName( "holder" ); if ( defined $node_y ) { $infostr .= $node_y->string_value() . ", "; } if ( defined $node_h ) { $infostr .= $node_h->string_value(); } $infostr .= ".\n\n"; } $infostr .= "=cut\n"; $infostr .= "\n\n".<<'EOF'; =head1 LICENSE This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. EOF } $self->{infoblock} = $infostr; } # ------------------------------------------------------------------------- # # handle # ------------------------------------------------------------------------- # # This function opens the output file and decides how the chapter is # processed sub handle { my $self = shift; my $chapter = shift; my ( $abbr ) = $chapter->findnodes( "titleabbrev" ); if ( defined $abbr ) { # create a new file. my $filename = $abbr->string_value(); $filename =~ s/^\s*|\s*$//g; my $dir = $self->{directory}; $filename =~ s/XML\:\:LibXML//g; $filename =~ s/^-|^\:\://g; # remove the first colon or minus. $filename =~ s/\:\:/\//g; # transform remaining colons to paths. # the previous statement should work for existing modules. This could be # dangerous for nested modules, which do not exist at the time of writing # this code. unless ( length $filename ) { $dir = ""; $filename = "LibXML"; } if ( $filename ne "README" and $filename ne "LICENSE" ) { $filename .= ".pod"; } else { $dir = ""; } $self->{OFILE} = IO::File->new(); $self->{OFILE}->open(">".$dir.$filename); if ( $abbr->string_value() eq "README" or $abbr->string_value() eq "LICENSE" ) { # Text only chapters in the documentation $self->dump_text( $chapter ); } else { # print header # print synopsis # process the information itself # dump the info block $self->dump_pod( $chapter ); $self->{OFILE}->print( $self->{infoblock} ); } # close the file $self->{OFILE}->close(); # Strip trailing space. my $text = _slurp($dir.$filename); $text =~ s/[ \t]+$//gms; open my $out, '>', $dir.$filename or die "Cannot open $dir$filename for writing."; print {$out} $text; close ($out); } } sub _slurp { my $filename = shift; open my $in, '<', $filename or die "Cannot open '$filename' for slurping - $!"; local $/; my $contents = <$in>; close($in); return $contents; } # ------------------------------------------------------------------------- # # dump_text # ------------------------------------------------------------------------- # # convert the chapter into a textfile, such as README. sub dump_text { my $self = shift; my $chap = shift; if ( $chap->nodeName() eq "chapter" ) { my ( $title ) = $chap->getChildrenByTagName( "title" ); my $str = $title->string_value(); my $len = length $str; $self->{OFILE}->print( uc($str) . "\n" ); $self->{OFILE}->print( "=" x $len ); $self->{OFILE}->print( "\n\n" ); } foreach my $node ( $chap->childNodes() ) { if ( $node->nodeName() eq "para" ) { # we split at the last whitespace before 80 chars my $string = $node->string_value(); my $os = ""; my @words = split /\s+/, $string; foreach my $word ( @words ) { if ( (length( $os ) + length( $word ) + 1) < 80 ) { if ( length $os ) { $os .= " "; } $os .= $word; } else { $self->{OFILE}->print( $os . "\n" ); $os = $word; } } $self->{OFILE}->print( $os ); $self->{OFILE}->print( "\n\n" ); } elsif ( $node->nodeName() eq "sect1" ) { my ( $title ) = $node->getChildrenByTagName( "title" ); my $str = $title->string_value(); my $len = length $str; $self->{OFILE}->print( "\n" . uc($str) . "\n" ); $self->{OFILE}->print( "=" x $len ); $self->{OFILE}->print( "\n\n" ); $self->dump_text( $node ); } elsif ( $node->nodeName() eq "sect2" ) { my ( $title ) = $node->getChildrenByTagName( "title" ); my $str = $title->string_value(); my $len = length $str; $self->{OFILE}->print( "\n" . $str . "\n" ); $self->{OFILE}->print( "=" x $len ); $self->{OFILE}->print( "\n\n" ); $self->dump_text( $node ); } elsif ( $node->nodeName() eq "itemizedlist" ) { my @items = $node->findnodes( "listitem" ); my $sp= " "; foreach my $item ( @items ) { $self->{OFILE}->print( "$sp o " ); my $str = $item->string_value(); $str =~ s/^\s*|\s*$//g; $self->{OFILE}->print( $str ); $self->{OFILE}->print( "\n" ); } $self->{OFILE}->print( "\n" ); } elsif ( $node->nodeName() eq "orderedlist" ) { my @items = $node->findnodes( "listitem" ); my $i = 0; my $sp= " "; foreach my $item ( @items ) { $i++; $self->{OFILE}->print( "$sp $i " ); my $str = $item->string_value(); $str =~ s/^\s*|\s*$//g; $self->{OFILE}->print( $str ); $self->{OFILE}->print( "\n" ); } $self->{OFILE}->print( "\n" ); } elsif ( $node->nodeName() eq "programlisting" ) { my $str = $node->string_value(); $str =~ s/\n/\n> /g; $self->{OFILE}->print( "> ". $str ); $self->{OFILE}->print( "\n\n" ); } } } # ------------------------------------------------------------------------- # # dump_pod # ------------------------------------------------------------------------- # # This method is used to create the real POD files for XML::LibXML. It is not # too sophisticated, but it already does quite a good job. sub dump_pod { my $self = shift; my $chap = shift; if ( $chap->nodeName() eq "chapter" ) { my ( $title ) = $chap->getChildrenByTagName( "title" ); my ( $ttlabbr ) = $chap->getChildrenByTagName( "titleabbrev" ); my $str = $ttlabbr->string_value() . " - ".$title->string_value(); $str=~s/^\s+|\s+$//g; $self->{OFILE}->print( "=head1 NAME\n\n$str\n" ); my ($synopsis) = $chap->findnodes( "sect1[title='Synopsis']" ); my @funcs = $chap->findnodes( ".//funcsynopsis" ); if ($synopsis or scalar @funcs) { $self->{OFILE}->print( "\n=head1 SYNOPSIS\n\n" ) } if ($synopsis) { $self->dump_pod( $synopsis ); } if ( scalar @funcs ) { foreach my $s ( @funcs ) { $self->dump_pod( $s ); } # $self->{OFILE}->print( "\n\n=head1 DESCRIPTION\n\n" ); } } foreach my $node ( $chap->childNodes() ) { if ( $node->nodeType == XML_TEXT_NODE || $node->nodeType == XML_CDATA_SECTION_NODE ) { # we split at the last whitespace before 80 chars my $prev_inline = ($node->previousSibling and $node->previousSibling->nodeName !~ /^(?:itemizedlist|orderedlist|variablelist|programlisting|funcsynopsis)/) ? 1 : 0; my $str = $node->data(); $str=~s/(^|\n)[ \t]+($|\n)/$1$2/g; if ($str=~/\S/) { my $string = $str; my $space_before = ($string =~ s/^\s+//g) ? $prev_inline : 0; my $space_after = ($string =~ s/\s+$//g) ? 1 : 0; $self->{OFILE}->print( " " ) if $space_before; my $os = ""; my @words = split /\s+/, $string; foreach my $word ( @words ) { if ( (length( $os ) + length( $word ) + 1) < 80 ) { if ( length $os ) { $os .= " "; } $os .= $word; } else { $self->{OFILE}->print( $os . "\n" ); $os = $word; } } $os.=" " if $space_after; $self->{OFILE}->print( $os ); } } elsif ( $node->nodeName() eq "para" ) { $self->dump_pod( $node ); $self->{OFILE}->print( "\n\n" ); } elsif ( $node->nodeName() eq "sect1" ) { my ( $title ) = $node->getChildrenByTagName( "title" ); my $str = $title->string_value(); unless ($chap->nodeName eq "chapter" and $str eq 'Synopsis') { $self->{OFILE}->print( "\n=head1 " . uc($str) ); $self->{OFILE}->print( "\n\n" ); $self->dump_pod( $node ); } } elsif ( $node->nodeName() eq "sect2" ) { my ( $title ) = $node->getChildrenByTagName( "title" ); my $str = $title->string_value(); my $len = length $str; $self->{OFILE}->print( "\n=head2 " . $str . "\n\n" ); $self->dump_pod( $node ); } elsif ( $node->nodeName() eq "sect3" ) { my ( $title ) = $node->getChildrenByTagName( "title" ); my $str = $title->string_value(); my $len = length $str; $self->{OFILE}->print( "\n=head3 " . $str . "\n\n" ); $self->dump_pod( $node ); } elsif ( $node->nodeName() eq "itemizedlist" ) { my @items = $node->findnodes( "listitem" ); $self->{OFILE}->print( "\n=over 4\n\n" ); foreach my $item ( @items ) { $self->{OFILE}->print( "=item *\n\n" ); $self->dump_pod( $item ); $self->{OFILE}->print( "\n\n" ); } $self->{OFILE}->print( "=back\n\n" ); } elsif ( $node->nodeName() eq "orderedlist" ) { my @items = $node->findnodes( "listitem" ); my $i = 0; $self->{OFILE}->print( "\n=over 4\n\n" ); foreach my $item ( @items ) { $i++; $self->{OFILE}->print( "=item $i.\n\n" ); $self->dump_pod($item); $self->{OFILE}->print( "\n\n" ); } $self->{OFILE}->print( "=back\n\n" ); } elsif ( $node->nodeName() eq "variablelist" ) { $self->{OFILE}->print( "=over 4\n\n" ); my @nodes = $node->findnodes( "varlistentry" ); $self->dump_pod( $node ); $self->{OFILE}->print( "\n=back\n\n" ); } elsif ( $node->nodeName() eq "varlistentry" ) { my ( $term ) = $node->findnodes( "term" ); $self->{OFILE}->print( "=item " ); if ( defined $term ) { $self->dump_pod( $term ); } $self->{OFILE}->print( "\n\n" ); my @nodes =$node->findnodes( "listitem" ); foreach my $it ( @nodes ) { $self->dump_pod( $it ); } $self->{OFILE}->print( "\n" ); } elsif ( $node->nodeName() eq "programlisting" ) { my $str = $node->string_value(); $str =~ s/^\s+|\s+$//g; $str =~ s/\n/\n /g; $str=~s/(^|\n)[ \t]+($|\n)/$1$2/g; $self->{OFILE}->print( "\n\n" ); $self->{OFILE}->print( " ". $str ); $self->{OFILE}->print( "\n\n" ); } elsif ( $node->nodeName() eq "funcsynopsis") { if (($node->getAttribute('role')||'') ne 'synopsis') { $self->dump_pod($node); $self->{OFILE}->print( "\n" ); } } elsif( $node->nodeName() eq "funcsynopsisinfo" ) { my $str = $node->string_value() ; $str =~ s/\n/\n /g; $self->{OFILE}->print( " $str\n" ); } elsif( $node->nodeName() eq "title" or $node->nodeName() eq "titleabbrev" ) { # IGNORE } elsif( $node->nodeName() eq "emphasis" ) { my $str = $node->string_value() ; $str =~ s/\n/ /g; $str = pod_escape($str); $self->{OFILE}->print( "I<<<<<< $str >>>>>>" ); } elsif( $node->nodeName() eq "function" or $node->nodeName() eq "email" or $node->nodeName() eq "literal" ) { my $str = $node->string_value() ; $str =~ s/\n/ /g; $str = pod_escape($str); $self->{OFILE}->print( "C<<<<<< $str >>>>>>" ); } elsif( $node->nodeName() eq "ulink" ) { my $str = $node->string_value() ; my $url = $node->getAttribute('url'); $str =~ s/\n/ /g; if ($str eq $url) { $self->{OFILE}->print( "L<<<<<< $url >>>>>>" ); } else { $self->{OFILE}->print( "$str (L<<<<<< $url >>>>>>)" ); } } elsif( $node->nodeName() eq "xref" ) { my $linkend = $node->getAttribute('linkend'); my ($target) = $node->findnodes(qq(//*[\@id="$linkend"]/titleabbrev)); ($target) = $node->findnodes(qq(//*[\@id="$linkend"]/title)) unless $target; if ($target) { my $str = $target->string_value() ; $str =~ s/\n/ /g; $str = pod_escape($str); $self->{OFILE}->print( "L<<<<<< $str >>>>>>" ); } else { warn "WARNING: Didn't find any section with id='$linkend'\n"; $self->{OFILE}->print( "$linkend" ); } } elsif( $node->nodeName() eq "olink" ) { my $str = pod_escape($node->string_value()); my $url = $node->getAttribute('targetdoc'); if (!defined $url) { warn $node->toString(1),"\n"; } $str =~ s/\n/ /g; if ($str eq $url) { $self->{OFILE}->print( "L<<<<<< $url >>>>>>" ); } else { $self->{OFILE}->print( "$str (L<<<<<< $url >>>>>>)" ); } } else { print STDERR "Ignoring ",$node->nodeName(),"\n"; $self->dump_pod($node); } } } sub pod_escape { my ($str) = @_; my %escapes = ( '>' => 'gt', '<' => 'lt', ); my $re = join('|', keys %escapes); $str =~ s/($re)/E<$escapes{$1}>/g; return $str; } 1;