[BACK]Return to list2html.pl CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / htdocs

File: [cvs.NetBSD.org] / htdocs / Attic / list2html.pl (download)

Revision 1.66, Wed Jun 6 22:49:38 2001 UTC (22 years, 10 months ago) by kim
Branch: MAIN
Changes since 1.65: +12 -7 lines

Add support for manual page collections:  man(1.i386/NetBSD-current)
Add "-c" switch for setting the default collection.

#!/usr/bin/env perl
#
# $NetBSD: list2html.pl,v 1.66 2001/06/06 22:49:38 kim Exp $
# Process *.list files into indexed *.html files. (abs)
# Looks for these compulsary tags:
#	<LIST>			Include generated list of entries here.
#	<SECTION>Text		Introduces new section
#	</LIST>			Mark end of all special entries
#
# Plus these optional tags: (You will probably want to use <DATE> or <ENTRY>)
#	<DATE>tag date Text	Change entry, expanded to title & added to list
#	<ENTRY>tag Text		New entry, expanded to title and added to list
#	<LISTLINK>url Text	Link added to list, removed from main text
#	<ENTRYLINK>url Text	Link added to list and main text
#	<HEADING>Text		Standard heading at top of document
#	<DEVHEADING>Text	Standard heading at top of developer docs
#	<TROW>Text: Text	Table row, with two text fields
#				NOTE: <TROW> will automatically continue
#				until a link containing </table>
#	<HOMELINK>		Add flag link to NetBSD home page
#	<DOCLINK>		Add flag links to NetBSD home page & docs top
#	<DEVLINK>		Add flag links to NetBSD home page & developers
#	<PORTLINK>		Add flag links to NetBSD home page & ports top
#	<GALLINK>		Add flag links to NetBSD home page & gallery
# Continuation lines are understood (useful for the special tags)
#
# Additional links:
#	([\w.+]+)\((\d)\)	-> manpages		eg: ls(1)
#	<([-\w.]+@[-\w.]+)>	-> email address	eg: <user@host>
#	<PKGSRC>category/name	-> link to pkgsrc README.html
#	<CURRENTSRC>path	-> link to source file/dir in -current
#	<URL>[^\s<]+[^<\s.]	-> Insert link to URL
#	<RFC>RFC[0-9]+		-> link to RFC (www.normos.org)
#
# NOTE: Update htdocs/developers/htdocs.list when adding features.
#
#   (c) 1999, 2000 DKBrownlee. All rights reserved. This file may be used to
#	update the information on the NetBSD website. If you want to use it
#	for any other purpose, ask me first.. abs@mono.org
#

use strict;
use Getopt::Std;
$^W = 1;
my($verbose, %extras, $months_previous, $list_date_links);
my($version, %opt, %pkgname);

# List of pkgsrc names to 'human preferred' forms
%pkgname = qw(kde KDE gimp GIMP gnome GNOME xsane XSane);

$months_previous = 9;	# Previous months to display for DATE entries
$list_date_links = 6;	# List the first N date entries on stdout

$version = '$Revision: 1.66 $';
$version =~ /([\d.]+)/ && ($version = $1);

if (!&getopts('a:c:m:hV', \%opt) || $opt{'h'} || ( !$opt{'V'} && @ARGV != 2) )
    {
    print "list2html.pl [opts] infile outfile
[opts]	-a xxx	Define 'arch=xxx' when linking to manpages
	-c xxx	Define 'collection=xxx' when linking to manpages
	-m xxx	Set months to display for <DATE> (default $months_previous)
	-h	This help.
	-V	Display version and exit ($version - David Brownlee/abs)

list2html.pl processes .list files into .html, parsing various special tags.
.list files are intended to reduce the effort required to maintain files such
as FAQs, and change logs. More details given at the start of list2html.pl.
";
    exit;
    }
if ($opt{'V'})
    { print "$version\n"; exit; }

$verbose = 1;
if ($opt{'m'})
    { $months_previous = $opt{'m'}; }
$months_previous = &get_minmonth($months_previous);

%extras = (

'<HEADING>\s*(.*)',
'<table><tr><td>
    <a href="$HOME/Misc/daemon-copy.html"><img
     align="middle" src="$HOME/images/BSD-daemon.jpg" border="0"
     width=146 height=129 alt="BSD daemon"></a>
  </td><td align=center>
    <h1>NetBSD Documentation:</h1>
    <h1>$SUB1</h1>
</td></tr></table>
<p>'
,

'<DEVHEADING>\s*(.*)',
'<table><tr><td>
    <a href="$HOME/Misc/daemon-copy.html"><img
     align="middle" src="$HOME/images/BSD-daemon.jpg" border="0"
     width=146 height=129 alt="BSD daemon"></a>
  </td><td align=center>
    <h1>NetBSD Developer Documentation:</h1>
    <h1>$SUB1</h1>
</td></tr></table>
<p>'
,

'<HOMELINK>',
'<table><tr><td>
    <a href="$HOME/"><img
	src="$HOME/images/NetBSD-flag.gif" border="0"
	width="91" height="42" alt=""></a>
  </td><td>
    <a href="$HOME/"><img
	src="$HOME/images/empty.gif" border="0"
	width="1" height="1" alt="NetBSD ">Home Page</a>
</td></tr></table>'
,

'<DEVLINK>',
'<table width="100%"><tr><td>
  <table><tr><td>
    <a href="$HOME/"><img
	src="$HOME/images/NetBSD-flag.gif" border="0"  
	width="91" height="42" alt=""></a>
  </td><td>
    <a href="$HOME/"><img
	src="$HOME/images/empty.gif" border="0"
	width="1" height="1" alt="NetBSD ">Home Page</a>
  </td></tr></table>
</td><td>
  <table><tr><td>
    <a href="$DEVELOPERS"><img
	src="$HOME/images/NetBSD-flag.gif" border="0"  
	width="91" height="42" alt=""></a>
  </td><td>
    <a href="$DEVELOPERS"><img
	src="$HOME/images/empty.gif" border="0"  
	width="1" height="1" alt="NetBSD ">Developer Documentation</a>      
  </td></tr></table>
</td></tr></table>'
,

'<DOCLINK>',
'<table width="100%"><tr><td>
  <table><tr><td>
    <a href="$HOME/"><img
	src="$HOME/images/NetBSD-flag.gif" border="0"
	width="91" height="42" alt=""></a>
  </td><td>
    <a href="$HOME/"><img
	src="$HOME/images/empty.gif" border="0"
	width="1" height="1" alt="NetBSD ">Home Page</a>
  </td></tr></table>
</td><td>
  <table><tr><td>
    <a href="$DOCS"><img
	src="$HOME/images/NetBSD-flag.gif" border="0"
	width="91" height="42" alt=""></a>
  </td><td>
    <a href="$DOCS"><img
	src="$HOME/images/empty.gif" border="0"
	width="1" height="1" alt="NetBSD ">Documentation top level</a>
  </td></tr></table>
</td></tr></table>'
,

'<PORTLINK>',
'<table width="100%"><tr><td>
  <table><tr><td>
    <a href="$HOME/"><img
	src="$HOME/images/NetBSD-flag.gif" border="0"
	width="91" height="42" alt=""></a>
  </td><td>
    <a href="$HOME/"><img
	src="$HOME/images/empty.gif" border="0"
	width="1" height="1" alt="NetBSD ">Home Page</a>
  </td></tr></table>
</td><td>
  <table><tr><td>
    <a href="$PORTS"><img
	src="$HOME/images/NetBSD-flag.gif" border="0"
	width="91" height="42" alt=""></a>
  </td><td>
    <a href="$PORTS"><img
	src="$HOME/images/empty.gif" border="0"
	width="1" height="1" alt="NetBSD ">Supported Architectures</a>
  </td></tr></table>
</td></tr></table>'
,

'<GALLINK>',
'<table width="100%"><tr><td>
  <table><tr><td>
    <a href="$HOME/"><img
	src="$HOME/images/NetBSD-flag.gif" border="0"
	width="91" height="42" alt=""></a>
  </td><td>
    <a href="$HOME/"><img
	src="$HOME/images/empty.gif" border="0"
	width="1" height="1" alt="NetBSD ">Home Page</a>
  </td></tr></table>
</td><td>
  <table><tr><td>
    <a href="$GALLERY"><img
	src="$HOME/images/NetBSD-flag.gif" border="0"     
	width="91" height="42" alt=""></a>
  </td><td>
    <a href="$GALLERY"><img
	src="$HOME/images/empty.gif" border="0"
	width="1" height="1" alt="NetBSD ">Gallery page</a>
  </td></tr></table>
</td></tr></table>'

);

# XXX Should DTRT with faqs not under Documentation

&makelist(@ARGV, &extras_generate(%extras));
exit;

sub check_date
    {
    my($date) = @_;
    my($month, $when);
    my(%months) = ('Jan' => 1,	'Feb' => 2,	'Mar' => 3,
		   'Apr' => 4,	'May' => 5,	'Jun' => 6,
		   'Jul' => 7,	'Aug' => 8,	'Sep' => 9,
		   'Oct' => 10,	'Nov' => 11,	'Dec' => 12 );

    if ($date !~ /(\S+)\s*(\d+)/)
	{ &fail("Unable to parse date '$date'"); }
    if (!defined($month = $months{$1}))
	{
	&warn("Unable to parse month '$1'");
	$month = 12;
	}
    $when = sprintf("%04d%02d", $2, $month);
    ( $when>$months_previous );
    }

sub extract_tags
    {
    my($file, @tags) = @_;
    my($tag, %map);

    if (!open(FILE, $file))
	{ return; }
    while (<FILE>)
	{
	foreach $tag (@tags)
	    {
	    if ( /($tag)/ )
		{ $map{$tag} = $1; }
	    }
	}
    close(FILE);
    %map;
    }

sub extras_generate
    {
    my(%extras) = @_;
    my($pathtodoc, $pathtodev, $pathtoports, $pathtogal, $str, $home);

    if ($0 !~ m#(.*)/[^/]+.pl#)
	{ &fail("Unable to extract path from '$0'"); }
    $home = "$1";
    $pathtodoc = "$home/Documentation/";
    $pathtodev = "$home/developers/";
    $pathtoports = "$home/Ports/";
    $pathtogal = "$home/gallery/";
    foreach $str ( keys %extras )
	{
	$extras{$str} =~ s#\$HOME#$home#g;
	$extras{$str} =~ s#\$DEVELOPERS#$pathtodev#g;
	$extras{$str} =~ s#\$DOCS#$pathtodoc#g;
	$extras{$str} =~ s#\$PORTS#$pathtoports#g;
        $extras{$str} =~ s#\$GALLERY#$pathtogal#g;
	}
    (%extras);
    }

sub extras_process
    {
    my($data, %extras) = @_;
    my($key, $sub1, $sub2, $value);

    foreach $key ( keys %extras )
	{
	$value = $extras{$key};
	if ($data =~ /$key/)
	    {
	    ($sub1, $sub2) = ($1, $2);
	    if (defined($sub1))
		{ $value =~ s#\$SUB1#$sub1#g; }
	    if (defined($sub2))
		{ $value =~ s#\$SUB2#$sub2#g; }
	    $data =~ s/$key.*/$value/;
	    }
	}
    $data;
    }

sub fail
    {
    print STDERR "ABORTING: ", @_, "\n";
    exit 3
    }

sub get_minmonth
    {
    my($monthsback) = @_;
    my($year, $month);

    ($month, $year) = (localtime(time))[4, 5];

    ++$month;
    $month -= $monthsback;
    while ($month<1)
	{
	$month += 12;
	--$year;
	}
    sprintf("%04d%02d", $year+1900, $month);
    }

# Collect $list containing forward links as we go. In general each entry will
# generate something in $list and some expanded data in the main $data.
#
sub makelist
    {
    my($infile, $outfile, %extras) = @_;
    my($data, $section, $href, $header, $list, $pre, %tags, $date_month);
    my($date_num, $date_num_used, $entry_num, $ignore, @date_links);
    my($in_entry, $in_section, $endlist);
    my($title_font) = "<font face=\"helvetica, arial, sans-serif\">";
    my($end_title_font) = "</font>";
    my(%rcsmap) = &extract_tags($outfile, '\$NetBSD.*\$');
    my($rcstag, $in_trow);

    $list = '';

    $data = $date_month = '';
    $entry_num = $date_num = $date_num_used = 0;
    open(FILE, $infile) || die("Unable to open '$infile': $!");
    foreach( <FILE> )
	{
	foreach $rcstag (%rcsmap)
	    { s/$rcstag/$rcsmap{$rcstag}/; }
	if (defined($pre))		# Handle continuation lines
	    { $_ = $pre.$_; $pre = undef; }

	if (substr($_, -2) eq "\\\n")	# Handle continuation lines
	    {
	    s/\\\n$//;
	    $pre = $_;
	    next;
	    }

	if (m#^<DATE>\s*(.+\S)#)	# Changes
	    {
	    my($year, $month, $link);

	    if ($in_entry)
		{
		$data .= "</dd></dl>\n";
		$in_entry = undef;
		}
	    $ignore = undef;
	    ++$date_num;
	    $header = $1;
	    if ($header !~ /^([-a-z0-9_.+]+)\s+(\d+) (\S+) (\d+) - (\S.*)/)
		{ &fail("'$header' not in expected 'date - event' format"); }
	    $href = $1;
	    $header = "$5 ($2 $3)";
	    $month = "$3 $4";
	    $link = $5;
	    if (defined($tags{$href}))
		{ &fail("Duplicate name tag '$href'"); }
	    $tags{$href} = 1;
	    if (!&check_date($month))
		{ $ignore = 1; }
	    else
		{
		$_ = '';
		++$date_num_used;
		if ($month ne $date_month)
		    {
		    if ($date_month ne '')
			{ $list .= "</ul>\n"; }
		    $list .= "<h3>$month</h3>\n<ul>\n";
		    $_ .= "<hr><h2>$month</h2><hr>\n";
		    $date_month = $month;
		    }

		$_.= "<p><h3>\n$title_font".
			"<a name=\"$href\">$header</a>$end_title_font\n".
			"<font size=\"-1\">".
			"(<a href=\"#top\">top</a>)</font>\n".
			"</h3><dl><dt><dd>\n";
		$list .= "<li>$title_font\n<a href=\"#$href\">$link</a>".
			"$end_title_font</li>\n";
		if (@date_links < $list_date_links)
		    {
		    push(@date_links, "<li><font face=\"helvetica, arial\" size=\"-1\">\n".
				      "<a href=\"Changes/#$href\">\n".
				      "    $link</a>\n".
				      "</font></li>\n");
		    }
		$in_entry = 1;
		}
	    }

	if (m#^<ENTRY>\s*(.+\S)#)
	    {
	    if (! $in_section )
		{ $list .= "<ul>\n"; }	# Start title list

	    if ($in_entry)
		{
		$data .= "</dd></dl>\n";
		$in_entry = undef;
		}

	    $ignore = undef;
	    ++$entry_num;
	    $_ = $1;
	    if (! /^([-a-z0-9_.+,]+)\s+(.*)/)
		{ &fail("Invalid <ENTRY> ($_), not ([-a-z0-9_.+,]+)\s+(.*)"); }
	    $href = $1;
	    $header = $2;
	    if (defined($tags{$href}))
		{ &fail("Duplicate name tag '$href'"); }
	    $tags{$href} = 1;

	    $_ = "<p><h3>\n$title_font".
		    "<a name=\"$href\">$header</a>$end_title_font\n".
		    "<font size=\"-1\">".
		    "(<a href=\"#top\">top</a>)</font>\n".
		    "</h3><dl><dt><dd>\n";
	    $list .= "<li>$title_font\n<a href=\"#$href\">$header</a>".
		    "$end_title_font</li>\n";
	    $in_entry = $in_section = 1;
	    &verbose("\t$href\n");
	    }

	if (m#^<LISTLINK>\s*(.+\S)#)
	    {
	    if ( ! $in_section )
		{ $list .= "<ul>\n"; }	# Start title list

	    $ignore = undef;
	    ++$entry_num;
	    $_ = $1;
	    if (! m#^(\S+)\s+(.*)#)
		{ &fail("Invalid <LISTLINK> ($_), not (\S+)\s+(.*)"); }
	    $href = $1;
	    $header = $2;
	    $_ = '';
	    $list .= "<li>$title_font\n<a href=\"$href\">$header</a>".
		    "$end_title_font</li>\n";
	    $in_section = 1;
	    &verbose("\t$href\n");
	    }

        if (m#^<ENTRYLINK>\s*(.+\S)#)
            {
            if ( ! $in_section )
                { $list .= "<ul>\n"; }  # Start title list

            if ($in_entry)
                {
                $data .= "</dd></dl>\n";
                $in_entry = undef;
                }

            $ignore = undef;
            ++$entry_num;
            $_ = $1;
            if (! m#^(\S+)\s+(.*)#)
                { &fail("Invalid <ENTRYLINK> ($_), not (\S+)\s+(.*)"); }
            $href = $1;
            $header = $2;
            $_ = "<p><h3>\n$title_font". 
                    "<a href=\"$href\">$header</a>$end_title_font\n".
                    "<font size=\"-1\">".
                    "(<a href=\"#top\">top</a>)</font>\n".
                    "</h3><dl><dt><dd>\n";
            $list .= "<li>$title_font\n<a href=\"$href\">$header</a>".
                    "$end_title_font</li>\n";
            $in_entry = $in_section = 1;
            &verbose("\t$href\n"); 
            }

	if (m#^<SECTION>\s*(.+\S)#)
	    {
	    if ($in_entry)
		{
		$data .= "</dd></dl>\n";
		$in_entry = undef;
		}
	    else # In case no entries
		{ $data =~ s#<hr>\n<h2>.*</h2><hr>\n*$##; }
	    $ignore = undef;
	    if (defined($section))
		{
		$list .= "</ul>\n";
		$section = $1;
		$list .= "<h2>$section</h2>\n";
		$list .= "<ul>\n";	# Start title list
		}
	    else
		{	# If we have never seen <SECTION> remember top link!
		$section = $1;
		$list .= "<h2><a name=\"top\">$section</a></h2>\n".
			"<ul>\n";
		}
	    $_ = "<hr>\n<h2>$section</h2><hr>";
	    $in_section = 1;
	    &verbose("    $section\n");
	    }

	if (m#^<TROW>\s*(.*)#)
	    {
	    $_ = $1;
	    if (! m#^([^:]+:)\s+(.*)#)
		{ &fail("<TROW> should match ([^:]+:)\s+(.*)"); }
	    $ignore = undef;
	    $_ = "<tr><th valign=top align=right>$1</th>\n  <td>$2</td></tr>\n";
	    $in_trow = 1;
	    }
	elsif ($in_trow)
	    {
	    if (m#</table>#i)
		{ $in_trow = 0; }
	    else
		{			# Append to last <TROW>
		substr($data, -11, 0) = ' '.&sub_external_links($_);
		$_ = '';
		}
	    }

	if (m#^</LIST>#)
	    {
	    if ($in_entry)
		{
		$data .= "</dd></dl>\n";
		$in_entry = undef;
		}
	    if ($endlist)
		{ &fail("Duplicate </LIST>"); }
	    $endlist = 1;
	    $ignore = undef;
	    $_ = "<hr>\n";
	    }

	if (! $ignore)
	    { $data .= &sub_external_links($_); }
	}

    close(FILE);
    $list .= "</ul>\n";
    if (!$endlist)
	{ &fail("Missing </LIST> tag"); }
    if ($data !~ s/<LIST>/$list/)
	{ &fail("Unable to locate <LIST> tag"); }
    $_ = "\n\n<!-- DO NOT EDIT THIS FILE. EDIT '$infile' AND RUN 'make' -->\n";
    if ($data !~ s/(<head[^>]*>)/$1$_/i)
	{ &fail("Unable to locate <head> tag"); }

    open(FILE, ">$outfile") || die("Unable to write '$outfile': $!");
    print FILE &extras_process($data, %extras);
    close(FILE);
    if ($date_num)
	{
	print "$date_num date entr", ($date_num == 1)?'y':'ies';
	if ($date_num_used != $date_num)
	    { print " ($date_num_used used)"; }
	print ".\n";
	if (@date_links)
	    {
	    print "First $list_date_links date links (for main index.html):\n",
								@date_links;
	    }
	}
    if ($entry_num)
	{ print "$entry_num entr", ($entry_num == 1)?'y':'ies', ".\n"; }
    }

sub sub_external_links
    {
    my($text) = @_;

    # Man page references. As of 1.4 matches every page except '[' and 'w'.
    # 
    $_ = $text; # Output text include match string, so handle in sections
    $text = '';
    while ( m#([a-zA-Z_][-\w.+]*[\w+])\((\d)(?:\.(\w+))?(?:/([-\w]+))?\)# )
	{
	my($page, $section, $arch, $collection) = ($1, $2, $3, $4);
	my($link);

	$link = 'http://www.tac.eu.org/cgi-bin/man-cgi?';

	$link .= "$page+$section";

	if (defined($arch))
	    { $link .= ".$arch"; }
	elsif ($opt{'a'})
	    { $link .= ".$opt{'a'}"; }

	if (defined($collection))
	    { $link .= "+$collection"; }
	elsif ($opt{'c'})
	    { $link .= "+$opt{'c'}"; }

	$text .= $` . "<a href=\"$link\">$page($section)</a>";
	$_ = $';
	}
    $text .= $_;

    # Expand <CURRENTSRC>path
    #
    while ($text =~ m#<CURRENTSRC>([^\s<>]+\w)#)
	{
        my($path);

	$path = $1;
	$path =~ s#^/##;
	$path =~ s#^usr/##;
	$path =~ s#^src/##;
	if ($path =~ m#^(sys|share|gnu)#)
	    { $path = $1."src/$path"; }
	elsif ($path !~ m#^(doc|xsrc)#)
	    { $path = "basesrc/$path"; }
	$text =~ s#<CURRENTSRC>([^\s<>]+\w)#<a href="http://cvsweb.netbsd.org/bsdweb.cgi/$path?rev=HEAD&amp;content-type=text/x-cvsweb-markup">$1</a>#;
	}

    # Expand <URL>[^\s<]+[^<\s.]
    $text =~ s#<URL>([^\s<]+[^<\s.])#<a href="$1">$1</a>#g;

    # Expand <PKGSRC>category/name entries
    #
    while ($text =~ m#<PKGSRC>((\w+/|)([^\s<>]+\w))#)
	{
        my($n) = $3;
	if (defined($pkgname{$n}))
	    { $n = $pkgname{$n}; }
        $text =~ s#<PKGSRC>((\w+/|)([^\s<>]+\w))#<a href="ftp://ftp.netbsd.org/pub/NetBSD/packages/pkgsrc/$1/README.html">$n</a>#;
	}

    # Expand <RFC>RFCxxxx entries
    #
    while ($text =~ m#<RFC>([^\s<>]+\w)#)
	{
        my($o, $n);
	$o = $n = $1;
	$n =~ s#^rfc##i;
	if ($n =~ /^\d+$/)
	    {
	    $text =~ s#<RFC>$o#<a href="http://www.normos.org/ietf/rfc/rfc$n.txt">$o</a>#;
	    }
	    else
	    {
	    $text =~ s#<RFC>$o#$o#;
	    }
	}

    # Expand <user@host> email addresses
    #
    $text =~ s#<([-\w.]+@[-\w.]+)>#<a href="mailto:$1">&lt;$1&gt;</a>#g;

    $text;
    }

sub verbose
    { $verbose && print @_; }

sub warn
    { print "WARNING: ", @_; }