#!/usr/bin/perl # listgroups-m.pl # (configured to connect to U. Mannheim news server) # listgroups.pl # Constructs Web contents pages that show the Usenet groups available on # a news server. # Writes static HTML pages with links to the accompanying readnews CGI # script, which retrieves current article headers. # For each top level group, lists the groups and the number of messages # in each group on one or more HTML pages, with some maximum number # of groups listed per page. # If a top level group has too many groups to list on one page, builds # a sub-contents page that shows the first and last group on each # listing page for the top level group. # Rod Clark # v1.54 # Feb 6, 2001 # # now uses Socket module for better socket compatibility # v1.53 # Jan 7, 2001 # # Junk groups now match beginning of string (e.g. control*...), not exact # string. # v1.52 # Nov 21, 2000 # # Split MainContentsPage into MainContentsPagePath and MainContentsPageURL # to make it easier to use separate directories for each news server. # v1.51 # March 19, 1999 # # Truncates group names on subcontents pages, if the first and last # group names on a line would together exceed one line. # v1.5 # July 30, 1998 # # More configuration items. # v1.4 # July 29, 1998 # # Writes a local static active file, so that readnews doesn't have to # retrieve it via NNTP from a remote server every time it runs a # search on newsgroup names. # Reading the group sizes individually is now optional. # Added some configuration items. # v1.3 # July 27, 1998 # # If the active file isn't available from the file system, connects to # the news server via NNTP and retrieves it. # v1.2 # July 24, 1998 # # Connects to the news server via NNTP and issues a GROUP command for # each group, to find accurate group sizes. (The active file is # unreliable for this, as it gives only the first and last article # numbers, and sometimes the first message is a leftover with an # old article number that leads to a wildly inacurate group size # estimate.) # v1.1 # July 20, 1998 #---------------------------------------------------------------- # Configuration #---------------------------------------------------------------- $ProviderName = "U. Mannheim"; $newshost = "news.uni-mannheim.de"; # This is the directory where the script writes the Web menu pages. # After running the script, you can manually symlink the directory's # home page to the script-generated groups*.html page, if you wish. # Or you can maintain a separate home page for the directory, that # isn't automatically generated by the script. $WebPagesDirPath = "/web/usenet/mannheim/"; $WebPagesDirURL = "/usenet/mannheim/"; $PageExt = "html"; $FileAbbrev = "mh"; # FileAbbrev: If you will be using the readnews and readmsg scripts for # more than one news server, you will need a separate copy of those scripts # configured for each server. Make a copy of listgroups.pl (this script) for # each server (rename to listgroups-foo.pl) and specify an abbreviation # to add as an identifying suffix to the filenames for the readnews and # readmsg scripts that will be configured for that specific news server. # For example: Use $FileAbbrev = "k" for readnews-k and readmsg-k # # This script will then print links, in the Web contents pages that it # generates, to call those versions of the readnews and readmsg scripts # that you've configured to read from a particular news server. #----- # If the news server's files are on the same Unix file system that this # script runs on, set this to 1. If not, set this to 0. $NewsServerIsOnThisFileSystem = 0; # If the above is set to 1, then you must set the path to the news # active file: $NewsActiveFile = "/usr/lib/news/active"; #---------------------------------------------------------------- # Check to make sure these are OK: #---------------------------------------------------------------- # Set this to 1 to read the group list from the news server via NNTP. # This must be set to 1 on the first run. Leave it set to 1 to refresh # the contents pages from updated information from the remote server. # # Set it to 0 to re-index the HTML pages when making local revisions, # such as changes in the maximum number of groups listed per page or the # news provider's name. $ReadListFromRemoteServer = 1; # The maximum number of groups on a page might be # 50 for < 5,000 groups # 100 for < 10,000 groups # 150 for < 15,000 groups # 200 for < 20,000 groups # 250 for > 20,000 groups $MaxGroupsPerPage = 100; # avoid overrunning one line on contents subpages in Lynx $MaxContentsLineLength = 58; # Getting the group sizes individually for each group takes a very long # time (possibly 3 to 8 groups per second, or slower). Always set this # to 0, unless you have your own news server and it has only a small # number of groups on it. $OKToGetGroupSizes = 0; #---------------------------------------------------------------- # You shouldn't need to change anything below this line. #---------------------------------------------------------------- if ($NewsServerIsOnThisFileSystem) { $StaticActiveFile = "active.txt"; $GroupNamesTextFile = "groups.txt"; $TopLevelTextFile = "toplevel.txt"; $CheckedGroupsTextFile = "checked.txt"; # The main groups list page - a plain filename, not a complete path. $MainContentsPagePath = "groups.html"; # $MainContentsPageURL can be "" if you symlink index.html to the # above $MainContentsPagePath file. # Otherwise, it should be the same as that file. $MainContentsPageURL = "groups.html"; $GroupListPageBegin = "list"; $ReadnewsName = "readnews"; } else { $StaticActiveFile = "active-$FileAbbrev.txt"; $GroupNamesTextFile = "groups-$FileAbbrev.txt"; $TopLevelTextFile = "toplevel-$FileAbbrev.txt"; $CheckedGroupsTextFile = "checked-$FileAbbrev.txt"; # The main groups list page - a plain filename, not a complete path. $MainContentsPagePath = "groups-$FileAbbrev.html"; # $MainContentsPageURL can be "" if you symlink index.html to the # above $MainContentsPagePath file. # Otherwise, it should be the same as that file. $MainContentsPageURL = "groups-$FileAbbrev.html"; $GroupListPageBegin = "$FileAbbrev"; $ReadnewsName = "readnews-$FileAbbrev"; } $port = 119; $AF_INET = 2; $sockaddr = 'S n a4 x8'; use Socket; # Ignore these active file groups when writing group names: @JunkNames = ('control', 'cancel', 'junk'); #---------------------------------------------------------------- # End of configuration #---------------------------------------------------------------- $Date = `date`; &GetGroupList; print " done\n"; if (!$StaticActiveFileUsed) { print "Writing local static active file...\n"; &PrintStaticActiveFile; print " done\n"; } &FindTopLevelGroupNames; if ($OKToGetGroupSizes) { print "Getting group sizes...\n"; &GetGroupSizes; print " done\n"; } print "Deleting previous list pages...\n"; &DeletePreviousGroupListPages; print " done\n"; print "Writing list pages...\n"; &PrintGroupListPages; print " done\n"; print "Writing contents page...\n"; &PrintContentsPage; print " done\n"; # Writing these text files is optional. The program doesn't use them. print "Writing other text files...\n"; &PrintGroupNamesTextFile; &PrintTopLevelTextFile; if ($OKToGetGroupSizes) { &PrintCheckedGroupsTextFile; } print " done\n"; #---------------------------------------------------------------- # read data #---------------------------------------------------------------- sub GetGroupList { if ($ReadListFromRemoteServer) { if ($NewsServerIsOnThisFileSystem) { if ((-e $NewsActiveFile) && (-r $NewsActiveFile) && !(-z $NewsActiveFile)) { print "Getting group list from news active file...\n"; &GetGroupListFromActiveFile ($NewsActiveFile); } else { print "Getting group list via NNTP...\n"; &GetGroupListFromNNTP; } } else { print "Getting group list via NNTP...\n"; &GetGroupListFromNNTP; } } else { if ($NewsServerIsOnThisFileSystem) { print "Getting group list from news active file...\n"; &GetGroupListFromActiveFile ($NewsActiveFile); } else { if ((-e $StaticActiveFile) && (-r $StaticActiveFile) && !(-z $StaticActiveFile)) { print "Getting group list from static active file...\n"; $StaticActiveFileUsed = 1; &GetGroupListFromActiveFile ($StaticActiveFile); } else { print "\n\nNOTE: No local static active file exists for $ProviderName.\n"; print "--> Set the variable $ReadGroupListFromRemoteServer = 1;\n\n"; exit; } } } } sub GetGroupListFromActiveFile { local ($ThisListFile) = @_; if (!open (ACTIVE, "$ThisListFile")) { print "Can't read news active file: $ThisListFile\n"; exit; } @ActiveLines = ; close (ACTIVE); @ActiveLines = sort (@ActiveLines); foreach $ActiveLine (@ActiveLines) { ($GroupName, $EndMessageNumber, $StartMessageNumber, $StatusCode) = split (/\s/, $ActiveLine, 4); if (($GroupName =~ /\S/) && ($GroupName =~ /\./)) { $IsJunk = 0; foreach $JunkName (@JunkNames) { if ($GroupName =~ /^$JunkName/) { $IsJunk = 1; } } if (!$IsJunk) { $NumberOfMessages = ($EndMessageNumber - $StartMessageNumber) + 1; if ($NumberOfMessages < 0) { $NumberOfMessages = 0; } push (@GroupLines, "$GroupName $NumberOfMessages"); } } } } sub GetGroupListFromNNTP { &ConnectToNewsServer; $ListStatus = &CheckListStatus; if ($ListStatus != 215) { print "List command not accepted.\n"; $ErrorMessage = "$ErrorMessage"."\nError: List command rejected ($status)"; } else { print "List command accepted.\n"; while () { last if ($_ eq ".\r\n"); $_ =~ s/\r//g; push (@ActiveLines, $_); ($GroupName, $EndMessageNumber, $StartMessageNumber, $StatusCode) = split (/\s/, $_, 4); if (($GroupName =~ /\S/) && ($GroupName =~ /\./)) { $IsJunk = 0; foreach $JunkName (@JunkNames) { if ($GroupName =~ /^$JunkName/) { $IsJunk = 1; } } if (!$IsJunk) { $NumberOfMessages = ($EndMessageNumber - $StartMessageNumber) + 1; if ($NumberOfMessages < 0) { $NumberOfMessages = 0; } push (@GroupLines, "$GroupName $NumberOfMessages"); } } } if ($_ ne ".\r\n") { $ErrorMessage = "$ErrorMessage"."\nError: Unexpected EOF on socket"; } @GroupLines = sort (@GroupLines); } &QuitNewsServer; } sub FindTopLevelGroupNames { ($TopLevelName, $Junk) = split (/\./, $GroupLines[0], 2); push (@TopLevelNames, $TopLevelName); $CurrentTopLevelName = $TopLevelName; foreach $GroupLine (@GroupLines) { ($TopLevelName, $Junk) = split (/\./, $GroupLine, 2); if ($TopLevelName ne $CurrentTopLevelName) { push (@TopLevelNames, $TopLevelName); $CurrentTopLevelName = $TopLevelName; } } } sub GetGroupSizes { &ConnectToNewsServer; $NumberOfGroupLines = @GroupLines; $GroupIndex = 0; while ($GroupIndex < $NumberOfGroupLines) { ($GroupName, $Junk) = split (/ /, $GroupLines[$GroupIndex], 2); $GroupStatus = &CheckGroupSize ($GroupName); if ($GroupStatus == 211) { push (@CheckedGroupLines, "$GroupName $gmany"); $GroupIndex++; } elsif ($GroupStatus == 400) { print "-\n"; sleep (2); } print "$GroupIndex\n"; } &QuitNewsServer; @GroupLines = @CheckedGroupLines; } #---------------------------------------------------------------- # write text files #---------------------------------------------------------------- sub PrintStaticActiveFile { if (!open (TEXTFILE, ">$WebPagesDirPath$StaticActiveFile")) { print "Can't write local active file: $WebPagesDirPath$StaticActiveFile\n"; exit; } foreach $ActiveLine (@ActiveLines) { print TEXTFILE "$ActiveLine"; } close (TEXTFILE); chmod (0664, "$WebPagesDirPath$StaticActiveFile"); } sub PrintGroupNamesTextFile { # print alphabetical list of all group names, as a text file if (!open (TEXTFILE, ">$WebPagesDirPath$GroupNamesTextFile")) { print "Can't write active groups text file: $WebPagesDirPath$GroupNamesTextFile\n"; exit; } foreach $GroupLine (@GroupLines) { ($GroupName, $Junk) = split (/ /, $GroupLine, 2); print TEXTFILE "$GroupName\n"; } close (TEXTFILE); chmod (0664, "$WebPagesDirPath$GroupNamesTextFile"); } sub PrintTopLevelTextFile { if (!open (TOPFILE, ">$WebPagesDirPath$TopLevelTextFile")) { print "Can't write top level groups text file: $WebPagesDirPath$TopLevelTextFile\n"; exit; } foreach $TopLevelName (@TopLevelNames) { print TOPFILE "$TopLevelName\n"; } close (TOPFILE); chmod (0664, "$WebPagesDirPath$TopLevelTextFile"); } sub PrintCheckedGroupsTextFile { if (!open (CHKFILE, ">$WebPagesDirPath$CheckedGroupsTextFile")) { print "Can't write checked groups text file: $WebPagesDirPath$CheckedGroupsTextFile\n"; exit; } foreach $CheckedGroupLine (@CheckedGroupLines) { print CHKFILE "$CheckedGroupLine\n"; } close (CHKFILE); chmod (0664, "$WebPagesDirPath$CheckedGroupsTextFile"); } #---------------------------------------------------------------- # write Web files #---------------------------------------------------------------- sub DeletePreviousGroupListPages { opendir (USENET, "$WebPagesDirPath"); while ($Filename = readdir (USENET)) { push (@DirList, $Filename); } closedir (USENET); foreach $Filename (@DirList) { if (($Filename =~ /^$GroupListPageBegin-/) && ($Filename =~ /$PageExt$/)) { unlink ("$Filename"); } } } sub PrintGroupListPages { $CurrentTopLevelName = $TopLevelNames[0]; foreach $GroupLine (@GroupLines) { ($TopLevelName, $Junk) = split (/\./, $GroupLine, 2); if ($TopLevelName eq $CurrentTopLevelName) { push (@CurrentGroupLines, $GroupLine); } else { &PrintThisTopLevelGroup; $CurrentTopLevelName = $TopLevelName; undef @CurrentGroupLines; push (@CurrentGroupLines, $GroupLine); } } &PrintThisTopLevelGroup; } sub PrintThisTopLevelGroup { $NumberOfCurrentGroups = @CurrentGroupLines; if ($NumberOfCurrentGroups > $MaxGroupsPerPage) { &PrintBigTopLevelGroup; } else { &PrintSmallTopLevelGroup; } } sub PrintSmallTopLevelGroup { $GroupListPagePath = "$WebPagesDirPath$GroupListPageBegin"."-$CurrentTopLevelName"."\.$PageExt"; if (!open (PAGEFILE, ">$GroupListPagePath")) { print "Can't write group list page: $GroupListPagePath\n"; exit; } &PrintListPageTop ($CurrentTopLevelName); foreach $CurrentGroupLine (@CurrentGroupLines) { ($GroupName, $NumberOfMessages) = split (/ /, $CurrentGroupLine, 2); print PAGEFILE "$GroupName $NumberOfMessages
\n"; } &PrintPageFooter; close (PAGEFILE); chmod (0664, "$GroupListPagePath"); } sub PrintBigTopLevelGroup { $NumberOfBatches = int ($NumberOfCurrentGroups / $MaxGroupsPerPage); if (($NumberOfCurrentGroups % $MaxGroupsPerPage) > 0) { $NumberOfBatches++; } $GroupsPerBatch = int ($NumberOfCurrentGroups / $NumberOfBatches); if (($NumberOfCurrentGroups % $NumberOfBatches) > 0) { $GroupsPerBatch++; } $BatchNumber = 1; $BatchLineCounter = 0; foreach $CurrentGroupLine (@CurrentGroupLines) { ($GroupName, $NumberOfMessages) = split (/ /, $CurrentGroupLine, 2); push (@BatchGroupNames, $GroupName); push (@BatchNumberOfMessages, $NumberOfMessages); $BatchLineCounter++; if ($BatchLineCounter == 1) { $FirstGroup = $GroupName; } elsif ($BatchLineCounter == $GroupsPerBatch) { $LastGroup = $GroupName; &TrimGroupNames; $BatchContentsLine = "$FirstGroup -- $LastGroup"; push (@BatchContentsLines, "$BatchContentsLine"); &PrintBigListPage; undef (@BatchGroupNames); undef (@BatchNumberOfMessages); $BatchNumber++; $BatchLineCounter = 0; } } # get name of last group on last page, if not a full page if (($BatchLineCounter > 0) && ($BatchLineCounter < $GroupsPerBatch)) { $LastGroup = $GroupName; &TrimGroupNames; $BatchContentsLine = "$FirstGroup -- $LastGroup"; push (@BatchContentsLines, $BatchContentsLine); &PrintBigListPage; undef (@BatchGroupNames); undef (@BatchNumberOfMessages); } &PrintSubContentsPage; undef (@BatchContentsLines); } sub TrimGroupNames { if (length ($FirstGroup) + length ($LastGroup) > $MaxContentsLineLength) { $LengthToTrim = ((length ($FirstGroup) + length ($LastGroup)) - $MaxContentsLineLength); if (length ($FirstGroup) > length ($LastGroup)) { $LengthDiff = length ($FirstGroup) - length ($LastGroup); } else { $LengthDiff = length ($LastGroup) - length ($FirstGroup); } if ($LengthDiff >= $LengthToTrim) { if (length ($FirstGroup) > length ($LastGroup)) { $FirstGroup = substr ($FirstGroup, 0, $MaxContentsLineLength - length ($LastGroup)); } else { $LastGroup = substr ($LastGroup, 0, $MaxContentsLineLength - length ($FirstGroup)); } } else { $FirstGroup = substr ($FirstGroup, 0, $MaxContentsLineLength / 2); $LastGroup = substr ($LastGroup, 0, $MaxContentsLineLength / 2); } } } sub PrintBigListPage { $GroupListPagePath = "$WebPagesDirPath$GroupListPageBegin"."-$CurrentTopLevelName"."$BatchNumber"."\.$PageExt"; if (!open (PAGEFILE, ">$GroupListPagePath")) { print "Can't write large-group list page: $GroupListPagePath\n"; exit; } &PrintListPageTop ($BatchContentsLine); $NumberOfGroupsInBatch = @BatchGroupNames; for ($BatchIndex = 0; $BatchIndex < $NumberOfGroupsInBatch; $BatchIndex++) { $ListGroupName = $BatchGroupNames[$BatchIndex]; $ListNumberOfMessages = $BatchNumberOfMessages[$BatchIndex]; print PAGEFILE "$ListGroupName $ListNumberOfMessages
\n"; } &PrintPageFooter; close (PAGEFILE); chmod (0664, "$GroupListPagePath"); } sub PrintSubContentsPage { # Print a Web sub-contents page with a link to each grouplist page, # for a big top-level group. $SubContentsPagePath = "$WebPagesDirPath$GroupListPageBegin"."-$CurrentTopLevelName"."\.$PageExt"; if (!open (PAGEFILE, ">$SubContentsPagePath")) { print "Can't write large-group subcontents page: $SubContentsPagePath\n"; exit; } &PrintListPageTop ($CurrentTopLevelName); $BatchPageCounter = 1; foreach $BatchContentsLine (@BatchContentsLines) { $GroupListPageURL = "$WebPagesDirURL$GroupListPageBegin"."-$CurrentTopLevelName"."$BatchPageCounter"."\.$PageExt"; print PAGEFILE "$BatchContentsLine
\n"; $BatchPageCounter++; } &PrintPageFooter; close (PAGEFILE); chmod (0664, "$SubContentsPagePath"); } sub PrintContentsPage { # Print a Web contents page with a link to each grouplist page. # The grouplist page is a sub-contents page if there are too many # groups for one grouplist page. if (!open (PAGEFILE, ">$WebPagesDirPath$MainContentsPagePath")) { print "Can't write main contents page: $WebPagesDirPath$MainContentsPagePath\n"; exit; } &PrintContentsPageTop; foreach $TopLevelName (@TopLevelNames) { $GroupListPageURL = "$WebPagesDirURL$GroupListPageBegin"."-$TopLevelName"."\.$PageExt"; print PAGEFILE "$TopLevelName
\n"; } &PrintPageFooter; close (PAGEFILE); chmod (0664, "$WebPagesDirPath$MainContentsPagePath"); } sub PrintContentsPageTop { print PAGEFILE < SCN - Usenet: Newsgroups from $ProviderName Seattle Community Network
  Usenet

Newsgroups from $ProviderName

ENDPRINT } sub PrintListPageTop { local ($PrintPageName) = @_; print PAGEFILE < SCN - Usenet: Newsgroups: $PrintPageName Seattle Community Network
  Usenet
    Newsgroups from $ProviderName

$PrintPageName

ENDPRINT } sub PrintPageFooter { print PAGEFILE <
Questions about using this newsreader: webmaster\@scn.org
- Newsgroups list updated $Date
ENDPRINT } #---------------------------------------------------------------- # NNTP routines #---------------------------------------------------------------- sub ConnectToNewsServer { # get news host name ($name, $aliases, $proto) = getprotobyname ('tcp'); if ($port !~ /^\d+$/) { ($name, $aliases, $port) = getservbyname ($port, 'tcp'); } if ($newshost =~ /(\d+)\.(\d+)\.(\d+)\.(\d+)/) { $thataddr = pack ('C4', $1, $2, $3, $4); } else { if ($newshost =~ /(\w+)(\.\w+)*/) { ($name, $aliases, $type, $len, $thataddr) = gethostbyname ($newshost); } else { print "Error: NNTP host not specified in proper format\n"; exit 1; } } print "NNTP host: $name\n"; # get socket $this = pack ($sockaddr, $AF_INET, 0, $thisaddr); $that = pack ($sockaddr, $AF_INET, $port, $thataddr); if (!socket (S, $AF_INET, SOCK_STREAM, $proto)) { print "Error: socket failed $1\n"; exit 1; } # print "Socket OK\n"; # bind to socket if (!bind (S, $this)) { print "Error: bind to $this failed $1\n"; exit 1; } # print "Bind OK\n"; # connect to socket if (!connect (S, $that)) { print "Can't connect to $newshost tcp/ip port $port. Error: $1\n"; exit 1; } # buffer the socket select (S); $| = 1; select (STDOUT); # read server status $_ = ; ($status, $rest) = split (/ /, $_, 2); if ($status == 200 || $status == 201) { # 200 server ready - posting allowed # 201 server ready - no posting allowed # print "Connecting to news server: Status 20x - OK\n"; } else { print "$newshost refused connection: status $status - $rest\n"; exit 0; } } sub CheckListStatus { print "Checking list status.\n"; print S "LIST\n"; # read reply print "Reading list status reply.\n"; $_ = ; print "Status reply: $_.\n"; print "Splitting status reply.\n"; ($status, $rest) = split (/ /, $_, 2); return ($status); print "Status replies: 1) $status, 2) $rest.\n"; } sub CheckGroupSize { local ($CheckGroupSizeGroup) = @_; print S "GROUP $CheckGroupSizeGroup\n"; # read reply $_ = ; ($status, $gmany, $firstnum, $lastnum, $groupname) = split (/ /, $_, 5); return ($status); } sub QuitNewsServer { print S "QUIT\n"; $_ = ; ($status, $rest) = split (/ /, $_, 2); if ($status != 205) { print "news server status=$status: $rest\n"; } return ($status); }