#!/usr/bin/perl -w eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}' if 0; # not running under some shell # StatNews - A script to generate some statistics out of a newsgroup. # $Id: statnews.pl,v 1.1.1.1 1998/12/02 12:22:55 salve Exp $ # # Copyright © Davide G. M. Salvetti , 1998. # modified by Bernard M. Piller , 1999. # modified by Jochen Striepe , 2000. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to: The Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # # On Debian GNU/Linux System you can find a copy of the GNU General Public # License in /usr/doc/copyright/GPL. use strict; use vars qw($SUA $BUA $CAP $DOT $FROM $PRO $REFS $SHRINK $SPOOL $TO $WIDTH); use Getopt::Long; #Getopt::Long::config(qw(bundling)); use Time::Local; # Please, don't change the following line unless you've carefully read # the GNU General Public License and you're sure you both understand # it and know what you're doing (in legal terms). my $AUTHOR = 'Copyleft (C) Davide G. M. Salvetti , 1998.'; my $REL = q$Revision: 1.1.1.1 $; chop($REL); my $MOD = "3.15"; #---------------------------# #-c-o-n-f-i-g-u-r-a-t-i-o-n-# #---------------------------# # The default directory where all newsgroups hierarchies reside. $SPOOL = '/var/spool/slrnpull/news/'; # The default terminal width. $WIDTH = 80; # The minimum allowed terminal width. $SHRINK = 70; # Whether to capitalize sender and receiver names. $CAP = 1; # Whether to leave dot or translate to slash in newsgroup names. $DOT = 0; # Whether to use References: for threading, rather than just matching subjects $REFS = 1; # Whether to create profiles of the NG's regulars $PRO = 0; # Whether to simplify user agent versions $SUA = 0; $BUA = 0; #-----------------------# #-s-u-b-r-o-u-t-i-n-e-s-# #-----------------------# # help: Prints usage information. sub help { print "StatNews generates some useful statistics out of a newsgroup.\n"; print "$AUTHOR\n"; print "$REL$MOD\n"; print "Refer to the GNU General Public License for condition of use.\n"; print "\nUsage: statnews [OPTIONS] NEWSGROUP\n"; print "\nOptions:\n"; print " --agents simplify user agent versions\n"; print " --better-agents simplify user agents at all\n"; print " --capitalize(*) whether to capitalize the name of both the sender\n"; print " and the receiver of each message\n"; print " (default is --capitalize: yes)\n"; print " --dotted(*) whether to translate \".\" to \"/\"' in NEWSGROUP\n"; print " (default is --nodotted: does translate)\n"; print " --from=DATE set the date statistics start from\n"; print " (DATE format is dd/mm/yyyy GMT)\n"; print " --help display this help summary\n"; print " --profile profile X-Headers\n"; print " --refs(*) use \"References:\" for threading (default is yes)\n"; print " --spool=SPOOLDIR search NEWSGROUP in SPOOLDIR\n"; print " (default is $SPOOL)\n"; print " --to=DATE set the date statistics end by\n"; print " (DATE format is dd/mm/yyyy GMT)\n"; print " --width=WIDTH set the terminal width to WIDTH columns\n"; print " (default is $WIDTH, with a minimum of $SHRINK)\n"; print "\nOptions may be conveniently abbreviated and prefixed by \"-\" instead\n"; print "of \"--\"; the \"=\" may be omitted or substituted with one or more blanks.\n"; print "Options listed with (*) may be negated by adding the prefix \"no\" in\n"; print "front of them (e.g., --dotted => --nodotted).\n"; print "\nThe content of the environment variable STATNEWS is prepended to the\n"; print "argument list if set. This can be used to override defaults.\n"; print "\nPlease, report bugs to .\n"; } # center: Center the argument string and return it. sub center { my ($line) = @_; return ' ' x (($WIDTH - length($line))/2) . $line . "\n"; } # dotline: Take the string and pad it right with dots. sub dotline { my ($len, $line) = @_; $len--; my $fmt = sprintf("%%.%ds %%s", $len); return sprintf($fmt, $line, '.' x ($len - length($line))); } # underline: Underline the argument string and return it. sub underline { my ($line) = @_; return sprintf("%s\n%s\n", $line, '=' x length($line)); } # fmttime: Take a time struct and returns a string. sub fmttime { my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = @_; my @days = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"); my @months = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"); # _I_ like it this way: if you want some other order, just do it. return sprintf("%s %d %s %d (GMT) %02d:%02d", $days[$wday], $mday, $months[$mon], 1900 + $year, $hour, $min); } # convertiso: Convert iso-8859-1 and iso-8859-15 chars escapes sub convertiso { my $line; ($line)= @_; my $isostart01 = "=\\?iso-8859-1\\?q\\?"; my $isostart15 = "=\\?iso-8859-15\\?q\\?"; my $isochar = "=([0-9a-f][0-9a-f])"; if (/$isostart01/i) { $line =~ s/$isostart01//i; $line =~ s/\?=//ig; while ($line =~ /$isochar/i) { my $c = chr(hex($1)); $line =~ s/$isochar/$c/i; } } elsif (/$isostart15/i) { $line =~ s/$isostart15//i; $line =~ s/\?=//ig; while ($line =~ /$isochar/i) { my $c = chr(hex($1)); $line =~ s/$isochar/$c/i; } } return $line; } #-------------------------# #-m-a-i-n---p-r-o-g-r-a-m-# #-------------------------# # Initialize start and end date. ($FROM, $TO) = (0, time); # Environment configuration. if ($_ = $ENV{STATNEWS}) {@_ = split; unshift(@ARGV, @_);} # Options. my %opt; unless (GetOptions(\%opt, "agents", "better-agents", "capitalize!", "dotted!", "from=s", "help", "profiles", "refs!", "spool=s", "to=s", "width=i")) {help(); exit(1);} if ($opt{"help"}) {help(); exit(0);} if (defined($opt{"agents"})) {$SUA = $opt{"agents"};} if (defined($opt{"better-agents"})) { $BUA = $opt{"better-agents"}; $SUA = 1; } if (defined($opt{"capitalize"})) {$CAP = $opt{"capitalize"};} if (defined($opt{"dotted"})) {$DOT = $opt{"dotted"};} if (defined($opt{"profiles"})) {$PRO = $opt{"profiles"};} if (defined($opt{"refs"})) {$REFS = $opt{"refs"};} if ($_ = $opt{"spool"}) {$SPOOL = $_ . '/';} if ($_ = $opt{"width"}) { if ($_ >= $SHRINK) {$WIDTH = $_;} else { warn "Width should be at least $SHRINK."; $WIDTH = $SHRINK;} } if ($_ = $opt{"from"}) {@_ = split m|/|; $FROM = timegm(0, 0, 0, $_[0], $_[1] - 1, $_[2] - 1900);} if ($_ = $opt{"to"}) {@_ = split m|/|; $TO = timegm(0, 0, 0, $_[0], $_[1] - 1, $_[2] - 1900);} die "$0: Start date has to be less than end date: " . gmtime($FROM) . ", " . gmtime($TO) . ".\n" unless ($FROM < $TO); unless ($_ = $ARGV[0]) {help(); exit(0);} # Translate "news.group" to "news/group", unless --dotted is set. unless ($DOT) {s|\.|/|g;} my $newsgroup = $_; # Initialize the time interval (this is intentionally reversed). my($mintime, $maxtime) = ($TO, $FROM); # Results variables, declared here for strict. my(%auth, %dest, %qchars, %qlines, %quote, %ratio, %rchars, %rlines, %subj, %wchars, %wlines, %agent, %sigcolor, %sigscolor, %sigvoter, %ref_msgid, %ref_subj, %ref_count, %ref_from, %reguser); my($auth, $chars, $days, $i, $lines, $qchars, $qlines, $agent, $sigcolor, $sigscolor, $sigvoter, $ref_msgid, $ref_subj, $ref_count, $ref_from, $reguser, $tmp); chdir($SPOOL) || die "$0: Can't chdir \`$SPOOL\': \"$!\""; opendir(DIR, $newsgroup) || die "$0: Can't opendir \`$newsgroup\': \"$!\""; my @articles = grep { /^[0-9]+$/ && -f "$newsgroup/$_" } readdir(DIR); die "$0: Empty newsgroup \`$newsgroup\'." unless (@articles); foreach my $IN (@articles) { # mtime is $stat[9]. my $mtime = (stat("$newsgroup/$IN"))[9]; # Skip articles outside the time window. next if ($mtime < $FROM || $mtime > $TO); # Get the maximum and minimum times. $mintime = ($mintime < $mtime)?$mintime:$mtime; $maxtime = ($maxtime > $mtime)?$maxtime:$mtime; my($from, $dest); my($nua_hint, $subject, $msgid, $sig_hint, $reg_hint, $references, $email) = (0, 0, 0, 0, 0, 0, 0); open(IN, "$newsgroup/$IN") || die "$0: Can't open $IN: $!"; while () { # Until headers end. last if /^$/; # I think to study is a good thing. chomp; study; if (s/^From: //) { $from = &convertiso($_); $email = $from; # Get email address. unless ($email =~ s/\(.*\)//g) {$email =~ s/.*<(.*)>/$1/g;} $email =~ s/[^\w\@\.\-]//go; # Get name from address: this is simple, but behaves mostly well. $from =~ s/\"//go; unless ($from =~ s/\s+<.*>//g) {$from =~ s/.*\((.*)\)/$1/g;} # Capitalize: this is to collect "d'Andrea" together with "D'Andrea". if ($CAP) { $from =~ s/\s(\w)/ \U$1/g; $from =~ s/^(\w)/\U$1/; } $from =~ s/ $//; # Same email address, but changed real names. if ($email) { if (($from eq "Root") or not $from) {$from = "<$email>";} if (exists($ref_from{$email})) { if ($ref_from{$email} ne $from) { if ($from =~ /^\w+(\s.*)?\s\w+$/) { $auth{$from} += $auth{$ref_from{$email}}; $qchars{$from} += $qchars{$ref_from{$email}}; $wchars{$from} += $wchars{$ref_from{$email}}; $qlines{$from} += $qlines{$ref_from{$email}}; $wlines{$from} += $wlines{$ref_from{$email}}; if ($PRO) { foreach my $key (keys %$sigvoter) { $sigvoter{$key}{$from} = 1; delete($sigvoter{$key}{$ref_from{$email}}); }; $reguser{$from} = $reguser{$ref_from{$email}}; delete($reguser{$ref_from{$email}}); }; delete($auth{$ref_from{$email}}); delete($qchars{$ref_from{$email}}); delete($wchars{$ref_from{$email}}); delete($qlines{$ref_from{$email}}); delete($wlines{$ref_from{$email}}); $ref_from{$email} = $from; } else {$from = $ref_from{$email};}; }; } else {$ref_from{$email} = $from;}; }; $auth{$from}++; } # X-Comment-To is a Fido syntax. if (s/^((X-)?Comment-)?To: //) { # Capitalize. if ($CAP) {s/\b(\w)/\U$1/g;} $dest = $_; $dest{$dest}++; } # subject: optimistic guess (unthreaded) if (s/^Subject: ([Rr]e:\s+)?//) { $subject = &convertiso($_); if (not $REFS) {$subj{$subject}++;}; } # subject: get references for educated guess (threaded) if ($REFS and s/^References: //) { $references = $_; } # sigcolor if ($PRO and s/^X-Sig(nature?)?-Colou?r: //) { $sig_hint = &convertiso($_); $sig_hint =~ tr/A-Z/a-z/; $sigcolor{$sig_hint}++; } # registered Linux user if ($PRO and s/^X-Registered-Linux-User: #?(\d+)//) { $reg_hint = $1; } # trust header and get user agent if (s/^X-Newsreader: //) {$nua_hint = &convertiso($_);}; if (s/^User-Agent: //) {$nua_hint = &convertiso($_);}; if (s/^X-Mailer: // and not $nua_hint) {$nua_hint = &convertiso($_);}; if (s/^X-FTN-Tearline: // and not $nua_hint) {$nua_hint = &convertiso($_);}; if (s/^Message-ID: //) {$msgid = $_}; } # educated guess about subject (threaded): if ($REFS and $msgid) { if ($references) { if ($references =~ /\>/g) { $ref_msgid{$msgid} = substr($references, 0, pos($references)); } else {$ref_msgid{$msgid} = $references;} } else {$ref_msgid{$msgid} = "first";} $ref_subj{$msgid} = $subject; $ref_count{$msgid} = 1; } # trying to figure out number of sig color voters: if ($PRO and $sig_hint) { # allow having several favorite colors if (not exists($sigvoter{$sig_hint}{$from})) { $sigvoter{$sig_hint}{$from} = 1; $sigscolor{$sig_hint}++; } } # figure out registered Linux users: if ($PRO and $reg_hint) {$reguser{$from} = $reg_hint;} # educated guess about user agent: if ($nua_hint) { if ($SUA) { if ($nua_hint =~ /Anawave Gravity/) {$agent{"Anawave Gravity"}++} elsif ($nua_hint =~ /APoint/) {$agent{"APoint"}++} elsif ($nua_hint =~ /Arn V /) {$agent{"Arn"}++} elsif ($nua_hint =~ /CrossPoint/) {$agent{"CrossPoint"}++} elsif ($nua_hint =~ /FakeEd/) {$agent{"FakeEd"}++} elsif ($nua_hint =~ /FEddy/) {$agent{"FEddy"}++} elsif ($nua_hint =~ /FIPS\/32/) {$agent{"FIPS\/32"}++} elsif ($nua_hint =~ /FIDOGATE/) {$agent{"FIDOGATE"}++} elsif ($nua_hint =~ /FleetStreet/i) {$agent{"FleetStreet"}++} elsif ($nua_hint =~ /Forte Agent/) { if ($BUA) {$agent{"Forte \[Free\] Agent"}++} else {$agent{"Forte Agent"}++}; } elsif ($nua_hint =~ /Forte Free Agent/) { if ($BUA) {$agent{"Forte \[Free\] Agent"}++} else {$agent{"Forte Free Agent"}++}; } elsif ($nua_hint =~ /G(old)?ED/) { $agent{"GoldED"}++; } elsif ($nua_hint =~ /Gnus/) { if ($BUA) {$agent{"Gnus"}++} elsif ($nua_hint =~ /XEmacs/i) {$agent{"Gnus/XEmacs"}++} elsif ($nua_hint =~ /Emacs/i) {$agent{"Gnus/Emacs"}++} else {$agent{"Gnus"}++}; } elsif ($nua_hint =~ /ifmail/) {$agent{"ifmail"}++} elsif ($nua_hint =~ /InterChange \(Hydra\) News/) {$agent{"InterChange (Hydra) News"}++} elsif ($nua_hint =~ /KNode/) {$agent{"KNode"}++} elsif ($nua_hint =~ /KRN/) {$agent{"KRN"}++} elsif ($nua_hint =~ /kexpress/) {$agent{"kexpress"}++} elsif ($nua_hint =~ /knews/) {$agent{"knews"}++} elsif ($nua_hint =~ /Lotus Notes/) {$agent{"Lotus Notes"}++} elsif ($nua_hint =~ /MT-NewsWatcher/) {$agent{"MT-NewsWatcher"}++} elsif ($nua_hint =~ /MacSOUP/) {$agent{"MacSOUP"}++} elsif ($nua_hint =~ /MR\/2 Internet Cruiser/) {$agent{"MR\/2 Internet Cruiser"}++} elsif ($nua_hint =~ /Msged/) {$agent{"Msged"}++} elsif ($nua_hint =~ /MicroPlanet Gravity/) {$agent{"MicroPlanet Gravity"}++} elsif ($nua_hint =~ /Microsoft (\(R\) Exchange )?Internet News/) {$agent{"MS Internet News"}++} elsif ($nua_hint =~ /Microsoft Outlook Express/) {$agent{"MS Outlook Express"}++} elsif ($nua_hint =~ /Mozilla.*compatible/) { if ($nua_hint =~ /StarOffice[ \/](\d+)\.(\d+)/i) { $tmp = "$1"; if ($BUA) {$agent{"Staroffice"}++} elsif ($nua_hint =~ /Linux/) {$agent{"Staroffice $tmp (Linux)"}++} elsif ($nua_hint =~ /Unix/) {$agent{"Staroffice $tmp (Unix)"}++} elsif ($nua_hint =~ /Win/) {$agent{"Staroffice $tmp (Win)"}++} else {$agent{$nua_hint}++}; } else {$agent{$nua_hint}++}; } elsif ($nua_hint =~ /Mozilla[ \/](\d+)\.(\d)/) { $tmp = "$1"; if ($BUA) {$agent{"Netscape"}++} elsif ($nua_hint =~ /Win/) {$agent{"Mozilla $tmp (Windows)"}++} elsif ($nua_hint =~ /Linux/) {$agent{"Mozilla $tmp (Linux)"}++} elsif ($nua_hint =~ /OS\/2/) {$agent{"Mozilla $tmp (OS/2)"}++} elsif ($nua_hint =~ /Macintosh/) {$agent{"Mozilla $tmp (Mac)"}++} elsif ($nua_hint =~ /SunOS/) {$agent{"Mozilla $tmp (Unix)"}++} elsif ($nua_hint =~ /BSD\/OS/) {$agent{"Mozilla $tmp (Unix)"}++} elsif ($nua_hint =~ /IRIX/) {$agent{"Mozilla $tmp (Unix)"}++} elsif ($nua_hint =~ /Unix/) {$agent{"Mozilla $tmp (Unix)"}++} else {$agent{$nua_hint}++}; } elsif ($nua_hint =~ /NN version/) {$agent{"NN"}++} elsif ($nua_hint =~ /News Xpress/) {$agent{"News Xpress"}++} elsif ($nua_hint =~ /Pan/) {$agent{"Pan"}++} elsif ($nua_hint =~ /pcp\//) {$agent{"Perl Cron Poster"}++} elsif ($nua_hint =~ /PMINews/) {$agent{"PMINews"}++} elsif ($nua_hint =~ /ProNews\/2 Version/) {$agent{"ProNews/2"}++} elsif ($nua_hint =~ /slrn/) {$agent{"slrn"}++} elsif ($nua_hint =~ /Sqed\/32/) {$agent{"Sqed/32"}++} elsif ($nua_hint =~ /TerMail/) {$agent{"TerMail"}++} elsif ($nua_hint =~ /Terminate/) {$agent{"Terminate"}++} elsif ($nua_hint =~ /timEd/) {$agent{"timEd"}++} elsif ($nua_hint =~ /tin/i) {$agent{"tin"}++} elsif ($nua_hint =~ /trn/) {$agent{"trn"}++} elsif ($nua_hint =~ /Turnpike/) {$agent{"Turnpike"}++} elsif ($nua_hint =~ /WinPoint/) {$agent{"WinPoint"}++} elsif ($nua_hint =~ /VSoup/) {$agent{"VSoup"}++} elsif ($nua_hint =~ /WP\/95 Rel/) {$agent{"WP/95"}++} elsif ($nua_hint =~ /WinVN/) {$agent{"WinVN"}++} elsif ($nua_hint =~ /XEmacs/) {$agent{"XEmacs"}++} elsif ($nua_hint =~ /XP/) {$agent{"XP"}++} elsif ($nua_hint =~ /Xnews/) {$agent{"Xnews"}++} elsif ($nua_hint =~ /xrn/) {$agent{"xrn"}++} elsif ($nua_hint =~ /YAM/) {$agent{"YAM"}++} elsif ($msgid) { # Dumb jokes get ignored if there's a better hint >:-) if ($msgid =~ /slrn/i) {$agent{"slrn"}++} elsif ($msgid =~ /pine/i) {$agent{"pine"}++} # Special treatments (regulars only): # Peter Bergt: elsif ($nua_hint =~ /Sorry. I forget what I was going to say. /) {$agent{"slrn"}++} else {$agent{$nua_hint}++}; } else {$agent{$nua_hint}++}; } else {$agent{$nua_hint}++}; } elsif ($msgid) { if ($msgid =~ /slrn/i) {$agent{"slrn"}++} elsif ($msgid =~ /pine/i) {$agent{"pine"}++} else {$agent{"[User agent not detected]"}++}; } else { $agent{"[User agent not detected, no Message-ID:]"}++; } # The body. my ($chars, $lines, $qchars, $qlines) = (0, 0, 0, 0); while () { # Count new-line's as well. $chars += length; # Don't count blank lines. $lines++ if ($_); # It's not a quote if it doesn't match. if (/^\s*\w{0,5}>+ /) { $qchars += length; $qlines++; } } $wlines{$from} += $lines; $qchars{$from} += $qchars; $qlines{$from} += $qlines; $wchars{$from} += $chars; # There are messages without $dest, especially in UseNet. if ($dest) { $rchars{$dest} += $chars; $rlines{$dest} += $lines; } close(IN); } closedir(DIR); # It makes sense to add one day to the time interval. $days = int(($maxtime - $mintime)/(3600 * 24)) + 1; # Grand totals. ($auth, $chars, $lines, $qchars, $qlines, $agent) = (0, 0, 0, 0, 0, 0); foreach my $key (keys %auth) { $auth += $auth{$key}; $qchars += $qchars{$key}; $qlines += $qlines{$key}; # These could not be %rchars and %rlines, # since not all chars are received, but every char is written. $chars += $wchars{$key}; $lines += $wlines{$key}; } # Evaluate threading. if ($REFS) { my $changes = 1; while ($changes) { $changes = 0; foreach my $key (keys %ref_msgid) { if ($ref_count{$key} and exists($ref_msgid{$ref_msgid{$key}})) { # credits go to daddy, but structure must be kept $ref_count{$ref_msgid{$key}} += $ref_count{$key}; $ref_count{$key} = 0; $changes = 1; } } } foreach my $key (keys %ref_msgid) { if ($ref_count{$key}) {$subj{$ref_subj{$key}} += $ref_count{$key};}; } } print center("StatNews Report"), center("($REL/$MOD)"), "\n", center($AUTHOR); print center(" Modified by Bernard M. Piller , 1999,"); print center(" modified by Jochen Striepe , 2000.\n\n"); printf("Newsgroup................: %s\n", $ARGV[0]); printf("Time stamp...............: %s\n", fmttime(gmtime())); printf("Start....................: %s\n", fmttime(gmtime(($mintime==$TO)?$FROM:$mintime))); printf("End......................: %s\n", fmttime(gmtime(($maxtime==$FROM)?$TO:$maxtime))); unless ($auth) {print "\nNo articles found!\n"; exit(0);} printf("Days.....................: %d\n", $days); printf("Messages.................: %d\n", $auth); $i = 0; foreach my $key (sort {$auth{$b} <=> $auth{$a} || $a cmp $b} keys %auth) { ++$i; } printf("Authors..................: %d\n", $i); printf("Messages per author......: %.1f\n", $auth/$i); printf("Characters...............: %d\n", $chars); printf("Average message length...: %.1f\n", $chars/$auth); printf("Messages per day.........: %.1f\n", $auth/$days); printf("Characters per day.......: %d\n", $chars/$days); printf("Quoting ratio............: %.1f%% (lines) %.1f%% (chars)\n", 100*$qlines/$lines, 100*$qchars/$chars); printf("\n\n%s\n", underline('Top 50 threads (#1: total messages, #2: mesgs/day, #3: share)')); $i = 0; foreach my $key (sort {$subj{$b} <=> $subj{$a} || $a cmp $b} keys %subj) { if ($i<50) { printf("%3d) %s: %3d %4.1f %4.1f%%\n", ++$i, dotline($WIDTH - 22, $key), $subj{$key}, $subj{$key}/$days, 100*$subj{$key}/$auth); } } printf("\n\n%s\n", underline('Top 50 authors (#1: reg. Linux user, #2: total mesgs, #3: mesgs/day, #4: share)')); $i = 0; foreach my $key (sort {$auth{$b} <=> $auth{$a} || $a cmp $b} keys %auth) { if ($i<50) { if ($reguser{$key}) {$tmp = " \(\#$reguser{$key}\) ";} else {$tmp = " "}; printf("%3d) %s%s: %3d %4.1f %4.1f%%\n", ++$i, dotline($WIDTH - (22 + length($tmp)), $key), $tmp, $auth{$key}, $auth{$key}/$days, 100*$auth{$key}/$auth); } } printf("\n\n%s\n", underline('Top 50 user agents (#1: total messages, #2: mesgs/day, #3: share)')); $i = 0; foreach my $key (sort {$agent{$b} <=> $agent{$a} || $a cmp $b} keys %agent) { if ($i<50) { printf("%3d) %s: %4d %5.1f %4.1f%%\n", ++$i, dotline($WIDTH - 24, $key), $agent{$key}, $agent{$key}/$days, 100*$agent{$key}/$auth); } } if ($PRO) { printf("\n\n%s\n", underline('Top 50 .signature colors (#1: total voters, #2: total votes)')); $i = 0; foreach my $key (sort {$sigscolor{$b} <=> $sigscolor{$a} || $sigcolor{$b} <=> $sigcolor{$a} || $a cmp $b} keys %sigscolor){ if ($i<50) { printf("%3d) %s: %4d %4d\n", ++$i, dotline($WIDTH - 17, $key), $sigscolor{$key}, $sigcolor{$key}); } } } # Quoting ratio & Co. Build the hash first to sort it over. foreach my $key (keys %qlines) {$quote{$key} = 100*$qlines{$key}/$wlines{$key};} printf("\n\n%s\n", underline('Top 50 quoting ratios (#1: qlines/wlines, #2: qchars/wchars, #3: total msg\'s)')); $i = 0; foreach my $key (sort {$quote{$b} <=> $quote{$a} || $a cmp $b} keys %quote) { if ($i<50) { printf("%3d) %s: %4.1f%% %4.1f%% %4d\n", ++$i, dotline($WIDTH - 24, $key), $quote{$key}, 100*$qchars{$key}/$wchars{$key}, $auth{$key}); } } print"\n\n"; print center("statnews.pl is downloadable as"); print center("http://sam.expmech.ing.tu-bs.de/stats/statnews.pl-$MOD.gz"); __END__ =head1 NAME statnews - generate some useful statistics out of a newsgroup =head1 SYNOPSIS statnews [OPTIONS] I =head1 DESCRIPTION The B command get some useful statistics out of a newsgroup. It displays things like how many articles each author posted, how many characters was written, how many lines were quoted, how many articles belong to each thread, the number of messages/characters per day, the average message length, and so on. =head1 OPTIONS =over 4 =item B<--capitalize>(*) Whether to capitalize the name of both the sender and the receiver of each message (default is C<--capitalize>: yes). This option is useful to collect C<"AUTHOR"> together with C<"author">, C<"Author">, and C<"AuThor">. =item B<--dotted>(*) Whether to translate C<"."> to C<"/"> in I (default is C<--nodotted>: does translate). This option may be useful if your system stores each newsgroup in a dedicate directory (e.g., F) instead that by hierarchy (e.g., F), or if your system has a news archive stored this way. =item B<--from=>I Set the date statistics start from (I format is C
, GMT). =item B<--help> Display the help summary. =item B<--spool=>I Search I in I (default is F). =item B<--to=>I Set the date statistics end by (I format is C
, GMT). =item B<--width=>I Set the terminal width to I columns (default is 80, with a minimum of 70). =back Options may be conveniently abbreviated and prefixed by "-" instead of "--"; the "=" may be omitted or substituted with one or more blanks. Options listed with (*) may be negated by adding the prefix C<"no"> in front of them (e.g., C<--dotted> => C<--nodotted>). =head1 RETURN VALUE The B command returns 0 on success and a positive integer on errors. =head1 ENVIRONMENT The environment variable I can hold a set of default options for B. These options are interpreted first by the program and can be overridden by explicit command line parameters. For example: =over 4 =item B C =item B C =back =head1 FILES The default spool directory is F. =head1 SEE ALSO L, L. =head1 BUGS There are no know bugs. =head1 UNRESTRICTIONS This program is copylefted. Refer to the GNU General Public License for conditions of use. =head1 AUTHOR This program has been written and is actively maintained by S Salvetti . =head1 HISTORY This program was originally aimed for use with FidoNet style echo areas under Debian GNU/Linux. It now can be used with Usenet newsgroups as well. More precisely, it can be used with every message base that stores each message in a file in some directory. =cut