#!/usr/bin/perl -w # log_report.plx # report on Web visitors. expects common or extended log # format lines to be fed to it on STDIN. use strict; use Time::Local; use Text::Wrap; # configuration section: my $log_format = 'extended'; # 'common' or 'extended' $Text::Wrap::columns = 60; my $site_name = 'My Web Site'; my $expire_time = 900; # seconds of inactivity to consider a # "visit" ended (0 = forever) my $summary_file = './summary.txt'; my $summary_count = 7; # how many script runs to summarize my $show_detail = 1; # (0 or 1) show detail? # script-wide my variable declarations: my ($begin_time, $end_time, $total_hits, $total_mb, $total_views, $total_visits, %visit_num, %host, %first_time, %last_time, %last_seconds, %page_sequence, %referer, $agent); # script proper begins while (<>) { my ($host, $ident_user, $auth_user, $date, $time, $time_zone, $method, $url, $protocol, $status, $bytes, $referer, $agent); if ($log_format eq 'common') { ($host, $ident_user, $auth_user, $date, $time, $time_zone, $method, $url, $protocol, $status, $bytes) = /^(\S+) (\S+) (\S+) \[([^:]+):(\d+:\d+:\d+) ([^\]]+)\] "(\S+) (.+?) (\S+)" (\S+) (\S+)$/ or next; } elsif ($log_format eq 'extended') { ($host, $ident_user, $auth_user, $date, $time, $time_zone, $method, $url, $protocol, $status, $bytes, $referer, $agent) = /^(\S+) (\S+) (\S+) \[([^:]+):(\d+:\d+:\d+) ([^\]]+)\] "(\S+) (.+?) (\S+)" (\S+) (\S+) "([^"]+)" "([^"]+)"$/ or next; } else { die "unrecognized log format '$log_format'"; } ++$total_hits; unless ($bytes =~ /^\d+$/) { $bytes = 0; } $total_mb += ($bytes / (1024 * 1024)); unless ($begin_time) { $begin_time = "$date:$time"; } $end_time = "$date:$time"; next if $url =~ /\.(gif|jpg|jpeg|png|xbm)$/i; # don't care about these for visit-tracking purposes ++$total_views; &store_line($host, $date, $time, $url, $referer, $agent); } # done processing log file. begin output. my @summary_lines; if (-e $summary_file) { open SUMMARY, $summary_file or die "can't open $summary_file for reading: $!\n"; @summary_lines = ; close SUMMARY or die "can't close $summary_file after reading: $!\n"; } my $summary_line = sprintf "%s %s %6u %6u %6u %6u\n", $begin_time, $end_time, $total_hits, $total_views, $total_visits, $total_mb; unshift @summary_lines, $summary_line; if (@summary_lines > $summary_count) { $#summary_lines = $summary_count - 1; } my $report = <$summary_file" or die "couldn't open $summary_file for writing: $!\n"; foreach (@summary_lines) { $report .= $_; print SUMMARY; } close SUMMARY or die "couldn't close $summary_file after writing: $!\n"; if ($show_detail) { $report .= < $expire_time)) { # this visit has expired, so start a new one &new_visit($host, $date, $time, $url, $seconds, $referer, $agent); } else { # this visit has not expired, so add to existing record &add_to_visit($host, $date, $time, $url, $seconds, $elapsed); } } else { # there is no visit currently "working" for this host &new_visit($host, $date, $time, $url, $seconds, $referer, $agent); } } BEGIN { my %date_seconds; my %month_num = ( Jan => 0, Feb => 1, Mar => 2, Apr => 3, May => 4, Jun => 5, Jul => 6, Aug => 7, Sep => 8, Oct => 9, Nov => 10, Dec => 11, ); sub get_seconds { # this subroutine accepts a date string of the form # '06/Jul/1999' and a time string of the form '12:14:00' # and returns the number of seconds since the Unix # epoch, as determined by Time::Local's timelocal # function. the subroutine caches conversions of the # date part in %date_seconds in order to improve # performance. my ($date, $time) = @_; my $seconds; if ($date_seconds{$date}) { $seconds = $date_seconds{$date}; } else { my ($day, $mon, $yr) = split /\//, $date; $mon = $month_num{$mon}; $yr = $yr - 1900; $seconds = $date_seconds{$date} = timelocal(0, 0, 0, $day, $mon, $yr); } my($hr, $min, $sec) = split /:/, $time; $seconds += ($hr * 3600) + ($min * 60) + $sec; } } sub new_visit { # record an entry for an access line that has been # determined to represent a new visit (either because # this is the first time this host has been seen, # or because the host's previous visit has expired) my ($host, $date, $time, $url, $seconds, $referer, $agent) = @_; my $visit_num = ++$total_visits; $visit_num{$host} = $visit_num; $host{$visit_num} = $host; $first_time{$visit_num} = "$date:$time"; $last_time{$visit_num} = "$date:$time"; $last_seconds{$visit_num} = $seconds; $page_sequence{$visit_num} = $url; if ($log_format eq 'extended') { $referer{$visit_num} = $referer; $agent{$visit_num} = $agent; } } sub add_to_visit { # append to an existing visit record, because it has been # determined that the current line contains more data to # be added to a currently "working" visit my($host, $date, $time, $url, $seconds, $elapsed) = @_; my $visit_num = $visit_num{$host}; $last_time{$visit_num} = "$date:$time"; $last_seconds{$visit_num} = $seconds; my $elapsed_string = (int ($elapsed/60)) . ':' . sprintf "%.2u", $elapsed % 60; $page_sequence{$visit_num} .= " $elapsed_string, $url"; } sub visit_detail { # returns a formatted report for a particular visit. # assumes 'use Text::Wrap;' my $visit_num = $_[0]; my $page_sequence = wrap('', ' ', $page_sequence{$visit_num}); my $detail = <