#!/usr/bin/perl -w # make_cf.plx # recreates the Main School 2000 CyberFair site, # walking the site to rewrite all pages from the latest # version of the templates, and recreating the navigational # links specific to each page. use strict; use File::Find; my $fs_root = '/w1/s/socalsail/cyberfair'; my $escaped_fs_root = quotemeta $fs_root; # for use in regex patterns my $web_root = '/cyberfair'; my %page_data; # HoH w/ primary keys: path # secondary keys: page template params # values: corresponding content my %teacher_students; # HoL, w/ keys of teacher short_name and values # of arrays of student page paths. my %student_profiles; # HoHoL, w/ primary keys of teacher/student # string, secondary key of @attribute element, # and values of leader page paths. my %cat_leaders; # HoL, w/ keys of leader cat short_names, and # values of arrays of leader page paths. my @attributes = qw(writer editor photographer illustrator); my %verb = ( writer => 'written', editor => 'edited', photographer => 'photographed', illustrator => 'illustrated', ); my %student_verb = ( writer => 'wrote', editor => 'edited', photographer => 'photographed', illustrator => 'illustrated', ); # end configuration section find(\&load_pages, $fs_root); # loads up %page_data # use Data::Dumper; # print Dumper(\%page_data); # now, make all the pages foreach my $path (sort keys %page_data) { my %build_hash = %{$page_data{$path}}; my $made_page = &build_page(%build_hash); my $link_string = my $more_string = ''; my($link_start, $link_end); if ($path =~ m{^$escaped_fs_root/leader/([^/]+)/index\.html$}o) { # this is the index page for a particular type of leader my $leader_cat = $1; $link_start = <All the leaders we profiled in this category:

\n"; my %lc_lastname; foreach my $profile_path (@{$cat_leaders{$leader_cat}}) { next if $profile_path =~ /index\.html$/; my $name = $page_data{$profile_path}{name}; my @name_ary = split /\s+/, $name; my $lc_lastname = lc (pop @name_ary); $lc_lastname{$profile_path} = $lc_lastname; } foreach my $profile_path (sort { $lc_lastname{$a} cmp $lc_lastname{$b} } keys %lc_lastname) { my $web_path = $profile_path; $web_path =~ s{$escaped_fs_root}{$web_root}o; $link_string .= <$page_data{$profile_path}{name} EndOfText } } elsif ($path =~ m{^$escaped_fs_root/leader/[^/]+/[^/]+\.html$}o) { # this is a leader profile page $link_start = "

"; $link_end = "

\n"; my $cat_path = $path; $cat_path =~ s{[^/]+$}{index.html}; my $cat_name = $page_data{$cat_path}{name}; my $web_path = $cat_path; $web_path =~ s{$escaped_fs_root}{$web_root}o; $web_path =~ s{index\.html$}{}; $more_string = "

View more $cat_name Leaders

\n"; foreach my $attribute (@attributes) { my $verb = $verb{$attribute}; if ($page_data{$path}{$attribute}) { $link_string .= "This page was $verb by "; my @params = split /\|/, $page_data{$path}{$attribute}; my @links; foreach my $param (@params) { my $page = "$web_root/student/$param.html"; my $path = "$fs_root/student/$param.html"; my $name = $page_data{$path}{name} || 'Unknown'; push @links, "$name"; } $link_string .= &join_list(@links); $link_string .= "
\n"; } } } elsif ($path =~ m{^$escaped_fs_root/student/([^/]+)/index\.html$}o) { # this is a teacher page $link_start = <The following students from this class participated in the Main School CyberFair 2000 project:

    EndOfText $link_end = "
\n"; if (my $aryref = $teacher_students{$1}) { my @student_paths = @{$aryref}; foreach my $student_path (sort @student_paths) { next if $student_path =~ /index\.html$/; my $student_name = $page_data{$student_path}{name} || 'Unknown'; $student_path =~ s{$escaped_fs_root}{$web_root}o; $link_string .= "
  • $student_name\n"; } } } elsif ($path =~ m{^$escaped_fs_root/student/([^/]+/[^/]+)\.html$}o) { # this is a student page $link_start = <I participated in the Main School CyberFair 2000 project in the following ways:

    EndOfText $link_end = "

    \n"; my $student_string = $1; my $teacher_path = $path; $teacher_path =~ s{[^/]+$}{index.html}; my $teacher_name = $page_data{$teacher_path}{name}; my $web_path = $teacher_path; $web_path =~ s{$escaped_fs_root}{$web_root}o; $web_path =~ s{index\.html$}{}; my $possessive = $teacher_name . '\''; unless ($possessive =~ /s'$/) { $possessive .= 's'; } $more_string = <View more students from $possessive class

    EndOfText if ($student_profiles{$student_string}) { # this student has leader profiles foreach my $attribute (@attributes) { if ($student_profiles{$student_string}{$attribute}) { my @ary = @{ $student_profiles{$student_string}{$attribute} }; if (@ary >= 2) { $link_string .= "I $student_verb{$attribute} the leader profiles for "; } else { $link_string .= "I $student_verb{$attribute} the leader profile for "; } foreach my $elem (@ary) { my $leader_name = $page_data{$elem}{name}; my $web_path = $elem; $web_path =~ s{$escaped_fs_root}{$web_root}o; $elem = "$leader_name"; } $link_string .= &join_list(@ary); $link_string .= "
    \n"; } } } } elsif ($path =~ m{^$escaped_fs_root/student/index\.html$}o) { # this is the "Who made this site?" page $link_start = <You can browse a list of participants in the Main School CyberFair 2000 project by following the teacher links below:

      EndOfText $link_end = "
    \n"; foreach my $teacher (sort keys %teacher_students) { my $key = $fs_root . "/student/$teacher/index.html"; my $teacher_longname = $page_data{$key}{name}; my $possessive = $teacher_longname . '\''; unless ($possessive =~ /s'$/) { $possessive .= 's'; } $link_string .= <$possessive Class EndOfText } } if ($link_string) { $link_string =~ s/
    \n$//; $link_string = "$link_start$link_string$link_end"; } $link_string .= "\n$more_string\n"; $made_page =~ s{()}{$1\n\n$link_string}i; &write_page($path, $made_page) or die "&write_page failed"; } # subroutines follow sub load_pages { return unless /\.html$/; my $file = $File::Find::name; my %page_hash = &read_page($file); return unless $page_hash{type} and $page_hash{type} eq 'cf'; $page_data{$file} = \%page_hash; if ($file =~ m{^$escaped_fs_root/student/([^/]+)/[^/]+\.html}o) { # it's a student page, so list it in the %teacher_students HoL my $teacher = $1; push @{$teacher_students{$teacher}}, $file; } elsif ($file =~ m{$escaped_fs_root/leader/([^/]+)/[^/]+\.html}o) { # it's a leader page, so list it in the %cat_leaders HoL # and the %student_profiles HoL my $cat = $1; push @{$cat_leaders{$cat}}, $file; foreach my $attribute (@attributes) { if ($page_hash{$attribute}) { my @values = split /\|/, $page_hash{$attribute}; foreach my $value (@values) { push @{ $student_profiles{$value}{$attribute} }, $file; } } } } } sub join_list { # do a grammatical joining of a list into a string, and return it my @list = @_; my $string = ''; while (@list) { $string .= shift @list; my $length = @list; if ($length > 1) { $string .= ', '; } elsif ($length == 1) { $string .= ' and '; } } $string; } sub read_page { # invoked with a full pathname as argument, # returns a hash suitable for # feeding to &build_page my $pathname = shift; my %return_hash; open IN, "$pathname" or die "Couldn't open $pathname for reading: $!"; my $page = join '', ; close IN; return unless $page; if ($page =~ m#(.*)#i) { $return_hash{title} = $1; } while ($page =~ m##gi) { $return_hash{$1} = $2; } if ($page =~ /.+\s*(.+?)\s*/s) { $return_hash{content} = $1; } %return_hash; } sub build_page { # given a suitable parameter hash, build a CyberFair page # and return it my %param = ( type => 'cf', # these are title => 'Untitled Document', # defaults... description => '', keywords => '', content => '', @_, # supplied name-value pairs come in here ); # translate the various META params into a merged $meta_block for # substituting into the template. my @meta_params = qw(type description keywords name writer editor illustrator photographer); my $meta_block = ''; foreach my $meta_param (@meta_params) { if ($param{$meta_param}) { $meta_block .= < EndOfText delete $param{$meta_param}; } } $param{meta_block} = $meta_block; my $template = < %%title%% %%meta_block%%


    CyberFair 2000 Project
    Main School
    Carpinteria, California, USA

    Carpinteria Valley Leaders
    Government
    Environment
    School
    Business
    Arts & Entertainment
    Sports & Recreation
    Medical
    Community Services

    About This Site
    Who Made This Site?
    Why an Avocado Tree?
    Project Narrative Information Sources

    %%content%%
    EndOfText # replace %%quoted%% words with values in %param hash $template =~ s{ %% ( .*? ) %% } { exists( $param{$1} ) ? $param{$1} : '' }gex; $template; } sub write_page { # invoked with a full path and an HTML page, # writes the page to that file location. # will create directories as it goes, as needed. # issues a warning and returns undef (without # writing anything) if the page exists already # and is anything other than a regular text file. my($full_path, $made_page) = @_; unless ($full_path =~ /\.html$/) { warn "$full_path does not end with '.html'\n"; return; } unless ($full_path =~ /^\//) { warn "$full_path does not begin with a slash\n"; return; } if (-l $full_path) { warn "$full_path is a symbolic link\n"; return; } if (-B $full_path) { warn "$full_path is a binary file\n"; return; } # still here? good. make any needed directories... my $dir_path = $full_path; $dir_path =~ s{/[^/]+$}{}; # lose last '/' and ensuing filename &make_dirs($dir_path) or die "problem with &make_dirs on '$dir_path'..."; open OUT, ">$full_path" or die "can't open $full_path for writing: $!"; print OUT $made_page; close OUT or die "can't close $full_path filehandle: $!"; chmod 0644, $full_path or die "couldn't chmod $full_path to 0644"; 1; } sub make_dirs { # invoked with an argument consisting of a full pathname, # split it on '/' and check each component to see if it # is a currently-existing directory. If it isn't, create it # with permissions of 0755. # the last component is skipped if it contains any periods. this # is intended to avoid accidentally creating a directory out of # what should be a filename, e.g.: # '/w1/s/socalsail/foo/index.html/'; normally, though, the filename # should not be passed to the routine. # for the sake of (minimal) security, this routine will abort # if the supplied argument contains two periods in a row ('..'). my $full_path = shift; return if $full_path =~ /\.\./; # doesn't trust people passing '..' my @dirs = split(/\//, $full_path); my $last_element = pop @dirs; unless ($last_element =~ /\./) { push @dirs, $last_element; # put it back on if no '.' } my $this_dir; umask 022; foreach (@dirs) { next unless $_; # empty element $this_dir .= "/$_"; if (-e $this_dir) { unless (-d $this_dir) { warn "$this_dir path component exists but is not a directory\n"; return; } } else { mkdir $this_dir, 0777 or die "couldn't mkdir $this_dir: $!"; } } 1; }