#!/usr/bin/perl -w # make_cf.plx # rewrite all the pages on the CyberFair site that have 'type' # META headers of 'cf', using the current template. use strict; use File::Find; find(\&process, '/w1/s/socalsail/cyberfair'); sub process { # this is invoked by File::Find's find function for each # file it recursively finds. return unless /\.html$/; my $filename = $File::Find::name; my %page_hash = &read_page($filename); return unless defined $page_hash{type} and $page_hash{type} eq 'cf'; &write_page($filename, &build_page(%page_hash)) or die "couldn't write_page for file '$filename'\n"; } sub read_page { # invoked with a full pathname as argument, # returns a hash suitable for # feeding to &build_page my $pathname = shift; my %return_hash; open IN, "$pathname" or die "Couldn't open $pathname for reading: $!"; my $page = join '', ; close IN; return unless $page; if ($page =~ m#(.*)#i) { $return_hash{title} = $1; } while ($page =~ m##gi) { $return_hash{$1} = $2; } if ($page =~ /.+\s*(.+?)\s*/s) { $return_hash{content} = $1; } return %return_hash; } sub build_page { # given a suitable parameter hash, build a CyberFair page # and return it my %param = ( type => 'cf', # these are title => 'Untitled Document', # defaults... description => '', keywords => '', content => '', @_, # supplied name-value pairs come in here ); # translate the various META params into a merged $meta_block for # substituting into the template. my @meta_params = qw(type description keywords); my $meta_block = ''; foreach my $meta_param (@meta_params) { if ($param{$meta_param}) { $meta_block .= < EndOfText delete $param{$meta_param}; } } $param{meta_block} = $meta_block; my $template = < %%title%% %%meta_block%%


CyberFair 2000 Project
Main School
Carpinteria, California, USA

Carpinteria Valley Leaders
Government
Environment
School
Business
Arts & Entertainment
Sports & Recreation
Medical
Community Services

About This Site
Who Made This Site?
Why an Avocado Tree?
Project Narrative Information Sources

%%content%%
EndOfText # replace %%quoted%% words with values in %param hash $template =~ s{ %% ( .*? ) %% } { exists( $param{$1} ) ? $param{$1} : '' }gex; return $template; } sub write_page { # invoked with a full path and an HTML page, # writes the page to that file location. # will create directories as it goes, as needed. # issues a warning and returns undef (without # writing anything) if the page exists already # and is anything other than a regular text file. my($full_path, $made_page) = @_; unless ($full_path =~ /\.html$/) { warn "$full_path does not end with '.html'\n"; return; } unless ($full_path =~ /^\//) { warn "$full_path does not begin with a slash\n"; return; } if (-l $full_path) { warn "$full_path is a symbolic link\n"; return; } if (-B $full_path) { warn "$full_path is a binary file\n"; return; } # still here? good. make any needed directories... my $dir_path = $full_path; $dir_path =~ s{/[^/]+$}{}; # lose last '/' and ensuing filename &make_dirs($dir_path) or die "problem with &make_dirs on '$dir_path'..."; open OUT, ">$full_path" or die "can't open $full_path for writing: $!"; print OUT $made_page; close OUT or die "can't close $full_path filehandle: $!"; chmod 0644, $full_path or die "couldn't chmod $full_path to 0644"; 1; } sub make_dirs { # invoked with an argument consisting of a full pathname, # split it on '/' and check each component to see if it # is a currently-existing directory. If it isn't, create it # with permissions of 0755. # the last component is skipped if it contains any periods. this # is intended to avoid accidentally creating a directory out of # what should be a filename, e.g.: '/foo/index.html/'. # normally, though, the filename should not be passed to # the routine. note that this will cause the subroutine to fail # to create the last directory in a path if that directory's # name contains a dot. # for the sake of (minimal) security, this routine will abort # if the supplied argument contains two periods in a row ('..'). my $full_path = shift; return if $full_path =~ /\.\./; # doesn't trust people passing '..' my @dirs = split(/\//, $full_path); my $last_element = pop @dirs; unless ($last_element =~ /\./) { push @dirs, $last_element; # put it back on if no '.' } my $this_dir; umask 022; foreach (@dirs) { next unless $_; # empty element $this_dir .= "/$_"; if (-e $this_dir) { unless (-d $this_dir) { warn "$this_dir path component exists but is not a directory\n"; return; } } else { mkdir $this_dir, 0777 or die "couldn't mkdir $this_dir: $!"; } } 1; }