#!/usr/bin/perl -w # make_exhibit.plx # this script reads a pair of data files, extracts information # relating to a group of tradeshow exhibitors, and writes # out a browseable Web-based directory of those exhibitors use strict; # configuration section: my $exhibit_file = './exhibit.txt'; my $category_file = './category.txt'; my $base_path = '/w1/s/sprocketexpo/exhibit'; my $base_url = '/exhibit'; my $show_name = 'SprocketExpo 2000'; # script-wide variables: my %listing; # key: company name ($co_name). # value: HTML-ized listing for this company. my %letter_count; # key: lowercased letter. # value: count of exhibitors starting with # that letter (for creating entries # in %listing_path). my %listing_path; # key: company name ($co_name). # value: path (relative to $base_path) of # this company's HTML listing page. # takes the form 'listings/a/a1.html'. my %index_line; # key: company name ($co_name). # value: HTML-ized
  • link to listing page. my %companies_by_category; # key: category name. # value: $co_name\n$co_name\n$co_name... my %companies_by_letter; # key: lowercased letter. # value: $co_name\n$co_name\n$co_name... my %categories_by_company; # key: company name ($co_name). # value: $category_name\n$category_name... # read and parse the main exhibitor file my @listing_lines = (); # holds current listing's lines for passing # to the &parse_exhibitor subroutine open EXHIBIT, $exhibit_file or die "Can't open $exhibit_file for reading: $!\n"; while () { if (/^\s*$/) { # this line is blank (or has nothing but space chars) if (@listing_lines) { &parse_exhibitor(@listing_lines); @listing_lines = (); } } else { # this line actually has data push @listing_lines, $_; } } # process last batch of lines, if the file didn't have a trailing # blank line to trigger it already. if (@listing_lines) { &parse_exhibitor(@listing_lines); } close EXHIBIT or die "Can't close $exhibit_file after reading: $!\n"; # read and parse the category file my @category_lines = (); # holds current category's lines for passing # to the &parse_category subroutine open CATEGORY, $category_file or die "Can't open $category_file for reading: $!\n"; while () { if (/^\s*$/) { # this line is blank (or has nothing but space chars) if (@category_lines) { &parse_category(@category_lines); @category_lines = (); } } else { # this line actually has data push @category_lines, $_; } } # process last batch of lines, if the file didn't have a trailing # blank line to trigger it already. if (@category_lines) { &parse_category(@category_lines); } close CATEGORY or die "Can't close $category_file after reading: $!\n"; # append the category information to the company listings foreach my $co_name (keys %listing) { if ($categories_by_company{$co_name}) { my @categories = split /\n/, $categories_by_company{$co_name}; $listing{$co_name} .= <<"EOF";

    Categories listed under:

      EOF foreach my $category (sort @categories) { my $cat_cleaned = &clean_name($category); my $path = "$base_url/cats/$cat_cleaned.html"; $listing{$co_name} .= "
    • $category\n"; } $listing{$co_name} .= "
    \n"; } } =comment the following debugging code has been commented out print "LISTINGS:\n\n"; foreach my $co_name (sort keys %listing) { print $listing{$co_name}, "\n"; } print "\nCATEGORIES:\n\n"; foreach my $cat (sort keys %companies_by_category) { print "$cat:\n\n$companies_by_category{$cat}\n\n"; } =cut # make sure all the directories we'll need exist my @dirs = ($base_path, "$base_path/alpha", "$base_path/cats", "$base_path/listings"); foreach my $letter ('a' .. 'z') { push @dirs, "$base_path/listings/$letter"; } umask 022; foreach my $dir (@dirs) { if (-e $dir) { unless (-d $dir) { die "$dir already exists, and isn't a directory."; } } else { mkdir $dir, 0777 or die "couldn't mkdir $dir: $!"; } } # write out each company listing foreach my $co_name (sort keys %listing) { my $path = "$base_path/$listing_path{$co_name}"; my $title = "$co_name ($show_name exhibitor listings)"; my $content = <<"EOF";

    Exhibitor Listings Index

    $listing{$co_name} EOF &write_page($path, $title, $content); } # write out the alphabetical index pages foreach my $letter ('a' .. 'z') { my $cap_letter = uc $letter; my $title = "$show_name - Exhibitors starting with '$cap_letter'"; my $path = "$base_path/alpha/$letter.html"; my $alpha_bar = &make_alpha_bar($letter); my $content = <<"EOF";

    Exhibitor Listing Index

    $alpha_bar

    $show_name - Exhibitors starting with '$cap_letter'

    EOF if ($companies_by_letter{$letter}) { # there is at least one listing for this letter $content .= "
      \n"; my @co_names = split /\n/, $companies_by_letter{$letter}; foreach my $co_name (sort {lc $a cmp lc $b} @co_names) { $content .= $index_line{$co_name}; } $content .= "
    \n"; } else { # there are no listings for this letter $content .= <<"EOF";

    (There currently are no listings for this letter)

    EOF } &write_page($path, $title, $content); } # write out the category pages my $cat_list = ''; # to hold the HTML-ized list of categories # for the top-level page foreach my $category (sort keys %companies_by_category) { my $cat_cleaned = &clean_name($category); my $path = "$base_path/cats/$cat_cleaned.html"; my $title = "$category Exhibitors ($show_name Exhibitor Listings)"; my $content = <<"EOF";

    Exhibitor Listings Index

    $show_name Exhibitor Listings - $category

      EOF my @companies = split /\n/, $companies_by_category{$category}; my $count = @companies; # how many companies in this category? $cat_list .= <<"EOF";
    • $category ($count exhibitors) EOF foreach my $co_name (sort {lc $a cmp lc $b} @companies) { $content .= $index_line{$co_name}; } $content .= "
    \n"; &write_page($path, $title, $content); } # write out the top-level page my $path = "$base_path/index.html"; my $title = "$show_name Exhibitor Listings"; my $alpha_bar = &make_alpha_bar; my $date = localtime; my $count = keys %listing; # how many exhibitors, total? my $content = <<"EOF";

    $show_name Exhibitor Listings

    Last updated $date
    $count exhibitors total

    Alphabetical Index:

    $alpha_bar

    Category Index:

      $cat_list
    EOF &write_page($path, $title, $content); # script proper ends. subroutines follow. sub parse_exhibitor { # extract the relevant information about a particular # exhibitor and store it in the appropriate hashes. # # invoked with an array of lines read from $exhibit_file. # has no return value, but instead modifies the following # script-wide variables: # # %listing # %index_line # %companies_by_letter my @lines = @_; my($co_name, $booth, $address, $address2, $phone, $fax, $email, $url, $description); my $line_count = 0; foreach my $line (@lines) { chomp $line; ++$line_count; if ($line_count == 1) { unless ($co_name = $line) { warn <<"EOF"; line_count=1, but got a false co_name. skipping exhibitor. ($exhibit_file line number $.) EOF return; } } elsif ($line_count == 2) { if ($line =~ /^Booth (\d+)/) { $booth = $1; } else { warn <<"EOF"; line_count=2, but couldn't parse booth. skipping exhibitor. (co_name '$co_name'. $exhibit_file line number $.) EOF return; } } elsif ($line_count == 3) { $address = $line; } elsif ($line_count == 4) { $address2 = $line; } elsif ($line_count == 5) { if ($line =~ /^\(\d{3}\)/) { $phone = $line; } else { warn <<"EOF"; line_count=5, but couldn't parse phone number. skipping exhibitor. (co_name '$co_name'. line '$line'. $exhibit_file line number $.) EOF return; } } elsif ($line_count == 6){ if ($line =~ /^(\(\d{3}\).+) \(fax\)$/) { $fax = $1; } else { warn <<"EOF"; line_count=6, but couldn't parse fax number. skipping exhibitor. (co_name '$co_name'. line '$line'. $exhibit_file line number $.) EOF return; } } elsif ($line =~ /^\S+@\S+$/) { $email = $line; } elsif ($line =~ /^http:\S+$/) { $url = $line; } else { $description .= "$line\n"; # append so that multi-line # descriptions work right } } # done cycling through @lines. if ($listing{$co_name}) { # we already have an entry in %listing for this $co_name, # so give an error message that we're going to be # writing over the old data. warn <<"EOF"; Parsed duplicate listing for co_name '$co_name'. Overwriting previous data. ($exhibit_file line number $.) EOF } # create the %listing entry $listing{$co_name} = <<"EOF";

    $co_name

    Booth $booth

    EOF $listing{$co_name} .= "

    "; $listing{$co_name} .= $address if $address; $listing{$co_name} .= "
    \n$address2" if $address2; $listing{$co_name} .= "
    \n$phone" if $phone; $listing{$co_name} .= "
    \n$fax (fax)" if $fax; $listing{$co_name} .= "
    \nEmail: $email" if $email; $listing{$co_name} .= "
    \nWeb: $url" if $url; $listing{$co_name} .= "\n

    \n\n"; if ($description) { $listing{$co_name} .= "

    Description:
    \n"; $listing{$co_name} .= "$description

    \n"; } # create the %listing_path entry my $first_char = lc substr $co_name, 0, 1; unless ($first_char =~ /[a-z]/) { $first_char = 'a'; } ++$letter_count{$first_char}; $listing_path{$co_name} = "listings/$first_char/$first_char$letter_count{$first_char}.html"; # create the %index_line entry $index_line{$co_name} = <<"EOF";
  • $co_name, Booth $booth EOF # append to the %companies_by_letter entry for this letter $companies_by_letter{$first_char} .= "$co_name\n"; } sub parse_category { # extract the relevant information about a particular # category and store it in the appropriate hashes. # # invoked with an array of lines read from $category_file. # has no return value, but instead modifies these script-wide # variables: # # %companies_by_category # %categories_by_company my @lines = @_; my $category; my $line_count = 0; foreach my $line (@lines) { chomp $line; ++$line_count; if ($line_count == 1) { if ($line =~ /^\[\[(.+)\]\]$/) { # line looks like '[[category name]]' $category = $1; } else { warn <<"EOF"; line_count=1, but couldn't parse category name. skipping this category. ($category_file line number $.) EOF return; } } elsif ($line =~ /^(.+), \d+$/) { my $co_name = $1; if ($listing{$co_name}) { $companies_by_category{$category} .= "$co_name\n"; $categories_by_company{$co_name} .= "$category\n"; } else { warn <<"EOF"; parsed co_name '$co_name' from category file, but couldn't find a corresponding company listing. ($category_file line number $.) EOF } } else { warn <<"EOF"; line '$line' from category file doesn't appear to be either a category or a company ($category_file line number $.) EOF } } } sub clean_name { # accepts a scalar, returns it with whitespace converted # to underscores and non-word chars deleted my $name = $_[0]; $name =~ s/\s+/_/g; $name =~ s/\W+//g; $name; } sub write_page { # write out an HTML page based on three arguments: # a path (including the filename at the end) where the # page is to be written, the title of the page, and the # the content of the page. this subroutine incorporates # a template for the SprocketExpo 2000 exhibitors directory; # the template will need to be modified for future shows. my($path, $title, $content) = @_; open OUT, ">$path" or die "can't open $path for writing: $!\n"; print OUT <<"EOF"; $title

    SprocketExpo
    2000

    Home
    Register
    Exhibitors
    Contact Us

    $content
    EOF close OUT or die "can't close filehandle for $path: $!\n"; } sub make_alpha_bar { # this makes an HTML alphabet navigation tool and # returns it. it takes one (optional) argument: a # letter of the alphabet. that letter (if supplied) is # not turned into a link in the resulting chunk of HTML, # but instead is enclosed in a tag to # indicate that that's the page the user is currently on. my $unlink_letter = lc $_[0]; my $alpha_bar = "

    "; foreach my $letter ('a' .. 'z') { my $cap_letter = uc $letter; if ($letter eq $unlink_letter) { $alpha_bar .= "$cap_letter | "; } else { $alpha_bar .= "$cap_letter | "; } if ($letter eq 'm') { # split the alpha_bar $alpha_bar =~ s{ \| $}{
    \n}; } } $alpha_bar =~ s{ \| $}{

    \n}; $alpha_bar; }