#!/usr/bin/perl -w # make_exhibit.plx # this script reads a pair of data files, extracts information # relating to a group of tradeshow exhibitors, and writes # out a browseable Web-based directory of those exhibitors use strict; # configuration section: my $exhibit_file = './exhibit.txt'; my $category_file = './category.txt'; # script-wide variables: my %listing; # key: company name ($co_name). # value: HTML-ized listing for this company. my %companies_by_category; # key: category name. # value: $co_name\n$co_name\n$co_name... # read and parse the main exhibitor file my @listing_lines = (); # holds current listing's lines for passing # to the &parse_exhibitor subroutine open EXHIBIT, $exhibit_file or die "Can't open $exhibit_file for reading: $!\n"; while () { if (/^\s*$/) { # this line is blank (or has nothing but space chars) if (@listing_lines) { &parse_exhibitor(@listing_lines); @listing_lines = (); } } else { # this line actually has data push @listing_lines, $_; } } # process last batch of lines, if the file didn't have a trailing # blank line to trigger it already. if (@listing_lines) { &parse_exhibitor(@listing_lines); } close EXHIBIT or die "Can't close $exhibit_file after reading: $!\n"; # read and parse the category file my @category_lines = (); # holds current category's lines for passing # to the &parse_category subroutine open CATEGORY, $category_file or die "Can't open $category_file for reading: $!\n"; while () { if (/^\s*$/) { # this line is blank (or has nothing but space chars) if (@category_lines) { &parse_category(@category_lines); @category_lines = (); } } else { # this line actually has data push @category_lines, $_; } } # process last batch of lines, if the file didn't have a trailing # blank line to trigger it already. if (@category_lines) { &parse_category(@category_lines); } close CATEGORY or die "Can't close $category_file after reading: $!\n"; # output parsed data for debugging print "LISTINGS:\n\n"; foreach my $co_name (sort keys %listing) { print $listing{$co_name}, "\n"; } print "\nCATEGORIES:\n\n"; foreach my $cat (sort keys %companies_by_category) { print "$cat:\n\n$companies_by_category{$cat}\n"; } # script proper ends. subroutines follow. sub parse_exhibitor { # extract the relevant information about a particular # exhibitor and store it in the appropriate hash. # # invoked with an array of lines read from $exhibit_file. # has no return value, but instead modifies the following # script-wide variable: # # %listing my @lines = @_; my($co_name, $booth, $address, $address2, $phone, $fax, $email, $url, $description); my $line_count = 0; foreach my $line (@lines) { chomp $line; ++$line_count; if ($line_count == 1) { unless ($co_name = $line) { warn <<"EOF"; line_count=1, but got a false co_name. skipping exhibitor. ($exhibit_file line number $.) EOF return; } } elsif ($line_count == 2) { if ($line =~ /^Booth (\d+)/) { $booth = $1; } else { warn <<"EOF"; line_count=2, but couldn't parse booth. skipping exhibitor. (co_name '$co_name'. $exhibit_file line number $.) EOF return; } } elsif ($line_count == 3) { $address = $line; } elsif ($line_count == 4) { $address2 = $line; } elsif ($line_count == 5) { if ($line =~ /^\(\d{3}\)/) { $phone = $line; } else { warn <<"EOF"; line_count=5, but couldn't parse phone number. skipping exhibitor. (co_name '$co_name'. line '$line'. $exhibit_file line number $.) EOF return; } } elsif ($line_count == 6){ if ($line =~ /^(\(\d{3}\).+) \(fax\)$/) { $fax = $1; } else { warn <<"EOF"; line_count=6, but couldn't parse fax number. skipping exhibitor. (co_name '$co_name'. line '$line'. $exhibit_file line number $.) EOF return; } } elsif ($line =~ /^\S+@\S+$/) { $email = $line; } elsif ($line =~ /^http:\S+$/) { $url = $line; } else { $description .= "$line\n"; # append so that multi-line # descriptions work right } } # done cycling through @lines. if ($listing{$co_name}) { # we already have an entry in %listing for this $co_name, # so give an error message that we're going to be # writing over the old data. warn <<"EOF"; Parsed duplicate listing for co_name '$co_name'. Overwriting previous data. ($exhibit_file line number $.) EOF } # create the %listing entry $listing{$co_name} = <<"EOF"; co_name: $co_name booth: $booth address: $address address2: $address2 phone: $phone fax: $fax email: $email url: $url description: $description EOF } sub parse_category { # extract the relevant information about a particular # category and store it in the appropriate hashes. # # invoked with an array of lines read from $category_file. # has no return value, but instead modifies this script-wide # variable: # # %companies_by_category my @lines = @_; my $category; my $line_count = 0; foreach my $line (@lines) { chomp $line; ++$line_count; if ($line_count == 1) { if ($line =~ /^\[\[(.+)\]\]$/) { # line looks like '[[category name]]' $category = $1; } else { warn <<"EOF"; line_count=1, but couldn't parse category name. skipping this category. ($category_file line number $.) EOF return; } } elsif ($line =~ /^(.+), \d+$/) { my $co_name = $1; if ($listing{$co_name}) { $companies_by_category{$category} .= "$co_name\n"; } else { warn <<"EOF"; parsed co_name '$co_name' from category file, but couldn't find a corresponding company listing. ($category_file line number $.) EOF } } else { warn <<"EOF"; line '$line' from category file doesn't appear to be either a category or a company ($category_file line number $.) EOF } } }