#!/usr/bin/perl
#
# Filename: biblelink.pl
# Author: Eric Pement
my $version = "1.28"; # Biblelink - javascript version
my $verdate = "23 July 2003"; # Last revised date
# Requires: perl v5.6 or better
#
# Purpose: Given 1 or more input files with Bible references, create HTML
# hyperlinks wherever possible. Where the link reference is
# ambiguous, prompt the user. Execute with no arguments for a
# brief help menu, or with argument "-h" for full documentation.
# Changelog (reverse chronological order):
# 2003/07/23 : Revised -s to accept input from STDIN if no file found in @ARGV.
# : Bible version wasn't getting "version=" prefixed to string. Fixed.
# 2003/06/19 : Added the JavaScript code I use. Affects documentation only.
# 2003/03/12 : Bugfix of spaces being inserted in strings like "This12" or
# "That34". Added -m switch to make links even in pre-linked
# files; regex optimizations; some varnames and comments made
# easier to understand.
# 2002/10/30 : Clarified comments in source code and minor cosmetic changes
# 2002/10/08 : Changed references for "javascript:bible(..)" to "javascript:bible('..')"
# 2002/10/04 : Fixed skipped bare-refs; fixed hrefs with spaces which should have been +
# 2002/09/26 : Added 'Dt' to the list of valid Bible abbreviations
#
# --------------------------------------------------
# BEGIN - Settings you may want to change
# --------------------------------------------------
# use warnings; # uncomment for diags
# use diagnostics; # uncomment for diags
my $book = "Genesis"; # default book, but changeable via -b switch
my $chap = "1"; # default chapter, changeable via -c switch
my $m = "\x10\x10"; # $m for "marker"; shows up nicely on Win2K.
my $sp = "\x04 "; # $sp is the string printed at the beginning
# of each line when the -s switch is used. It
# is a graphic character and single space.
#
# Quote-Left and Quote-Right chars to highlight text when prompting the
# user for input. Set to guillemets (European double-quote marks) for Win32.
my $ql = "\xAE"; # Left guillemet
my $qr = "\xAF"; # Right guillemet
# Biblelink creates hyperlinks that look like this:
#
# 1 John 2:1
#
# Our 'bible' function creates a popup window of a specified size, passing
# the reference to Biblegateway to display the passage. The script looks
# like this and it goes in the HEAD section of the HTML file.
#
#
#
# If you don't want to use this script and would rather link directly to
# the Bible Gateway site, change the $link variable below:
my $link = 'Lk 1:1
# and the parens and single quotes are not needed by BibleGateway's parser.
# You _ALSO_ need to change "Trick 3" and "Trick 4" near line 590 to remove
# the (' ... ') which enclose the scripture references for the parser.
#
# --------------------------------------------------
# END - Settings you may want to change
# --------------------------------------------------
# --------------------------------------------------
# Potential pitfalls in this script
# --------------------------------------------------
# (1) Some input files contain very long lines, such as Microsoft Wordpad
# files, in which a newline is only used at the end of PARAGRAPHS.
# With such input files, the display may be longer than we expect.
# We may need to test the length of the 3-line buffer and truncate the
# head of the buffer if it exceeds a certain value. I will implement
# this if we get enough input files to require it.
#
# (2) The L (skip rest of line) command may confuse some users. This command
# only skips the rest of the current TYPE of matches. If we're matching
# on verses at this point, L skips all other verse references, but will
# return for chapter references or bare references, if they exist.
#
# (3) This script is presumes a Win32 environment, especially the parts that
# look for the COMSPEC and try to set the $pager. I'd like to modify it
# to run under Linux, if time permits.
# --------------------------------------------------
# main program begins here
# --------------------------------------------------
use strict;
use Getopt::Std;
our($opt_a, $opt_d, $opt_h, $opt_l, $opt_m, $opt_s, $opt_p, $opt_b, $opt_c);
getopts('adhlmspb:c:');
# Foreach of (a,d,h,l,m,s,p), sets $opt_* = 1. Sets $opt_b, $opt_c to strings.
# -a = automatically link "chapter" and "verse" references
# -d = turn on diagnostic prompts
# -h = display additional help or info
# -l = corrects most ignorant use of 'l' instead of '1'
# -m = make links even though file is linked already
# -s = send results to STDOUT instead of creating an output file; if
# no input file, accept input from STDIN and turn -a switch ON.
# -p = page display to STDOUT (implies -s)
# -b Exod. = change default book to "Exodus"
# -c 5 = change default chapt to 5
my %books; # hash of books of the Bible
my $count_1 = 0; # count files successfully processed
my $count_2 = 0; # count files skipped (already marked up)
my $count_3 = 0; # count links created in any file
my $skipfile = 0; # will be set to 1 if file should be skipped
my $n = "[1-9][0-9]{0,2}"; # numeric value, resolves from 1-999.
#----not currently activated; save for later----
# my $local_book = ""; # for books w/in parens "(Jn 1:1 and 14:6)"
# my $local_chap = ""; # likewise: "(Jn 1:1, but compare verse 14)"
# Yes, the $pager variable is created and clobbered 2 times. The first loop
# looks for "less" or "more" on the DOS/Win path. But if 4DOS is installed,
# use LIST instead. And if PAGER environment variable exists, use it instead.
my $pager;
my @cmds = ("less.exe", "more.exe");
my %cmdh = ("less.exe" => "less", "more.exe" => "more");
my @paths = split(';', $ENV{'PATH'});
STOP_LOOKING: foreach my $cmd (@cmds) {
foreach my $dir (@paths) {
if ( -e "$dir/$cmd" ) {
$pager = $cmdh{$cmd};
next STOP_LOOKING;
}
}
} # end foreach
$pager = "$ENV{COMSPEC} /C list /s" if $ENV{'COMSPEC'} =~ /4DOS|4NT|TCMD/;
$pager = $ENV{"PAGER"} || $pager; # fails is none is found
# Get the abbreviation names at the EOF before processing files.
# DATA is a special filehandle that points to what is after '__END__'.
while () {
chomp;
next if /^#/;
next unless /:/;
my ($abbrev, $full) = split /:/ ;
$books{"$abbrev"} = "$full";
}
close(DATA) or die "Could not close the perl script: $!\n";
if ( $opt_h ) { # user request to see docs
system("perldoc", $0);
exit;
} else {
if ( $opt_b && $opt_b =~ m/^([123] ?)?[A-Za-z]+\.*/ ) {
# Looks like a valid Bible book name.
$opt_b = &fixbookname($opt_b); # Convert to proper case
if ( exists($books{$opt_b}) ) { # Final test for valid book
$book = $books{$opt_b};
} else {
&bad_book; # Invalid book name
}
} elsif ( $opt_b ) {
&bad_book; # Not even potentially valid!!
}
if ( $opt_c && ($opt_c >= 1 || $opt_c <= 150) ) {
$chap = $opt_c; # Set default chapter.
} elsif ( $opt_c ) {
&bad_chap; # Invalid chapter number
}
}
# Valid switches are removed from @ARGV before processing.
# Invalid switches generate an error message, halting the perl script
my @filelist;
my @tempcopy = @ARGV;
# Get list of all args (potential ambiguous filespecs) passed on the
# command line. Use glob() to expand them to DOS/Windows filenames.
push @filelist, glob shift @ARGV while @ARGV;
# If the command tail was (s*.* *.htm *.h*), there would be duplicate
# filenames. Eliminate duplicate filenames from processing.
my %uniquefiles = ();
foreach my $arg ( @filelist ) {
if ( -e $arg ) {
$uniquefiles{$arg}++;
} else {
# This \a rings the bell at the console.
print STDERR "\a>> Diskfile \"$arg\" does not exist! \n";
}
}
# Revise @filelist to contain only unique, valid filenames.
@filelist = sort keys %uniquefiles;
# Issue error msg if filenames are listed but don't exist on the disk
&bad_names if (scalar(@filelist) < 1 && scalar(@tempcopy) >= 1);
# Issue error msg if no files are listed at all, except when -s option is used
if (scalar(@tempcopy) == 0) {
&no_args unless $opt_s;
if ($opt_s) {
push(@filelist, "std_in"); # Needed to fool foreach(), below
$opt_a = 1; # Automatically create links without asking.
}
}
# Process each file, one at a time.
foreach my $file ( @filelist ) {
next if $file =~ m/\.
(BAK|EXE|DLL|LNK|COM|SYS|BAT|PL|JS|DOC|PDF|RTF|JPG|PNG|GIF|SWF|FLA|ZIP|TAR|GZ)$/ix;
my $old = "$file";
if ($opt_s and $old eq "std_in") {
open(OLD, "<-" ) or die "Cannot read from standard input: $!";
} else {
open(OLD, "< $old") or die "Cannot read from input file, $old: $!";
}
my $rename_files = ""; # used later
my $new = "$file.NEW";
# Now we check -s and -p switches! This is a list of all possible options
# 1. files==1 && -s print to STDOUT
# 2. files==1 && -p print to %PAGER
# 3. files>1 && (-s || -p) issue ERROR message
# 4. neither -s nor -p print to diskfile.NEW
if ( scalar(@filelist) == 1 && $opt_s ) {
open(NEW, ">-") or die "Could not open standard output: $!";
} elsif ( scalar(@filelist) == 1 && $opt_p ) {
open(NEW, "| $pager") or die "Could not open 'more' or %pager utility: $!";
} elsif ( scalar(@filelist) > 1 && ($opt_s || $opt_p ) ) {
&option_conflict; # issue error message and exit
} else {
# We want to write changes to the disk.
open(NEW, "> $new") or die "Cannot open output file, $new: $!";
$rename_files = "yes";
} # end if-scalar test
select(NEW); # 'print' now goes to NEW if not specified
&process('OLD');
$count_1++;
close(OLD) or die "Cannot close input file, $old: $!";
print STDERR "$file - $::count_3 links created.\n" unless $skipfile==1;
if ( $rename_files eq "yes" ) {
if ( $skipfile == 1 ) { # The file was already processed
$count_1--; # Remove one processing count
print STDERR "Skipping the file $old ... it was already done!\n";
close(NEW) or die "Cannot close output file, $new: $!";
# Silently delete the temporary file.
unlink($new) or warn "\a The file $new could not be deleted.\n";
$skipfile = 0;
} else {
close(NEW) or die "Cannot close output file, $new: $!";
rename($old, "$old.BAK") or die "Cannot rename $old to $old.BAK: $!";
rename($new, $old) or die "Cannot rename $new to $old: $!";
}
}
$::count_3 = 0; # reset counter for next file.
} # end foreach
&liststats; # cleanup
#----end of main----
# ------------------------------------------------------------------------
# Subroutines go at the end.
# ------------------------------------------------------------------------
############################################################################
sub process # 07/19/02 3:47:PM
# Parse each line of an input file
############################################################################
{
my $fh = shift @_; # Takes the filehandle "OLD"
my ($third, $prev, $this);
LINE: while (<$fh>) {
chomp;
s/\s+$//; # remove trailing whitespace from each line
# Sliding window. Some lines need to be appended to the previous line.
# Example: Given an input line that begins like this:
#
# Tim 3:16
# or
# 3:22-23, 4:12,
#
# we need to append such lines to the previous line to determine the
# full reference (e.g., "2 Tim 3:16" or "Acts 3:22-23, 4:12").
$third = $prev; # We'll use $third later, not now
$prev = $this; # Keep previous line
$this = $_; # Assign current line to $this
# Maybe "die" is too strong. Suppose there are multiple files...?
die "\a\n\n Input file already contains [$m] char on line $. - Aborting.\n\n" if /$m/;
# This test is skipped if $opt_m (-m switch) was true
if ( /javascript: ?bible/i && $opt_m != 1 ) { # File was already processed!
$count_2++; # Increment error counter
$skipfile = "1"; # pass value back to parent.
last LINE; # end this procedure
}
PARTIAL:
while ( m/(([123] )?[A-Z][a-z]+)\.?\s*$/ ) { # While any partial match at EOL
print STDOUT "! DIAG 01: found possible partial match at EOL\n" if $opt_d;
if ( exists $books{$1} ) { # If the match is valid, then..
print STDOUT "! DIAG 1a: partial match bkname is valid\n" if $opt_d;
$this =~ s/\s+$//; # .. remove trailing whitespace,
chomp($_ = <$fh>); # .. get another line
$_ =~ s/^\s+//; # .. remove leading whitespace,
$this = $this . " " . $_; # .. reassign $this line,
$_ = $this; # .. reassign $_ .
} else {
print STDOUT "! DIAG 1b: partial match bkname is not valid\n" if $opt_d;
last PARTIAL;
}
} ## end while
while ( m/(:\d+,)$/ ) { # Second routine for partial matches. Different rule.
print STDOUT "! DIAG 1c: found trailing chars /$1/ at line $.\n" if $opt_d;
chomp($_ = <$fh>); # .. get another line
$_ =~ s/^\s+//; # .. remove leading whitespace,
$this = $this . " " . $_; # .. reassign $this line,
$_ = $this; # .. reassign $_.
} ## end while
# This position is important, because if the block occurs above the PARTIAL
# loop, some lines will be read in and not corrected.
if ( $opt_l ) {
# Try to correct most mistaken uses of l instead of 1, though this
# sometimes comes from OCR scanned documents, not always human error.
print STDOUT "! DIAG x1 - matched /$&/ \n" if ( $opt_d && m/ l(\d+[:]?)/ );
s/ l(\d+[:]?)/ 1$1/g;
print STDOUT "! DIAG x2 - matched /$&/ \n" if ( $opt_d && m/\d+l[-:\d ]/ );
s/(\d+)l([-:\d ])/$1 . "1" . $2/ge;
print STDOUT "! DIAG x3 - matched /$&/ \n" if ( $opt_d && m/\d[-:]l/ );
s/(\d[-:])l/$1 . "1"/ge;
s/ l / 1 /g;
}
# Correct Microsoft Word-to-HTML problems with inclusive numbers. In
# Microsoft Word, the en dash is not converted to a hyphen properly,
# resulting in "Gen. 1:26?28" instead of "Gen. 1:26-28" when using
# Word's "save-as-HTML" feature. NOTE: if the input file is *supposed*
# to have strings like "2?3" or "10?11", this will create problems.
s/(\d)\?(\d)/$1-$2/g;
# Convert Roman numbers in Bible refs to Arabic numerals. Side effect:
# This will affect embedded quotations from older books.
s/\b(II{0,2}) ?(Sam|Kin|Chron|Cor|Thess|Tim|Pet|John)/length($1) . " $2"/ge;
# Redefine global $book and global $chap
# --------------------------------------
# The "class=.subheading" string redefines the global chapter, but
# sometimes the closing '' tag is on the next line. The tag may look
# like:
# Ezekiel's Temple (40-48)
# The Word Became Flesh (John 1:1-18)
# Abraham's Call (12)
#
# with the title being long enough to span two lines. The text is usually
# marked with the CLASS tags before, which is why we do this extra step.
if ( m|class=['"]subheading|i && $_ !~ m|| ) {
# We cannot find a closing '' tag on the same line, so...
print STDOUT "! DIAG 02: found subheading, no closing\n" if $opt_d;
$this = $_; # .. save this line,
chomp($_ = <$fh>); # .. get the next,
s/^\s+//; # .. remove leading whitespace from the next,
$this = $this . " " . $_; # .. combine both into this,
$_ = $this; # .. and set the current line equal to $this.
}
# NOTE: Do not put a '~' in any comment, since it's the m~~ delimiter.
if ( m~
class=['"]subheading.*? # Required string
\( # Literal paren, required
( # Group 1 - optional bookname
([123]\s?)? # Group 2 - 1 Sam, 2Tim, etc.
[A-Z][a-z]\.? # Gen., Ex., Lev., Numb., etc.
)? # End optional Group 1
\s? # Optional space
(\d+) # Required Group 3 - may be (16):31-32
.* # The :31-32 including close paren
# Required closing tag, case-insensitive
~ix ) {
$book = $1 if $1; # reset the global book
$chap = $3; # reset the global chapter
print STDOUT "! DIAG 03: reset default chapter as $chap\n" if $opt_d;
# Match on bare num (12) or span (12-15), less than bare references.
$::count_3 += ($_ =~ s|(?<=\()$n(-$n)?(?=\))|$link\{{$1}}">$1|);
} ## end if-match
# Verse/chapt substitutions - may occur multiple times on a line.
if ($opt_a) { # If told to generate links automatically...
# Verse substitutions
# Count substitutions with $count_3
$::count_3 += ($_ =~ s~
( # Group 1
(?$1$3~gx );
print STDOUT "! DIAG 50: vv link\n" if $opt_d;
# Chapter substitutions
# NB: Chapter NN must occur before hash refs to permit it. Else,
# the hash ref will forbid the string "Chapter 12" or 3, etc.!
$::count_3 += ( $_ =~ s~
[Cc]hapter
\s
(\d+)(?![:<])
~$link\{{$book\+$1}}">$&~gx );
print STDOUT "! DIAG 51: chapt link\n" if $opt_d;
} else { # Do interactive verse/chapter substitutions
# Verse substitutions
s/(?$bk+!+$u_tail$m~;
print STDOUT "! DIAG 07: hashref exists, line after sub:\n -$_\n" if $opt_d;
&move_the_marker('hash'); # move marker forward to next match
print STDOUT "! DIAG 08: hashref exists, after markrmove:\n -$_\n" if $opt_d;
$::count_3++; # increment the counter
} else {
# What looked like a Bible book was invalid, e.g., Sura 4:157 or Mosiah 3:10.
# We will prevent it from being matched as a Bare Block by using '+skip+'.
print STDOUT "! DIAG 09: Bible book $tbk not found\n" if $opt_d;
s~$bk$m$cv$xlat~$bk+skip+$cv$xlat$m~;
&move_the_marker('hash'); # and forward to the next match
} ## end if-else
} # end while /Book Chap:verse/ search
# Bare references (no bookname, no prefixes).
# Examples of bare references which should be linked:
#
# The statement of the headwaiter in 2:10 was prophetic ...
# Jesus said, "Go, call your husband and come back" (4:16).
# ... the second day is in 1:29-34, the third in 1:35-43, ...
#
# The subroutine must avoid rematching:
# 1. internal A HREF links: bible('John+1:1,+8:58,+14:6')
# 2. external A HREF links: ')">John 1:1, 8:58, 14:6
#
# Bare references should always be interactive because of:
# - Multivolume bibliographic notes: ISBE, 5:15-37 (Zondervan, 1975)
# - Nonbiblical references: Moroni 9:9
#
# The task of this s/// cmd is to put a $m (marker) in front of the first
# probable bare reference it finds. We don't need to look for commas, extra
# chapters, etc., since they're not relevant to setting the marker.
#
# After setting of the marker here (only once), each time a hyperlink is
# successfully created, the marker is moved two more times. (1) by the
# function make_some_links, which moves it from the front of the reference
# to the end of a hyperlink, and by (2) move_the_marker, which moves it
# from the end of a hyperlink to the beginning of the next probable
# reference.
#
# The /x on s/// permits whitespace and #comments in the search expression.
#
# 4 types of lookaround. All are nonmatching. Lookbehind must be fixed-len.
# ------------------------------------------------------------------------
# Pos lookbehind (?<=abc) Succeeds if 'abc' CAN match to the LEFT
# Neg lookbehind (?....
} # end process() subroutine
############################################################################
sub fixbookname # convert potential book names to proper case
# Input passed here may include digits, like
# "1JN. 5:12" or "2cor. 8:36"
############################################################################
{
my $bkname = $_[0];
$bkname =~ s/^\s*//; # remove leading spaces
$bkname =~ s/\s*$//; # remove trailing spaces
$bkname =~ s/.*/\L$&/g; # lowercase everything
$bkname =~ s/([a-z])\./$1/; # remove period after a word
$bkname =~ s/[a-z][a-z]/\u$&/; # capitalize the first letter
return $bkname;
};
############################################################################
sub deldot #07/16/02 10:14:AM
# Remove any periods. Change Gen. to Gen, Rom. to Rom, etc.
############################################################################
{
my $bk_str = shift @_;
$bk_str =~ s/\.$//; # strip trailing period, if any
return $bk_str;
} ##deldot
############################################################################
sub make_some_links # 08/28/02 10:40:AM
# make links for verses, chapters, bare refs
############################################################################
{
my $pattern = $_[0]; # string: 'vv. 33-34' 'chapter 21' '12:4-10'
my $digits = $_[1]; # string: '3' '11-12' '12:4-10'
my $type = $_[2]; # string: 'verse' 'chapt' 'bare'
my $one = $_[3];
my $two = $_[4] || "" ;
my $thr = $_[5] || "" ;
my $ref = "";
# Don't omit the STDOUT filehandle; SELECT has changed it ...
print STDOUT "\n\nLast 3 lines of context:\n";
print STDOUT "$thr\n$two\n$_\n"; # was $one
LOOP_1:
if ($type eq "verse") { $ref = "$chap:$digits" }
elsif ($type eq "chapt") { $ref = "$digits" }
elsif ($type eq "bare" ) { $ref = "$digits" }
print STDOUT <<"ENDBLOCK";
Should I link $ql$pattern$qr as $ql$book $ref$qr?
Press Y to accept the link we suggest,
to skip it and go to the next item,
L to skip it and all other $type references on this line
B book to change the default bookname,
C num to change the default chapter,
ENDBLOCK
print STDOUT " Or enter a substitute reference (eg, Acts 4:12): ";
my $change = "";
chomp($change = );
# If $change is Y, make the hyperlink we suggested.
# If $change is null, do not hyperlink at all.
# If $change is L, skip the rest of this line.
# If $change is B str set a new default bookname.
# If $change is C str set a new default chapter.
# If $change is N, assume that he didn't read.
# If $change is a valid Bible reference, make the link.
# If $change is anything else, issue error message and
# return the user to the prompt above.
if ( $change =~ /^[yY]$/ ) {
# Make the hyperlink we suggested
print STDOUT "! DIAG 60: y, make the link\n" if $opt_d;
$ref =~ s/\s+/+/g; # replace spaces in ref with literal plus
s!$m$pattern!$link\{{$book\+$ref}}">$pattern$m!;
$::count_3++;
&move_the_marker($type);
&update_line_one;
} elsif ( $change eq "") {
# Do not hyperlink at all,
# but first confirm that the user wants to skip this reference.
if ( &confirm_skip("$pattern") ) {
# User entered 'Y, I'm sure I want to skip' when prompted to confirm
s/$m($pattern)/$1$m/; # moves marker to end of this pattern
&move_the_marker($type); # moves marker to beginning of next pattern
} else {
# User entered 'N, I made a mistake' when prompted for confirmation
goto LOOP_1;
}
} elsif ( $change =~ /^[lL]$/ ) {
# Skip the rest of this line, but ask for confirmation first.
if (&confirm_skip2($pattern)) {
# User entered 'Y, I'm sure I want to skip' when prompted to confirm
# but note: this only skips the rest of the category (verse/chapter),
# but does not absolutely skip of the rest of the line.
s/$m//g; # This action will exit the while() loop
} else {
# User entered 'N, I made a mistake' when prompted to confirm.
goto LOOP_1;
}
} elsif ( $change =~ /^[bB]\s+(\w.+)$/ ) {
# Set a new default bookname
my $testname = $1;
$testname = &fixbookname($testname);
if (exists $books{$testname}) {
$book = $books{$testname}; # The bookname is valid. Change default book.
print STDOUT " Default book is now $book\n";
goto LOOP_1;
} else {
# Whatever the user entered is not a valid book of the Bible.
print STDOUT "\a\a"; # beep twice (\a for 'alert')
print STDOUT
" $ql$testname$qr is not a valid name/abbreviation for a book of the Bible.\n";
goto LOOP_1;
}
} elsif ( $change =~ /^[cC]\s+($n)$/ ) {
# Set a new default chapter
my $testchap = $1;
if ($testchap <= 150) {
$chap = $testchap;
print STDOUT " Default chapter is now $chap\n";
goto LOOP_1;
} else {
# The number the user entered is greater than 150
print STDOUT "\a\a"; # beep twice (\a for 'alert')
print STDOUT " The number $testchap is too large.\n";
goto LOOP_1;
}
} elsif ( $change =~ /^[nN]$/ ) {
# The user entered N in response to: Should I link "$pattern" as "$book $ref" ?
# Though N is not a valid response, we assume he wants to choose another reference.
# This is a silent bit of robust programming.
print STDOUT
"\a\n---If the link we suggest is wrong, type in the replacement yourself.---\n";
goto LOOP_1;
} elsif ( &reference_is_valid($change) ) {
# Create the link
$change = &fixbookname($change);
print STDOUT "! DIAG 63: after fixbookname: /$change/\n" if $opt_d;
$change =~ s/^(([123]\s?)?[A-Z][a-z]+)\.?/$books{$1}/e; # expand the book names
$change =~ s/\s+/+/g; # replace spaces with '+'
s!$m$pattern!$link\{{$change}}">$pattern$m!;
print STDOUT "! DIAG 64: y, ref is valid, change link\n" if $opt_d;
$::count_3++;
&move_the_marker($type);
&update_line_one;
} else {
# Issue an error message and return the user to the prompt above.
print STDOUT << "ENDERR";
\a
=======================================================================
>>> $ql$change$qr is not a valid Bible reference.
>>> Please try again.
ENDERR
goto LOOP_1;
} # end of 4-part test
} ## make_some_links
############################################################################
sub move_the_marker # Wed, 04 Sep 2002 16:26:39 -0500
# Move the marker forward to the next verse, chapt,
# bare reference or hash reference. Works on $_.
############################################################################
{
my $typec = $_[0]; # strings: "verse chapt bare hash"
# At this point, the marker is ALREADY positioned after the last match.
# This subroutine moves the marker forward to the next match, or else
# deletes the marker if no match can be found.
if ($typec eq "verse") {
# If the first s/// cannot match, the 2nd one will be executed.
# Resolves to "either move the marker or delete the marker"
s/$m(.*?)(?);
if ($inp =~ m/^Y$/i ) {
return 1;
} else {
return 0;
}
} ##confirm_skip
############################################################################
sub confirm_skip2 #08/30/02
############################################################################
{
my $ref = $_[0];
print STDOUT "\nYou responded that you want to skip \"$ref\", and\n";
print STDOUT "all the rest of this line. Is this correct? (Y/N) ";
my $inp = "";
chomp($inp = );
if ($inp =~ m/^Y$/i ) {
return 1;
} else {
return 0;
}
} ##confirm_skip2
############################################################################
sub reference_is_valid # FUNCTION written on 8/28/2002 2:25PM
# Return TRUE only if reference is valid;
# Function expects input variable to look like
# a Bible verse, e.g., "Mark 8:34-36"
############################################################################
{
my $ref = shift @_;
$ref = fixbookname($ref);
# Do not put a '/' in any comment, since it's the m/// delimiter!
if ( $ref =~ m/
( # Group 1 - the full book = $bk
(?:[123]\s?)? # 1 Sam, 2 Tim, etc. noncap parens
[A-Z][a-z]+\.? # Gen, Ex., Lev., Numb., etc.
) # End Group 1
\s? # space - THE ONE DIFFERENCE BETWEEN THE
# - PROCESS SUB AND THIS ONE: NO $M !
( # Group 2 - chapt and verses = $cv
$n # Min requirement: chapter number
(?:[-:]$n[abc]? # opt verse or "12b" noncap parens
(?:-$n # opt new chapt noncap parens
(:$n[abc]?)? # opt new verse Group 3
)? # end opt chapt
)? # end opt verse
(?:[,;]\s$n # opt add-on chapter noncap parens
(?! \s[A-Z][a-z]) # Do NOT match on "; 1 Pe" or the like
(?:[-:]$n # opt 2d verse noncap parens
(?:-$n)? # opt chapt span noncap parens
)? # end opt 2d verse
)* # end opt add-on chapters
| # OR
$n # Min requirement: chapter number
(?:-$n)? # opt chap span noncap parens
(?:[;,]\s$n # opt 2nd chap noncap parens
(?:-$n)? # opt 2nd chap span noncap parens
)? # end opt 2nd chap
) # End Group 2
(\s # Group 4: translation = $xlat
(N?(KJV|ASB|ASV|LT|RSV)) # Group 5, Group 6
)? # End Group 4
/gx ) {
# We now have a POTENTIAL match. Check it.
my $bk = $1;
my $cv = $2;
$bk = &deldot($bk); # delete any trailing period
# Does bookname actually exist in the hash?
if ( exists($books{$bk}) ) {
print STDOUT "! DIAG 20: book ref /$ref/ is valid\n" if $opt_d;
return 1;
} else {
# What looked like a Bible book was invalid, eg, Moroni 9:10
print STDOUT "! DIAG 21: bookref /$ref/, \$bk /$bk/ looks valid but isn't.\n" if $opt_d;
return 0;
}
} else {
# The reference pattern doesn't even look possible. In other words,
# to get here, the pattern looks like "blurk" or "23Skidoo".
print STDOUT "! DIAG 22: bookref /$ref/ doesn't look valid\n" if $opt_d;
return 0;
} # end of if-else clause
} ## reference_is_valid
############################################################################
sub update_line_one # 08/28/02
# Updates or copies the changes to variable $
# so the user doesn't think nothing was changed.
############################################################################
{
my ($line_number,$line_contents) = split(/:/, $_, 2);
our $this;
$this = "$line_number: " . "$_";
} ##update_line_one
############################################################################
sub liststats #07/22/02 11:00:AM
############################################################################
{
if ( $count_1 > 0 ) {
print STDERR " \nBiblelink:\n $count_1 file";
print STDERR ($count_1==1? "":"s") . " successfully processed.\n";
}
print STDERR " $count_2 file" . ($count_2==1?"":"s") . " skipped.\n" if $count_2 > 0;
} ##liststats
############################################################################
sub option_conflict #07/22/02 11:16:AM
############################################################################
{
print STDOUT <<"CONFLICT";
\a\a
>>> ERROR! <<<
When using the -s switch to send output to stdout, no more than
one filename should be passed on the command line (if no file is
present, input should come from stdin). The -s is mainly intended
for diagnostics before changing a file directly. To send multiple
files to the screen (not changing them on disk), use a
for..in..do command from the DOS/CMD prompt, e.g.,
for %%v in ( *.* ) do biblelink -s %%v
For more help, type "biblelink -h" .
CONFLICT
exit;
} ##option_conflict
############################################################################
sub bad_book #07/22/02 4:12:PM
############################################################################
{
print STDOUT <<"BADBOOK";
\a\a
>>> ERROR! <<<
The -b switch must be followed by a valid name or abbreviation
of one of the books of the Bible. For example, -b 2Jn is okay.
You followed it with the string <<$opt_b>>, which
is not a valid book name. Please try again.
BADBOOK
exit;
} ##bad_book
############################################################################
sub bad_chap #07/22/02 4:46:PM
############################################################################
{
print STDOUT <<"BADCHAP";
\a\a
>>> ERROR! <<<
The -c (chapter) switch must be followed by a valid chapter
number for a Bible book. No chapter can be higher than 150.
For example, "-c 12" sets the default chapter to 12. You
used <<$opt_c>>, which is not a valid chapter number.
Please try again.
BADCHAP
exit;
} ##bad_chap
############################################################################
sub bad_names # Fri, 06 Sep 2002 18:21:28 -0500
# args given, but none were filenames
############################################################################
{
print STDOUT "\a\a\a\nERROR! No valid filenames found on the command line!\n\n";
print STDOUT << "BADNAMES";
The string issued on the command line was: @tempcopy
At least one of these tokens must resolve to a valid filename
for the program to execute. None of them match any files in the
current directory. (Type DIR /W at a command prompt to check.)
---------------------------------------------------------------
BADNAMES
exit;
} ##bad_names
############################################################################
sub no_args # 07/19/02 3:57:PM
# help message to display if invoked without any args
############################################################################
{
print STDOUT <<"SYNTAX";
\a\aBIBLELINK (v$version) - Create hyperlinks to Bible references on input files.
USAGE:
biblelink [-options] file1*.* [file2*.php file3* ...]
Option switches begin with a hyphen. File specs may be ambiguous (using
'*' or "?" is okay), but there must be AT LEAST ONE FILENAME on the
command line. Files are changed in-place under the same name, and the
unchanged versions are saved as "filename.typ.BAK" . Items in square
brackets [...] are optional; don't include the brackets when using the
option switches.
OPTIONS:
-a # automatically create verse/chapter links (non-interactive)
-b Eph. # set default book to Ephesians (default is Genesis)
-c 5 # set default chapter to chapt. 5
-l # change lowercase L to 1, where it should be a numeral
-m # make links, even though file has links already
-s # send output to stdout. Input from stdin or from one filename.
-p # same as s, but page output thru MORE, LESS, or %PAGER% env var.
-h # Display more complete help
-d # Diagnostic helps to STDOUT (used for debugging)
Copyleft 2002 by Eric Pement. Type "biblelink -h" for distribution terms.
Last revised: $verdate
SYNTAX
exit;
} ##no_args
# ------------------------------------------------------------------------
# End of subroutines. Perl POD (Plain Old Documentation) goes here.
=head1 NAME
biblelink (v1.28) - Create hyperlinks to Bible references on input
files
=head1 USAGE
GENERAL: biblelink [-options] file1*.* [file2*.htm* file3.as* ...]
EXAMPLE: biblelink -b Galatians u1*.php v2*.*
biblelink -b John -c 15 -p somefile.htm
=head1 OPTIONS
Options may be clustered (e.g., "-sb Deut.").
=over 4
=item B<-a> (automatic links)
Intead of prompting the user for "chapter" and "verse" links, create
them automatically without prompting. This may result in some false
matches, which is why it is off by default. Bare references like
"cf. 14:6" are always interactive.
=item B<-b> "1 Samuel"
Change default book to 1 Samuel (default is Genesis). Abbreviations
and lowercase are okay. E.g., "1sa" or "1 sam" are recognized.
=item B<-c> 12
Change default chapter to 12 (default is 1).
=item B<-d> (debug)
Print script debug info to stdout to track down script problems.
=item B<-h> (help)
Issue help message in perldoc, and exit. Cancels all other options.
=item B<-l> (change l to 1)
Silently changes occurrences of letter 'l' (lowercase L) where the
number 1 (one) was expected. Sometimes needed where input files
come from OCR scanning.
=item B<-m> (make links anyway)
The default behavior is to skip the file if BibleLinks are detected,
since this might re-link (or double-link) some Bible references. If
the -m switch is used, this overrides the default and processes the
file normally. Use this switch when a few links are already coded
in the input file. NB: Double-linking MAY occur with this switch!
=item B<-p> (pause)
Send output to the screen, pausing each screenful. Pipes the output
to the pager specified by the enviroment variable %PAGER, or to LIST
(if 4DOS or 4NT are active), or to LESS or MORE (in that order). This
option does not alter the input file. When -p is used, only one
filename may be given on the command line.
=item B<-s> (stdout)
Send output to standard output device (the screen), without paging.
This option does not alter the input file. When -s is used, only one
filename may be passed on the command line. If no filename is passed
on the command line, input comes from stdin (usually, piped input).
=back 4
=head1 DESCRIPTION
Given a list of input files on the command line (ambiguous filespecs
okay), BIBLELINK first checks to see that the list of files is valid.
Files named *.BAK are skipped. The file is changed in-place with the same
filename. The original file is saved as "sourcefile.type.BAK". If a file
does not exist, such as a command line like:
biblelink *.htm ooops!-a-typo.htm
the mistaken word or filename is echoed to STDERR (the screen) with a
beep, while the other files are modified. Files with the following file
extensions are always skipped (case-insensitive match), even if they are
explicitly listed on the command line:
BAK EXE DLL LNK COM SYS BAT PL JS DOC PDF RTF JPG PNG GIF SWF FLA ZIP TAR GZ
Wherever a valid Bible reference occurs (eg., Rom 6:10, 23), the entire
reference is changed to an HTML hyperlink of the following form:
Rom 6:10, 23
All standard abbreviations of books of the Bible are recognized. A
period is ignored when making the link, but is retained in the text.
To make the finished file work in the HTML output files, insert the
following script in the HEAD section of the HTML document:
SILENT CORRECTIONS: Roman numerals (I Tim., II Cor.) before books of the
Bible are automatically changed to digits. Many of our input files are
generated by Microsoft Word ("Save As Web Page" or "Save As HTML"), which
does not convert the en dash to a hyphen, resulting in "Gen. 1:26?28"
instead of "Gen. 1:26-28" as expected. Any question mark between two
digits ("2?3" or "8?4") is silently changed to a hyphen.
BIBLELINK uses a default book (Genesis) and default chapter (1). These are
used because BIBLELINK will also generate links for all these:
verse 14 vv. 19-22 (cf. 22:8-10) (10:9-10) chapters 2-3
The default book or chapter may be changed from the command line via an
option switch (-b or -c, followed by a string), or in the middle of a match
at a user prompt. The default chapter is also automatically changed each
time it finds a particular subheading declaration, described next.
SUBHEADING DEFINITIONS: A large expository document will sometimes
need to change the default chapter several times as it works through
successive portions of a book. A single input file is usually broken
into sections, each section dealing with a certain chapter or set of
chapters in one of the books of the Bible. At Moody, we use stylesheet
"classes" to change the appearance of heading titles. The heading title
usually signals when a new chapter is required. For example:
The Word Became Flesh (John 1:1-18)
The Millennial Temple (40-48)
The string "subheading" followed by digits in parenthesis forms a
pattern to indicate when the default chapter should be changed. The
first string of digits with parentheses resets the default chapter.
The "subheading" pattern MUST be followed by a closing tag such as
, , . If the closing tag cannot be found, the script
reads one more line to find it. Any new default chapter continues
until the next "class=.subheading" is encountered.
NORMAL MODE OF OPERATION: The script looks for digits (2), digit spans
(2-4), chapt:verse (5:6), with spans or additions (, 7:8-9:10), and
then looks backward for books of the Bible or words like "vv",
"verse", "chapter", etc. Clear matches ("Gen. 3:15") are automatically
linked without user input.
User intervention is requested when matching "vv.", "chapter", etc., since
these usually require reading the context to determine which book and
chapter is intended. However, if the -a switch is used, chapter-and-verse
linking is done automatically, without prompting. If you use the -a
switch, you should use the -b (book) and -c (chapter) switches as well.
Thus, to set the default to John, chapter 10, linking chapter and verse
references automatically, enter this:
perl biblelink -ab John -c 10 John*.htm
This script also links "bare references", which must be specified
item-by-item from the console. A "bare reference" to a passage occurs
without a book name at all. For example:
The construction of 2:13 is significant.
The user is prompted to link "2:13" to something. The last three lines of
context are displayed on screen, and then the user is prompted with a
suggestion, using the default book value:
Should I link "2:13" as "Genesis 2:13"?
Press Y to accept the link we suggest,
to skip it and go to the next item,
L to skip it and all other bare references
on this line,
B book to change the default bookname,
C num to change the default chapter,
Or enter a substitute reference (e.g., Acts 4:12):
Note in particular that 'b luke' will change the default book to
Luke for the rest of the session. Likewise, the user may choose to skip
this particular reference or all remaining references on the line.
If the user enters a substitute reference ("Mark 51:99"), the title is
checked to see if the name of the book is valid, but no further checking
is done to confirm the validity of the chapter or verse.
As currently configured, the script looks for references that roughly
match this regular expression (in the code, it's far more complex):
([1-3] )?[A-Z][a-z]+\.? [1-9]
but skips them if the "word pattern" isn't in its table of books of the
Bible. For example, the string "Romns. 3:10" is not a valid abbreviation
for the book of Romans. The script will not prompt you for a different
spelling, and ALSO will not try to match the bare reference "3:10". As
currently configured, the script does not recognize books of the
Apocrypha, the Qur'an, the Book of Mormon, or other nonbiblical works.
MASTER TRICKS: There is no easy way to save your work halfway through
a file, quit, and resume later where you left off. Pressing Control-C
will abort the script. Any lines processed up to that point will be
saved in a temporary file called "filename.NEW", which you may examine
if you wish. The original file will be unchanged. This could be
construed as a feature, since the script halts if it finds the
"javascript:bible" pattern in the input file, in order to prevent
rematching verses that are already linked (unless the -m switch was
used).
Sometimes while generating links, you may notice a typo in the text.
To flag the error without quitting, select "Enter a substitute
reference." If the verse was James 1:5, enter "Job 99999". This
will be accepted as valid and you will proceed to the next line of
input. When finished, edit the file and search for "99999". This way,
you have marked the spot and can make any corrections necessary.
=head1 BUGS
(1) Ambiguous filenames with embedded spaces are probably supported, but
have never been tested. You shouldn't use spaces in web filenames, anyway.
(2) The -p switch often doesn't work properly. If it doesn't work well for
you, use the -s switch instead and pipe the output through MORE /E:
biblelink -s filename.htm | more /e
=head1 AUTHOR
Written by Eric Pement (eric.pement@moody.edu, pemente@northpark.edu) in
July-September 2002. Home page: http://www.student.northpark.edu/pemente/
This script was prepared for the staff of the Education Technology
Services department of Moody Bible Institute (http://www.moody.edu). It
works for us. If you decide to improve upon it, please let me know.
=head1 COPYRIGHT
Copyleft (c) 2002 by Eric Pement. This program is free software and may be
freely copied and modified under the terms of the GNU General Public
License, version 2. Details at http://www.gnu.org/licenses/gpl.html.
=cut
# Logical end of script - Array begins next
__END__
# Abbreviations of Books of the Bible, as used by
# the Chicago Manual of Style, Society for Biblical Literature (SBL),
# the Online Bible, and other common forms.
#
# Format - abbreviation:complete book name
#
# A complete book name maps to itself (e.g., "1 Samuel:1 Samuel")
# Trailing periods (Gen., Ex.) are ignored in the script.
#
# Uses ':' to split into a hash (an associative array)
# Blank lines and lines beginning with '#' are ignored
# ------------------------------------------------------------------------
Ge:Genesis
Gen:Genesis
Genesis:Genesis
Ex:Exodus
Exod:Exodus
Exodus:Exodus
Le:Leviticus
Lev:Leviticus
Leviticus:Leviticus
Nu:Numbers
Num:Numbers
Numbers:Numbers
De:Deuteronomy
Dt:Deuteronomy
Deut:Deuteronomy
Deuteronomy:Deuteronomy
Jos:Joshua
Josh:Joshua
Joshua:Joshua
Jdg:Judges
Judg:Judges
Judges:Judges
Ru:Ruth
Ruth:Ruth
1Sa:1+Samuel
1Sam:1+Samuel
1 Sa:1+Samuel
1 Sam:1+Samuel
1 Samuel:1+Samuel
2Sa:2+Samuel
2Sam:2+Samuel
2 Sa:2+Samuel
2 Sam:2+Samuel
2 Samuel:2+Samuel
1Ki:1+Kings
1Kgs:1+Kings
1 Ki:1+Kings
1 Kgs:1+Kings
1 Kings:1+Kings
2Ki:2+Kings
2Kgs:2+Kings
2 Ki:2+Kings
2 Kgs:2+Kings
2 Kings:2+Kings
1Ch:1+Chronicles
1Chr:1+Chronicles
1Chron:1+Chronicles
1 Ch:1+Chronicles
1 Chr:1+Chronicles
1 Chron:1+Chronicles
1 Chronicles:1+Chronicles
2Ch:2+Chronicles
2Chr:2+Chronicles
2Chron:2+Chronicles
2 Ch:2+Chronicles
2 Chr:2+Chronicles
2 Chron:2+Chronicles
2 Chronicles:2+Chronicles
Ezr:Ezra
Ezra:Ezra
Ne:Nehemiah
Neh:Nehemiah
Nehemiah:Nehemiah
Es:Esther
Esth:Esther
Esther:Esther
Job:Job
Ps:Psalms
Pss:Psalms
Psalm:Psalms
Psalms:Psalms
Pr:Proverbs
Prov:Proverbs
Proverbs:Proverbs
Ec:Ecclesiastes
Ecc:Ecclesiastes
Eccl:Ecclesiastes
Qoh:Ecclesiastes
Ecclesiastes:Ecclesiastes
So:Song+of+Solomon
Ss:Song+of+Solomon
Song:Song+of+Solomon
Sos:Song+of+Solomon
Cant:Song+of+Solomon
Song of Songs:Song+of+Solomon
Song of Solomon:Song+of+Solomon
Isa:Isaiah
Isaiah:Isaiah
Jer:Jeremiah
Jeremiah:Jeremiah
La:Lamentations
Lam:Lamentations
Lamentations:Lamentations
Eze:Ezekiel
Ezek:Ezekiel
Ezekiel:Ezekiel
Da:Daniel
Dan:Daniel
Daniel:Daniel
Ho:Hosea
Hos:Hosea
Hosea:Hosea
Joel:Joel
Am:Amos
Amos:Amos
Ob:Obadiah
Obad:Obadiah
Obadiah:Obadiah
Jon:Jonah
Jonah:Jonah
Mic:Micah
Micah:Micah
Na:Nahum
Nah:Nahum
Nahum:Nahum
Hab:Habakkuk
Habakkuk:Habakkuk
Zep:Zephaniah
Zeph:Zephaniah
Zephaniah:Zephaniah
Hag:Haggai
Haggai:Haggai
Zec:Zechariah
Zech:Zechariah
Zechariah:Zechariah
Mal:Malachi
Malachi:Malachi
#
# --- cosmetic break between OT and NT books ----
#
Mt:Matthew
Matt:Matthew
Matthew:Matthew
Mk:Mark
Mark:Mark
Lu:Luke
Lk:Luke
Luke:Luke
Jn:John
Joh:John
John:John
Ac:Acts
Acts:Acts
Ro:Romans
Rom:Romans
Romans:Romans
1Co:1+Corinthians
1Cor:1+Corinthians
1Corinth:1+Corinthians
1 Co:1+Corinthians
1 Cor:1+Corinthians
1 Corinth:1+Corinthians
1 Corinthians:1+Corinthians
2Co:2+Corinthians
2Cor:2+Corinthians
2Corinth:2+Corinthians
2 Co:2+Corinthians
2 Cor:2+Corinthians
2 Corinth:2+Corinthians
2 Corinthians:2+Corinthians
Ga:Galatians
Gal:Galatians
Galatians:Galatians
Ep:Ephesians
Eph:Ephesians
Ephesians:Ephesians
Php:Philippians
Phil:Philippians
Philippians:Philippians
Co:Colossians
Col:Colossians
Colossians:Colossians
1Th:1+Thessalonians
1Thess:1+Thessalonians
1 Th:1+Thessalonians
1 Thess:1+Thessalonians
1 Thessalonians:1+Thessalonians
2Th:2+Thessalonians
2Thess:2+Thessalonians
2 Th:2+Thessalonians
2 Thess:2+Thessalonians
2 Thessalonians:2+Thessalonians
1Ti:1+Timothy
1Tim:1+Timothy
1 Ti:1+Timothy
1 Tim:1+Timothy
1 Timothy:1+Timothy
2Ti:2+Timothy
2Tim:2+Timothy
2 Ti:2+Timothy
2 Tim:2+Timothy
2 Timothy:2+Timothy
Tit:Titus
Titus:Titus
Phm:Philemon
Philem:Philemon
Philemon:Philemon
Heb:Hebrews
Hebrews:Hebrews
Jas:James
James:James
1Pe:1+Peter
1Pet:1+Peter
1 Pe:1+Peter
1 Pet:1+Peter
1 Peter:1+Peter
2Pe:2+Peter
2Pet:2+Peter
2 Pe:2+Peter
2 Pet:2+Peter
2 Peter:2+Peter
1Jn:1+John
1 Jn:1+John
1 John:1+John
2Jn:2+John
2 Jn:2+John
2 John:2+John
3Jn:3+John
3 Jn:3+John
3 John:3+John
Jude:Jude
Re:Revelation
Rev:Revelation
Revelation:Revelation
#
#--- true end of script ---