Return to the notes... - Run the program - View dwd.pm module code - View dwd.conf external config file
#!/usr/local/bin/perl -w
#
# getpage.pl (c) Andy Belcher DandyWebDesign 2006
#
# This program will display a simple form which invites you to enter
# a URL. On submission the program will GET the requested page and
# display its source code. Strictly speaking this is not a useful
# application as it stands, however it is a precursor to so many other
# programs that require pages to be fetched across the 'net. Once the
# source code of a page has been fetched it can be parsed in many ways.
# The most obvious use is as a spider, all of the links to other pages
# are located and added to a list to be scanned. The page source code
# can also be passed to an analyser, perhaps to check the code, build
# keyword lists and so on, the list of possibilities is endless.
#
######################################################################
use strict;
use CGI qw(:all);
use LWP::UserAgent;
use dwd;
# INITIALISE PROGRAM ENVIRONMENT
my %G; # Stores global values within the program.
my %arg; # Stores the CGI form/URL parameters
my @parameters = qw(requrl);
initprogenv(\%G,\%arg,\@parameters);
# DEFINE PROGRAM SPECIFIC VALUES
$G{'pagetitle'} = "Get Page";
$G{'title'} = "$G{'title'} $G{'pagetitle'}";
$G{'returnurl'} = "/tech/web/webgetpage.shtml";
$G{'sourceurl'} = "/tech/web/getpage_source.html";
$G{'pmsourceurl'} = "/tech/web/dwdpm_source.html";
$G{'requrl'} = "";
my $error = "";
my $content = "";
# GET PAGE
if($arg{'requrl'})
{$G{'requrl'} = $arg{'requrl'};
if(useragent_fetchpage(\%G))
{$content = dehtml_string($G{'uacontent'});
$content =~ s/[\r\n]+/\n<br \/>/gsix;
$content = "<div class=\"codelisting\">$content</div><!--codelisting-->\n";
$content = "<h4>Returned page content...</h4>\n$content";
}
else {$error = "<p class=\"loud\"><strong>ERROR!</strong> $G{'uaerror'}</p>";}
}
else {$arg{'requrl'} = "http://dwd/tech/web/web_index.shtml";}
# OUTPUT PAGE
&do_topsection;
print<<HERE;
<div class="main">
<p>Enter a valid URL below...</p>
<p><form method="post" action="$G{'progname'}">
<input type="text" name="requrl" size="50" value="$arg{'requrl'}" />
<br /><input type="submit" value="Get the page..." class="button"
/></form></p>
</div><!--main-->
$error
$content
HERE
&do_bottom;
# END PROGRAM
#
# SUB-ROUTINES
######################################################################
sub do_top {do_std_head(\%G);print "</head>\n<body>\n";do_std_header(\%G);}
######################################################################
sub do_bottom {do_std_bottom($G{'footerssi'});exit;}
######################################################################
sub do_topsection
{&do_top;print<<HERE;
<div id="pathbar"><script type="text/javascript">drawPathBar(
'tech','/tech/tech.shtml','To the Technical pages index...',
'web applications','/tech/web/web_index.shtml','To the web applications index...',
'program notes','$G{'returnurl'}','The accompanying notes for this program...',
'getpage.pl');//</script></div><!--pathbar-->
<div id="content">
<div id="rightbar"><dl class="nbar">
<dt>Application Links</dt>
<dd><a href="$G{'returnurl'}"
title="Return to the documentation for this application..."
>Program Notes</a></dd>
<dd><a href="$G{'sourceurl'}"
title="View the source code for this application..."
>source code</a></dd>
<dd><a href="$G{'pmsourceurl'}"
title="View the perl module source code..."
>dwd.pm source</a></dd>
</dl></div><!--rightbar-->
<h2>$G{'pagetitle'}</h2>
HERE
}
######################################################################
# EOF