Application Source Code

Get Page /cgi-bin/getpage.pl

Return to the notes... - Run the program - View dwd.pm module code - View dwd.conf external config file

#!/usr/local/bin/perl -w
#
#  getpage.pl    (c) Andy Belcher DandyWebDesign 2006
#
#  This program will display a simple form which invites you to enter
#  a URL. On submission the program will GET the requested page and
#  display its source code. Strictly speaking this is not a useful
#  application as it stands, however it is a precursor to so many other
#  programs that require pages to be fetched across the 'net. Once the
#  source code of a page has been fetched it can be parsed in many ways.
#  The most obvious use is as a spider, all of the links to other pages
#  are located and added to a list to be scanned. The page source code
#  can also be passed to an analyser, perhaps to check the code, build
#  keyword lists and so on, the list of possibilities is endless.
#
######################################################################
use strict;
use CGI qw(:all);
use LWP::UserAgent;
use dwd;

#  INITIALISE PROGRAM ENVIRONMENT
my %G;      #  Stores global values within the program.
my %arg;    #  Stores the CGI form/URL parameters
my @parameters  = qw(requrl);
initprogenv(\%G,\%arg,\@parameters);

#  DEFINE PROGRAM SPECIFIC VALUES
$G{'pagetitle'}    = "Get Page";
$G{'title'}        = "$G{'title'} $G{'pagetitle'}";
$G{'returnurl'}    = "/tech/web/webgetpage.shtml";
$G{'sourceurl'}    = "/tech/web/getpage_source.html";
$G{'pmsourceurl'}  = "/tech/web/dwdpm_source.html";
$G{'requrl'}       = "";
my $error          = "";
my $content        = "";
#  GET PAGE
if($arg{'requrl'})
  {$G{'requrl'} = $arg{'requrl'};
  if(useragent_fetchpage(\%G))
    {$content = dehtml_string($G{'uacontent'});
    $content =~ s/[\r\n]+/\n<br \/>/gsix;
    $content = "<div class=\"codelisting\">$content</div><!--codelisting-->\n";
    $content = "<h4>Returned page content...</h4>\n$content";
    }
  else {$error = "<p class=\"loud\"><strong>ERROR!</strong> $G{'uaerror'}</p>";}
  }
else {$arg{'requrl'} = "http://dwd/tech/web/web_index.shtml";}

#  OUTPUT PAGE
&do_topsection;
print<<HERE;
<div class="main">
<p>Enter a valid URL below...</p>
<p><form method="post" action="$G{'progname'}">
<input type="text" name="requrl" size="50" value="$arg{'requrl'}" />
<br /><input type="submit" value="Get the page..." class="button"
  /></form></p>
</div><!--main-->
$error
$content
HERE
  &do_bottom;
#  END PROGRAM
#
#  SUB-ROUTINES
######################################################################
sub do_top {do_std_head(\%G);print "</head>\n<body>\n";do_std_header(\%G);}

######################################################################
sub do_bottom {do_std_bottom($G{'footerssi'});exit;}

######################################################################
sub do_topsection
  {&do_top;print<<HERE;
<div id="pathbar"><script type="text/javascript">drawPathBar(
  'tech','/tech/tech.shtml','To the Technical pages index...',
  'web applications','/tech/web/web_index.shtml','To the web applications index...',
  'program notes','$G{'returnurl'}','The accompanying notes for this program...',
  'getpage.pl');//</script></div><!--pathbar-->
<div id="content">
<div id="rightbar"><dl class="nbar">
<dt>Application Links</dt>
  <dd><a href="$G{'returnurl'}"
    title="Return to the documentation for this application..."
    >Program Notes</a></dd>
  <dd><a href="$G{'sourceurl'}"
    title="View the source code for this application..."
    >source code</a></dd>
  <dd><a href="$G{'pmsourceurl'}"
    title="View the perl module source code..."
    >dwd.pm source</a></dd>
</dl></div><!--rightbar-->
<h2>$G{'pagetitle'}</h2>
HERE
  }

######################################################################
#  EOF