open(IN,"test.html");
@file=<IN> ;
foreach $line (@file){
find_closing_tags($line);
}
close IN;
# end of main
sub find_closing_tags {
my $line = $line;
if ($line =~ /(<\/.*?>)/i){
print "$1\n";
}
}
Note: Instead of my $line = $line; it would be better to write my $line = $_[0]; because variables should be sent to subroutines in the @_ array. But the first version is easier to remember and works in many cases.
The main routine sends $file as argument to the subroutine read_file and receives @text from it as return value. The subroutine sends values to the main routine via the "return" function.
Each subroutine should use its own set of variables so that the different subroutines do not interfere with each other. "Global variables" are variables that are used in the main routine and in subroutines. In the previous script $file and @textfile are global variables. But in the subroutines they are renamed to $inputfile, $outputfile and @text. All variables in subroutines should be defined with "my".
Variables declared with "local" are visible in the subroutine and all subroutines that are called from the subroutine.
If "use strict" is used, global variables are not permitted. In that case all variables must be sent to subroutines using the @_ array.
#!/usr/local/bin/perl -w
############# beginning of main routine ####################
use IO::Socket;
use CGI qw(:standard -debug);
my $url = param("url");
my $keyword = param("keyword");
start_webpage();
($host, $document) = parse_input($url);
@page_content= read_page($host,$document);
search($keyword, @page_content);
############## end of main routine ###########################
sub start_webpage{
print header();
print "<HTML>
<HEAD>
<TITLE>Search Results</TITLE>
</HEAD>
<BODY>
<H3>Search Results</h3>"
}
##############################################################
sub parse_input {
$current_url = $_[0];
$current_url =~ /(http:\/\/)?([^\/]*)(.*)/;
$host = $2;
$document = $3;
if ($document eq "") {$document = "/";}
return ($host, $document);
}
########################################################################
sub read_page{
my $current_host=$_[0];
my $current_doc=$_[1];
$remote =IO::Socket::INET->new(Proto => "tcp",
PeerAddr => $current_host,
PeerPort => "http(80)",
);
if (!$remote) { die "cannot connect to http daemon on $host"}
$remote->autoflush(1);
print $remote "GET $current_doc HTTP/1.0\r\n";
print $remote "Host: $current_host\r\n\r\n";
@output = <$remote> ;
close $remote;
return @output;
}
##################################################################
sub search{
($term,@text) = @_;
print "<p>The results for $term are:";
print @text;
}