# Simple Brand Monitoring using Perl # Proof of concept/sample code for # - Phishing Sites # - Web References # Load required modules use LWP::UserAgent; # Used for Web Page Query use Crypt::SSLeay; # Used for SSL Queries use URI::Escape; use Time::localtime; $tm = localtime; $now = sprintf("%04d-%02d-%02d", $tm->year+1900,($tm->mon)+1, $tm->mday) . " " . sprintf(" %02d:%02d:%02d", $tm->hour, $tm->min, $tm->sec); #Read in the ini file $inifile = $ARGV[0]; if (! $inifile) { print "brandmonitor.pl Monitor Google search engine for hits matching a specific string\n"; print "To use, create an inifile, and then run \"perl brandmonitor.pl inifilename\"\n"; print "\nSample inifile contents:\n"; print " querystring=\"KEVIN BONG\"\n smtpserver=localhost.localdomain.com\n mailfrom=webmaster@petsits.com\n mailto=info@petsits.com\n sendmail=no\n"; } if (! -e $inifile) { print "INI File $inifile not found\n"; exit(); } else { open INI, $inifile or die "Could not open INI file $inifile, $!\n"; while () { chomp; if (m/^(.*)\=(.*)$/) { $ini{$1} = $2; } } } # Pick an output file name $datafile = $inifile; $datafile =~ s/\..*$//; $reportfile = $datafile; $datafile .= ".data"; $reportfile .= ".out.html"; if (! $ini{"querystring"} ) { print ("Querystring not found in ini file $inifile\n"); exit; } $searchstring = $ini{"querystring"}; if (open LASTRUN, $datafile) #ok if it fails, no last run { while () { if (m/^METADATA(.*)\t(.*)$/) { $lastrunini{$1} = $2; } elsif ( m/(.*)\t(.*)$/) { $lastrunini{$1} = {$2}; } } } $onmoreresultsstart = 0; # &filter=0 causes it not to omit search results that are similar to those already displayed. $searchurl = "http://www.google.com/search?q=" . uri_escape($searchstring) . "&filter=0&btnG=Search"; print "Searching using $searchurl\n"; $allcontents = &geturlcontents($searchurl); print "Fetching first 10 results\n"; while ($allcontents) { @searchresults = ($allcontents =~ m/\(.*?)\<\/h2\>/igs); foreach $resultmatch (@searchresults) { #print "************\n$match\n*********\n\n"; if ($resultmatch =~ m/\(.*)\<\/a\>/) { $searchresultshash{$1} = $2; if (! $lastrunini{$1}) { $newresultshash{$1} = $2; #print "Description: $2"; } } else { throwerror ("Match $match does not match format"); } } if ($allcontents =~ m/\\<\/div\>Next\<\/a\>/) { $onmoreresultsstart += 10; print "Fetching results $onmoreresultsstart through " . ($onmoreresultsstart + 10) . "\n"; $allcontents = &geturlcontents($searchurl . "&start=" . $onmoreresultsstart); } else { $allcontents = 0; } } # print the output to the screen, or email it as appropriate. open REPORTFILE, ">$reportfile" or die "Could not write reportfile $reportfile $!\n"; print REPORTFILE "Perl Brand Monitor Run " ."
\n"; print REPORTFILE "Search for:" . $searchstring . "
\n"; print REPORTFILE "Report Created $now
\n"; print REPORTFILE "Last Run on: " . $lastrunini{"LastRunDate"} . "
\n\n"; $totalmatches = keys %searchresultshash; print REPORTFILE "Total Search Results: $totalmatches
 
\n"; $totalmatches = keys %newresultshash; print REPORTFILE "Total New Search Results: $totalmatches
 
\n"; foreach $matchurl (sort keys %newresultshash) { if (!($lastrun{$matchurl})) { print REPORTFILE "$newresultshash{$matchurl}
" . $matchurl . "
 
\n"; } } close REPORTFILE; print "\nReport complete. Output saved in $reportfile\n\n"; # print the results out to the lastrun data file open DATAFILE, ">$datafile" or die "Could not write last run datafile $datafile $!\n"; print DATAFILE "METADATALastRunDate\t$now\n"; foreach $matchurl (sort keys %searchresultshash) { if (!($lastrun{$matchurl})) { print DATAFILE "$matchurl\t" . $searchresultshash{$matchurl} . "\n"; } } close DATAFILE; exit(); sub throwerror { my ($errmsg, @rest) = @_; print $errmsg; } sub geturlcontents { my ($url, @rest) = @_; my $ua = new LWP::UserAgent(agent => 'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.8.1.4) Gecko/20070515 Firefox/2.0.0.4'); my $req = new HTTP::Request('GET', $url); print "\nRetrieve web page $url ?\n(y/n)"; $proceed = ; if ( !($proceed =~ m/y/i)) { print "\n\nQuitting\n"; exit; } my $res = $ua->request($req); if ($res->is_success) { return $res->content; } else { return 0; } }