#!/usr/local/bin/perl -w use strict; # Where Are They Coming From? # A Small but Useful (tm) perl script to see how people got here. # Truly awful, but what the hell. Version 1.0 -- Ph1rs+ r3l35s3! # December 18, 2004 # Copyright 2004 Hugh Brown # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, # Boston, MA 02111-1307, USA. # $Id: referers.cgi,v 1.5 2004/12/19 01:55:02 aardvark Exp aardvark $ # # $Log: referers.cgi,v $ # Revision 1.5 2004/12/19 01:55:02 aardvark # First release! Also, no longer depends on get_referers.sh. # # Revision 1.4 2004/12/19 01:41:18 aardvark # Add deyahoo section. # # Revision 1.3 2004/01/21 15:16:14 aardvark # *** empty log message *** # # You'll want to customize these two: my $website = "saintaardvarkthecarpeted.com"; my $logfile = "/home/aardvark/logs/aardvark.access.log"; # And maybe check the paths on these. I use gawk and the GNU versions # of grep, sort and uniq; I haven't tested them with other versions (like FreeBSD's, say) # but I don't think I'm using any options that are too weird. my $grep = "/usr/bin/grep"; my $awk = "/usr/bin/awk"; my $sort = "/usr/bin/sort"; my $uniq = "/usr/bin/uniq"; my $date = `tail -1 $logfile | awk '{print \$4}' | awk -F":" '{print \$1}' | tr -d "["`; chomp $date; #grep "$DATE" $LOG | awk '{print $11}' | grep -v '"-"' | grep -v "saintaardvarkthecarpeted.com/" open (IN, "$grep $date $logfile | $awk '{print \$11}' | $grep -v '\"-\"' | $grep -v $website | $sort | $uniq -c | $sort -n|"); print "Content-type: text/html\n\n"; print "Where are they coming from?"; print ""; print "

Where are they coming from?

\n"; print "\n"; while () { chomp; my ($number, $url) = split; print ""; print "\n"; } print "
Number of timesURL
$number"; $url =~ y/"//d; if ($url =~ /google/) { $url = °oogle($url); } elsif ($url =~ /yahoo/) { $url = &deyahoo($url); } print "$url

"; print ""; sub degoogle { my $url = shift; # http://www.google.com/search?q=event-id:1000+%2B+access+denied&hl=no&lr=&ie=UTF-8&oe=UTF-8&start=10&sa=N # http://www.google.com/search?hl=en&lr=&ie=UTF-8&oe=UTF-8&q=install+office+2000+.cab+file+error+1311 # First, canonify the name. Use Google.[country code] if not .com. #$url =~ s#^http://(www\.)?google\.com/#Google:QQQ#; $url =~ s#^http://(www\.)?google(\.|\w+)+?/#Google.$2:QQQ#; #$url =~ s#Google.com:QQQ#Google:QQQ#; # Now the query: $url =~ s#QQQ.*q=(.*?)(&.*|$)#QQQ$1# or warn "I can't do that!\n"; # Is there an offset in the results? #$url =~ s#RRR.*(start=\d+)\&.*#RRR($1)SSS#; # Now pretty it up: $url =~ s/(QQQ|RRR|SSS)/ /g; $url =~ s/%([a-fA-F0-9]{2,2})/chr(hex($1))/eg; $url =~ s/\+/ /g; #$url =~ s/%20/ /g; return $url; } sub deyahoo { my $url = shift; # First, canonify the name. Use Yahoo.[country code] if not .com. #$url =~ s#^http://(search\.)?yahoo\.com/#Yahoo:QQQ#; $url =~ s#^http://(\w+\.)?(search\.)?yahoo.com/#$1Yahoo.com:QQQ#; if ($url !~ /^search\.Yahoo.com/) { $url =~ s/^(\w+)\.Yahoo.com/Yahoo.$1/; } else { $url =~ s/^search.Yahoo.com/Yahoo.com/; } #$url =~ s#Yahoo.com:QQQ#Yahoo:QQQ#; # Now the query: $url =~ s#QQQ.*search\?p=(.*?)(&.*|$)#QQQ$1# or warn "I can't do that!\n"; if ($url =~ m#search/images#) { $url =~ s#QQQ.*search/images\?p=(.*?)(&.*|$)#QQQ$1# or warn "I can't do that!\n"; } #else { #} # Is there an offset in the results? #$url =~ s#RRR.*(start=\d+)\&.*#RRR($1)SSS#; # Now pretty it up: $url =~ s/(QQQ|RRR|SSS)/ /g; $url =~ s/%([a-fA-F0-9]{2,2})/chr(hex($1))/eg; $url =~ s/\+/ /g; #$url =~ s/%20/ /g; if ($url =~ m#search/images#) { $url =~ s#^([\w\.].*?):.*search/images\?p=(.*?)(&.*|$)#$1 image search: $2# or warn "I can't do that!\n"; } return $url; }