#!/usr/bin/perl -w # WebWatchDaemon # Copyright 2003-2006 John M. Grohol # Written: December 23, 2003 # Last modified: April 9, 2006 # v1.1 # ------------------------------------- # WebWatchDaemon allows you to not only # check to see if a Web site is functioning, but to ensure # that the correct page is online and contains a specific # piece of content (called the keyphrase). This allows you # to check not only that your site is operational, but # that your application is functioning as expected. # # Run this on a server that has Internet connectivity as a cron job, every # X minutes you want to check on your server. Sample crontab line: # # 05 * * * * perl /home/user/johnsmith/webwatch.pl # # This line would run the script every hour at :05. If your # watch page is very important to your site, # you might run it every minute or every 5 minutes. # # Please note: You can run this script on your Web server. # But for more robust applications, you should consider # running it on a Web server external to your network. # This allows the script to test not only for content # changes in the target page, but also network connectivity. # # ------------------------------------- # License: This software is copyright (C) 2003-2006 John M. Grohol. It is distributed # under the terms of the GNU General Public License (GPL). Because it is licensed # free of charge, there is NO WARRANTY, it is provided AS IS. The author can not # be held liable for any damage that might arise from the use of this software. # Use it at your own risk. See http://www.gnu.org/ for details and more information. # ------------------------------------- # Requires the following perl modules to be installed: use LWP::UserAgent; use HTTP::Request; use HTTP::Response; # Set your watch URL here my $watch_url = "http://www.yourdomain.com/specific_page_to_be_watched.html"; # Set your keyphrase here my $keyphrase = "This is text that appears on the target watch URL"; # Alert and logging options my $alert = 1; # Set to 1 to turn email alerts on; 0 to turn them off my $email = "your\@contact_email_address.com"; # Keep the backslash before the at sign my $mailprog = '/usr/lib/sendmail'; my $logging = 0; # Set to 1 to turn logging on; 0 to turn it off my $logfile = "/home/www/logs/www_watch.txt"; # Set to your logfile # Other variables that may need changing $ENV{"TZ"} = "EST5EDT"; # Change to reflect your time zone if necessary my $date = "/bin/date"; my $t = scalar(time); # -------------- End Configuration -------------------------- # ----------------------------------------------------------- # Main # ----------------------------------------------------------- # This just sets the program to pretend to be a specific Web browser version my $br_ver = ""; my $rand_num = ""; srand( time() ^ ($$ + ($$ << 15)) ); $rand_num = int(rand(50)); if ($rand_num gt 30) { $br_ver = "5"; } elsif ($rand_num lt 20) { $br_ver = "01"; } else { $br_ver = "0"; } # Go get the watch content page my $content = &get_watch_page($watch_url,$br_ver); my @page; my $whenn = 0; @page = split(/\n/,$content); foreach $line (@page) { if ($line =~ /$keyphrase/) { $whenn++; } } # We only alert/log failures if ($whenn == 0) { # If alerts are enabled, send an email to the administrator's contact address if ($alert) { open (MAIL, "|$mailprog -t") || die "Can't open $mailprog!\n"; print MAIL "To: $email\n"; print MAIL "From: WebWatchDaemon <$email>\n"; print MAIL "X-Priority: High\n"; print MAIL "Subject: Alert! Content is unreachable\n\n"; print MAIL qq~ Hi! I'm the WebWatchDaemon for your Website. On my most recent watch, I noticed that the following page did not have the required text returned, suggesting it may not be available: $watch_url I suggest you look into it at your earliest convenience, thank you! - WebWatchDaemon ~; close(MAIL); } # If logging is enabled, write the event to the logfile if ($logging) { open (WRITEIT, ">>$logfile"); flock(WRITEIT, LOCK_EX); seek (WRITEIT, 0,2); print WRITEIT "$date|$watch_url|Failure\n"; close(WRITEIT); } } # end if exit(0); # --------------------------------------------- # Subroutine: Get the watched content page # --------------------------------------------- sub get_watch_page() { my $url = shift(@_); my $br_ver = shift(@_); my $ua = LWP::UserAgent->new(); my $ag_ver = "Mozilla/4.0 (compatible; MSIE 5." . $br_ver . "; Windows 98)"; $ua->agent($ag_ver); my $req = HTTP::Request->new(GET=> $url); my $response = $ua->request($req); if ($response->is_error()) { $response->error_as_HTML(); } $content = $response->content(); return $content; } 1;