<?PHP
#
#   FILE:  BotDetector.php
#
#   Part of the Collection Workflow Integration System (CWIS)
#   Copyright 2002-2013 Edward Almasy and Internet Scout Research Group
#   http://scout.wisc.edu/cwis/
#

/**
* Provides support for detecting whether a page was loaded by a person or by an
* automated program, e.g., a web crawler or spider.
*/
class BotDetector extends Plugin {

    /**
    * Register information about this plugin.
    */
    function Register()
    {
        $this->Name = "Bot Detector";
        $this->Version = "1.0.0";
        $this->Description = "Provides support for detecting whether the"
                ." current page load is by an actual person or by an automated"
                ." <a href=\"http://en.wikipedia.org/wiki/Web_crawler\""
                ." target=\"_blank\">web crawler or spider</a>.";
        $this->Author = "Internet Scout";
        $this->Url = "http://scout.wisc.edu/cwis/";
        $this->Email = "scout@scout.wisc.edu";
        $this->Requires = array("CWISCore" => "2.1.0");
        $this->EnabledByDefault = TRUE;
    }

    /**
    * Declare the events this plugin provides to the application framework.
    * @return Returns an array of events this plugin provides.
    */
    function DeclareEvents()
    {
        return array(
                "BotDetector_EVENT_CHECK_FOR_BOT"
                        => ApplicationFramework::EVENTTYPE_FIRST,
                );
    }

    /**
    * Hook the events into the application framework.
    * @return Returns an array of events to be hooked into the application
    *      framework.
    */
    function HookEvents()
    {
        return array(
                "BotDetector_EVENT_CHECK_FOR_BOT" => "CheckForBot",
                );
    }

    /**
    * Determine whether the page was loaded by a person or an automated program.
    * @return Returns TRUE if the page was loaded by an automated program.
    */
    function CheckForBot()
    {
        # If there's no user-agent, skip the bot check and assume that
        # we're dealing with a human.
        if (isset($_SERVER['HTTP_USER_AGENT']))
        {
            # some of the patterns borrowed from awstats list of common robots
            # in awstats-7.0, this is in lib/robots.pm line 334
            $Robots = array(
                'appie', 'architext', 'jeeves', 'bjaaland', 'contentmatch',
                'ferret', 'googlebot', 'google\-sitemaps', 'gulliver',
                'virus[_+ ]detector', 'harvest', 'htdig', 'linkwalker',
                'lilina', 'lycos[_+ ]', 'moget', 'muscatferret', 'myweb',
                'nomad', 'scooter', 'slurp', '^voyager\/', 'weblayers',
                'antibot', 'bruinbot', 'digout4u', 'echo!',
                'fast\-webcrawler', 'ia_archiver\-web\.archive\.org',
                'ia_archiver', 'jennybot', 'mercator', 'netcraft',
                'msnbot\-media', 'msnbot', 'petersnews', 'relevantnoise\.com',
                'unlost_web_crawler', 'voila', 'webbase', 'webcollage',
                'cfetch', 'zyborg', 'wisenutbot', 'bingbot', 'yandexbot',
                'blexbot');

            foreach ($Robots as $Robot)
            {
                if (preg_match('/'.$Robot.'/i', $_SERVER['HTTP_USER_AGENT']))
                {
                    return TRUE;
                }
            }
        }
        return FALSE;
    }

}
