<?PHP

class UrlChecker extends Plugin
{

    /**
     * @const FLAG_OFF_VALUE value used by the Resource class when a flag is off
     */
    const FLAG_OFF_VALUE = 0;

    /**
     * @const FLAG_ON_VALUE value used by the Resource class when a flag is on
     */
    const FLAG_ON_VALUE = 1;

    /**
    * The number of times a URL has to be found invalid to be considered an
    * invalid URL.
    */
    const INVALIDATION_THRESHOLD = 4;

    /**
    * The timeout value in seconds for URL checking connections.
    */
    const CONNECTION_TIMEOUT = 5.0;

    /**
    * The maximum number of URLs to check at a time.
    */
    const NUM_TO_CHECK = 500;

    /**
     * Register information about this plugin.
     */
    public function Register()
    {
        $this->Name = "URL Checker";
        $this->Version = "2.1.11";
        $this->Description = trim(preg_replace('/\s+/', ' ', '
            Periodically validates URL field values.
            <i>System Administrator</i> or <i>Collection Administrator</i> privilege
            is required to view the results.'));
        $this->Author = "Internet Scout";
        $this->Url = "http://scout.wisc.edu/cwis/";
        $this->Email = "scout@scout.wisc.edu";
        $this->Requires = array("CWISCore" => "2.9.0");
        $this->EnabledByDefault = FALSE;

        $Schema = new MetadataSchema();
        $FlagFields = $Schema->GetFields(MetadataSchema::MDFTYPE_FLAG);
        $Options = array("" => "--");

        # create the options for each of the "Don't check if..." settings
        foreach ($FlagFields as $Field)
        {
            $Options[$Field->Id().":".self::FLAG_OFF_VALUE] =
                $Field->Name() . " is set to \"" . $Field->FlagOffLabel() . "\"";
            $Options[$Field->Id().":".self::FLAG_ON_VALUE] =
                $Field->Name() . " is set to \"" . $Field->FlagOnLabel() . "\"";
        }

        $this->CfgSetup["TaskPriority"] = array(
            "Type" => "Option",
            "Label" => "Task Priority",
            "Help" => "Priority of the URL checking tasks in the task queue.",
            "AllowMultiple" => FALSE,
            "Options" => array(
                ApplicationFramework::PRIORITY_BACKGROUND => "Background",
                ApplicationFramework::PRIORITY_LOW => "Low",
                ApplicationFramework::PRIORITY_MEDIUM => "Medium",
                ApplicationFramework::PRIORITY_HIGH => "High"));

        $this->CfgSetup["DontCheck1"] = array(
            "Type" => "Option",
            "Label" => "Don't check URLs if",
            "Help" => "Don't check the URLs of resources that match this rule or any of the following ones.",
            "Options" => $Options);

        $this->CfgSetup["DontCheck2"] = array(
            "Type" => "Option",
            "Label" => "Don't check URLs if",
            "Help" => "Don't check the URLs of resources that match this rule or any of the surrounding ones.",
            "Options" => $Options);

        $this->CfgSetup["DontCheck3"] = array(
            "Type" => "Option",
            "Label" => "Don't check URLs if",
            "Help" => "Don't check the URLs of resources that match this rule or any of the preceding ones.",
            "Options" => $Options);

        $this->CfgSetup["EnableDeveloper"] = array(
            "Type" => "Flag",
            "Label" => "Enable Developer Interface",
            "Help" => "Enable an additional developer interface to aid in debugging the plugin.",
            "OnLabel" => "Yes",
            "OffLabel" => "No");
    }

    /**
    * Make some config settings available when running in the background.
    */
    public function Initialize()
    {
        $this->Rules = array(
            $this->ConfigSetting("DontCheck1"),
            $this->ConfigSetting("DontCheck2"),
            $this->ConfigSetting("DontCheck3"));
    }

    /**
     * Create the database tables necessary to use this plugin.
     * @return NULL if everything went OK or an error message otherwise
     */
    public function Install()
    {
        $DB = new Database();

        # resource history table
        if (FALSE === $DB->Query("
            CREATE TABLE IF NOT EXISTS UrlChecker_ResourceHistory (
                ResourceId     INT,
                CheckDate      TIMESTAMP,
                Time           INT DEFAULT ".intval(self::CONNECTION_TIMEOUT).",
                PRIMARY KEY    (ResourceId)
            );"))
        { return "Could not create the resource history table"; }

        # url history table
        if (FALSE === $DB->Query("
            CREATE TABLE IF NOT EXISTS UrlChecker_UrlHistory (
                ResourceId          INT,
                FieldId             INT,
                Hidden              INT,
                CheckDate           TIMESTAMP,
                TimesInvalid        INT,
                Url                 TEXT,
                StatusCode          SMALLINT,
                ReasonPhrase        TEXT,
                IsFinalUrlInvalid   INT,
                FinalUrl            TEXT,
                FinalStatusCode     SMALLINT,
                FinalReasonPhrase   TEXT,
                PRIMARY KEY         (ResourceId, FieldId)
            );"))
        { return "Could not create the URL history table"; }

        # set default settings
        $this->ConfigSetting("EnableDeveloper", FALSE);
        $this->ConfigSetting("NextNormalUrlCheck", 0);
        $this->ConfigSetting("NextInvalidUrlCheck", 0);
        $this->ConfigSetting("TaskPriority", ApplicationFramework::PRIORITY_BACKGROUND);

        return NULL;
    }

    /**
    * Only save the Rules variable when sleeping.
    * @return The variables to save when serializing.
    */
    public function __sleep()
    {
        return array("Rules");
    }

    /**
     * Uninstall the plugin.
     * @return NULL|string NULL if successful or an error message otherwise
     */
    public function Uninstall()
    {
        $Database = new Database();

        # resource history table
        if (FALSE === $Database->Query("DROP TABLE UrlChecker_ResourceHistory;"))
        { return "Could not remove the resource history table"; }

        # URL history table
        if (FALSE === $Database->Query("DROP TABLE UrlChecker_UrlHistory;"))
        { return "Could not remove the URL history table"; }
    }

    /**
     * Upgrade from a previous version.
     * @param $PreviousVersion previous version
     */
    public function Upgrade($PreviousVersion)
    {
        # upgrade from versions < 2.0.0 to 2.0.0
        if (version_compare($PreviousVersion, "2.0.0", "<"))
        {
            $DB = new Database();

            // make the upgrade process fault tolerant
            $DB->SetQueryErrorsToIgnore(array(
                '/ALTER\s+TABLE\s+[^\s]+\s+CHANGE\s+.+/i'
                  => '/Unknown\s+column\s+[^\s]+\s+in\s+[^\s]+/i',
                '/ALTER\s+TABLE\s+[^\s]+\s+CHANGE\s+.+/i'
                  => '/Table\s+[^\s]+\s+doesn\'t\s+exist/i',
                '/ALTER\s+TABLE\s+[^\s]+\s+ADD\s+.+/i'
                  => '/Duplicate\s+column\s+name\s+[^\s]+/i',
                '/ALTER\s+TABLE\s+[^\s]+\s+ADD\s+.+/i'
                  => '/Table\s+[^\s]+\s+doesn\'t\s+exist/i',
                '/RENAME\s+TABLE\s+[^\s]+\s+TO\s+[^\s]+/i'
                  => '/Table\s+[^\s]+\s+already\s+exists/i',
                '/CREATE\s+TABLE\s+[^\s]+\s+\([^)]+\)/i'
                  => '/Table\s+[^\s]+\s+already\s+exists/i'));

            # rename columns
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE DateChecked CheckDate TIMESTAMP"))
            { return "Could not update the URL history CheckDate column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE TimesFailed TimesInvalid INT"))
            { return "Could not update the TimesInvalid column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE StatusNo StatusCode INT"))
            { return "Could not update the StatusCode column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE StatusText ReasonPhrase TEXT"))
            { return "Could not update the ReasonPhrase column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE DataOne FinalStatusCode INT DEFAULT -1"))
            { return "Could not update the FinalStatusCode column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE DataTwo FinalUrl TEXT"))
            { return "Could not update the FinalUrl column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_History
                CHANGE DateChecked CheckDate TIMESTAMP"))
            { return "Could not update the resource history CheckDate column"; }

            # add columns
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                ADD Hidden INT DEFAULT 0
                AFTER FieldId"))
            { return "Could not add the Hidden column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                ADD IsFinalUrlInvalid INT DEFAULT 0
                AFTER ReasonPhrase"))
            { return "Could not add the IsFinalUrlInvalid column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                ADD FinalReasonPhrase TEXT"))
            { return "Could not add the FinalReasonPhrase column"; }

            # rename history tables
            if (FALSE === $DB->Query("
                RENAME TABLE UrlChecker_Failures
                TO UrlChecker_UrlHistory"))
            { return "Could not rename the URL history table"; }
            if (FALSE === $DB->Query("
                RENAME TABLE UrlChecker_History
                TO UrlChecker_ResourceHistory"))
            { return "Could not rename the resource history table"; }

            # remove any garbage data
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_UrlHistory
                WHERE ResourceId < 0"))
            { return "Could not remove stale data from the URL history"; }
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_ResourceHistory
                WHERE ResourceId < 0"))
            { return "Could not remove stale data from the resource history"; }

            # add settings table
            if (FALSE === $DB->Query("
                CREATE TABLE UrlChecker_Settings (
                    NextNormalUrlCheck     INT,
                    NextInvalidUrlCheck    INT
                );"))
            { return "Could not create the settings table"; }

            # repair and optimize tables after the changes. if this isn't done,
            # weird ordering issues might pop up
            if (FALSE === $DB->Query("
                REPAIR TABLE UrlChecker_UrlHistory"))
            { return "Could not repair the URL history table"; }
            if (FALSE === $DB->Query("
                REPAIR TABLE UrlChecker_ResourceHistory"))
            { return "Could not repair the resource history table"; }
            if (FALSE === $DB->Query("
                OPTIMIZE TABLE UrlChecker_UrlHistory"))
            { return "Could not optimize the URL history table"; }
            if (FALSE === $DB->Query("
                OPTIMIZE TABLE UrlChecker_ResourceHistory"))
            { return "Could not optimize the resource history table"; }
        }

        # upgrade from version 2.0.0 to 2.1.0
        if (version_compare($PreviousVersion, "2.1.0", "<"))
        {
            $DB = new Database();

            // make the upgrade process fault tolerant
            $DB->SetQueryErrorsToIgnore(array(
                '/ALTER\s+TABLE\s+[^\s]+\s+ADD\s+.+/i'
                  => '/Duplicate\s+column\s+name\s+[^\s]+/i',
                '/ALTER\s+TABLE\s+[^\s]+\s+DROP\s+.+/i'
                  => '/Can\'t\s+DROP\s+[^\s;]+;\s+check\s+that\s+column\/key\s+exists/i'));

            # get old settings data
            if (FALSE === $DB->Query("
                SELECT * FROM UrlChecker_Settings LIMIT 1"))
            { return "Could not get settings data"; }

            if ($DB->NumRowsSelected())
            {
                $Row = $DB->FetchRow();
                $NextNormalUrlCheck = $Row["NextNormalUrlCheck"];
                $NextInvalidUrlCheck = $Row["NextInvalidUrlCheck"];
            }

            else
            {
                $NextNormalUrlCheck = 0;
                $NextInvalidUrlCheck = 0;
            }

            # add column
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Settings
                ADD Name Text"))
            { return "Could not add the Name column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Settings
                ADD Value Text"))
            { return "Could not add the Value column"; }

            # remove old columns
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Settings
                DROP NextNormalUrlCheck"))
            { return "Could not remove the NextNormalUrlCheck Column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Settings
                DROP NextInvalidUrlCheck"))
            { return "Could not remove the NextInvalidUrlCheck Column"; }

            # remove any garbage data from the tables
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_UrlHistory
                WHERE ResourceId < 0"))
            { return "Could not remove stale data from the URL history"; }
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_ResourceHistory
                WHERE ResourceId < 0"))
            { return "Could not remove stale data from the resource history"; }

            # this makes sure that no garbage rows exist
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_Settings"))
            { return "Could not remove stale data from the settings table"; }

            # add settings back into the table
            if (FALSE === $DB->Query("
                INSERT INTO UrlChecker_Settings (Name, Value)
                VALUES
                ('NextNormalUrlCheck', '".addslashes($NextNormalUrlCheck)."'),
                ('NextInvalidUrlCheck', '".addslashes($NextInvalidUrlCheck)."'),
                ('EnableDeveloper', '0')"))
            { return "Could not initialize the updated settings"; }

            # repair and optimize the settings table after the changes
            if (FALSE === $DB->Query("
                REPAIR TABLE UrlChecker_Settings"))
            { return "Could not repair the settings table"; }
            if (FALSE === $DB->Query("
                OPTIMIZE TABLE UrlChecker_Settings"))
            { return "Could not optimize the settings table"; }
        }

        # upgrade from version 2.1.0 to 2.1.1
        if (version_compare($PreviousVersion, "2.1.1", "<"))
        {
            $DB = new Database();

            # remove old garbage data
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_UrlHistory
                WHERE Url NOT REGEXP '^https?:\/\/'"))
            { return "Could not remove stale data from the URL history"; }
        }

        # upgrade to version 2.1.4
        if (version_compare($PreviousVersion, "2.1.4", "<"))
        {
            $this->ConfigSetting("TaskPriority", ApplicationFramework::PRIORITY_BACKGROUND);
        }

        # upgrade to version 2.1.10
        if (version_compare($PreviousVersion, "2.1.10", "<"))
        {
            $DB = new Database();

            # make the upgrade process fault tolerant
            $DB->SetQueryErrorsToIgnore(array(
                '/DROP\s+.+/i'
                  => '/Unknown\s+table/i',
                '/SELECT\s+.+/i'
                  => '/doesn\'t\s+exist/i'));

            # get old settings data if possible
            $Result = $DB->Query("SELECT * FROM UrlChecker_Settings");

            $OldSettings = array();

            # if the query succeeded
            if ($Result)
            {
                # add the old settings to the array
                while (FALSE !== ($Row = $DB->FetchRow()))
                {
                    $OldSettings[$Row["Name"]] = intval($Row["Value"]);
                }
            }

            # migrate the data to the settings for the plugin
            $this->ConfigSetting("EnableDeveloper", (bool) GetArrayValue($OldSettings, "EnableDeveloper", FALSE));
            $this->ConfigSetting("NextNormalUrlCheck", GetArrayValue($OldSettings, "NextNormalUrlCheck", 0));
            $this->ConfigSetting("NextInvalidUrlCheck", GetArrayValue($OldSettings, "NextInvalidUrlCheck", 0));

            # remove the old settings table if possible
            $DB->Query("DROP TABLE UrlChecker_Settings;");
        }

        # upgrade to version 2.1.11
        if (version_compare($PreviousVersion, "2.1.11", "<"))
        {
            $DB = new Database();

            # make the upgrade process fault tolerant
            $DB->SetQueryErrorsToIgnore(array(
                '/ALTER\s+.+/i'
                  => '/Duplicate\s+column\s+name/i'));

            # add the Time column if possible
            $DB->Query("
                ALTER TABLE UrlChecker_ResourceHistory
                ADD Time INT DEFAULT ".intval(self::CONNECTION_TIMEOUT));

            # reset the check times (invalid less than normal to make sure an
            # invalid check is performed first)
            $this->ConfigSetting("NextNormalUrlCheck", 1);
            $this->ConfigSetting("NextInvalidUrlCheck", 0);
        }

        return NULL;
    }

    /**
     * Declare the events this plugin provides to the application framework.
     * @return an array of the events this plugin provides
     */
    public function DeclareEvents()
    {
        return array(
            # this event should get hooked by an outside plugin
            "URLCHECKER_SET_RESOURCE_RELEASE_CALLBACKS"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_USING_CUSTOM_RELEASE_CALLBACKS"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_GET_INFORMATION"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_CHECK_RESOURCE_URLS"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_GET_INVALID_COUNT"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_GET_INVALID_URLS"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_GET_INVALID_URL"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_IS_RESOURCE_RELEASED"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_RELEASE_RESOURCE"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_WITHHOLD_RESOURCE"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_HIDE_URL"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_UNHIDE_URL"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_GET_NEXT_RESOURCES_TO_BE_CHECKED"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_GET_NEXT_URLS_TO_BE_CHECKED"
              => ApplicationFramework::EVENTTYPE_FIRST);
    }

    /**
     * Hook the events into the application framework.
     * @return an array of events to be hooked into the application framework
     */
    public function HookEvents()
    {
        $Events = array(
            # this is useful for debugging but otherwise shouldn't be used
            # "EVENT_HTML_FILE_LOAD_COMPLETE" => "QueueResourceCheckTasks",

            "EVENT_COLLECTION_ADMINISTRATION_MENU" => "DeclareColAdminPages",
            "EVENT_PAGE_LOAD" => "SetResourceReleaseCallbacks",
            "EVENT_PERIODIC" => "QueueResourceCheckTasks",
            "URLCHECKER_USING_CUSTOM_RELEASE_CALLBACKS" => "UsingCustomReleaseCallbacks",
            "URLCHECKER_GET_INFORMATION" => "GetInformation",
            "URLCHECKER_CHECK_RESOURCE_URLS" => "CheckResourceUrls",
            "URLCHECKER_GET_INVALID_COUNT" => "GetInvalidCount",
            "URLCHECKER_GET_INVALID_URLS" => "GetInvalidUrls",
            "URLCHECKER_GET_INVALID_URL" => "GetInvalidUrl",
            "URLCHECKER_IS_RESOURCE_RELEASED" => "IsResourceReleased",
            "URLCHECKER_RELEASE_RESOURCE" => "ReleaseResource",
            "URLCHECKER_WITHHOLD_RESOURCE" => "WithholdResource",
            "URLCHECKER_HIDE_URL" => "HideUrl",
            "URLCHECKER_UNHIDE_URL" => "UnhideUrl",
            "URLCHECKER_GET_NEXT_RESOURCES_TO_BE_CHECKED" => "GetNextResourcesToBeChecked",
            "URLCHECKER_GET_NEXT_URLS_TO_BE_CHECKED" => "GetNextUrlsToBeChecked");

        if ($this->ConfigSetting("EnableDeveloper"))
        {
            $Events["EVENT_SYSTEM_ADMINISTRATION_MENU"] = "DeclareSysAdminPages";
        }

        return $Events;
    }

    /**
     * Add page hooks for the collection administration section.
     * @return map page name to page title for the application framework
     */
    public function DeclareColAdminPages()
    {
        if ($this->ConfigSetting("EnableDeveloper"))
        {
            return array(
              "Results" => "URL Checker Results",
              "HiddenUrls" => "URL Checker Hidden URLs");
        }

        return array(
            "Results" => "URL Checker Results");
    }

    /**
     * Add page hooks for the system administration section. This should only
     * be called if EnableDeveloper is TRUE.
     * @return map page name to page title for the application framework
     */
    public function DeclareSysAdminPages()
    {
        return array(
          "Developer" => "URL Checker Developer Page");
    }

    /**
     * Signal to set custom resource releasing/withholding callbacks on page
     * load.
     */
    public function SetResourceReleaseCallbacks()
    {
        $Callbacks = $GLOBALS["AF"]->SignalEvent("URLCHECKER_SET_RESOURCE_RELEASE_CALLBACKS");

        if (is_array($Callbacks) && count($Callbacks) == 3
            && is_callable($Callbacks[0]) && is_callable($Callbacks[1])
            && is_callable($Callbacks[2]))
        {
            $this->IsResourceReleasedCallback = $Callbacks[0];
            $this->ReleaseResourceCallback = $Callbacks[1];
            $this->WithholdResourceCallback = $Callbacks[2];
        }
    }

    /**
     * Return whether or not custom callbacks are set.
     * @return TRUE if custom callbacks are set, FALSE otherwise
     */
    public function UsingCustomReleaseCallbacks()
    {
        # if callbacks are set, then so will this data member
        return isset($this->IsResourceReleasedCallback);
    }

    /**
    * Queue tasks to check resource URLs for resources that need to be checked.
    * @return Returns the amount of time before this should be called again, in
    *      minutes.
    */
    public function QueueResourceCheckTasks()
    {
        # don't waste time and resources if there aren't any URL fields
        if (count($this->GetUrlFields()) == 0)
        {
            return 60;
        }

        # come back in five minutes if there are URLs still being checked
        if ($GLOBALS["AF"]->GetQueuedTaskCount(array($this, "CheckResourceUrls")))
        {
            return 5;
        }

        $NextNormalUrlCheck = $this->ConfigSetting("NextNormalUrlCheck");
        $NextInvalidUrlCheck = $this->ConfigSetting("NextInvalidUrlCheck");

        # if we should check the invalid urls
        if ($NextInvalidUrlCheck != 0 && $NextInvalidUrlCheck < $NextNormalUrlCheck)
        {
            $Urls = $this->GetNextUrlsToBeChecked();
            $NextInvalidUrlCheck = 0;

            foreach ($Urls as $Url)
            {
                $Resource = new UrlChecker_Resource($Url->ResourceId);

                # add to the time estimate only if the resource will be checked
                if (!$this->ShouldNotCheckUrls($Resource))
                {
                    $NextInvalidUrlCheck += $Resource->GetApproximateCheckTime();
                }

                $GLOBALS["AF"]->QueueUniqueTask(
                    array($this, "CheckResourceUrls"),
                    array($Resource), $this->ConfigSetting("TaskPriority"),
                    "Validate the URLs associated with a resource");
            }

            $this->ConfigSetting("NextInvalidUrlCheck", $NextInvalidUrlCheck);
        }

        # or just check resources that haven't been checked in awhile
        else
        {
            $Resources = $this->GetNextResourcesToBeChecked();
            $NextNormalUrlCheck = 0;

            foreach ($Resources as $Resource)
            {
                # add to the time estimate only if the resource will be checked
                if (!$this->ShouldNotCheckUrls($Resource))
                {
                    $NextNormalUrlCheck += $Resource->GetApproximateCheckTime();
                }

                $GLOBALS["AF"]->QueueUniqueTask(
                    array($this, "CheckResourceUrls"),
                    array($Resource), $this->ConfigSetting("TaskPriority"),
                    "Validate the URLs associated with a resource");
            }

            $this->ConfigSetting("NextNormalUrlCheck", $NextNormalUrlCheck);
        }

        $NextCheck = min($NextNormalUrlCheck, $NextInvalidUrlCheck);
        $SecondCheck = max($NextNormalUrlCheck, $NextInvalidUrlCheck);

        # return in an hour if there are no resources or invalid URLs to check
        if ($NextCheck == 0 && $SecondCheck == 0)
        {
            return 60;
        }

        # return at the second check if the next one isn't going to check
        # anything
        if ($NextCheck == 0)
        {
            return intval(round($SecondCheck/60));
        }

        # otherwise, just come back when the immediate check is needed
        return intval(round($NextCheck/60));
    }

    /**
     * Get information/stats of the various data saved.
     * @return array of various information
     */
    public function GetInformation()
    {
        $this->RemoveStaleData();

        $DB = new Database();
        $Info = array();

        # database settings
        $Info["NextNormalUrlCheck"] = intval($this->ConfigSetting("NextNormalUrlCheck"));
        $Info["NextInvalidUrlCheck"]  = intval($this->ConfigSetting("NextInvalidUrlCheck"));
        $Info["EnableDeveloper"] = intval($this->ConfigSetting("EnableDeveloper"));

        # hard-coded settings
        $Info["Timeout"] = self::CONNECTION_TIMEOUT;
        $Info["Threshold"] = self::INVALIDATION_THRESHOLD;
        $Info["NumToCheck"] = self::NUM_TO_CHECK;

        # the number of resources checked so far
        $DB->Query("SELECT COUNT(*) as NumChecked FROM UrlChecker_ResourceHistory");
        $Info["NumResourcesChecked"] = intval($DB->FetchField("NumChecked"));

        # the number of resources that haven't been checked so far (don't count
        # resources with IDs < 0 since they're probably bad)
        $DB->Query("
            SELECT COUNT(*) as NumResources
            FROM Resources
            WHERE ResourceId >= 0");
        $Info["NumResourcesUnchecked"] = intval($DB->FetchField("NumResources"))
            - $Info["NumResourcesChecked"];

        # the number of the invalid URLs past the threshold and "not hidden"
        $DB->Query("
            SELECT COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE Hidden = 0
            AND TimesInvalid > ".self::INVALIDATION_THRESHOLD);
        $Info["NumInvalid"] = intval($DB->FetchField("NumInvalid"));

        # the number of the invalid URLs past the threshold and hidden
        $DB->Query("
            SELECT COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE Hidden = 1
            AND TimesInvalid > ".self::INVALIDATION_THRESHOLD);
        $Info["NumInvalidAndHidden"] = intval($DB->FetchField("NumInvalid"));

        # the number of possibly invalid urls
        $DB->Query("
            SELECT COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE TimesInvalid <= ".self::INVALIDATION_THRESHOLD);
        $Info["NumPossiblyInvalid"] = intval($DB->FetchField("NumInvalid"));

        # the number of "not hidden" invalid URLs for each status code
        $Info["InvalidUrlsForStatusCodes"] = array();
        $DB->Query("
            SELECT StatusCode, COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE Hidden = 0
            AND TimesInvalid > ".self::INVALIDATION_THRESHOLD."
            GROUP BY StatusCode");
        while (FALSE !== ($Row = $DB->FetchRow()))
        {
            $Info["InvalidUrlsForStatusCodes"][intval($Row["StatusCode"])]
                = intval($Row["NumInvalid"]);
        }

        # the number of "hidden" invalid URLs for each status code
        $Info["HiddenInvalidUrlsForStatusCodes"] = array();
        $DB->Query("
            SELECT StatusCode, COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE Hidden = 1
            AND TimesInvalid > ".self::INVALIDATION_THRESHOLD."
            GROUP BY StatusCode");
        while (FALSE !== ($Row = $DB->FetchRow()))
        {
            $Info["HiddenInvalidUrlsForStatusCodes"][intval($Row["StatusCode"])]
                = intval($Row["NumInvalid"]);
        }

        # if using custom callbacks
        $Info["UsingCustomReleaseCallbacks"] =
            ($this->UsingCustomReleaseCallbacks()) ? "Yes" : "No";

        # the last time a check was done
        $DB->Query("
            SELECT *
            FROM UrlChecker_ResourceHistory
            ORDER BY CheckDate DESC LIMIT 1");
        $Info["DateLastResourceChecked"] = $DB->FetchField("CheckDate");

        # the next time a check will be performed
        $Info["DateOfNextCheck"] = $this->GetDateOfNextCheck();

        # version information
        $Info["Version"] = $this->Version;
        $Info["CwisVersion"] = CWIS_VERSION;
        $Info["PhpVersion"] = PHP_VERSION;

        return $Info;
    }

    /**
     * Check all of the URL metadata field values for the given resource.
     * @param $Resource resource
     */
    public function CheckResourceUrls(Resource $Resource)
    {
        $DB = new Database();

        # the URLs for the resource should not be checked
        if ($this->ShouldNotCheckUrls($Resource))
        {
            # to estimate the approximate time without having checked the URLs,
            # use the connection timeout, making sure it's at least 1
            $ApproximateCheckTime = intval(round(self::CONNECTION_TIMEOUT));
            $ApproximateCheckTime = max(1, $ApproximateCheckTime);

            # record that the resource was checked
            $this->UpdateResourceHistory($Resource, $ApproximateCheckTime);

            # clear out the URL history
            $DB->Query("
                DELETE FROM UrlChecker_UrlHistory
                WHERE ResourceId = '".addslashes($Resource->Id())."'");

            # don't check any URLs
            return;
        }

        # record when the operation started
        $Start = microtime(true);

        foreach ($this->GetUrlFields() as $Field)
        {
            $Url = $Resource->Get($Field);

            # get the url's http status
            $Info = $this->GetHttpInformation($Url);

            # remove old failure data, if any, if the url is ok
            if ($Info["StatusCode"] == -1 || ($Info["StatusCode"] == 200
                && $this->HasValidContent($Resource->Get($Field))))
            {
                # delete/insert record (fragmentation? mysql: prob. not, pgsql: no)
                # avoids any sync issues and self-heals if sync issues do arise
                $DB->Query("LOCK TABLES UrlChecker_UrlHistory WRITE");
                $DB->Query("
                    DELETE FROM UrlChecker_UrlHistory
                    WHERE ResourceId = '".intval($Resource->Id())."'
                    AND FieldId = '".intval($Field->Id())."'");
                $DB->Query("UNLOCK TABLES");
            }

            # record a failure since there was a problem
            else
            {
                $DB->Query("LOCK TABLES UrlChecker_UrlHistory WRITE");
                $DB->Query("
                    SELECT * FROM UrlChecker_UrlHistory
                    WHERE ResourceId = '".intval($Resource->Id())."'
                    AND FieldId = '".intval($Field->Id())."'");

                # try to use an existing TimesInvalid value if possible and the
                # HTTP info is not too different
                $TimesInvalid = 1;
                $Hidden = 0;
                if (FALSE !== ($Row = $DB->FetchRow())
                    && $Row["StatusCode"] == strval($Info["StatusCode"])
                    && $Row["FinalStatusCode"] == strval($Info["FinalStatusCode"]))
                {
                    # the URL hasn't changed at all
                    if ($Row["FinalUrl"] == $Info["FinalUrl"])
                    {
                        $TimesInvalid = intval($Row["TimesInvalid"]) + 1;
                        $Hidden = intval($Row["Hidden"]);
                    }

                    # if the server uses cookies, and there is a redirect, the
                    # URL is likely to change every time a check takes place.
                    # thus, only check the host portions if those conditions are
                    # true
                    else if ($Row["StatusCode"]{0} == "3" && $Info["UsesCookies"])
                    {
                        $DbUrl = @parse_url($Row["FinalUrl"]);
                        $NewUrl = @parse_url($Info["FinalUrl"]);

                        if ($DbUrl && $NewUrl && isset($DbUrl["host"])
                            && isset($NewUrl["host"])
                            && $DbUrl["host"] == $NewUrl["host"])
                        {
                            $TimesInvalid = intval($Row["TimesInvalid"]) + 1;
                            $Hidden = intval($Row["Hidden"]);
                        }
                    }
                }

                if ($Info["FinalStatusCode"] == 200
                    && !$this->HasValidContent($Info["FinalUrl"]))
                {
                    $IsFinalUrlInvalid = 1;
                }

                else
                {
                    $IsFinalUrlInvalid = 0;
                }

                # add the new row with the updated info
                $DB->Query("
                    DELETE FROM UrlChecker_UrlHistory
                    WHERE ResourceId = '".intval($Resource->Id())."'
                    AND FieldId = '".intval($Field->Id())."'");
                $DB->Query("
                    INSERT INTO UrlChecker_UrlHistory SET
                    ResourceId = '".intval($Resource->Id())."',
                    FieldId = '".intval($Field->Id())."',
                    Hidden = '".$Hidden."',
                    TimesInvalid = ".intval($TimesInvalid).",
                    Url = '".addslashes($Resource->Get($Field))."',
                    StatusCode = '".intval($Info["StatusCode"])."',
                    ReasonPhrase = '".addslashes($Info["ReasonPhrase"])."',
                    IsFinalUrlInvalid = '".$IsFinalUrlInvalid."',
                    FinalUrl = '".addslashes($Info["FinalUrl"])."',
                    FinalStatusCode = '".intval($Info["FinalStatusCode"])."',
                    FinalReasonPhrase = '".addslashes($Info["FinalReasonPhrase"])."'");
                $DB->Query("UNLOCK TABLES");
            }
        }

        # record when the operation stopped
        $Stop = microtime(true);

        # get a rough estimate of how long it takes to test the URL, making sure
        # it's at least one second
        $TimeTaken = max(1, round($Stop - $Start));

        # record that the resource was checked
        $this->UpdateResourceHistory($Resource, $TimeTaken);
    }

    /**
     * Get the number of invalid URLs that match the given constraints
     * @param $Constraints array of constraints
     * @return the number of invalid URLs that match the constraints
     */
    public function GetInvalidCount(array $Constraints = array())
    {
        $this->RemoveStaleData();

        $DB = new Database();
        $ValidRelations = array("=", "!=", "<", ">", "<=", ">=");

        # construct the where constraint
        $Where = " WHERE URH.TimesInvalid > ".self::INVALIDATION_THRESHOLD." ";
        $OuterGroup = "";
        foreach ($Constraints as $ConstraintList)
        {
            # skip invalid constraints
            if (!($ConstraintList instanceof UrlChecker_ConstraintList))
            {
                continue;
            }

            $InnerGroup = "";
            foreach ($ConstraintList as $Constraint)
            {
                $Key = $Constraint->Key;
                $Value = $Constraint->Value;
                $Relation = $Constraint->Relation;

                # skip if the relation is invalid
                if (!in_array($Relation, $ValidRelations))
                {
                    continue;
                }

                # Resource table constraint
                if ($Key instanceof MetadataField
                    && $Key->Status == MetadataSchema::MDFSTAT_OK)
                {
                    $LogicOperator = (strlen($InnerGroup)) ? "AND" : "";
                    $InnerGroup .= " ".$LogicOperator." R.".$Key->DBFieldName();
                    $InnerGroup .= " ".$Relation." '".addslashes($Value)."'";
                }

                # UrlChecker_History table constraint
                else if (is_string($Key))
                {
                    $LogicOperator = (strlen($InnerGroup)) ? "AND" : "";
                    $InnerGroup .= " ".$LogicOperator." URH.".$Key;
                    $InnerGroup .= " ".$Relation." '".addslashes($Value)."'";
                }

                # otherwise ignore the invalid key value
            }

            if (strlen($InnerGroup))
            {
                $OuterGroup .= (strlen($OuterGroup)) ? " OR " : "";
                $OuterGroup .= " ( " . $InnerGroup . " ) ";
            }
        }

        if (strlen($OuterGroup))
        {
            $Where .= " AND " . $OuterGroup;
        }

        # get the url data
        $DB->Query("
            SELECT COUNT(*) AS NumInvalid
            FROM UrlChecker_UrlHistory URH
            LEFT JOIN Resources R
            ON URH.ResourceId = R.ResourceId
            ".$Where);

        return intval($DB->FetchField("NumInvalid"));
    }

    /**
     * Get the invalid URLs that match the given constraints.
     * @param $Constraints array of constraints
     * @param $OrderBy field by which the URLs should be sorted
     * @param $OrderDirection direction in which the URLs should be sorted
     * @param $Limit how many URLs should be returned
     * @param $Offset where the result set should begin
     * @param $Options various other options
     * @return an array of UrlChecker_InvalidUrl objects
     */
    public function GetInvalidUrls(array $Constraints=array(), $OrderBy="StatusCode",
        $OrderDirection="DESC", $Limit=15, $Offset=0, array $Options=array())
    {
        $this->RemoveStaleData();

        $DB = new Database();
        $ValidGetConstraints = array(
            "ResourceId", "FieldId", "TimesInvalid", "Url", "CheckDate",
            "StatusCode", "ReasonPhrase", "FinalUrl", "FinalStatusCode",
            "FinalReasonPhrase", "Hidden");
        $ValidRelations = array("=", "!=", "<", ">", "<=", ">=");

        # construct the where constraint
        $Where = " WHERE URH.TimesInvalid > ".self::INVALIDATION_THRESHOLD." ";
        $OuterGroup = "";
        foreach ($Constraints as $ConstraintList)
        {
            # skip invalid constraints
            if (!($ConstraintList instanceof UrlChecker_ConstraintList))
            {
                continue;
            }

            $InnerGroup = "";
            foreach ($ConstraintList as $Constraint)
            {
                $Key = $Constraint->Key;
                $Value = $Constraint->Value;
                $Relation = $Constraint->Relation;

                # skip if the relation is invalid
                if (!in_array($Relation, $ValidRelations))
                {
                    continue;
                }

                # Resource table constraint
                if ($Key instanceof MetadataField
                    && $Key->Status == MetadataSchema::MDFSTAT_OK)
                {
                    $LogicOperator = (strlen($InnerGroup)) ? "AND" : "";
                    $InnerGroup .= " ".$LogicOperator." R.".$Key->DBFieldName();
                    $InnerGroup .= " ".$Relation." '".addslashes($Value)."'";
                }

                # UrlChecker_History table constraint
                else if (is_string($Key))
                {
                    $LogicOperator = (strlen($InnerGroup)) ? "AND" : "";
                    $InnerGroup .= " ".$LogicOperator." URH.".$Key;
                    $InnerGroup .= " ".$Relation." '".addslashes($Value)."'";
                }

                # otherwise ignore the invalid key value
            }

            if (strlen($InnerGroup))
            {
                $OuterGroup .= (strlen($OuterGroup)) ? " OR " : "";
                $OuterGroup .= " ( " . $InnerGroup . " ) ";
            }
        }

        if (strlen($OuterGroup))
        {
            $Where .= " AND " . $OuterGroup;
        }

        # valid UrlChecker_History table order
        if (in_array($OrderBy, $ValidGetConstraints))
        {
            $OrderBy = "URH.".$OrderBy;
        }

        # valid Resource table order
        else if ($OrderBy instanceof MetadataField
                && $OrderBy->Status() == MetadataSchema::MDFSTAT_OK)
        {
            $OrderBy = "R.".$OrderBy->DBFieldName();
        }

        # otherwise default the StatusCode field of the UrlChecker_History tale
        else
        {
            $OrderBy = "URH.StatusCode";
        }

        # make sure order direction is valid
        if ($OrderDirection != "ASC" && $OrderDirection != "DESC")
        {
            $OrderDirection = "DESC";
        }

        # get the url data
        $DB->Query("
            SELECT * FROM UrlChecker_UrlHistory URH
            LEFT JOIN Resources R
            ON URH.ResourceId = R.ResourceId
            ".$Where."
            ORDER BY ".$OrderBy." ".$OrderDirection."
            LIMIT ".intval($Limit)."
            OFFSET ".intval($Offset));

        # create url objects
        $Urls = array();
        while (FALSE !== ($Row = $DB->FetchRow()))
        {
            $Urls[] = new UrlChecker_InvalidUrl($Row);
        }

        return $Urls;
    }

    /**
     * Get the invalid URL that is associated with the given resource and
     * metadata field, or NULL if one doesn't exist.
     * @param $Resource resource
     * @param $Field metadata field
     * @return an UrlChecker_InvalidUrl object or NULL
     */
    public function GetInvalidUrl(Resource $Resource, MetadataField $Field)
    {
        $DB = new Database();
        $DB->Query("
            SELECT *
            FROM UrlChecker_UrlHistory
            WHERE ResourceId = ".intval($Resource->Id())."
            AND FieldId = ".$Field->Id());

        if (!$DB->NumRowsSelected())
        {
            return NULL;
        }

        return new UrlChecker_InvalidUrl($DB->FetchRow());
    }

    /**
     * Determine whether or not the resource is "released". By default, this
     * means whether or not the Release Flag value is set to TRUE or not, but
     * may be different if a custom callback has been set.
     * @param $Resource resource
     * @return TRUE if the resource is released, FALSE otherwise
     */
    public function IsResourceReleased(Resource $Resource)
    {
        $Schema = new MetadataSchema($Resource->SchemaId());

        # custom callback set
        if (isset($this->IsResourceReleasedCallback))
        {
            return call_user_func($this->IsResourceReleasedCallback, $Resource);
        }

        # release flag does not exist or is disabled, assume TRUE
        else if (NULL === ($ReleaseFlag = $Schema->GetFieldByName("Release Flag"))
                || $ReleaseFlag->Status() != MetadataSchema::MDFSTAT_OK
                || !$ReleaseFlag->Enabled())
        {
          return TRUE;
        }

        return (bool) $Resource->Get("Release Flag");
    }

    /**
     * Release the given resource. By default, this means that the Release Flag
     * value for the resource will be set to TRUE, but may be different if a
     * custom callback has been set.
     * @param $Resource resource
     */
    public function ReleaseResource(Resource $Resource)
    {
        $Schema = new MetadataSchema($Resource->SchemaId());

        # custom callback set
        if (isset($this->ReleaseResourceCallback))
        {
            call_user_func($this->ReleaseResourceCallback, $Resource);
            return;
        }

        # release flag does not exist or is disabled
        else if (NULL === ($ReleaseFlag = $Schema->GetFieldByName("Release Flag"))
                || $ReleaseFlag->Status() != MetadataSchema::MDFSTAT_OK
                || !$ReleaseFlag->Enabled())
        {
            return;
        }

        $Resource->Set("Release Flag", TRUE);
    }

    /**
     * Withhold the given resource. By default, this means that the Release Flag
     * value for the resource will be set to NULL, but may be different if a
     * custom callback has been set.
     * @param $Resource resource
     */
    public function WithholdResource(Resource $Resource)
    {
        $Schema = new MetadataSchema($Resource->SchemaId());

        # custom callback set
        if (isset($this->WithholdResourceCallback))
        {
            call_user_func($this->WithholdResourceCallback, $Resource);
            return;
        }

        # release flag does not exist or is disabled
        else if (NULL === ($ReleaseFlag = $Schema->GetFieldByName("Release Flag"))
                || $ReleaseFlag->Status() != MetadataSchema::MDFSTAT_OK
                || !$ReleaseFlag->Enabled())
        {
            return;
        }

        $Resource->Set("Release Flag", NULL);
    }

    /**
     * Hide the URL associated with the given resource and metadata field so
     * that it doesn't show up on the results page.
     * @param $Resource resource
     * @param $Field metadata field
     */
    public function HideUrl(Resource $Resource, MetadataField $Field)
    {
        $DB = new Database();
        $DB->Query("
            UPDATE UrlChecker_UrlHistory
            SET Hidden = 1
            WHERE ResourceId = '".intval($Resource->Id())."'
            AND FieldId = '".intval($Field->Id())."'");
    }

    /**
     * "Unhide" the URL associated with the given resource and metadata field so
     * that it shows up on the results page.
     * @param $Resource resource
     * @param $Field metadata field
     */
    public function UnhideUrl(Resource $Resource, MetadataField $Field)
    {
        $DB = new Database();
        $DB->Query("
            UPDATE UrlChecker_UrlHistory
            SET Hidden = 0
            WHERE ResourceId = '".intval($Resource->Id())."'
            AND FieldId = '".intval($Field->Id())."'");
    }

    /**
     * Get a subset of the resources that haven't been checked or haven't been
     * checked in at least a day.
     * @return an array of UrlChecker_Resource objects
     */
    public function GetNextResourcesToBeChecked()
    {
        $this->RemoveStaleData();

        $DB = new Database();
        $Resources = array();

        # never been checked
        $DB->Query("
            SELECT R.*
            FROM Resources R
            LEFT JOIN UrlChecker_ResourceHistory URH
            ON R.ResourceId = URH.ResourceId
            WHERE URH.ResourceId IS NULL
            AND R.ResourceId >= 0
            LIMIT ".self::NUM_TO_CHECK);

        $NumNew = $DB->NumRowsSelected();

        while (FALSE !== ($Row = $DB->FetchRow()))
        {
            $Resources[] = new UrlChecker_Resource($Row["ResourceId"], "N/A");
        }

        # still some space left for more resources to check
        if ($NumNew < self::NUM_TO_CHECK)
        {
            # resources that haven't been checked in at least one day, sorted
            # by the last time they were checked.
            $Yesterday = date("Y-m-d H:i:s", strtotime("-1 day"));
            $DB->Query("
                SELECT *
                FROM UrlChecker_ResourceHistory
                WHERE CheckDate <= '".strval($Yesterday)."'
                ORDER BY CheckDate ASC
                LIMIT ".(self::NUM_TO_CHECK - $NumNew));

            while (FALSE !== ($Row = $DB->FetchRow()))
            {
                $Resources[] = new UrlChecker_Resource($Row["ResourceId"],
                    $Row["CheckDate"]);
            }
        }

        return $Resources;
    }

    /**
     * Get a subset of the invalid URLS that haven't been checked in over a
     * day and are below the threshold, or haven't been checked in over a week
     * and are over the threshold.
     * @return an array of UrlChecker_InvalidUrl objects
     */
    public function GetNextUrlsToBeChecked()
    {
        $this->RemoveStaleData();

        $DB = new Database();
        $Urls = array();

        # (check times > 1 day and <= threshold) OR
        # (check times > 1 week and > threshold)
        $Yesterday = date("Y-m-d H:i:s", strtotime("-1 day"));
        $WeekAgo = date("Y-m-d H:i:s", strtotime("-1 week"));
        $DB->Query("
            SELECT *
            FROM UrlChecker_UrlHistory
            WHERE
              (TimesInvalid <= ".intval(self::INVALIDATION_THRESHOLD)."
               AND CheckDate <= '".strval($Yesterday)."')
              OR
              (TimesInvalid > ".intval(self::INVALIDATION_THRESHOLD)."
               AND CheckDate <= '".strval($WeekAgo)."')
            ORDER BY CheckDate ASC
            LIMIT ".self::NUM_TO_CHECK);

        while (FALSE !== ($Row = $DB->FetchRow()))
        {
            $Urls[] = new UrlChecker_InvalidUrl($Row);
        }

        return $Urls;
    }

    /**
     * Determine whether or not the URLs for the given resource should be
     * checked.
     * @param Resource $Resource resource
     * @return bool TRUE if the URLs should not be checked and FALSE otherwise
     */
    protected function ShouldNotCheckUrls(Resource $Resource)
    {
        $Schema = new MetadataSchema($Resource->SchemaId());
        $Rules = isset($this->Rules) ? $this->Rules : array();

        # check if the resource matches any of the rules
        foreach ($Rules as $Rule)
        {
            # skip rules that are not set
            if (empty($Rule))
            {
                continue;
            }

            # parse out the field ID and flag value
            list($FieldId, $Flag) = explode(":", $Rule);

            $Value = $Resource->Get($Schema->GetField($FieldId));

            # the rule matches if the field value equals the flag value
            # specified in the rule. the checks with empty() are used in case
            # NULLs are in the database, which are assumed to be "off"
            if ($Value == $Flag || (empty($Value) && !$Flag))
            {
                return TRUE;
            }
        }

        return FALSE;
    }

    /**
    * Update the resource history for the given resource.
    * @param Resource $Resource The resource for which to update the history.
    * @param int $TimeTaken The time it took to check the resource, in seconds.
    */
    protected function UpdateResourceHistory(Resource $Resource, $TimeTaken)
    {
        $DB = new Database();

        # delete/insert record (fragmentation? mysql: prob. not, pgsql: no)
        # avoids any sync issues and self-heals if sync issues do arise
        $DB->Query("LOCK TABLES UrlChecker_ResourceHistory WRITE");
        $DB->Query("
            DELETE FROM UrlChecker_ResourceHistory
            WHERE ResourceId = '".addslashes($Resource->Id())."'");
        $DB->Query("
            INSERT INTO UrlChecker_ResourceHistory
            SET ResourceId = '".addslashes($Resource->Id())."',
            Time = '".intval($TimeTaken)."'");
        $DB->Query("UNLOCK TABLES");
    }

    /**
     * Get an URL's status info. If there is no redirection, this will be the
     * status line for the URL. If there are redirects, this will be the status
     * line for the URL and the status line for the last URL after redirection.
     * @Param $Url URL
     * @return an array with the same fields as an UrlChecker_HttpInfo object
     */
    public function GetHttpInformation($Url)
    {
        # information for the URL
        list($Info, $Redirect) = $this->GetHttpInformationAux($Url);

        # information for redirects, if any
        if (!is_null($Redirect))
        {
            $MaxIterations = 5;

            while (isset($Redirect) && --$MaxIterations >= 0)
            {
                $FinalUrl = $Redirect;
                list($FinalInfo, $Redirect) =
                    $this->GetHttpInformationAux($Redirect);

                $Info["UsesCookies"] = $Info["UsesCookies"] || $Info["UsesCookies"];

                if (is_null($Redirect))
                {
                    unset($Redirect);
                }
            }

            $Info["FinalUrl"] = $FinalUrl;
            $Info["FinalStatusCode"] = $Info["StatusCode"];
            $Info["FinalReasonPhrase"] = $Info["ReasonPhrase"];
        }

        return $Info;
    }

    /**
     * Auxiliary function for self::GetHttpInformation(). Gets the HTTP
     * information on one URL. Note that this only supports HTTP and HTTPS.
     * @param $Url URL
     * @return an array with the same fields as an UrlChecker_HttpInfo object
     */
    private function GetHttpInformationAux($Url)
    {
        # this should be an UrlChecker_HttpInfo object but some versions of PHP
        # segfault when using them, for an unknown reason
        $Info = array("Url" => "", "StatusCode" => -1, "ReasonPhrase" => "",
            "FinalUrl" => "", "FinalStatusCode" => -1, "FinalReasonPhrase" => "",
            "UsesCookies" => FALSE);

        # blank url (code defaults to -1, i.e., not checked)
        if (!strlen(trim($Url)))
        {
            return array($Info, NULL);
        }

        # default to HTTP if not protocol is specified
        if (!@preg_match('/^[a-z]+:/', $Url))
        {
            $Url = "http://".$Url;
        }

        # only check HTTP/HTTPS URLs
        if (!@preg_match('/^https?:\/\//', $Url))
        {
            return array($Info, NULL);
        }

        # assume that we can't connect to the URL
        $Info["Url"] = $Url;
        $Info["StatusCode"] = 0;

        # make sure there are no spaces in the url and parse it
        $ParsedUrl = @parse_url(str_replace(" ", "%20", $Url));

        if (!$ParsedUrl || !isset($ParsedUrl["host"]))
        {
            return array($Info, NULL);
        }

        $HostName = $ParsedUrl["host"];

        # username and password specified in the URL, add to the hostname
        if (isset($ParsedUrl["user"]) && isset($ParsedUrl["pass"]))
        {
            $HostName = $ParsedUrl["user"].":".$ParsedUrl["pass"]."@".$HostName;
        }

        # port specified in the URL, so get it out
        if (isset($ParsedUrl["port"]))
        {
            $Port = intval($ParsedUrl["port"]);
        }

        # HTTPS needs to use the ssl:// protocol with fsockopen
        if (isset($ParsedUrl["scheme"]) && $ParsedUrl["scheme"] == "https")
        {
            $HostName = "ssl://".$HostName;

            # default to port 443 if no port is specified
            if (!isset($Port))
            {
                $Port = 443;
            }
        }

        # default to port 80 if no port specified
        if (!isset($Port))
        {
            $Port = 80;
        }

        # can't connect. also the timeout is set to 5 seconds
        if (FALSE === ($Stream = @fsockopen($HostName, $Port, $ErrNo,
            $ErrStr, self::CONNECTION_TIMEOUT)))
        {
            return array($Info, NULL);
        }

        # construct the path that's going to be GET'ed
        if (isset($ParsedUrl["path"]))
        {
            $Path = $ParsedUrl["path"];

            if (isset($ParsedUrl["query"]))
            {
                $Path .= "?".$ParsedUrl["query"];
            }
        }

        else
        {
            $Path = "/";
        }

        # basic headers required for HTTP version 1.1
        $RequestHeaders = "GET ".$Path." HTTP/1.1\r\n";
        $RequestHeaders .= "Host: ".$ParsedUrl["host"]."\r\n";

        # set the User-Agent header since some servers erroneously require it
        $RequestHeaders .= "User-Agent: URL-Checker/".$this->Version." "
           ."CWIS/".CWIS_VERSION." PHP/".PHP_VERSION."\r\n";

        # some servers erroneously require the Accept header too
        $RequestHeaders .= "Accept: text/html,application/xhtml+xml,"
            ."application/xml;q=0.9,*/*;q=0.8\r\n";

        # final newline to signal that we're done sending headers
        $RequestHeaders .= "\r\n";

        if (FALSE === fwrite($Stream, $RequestHeaders))
        {
            # couldn't send anything
            fclose($Stream);
            return array($Info, NULL);
        }

        # HTTP status line
        if (!feof($Stream) && FALSE !== ($Line = fgets($Stream)))
        {
            # remove trailing newline from the HTTP status line
            $Line = trim($Line);

            $StatusLine = new UrlChecker_StatusLine($Line);
            $Info["StatusCode"] = $StatusLine->GetStatusCode();
            $Info["ReasonPhrase"] = $StatusLine->GetReasonPhrase();
        }

        else
        {
            # the server responded with nothing so mark the URL as an internal
            # server error (500)
            fclose($Stream);
            $Info["StatusCode"] = 500;
            $Info["ReasonPhrase"] = "Internal Server Error";
            return array($Info, NULL);
        }

        # this might cause hangs for line > 8KB. trim() removes trailing newline
        while (!feof($Stream) && FALSE !== ($Line = trim(fgets($Stream))))
        {
            # stop before reading any content
            if ($Line == "")
            {
                break;
            }

            # a Location header
            if (substr($Line, 0, 9) == "Location:")
            {
                list(, $Location) = explode(":", $Line, 2);
                $Location = ltrim($Location);
            }

            # a Set-Cookie header
            if (substr($Line, 0, 11) == "Set-Cookie:")
            {
                $Info["UsesCookies"] = TRUE;
            }
        }

        # given a Location value; need to make sure it's absolute
        if (isset($Location) && strlen($Location)
            && substr($Location, 0, 4) != "http")
        {
            # relative path, relative URI, so add in the path info
            if ($Location{0} != "/")
            {
                $Location = dirname($ParsedUrl["path"])."/".$Location;
            }

            if (substr($HostName, 0, 6) == "ssl://")
            {
                $Location = "https://".substr($HostName, 5).$Location;
            }

            else
            {
                $Location = "http://".$HostName.$Location;
            }
        }

        return array($Info, isset($Location) ? $Location : NULL);
    }

    /**
     * Determine if a given URL has valid content, that is, if it doesn't match
     * some rudimentary regular expressions. Checks for "Page Not Found"-type
     * strings.
     * @param $Url URL
     * @return TRUE if the content for the given URL is valid, FALSE otherwise
     */
    private function HasValidContent($Url)
    {
        # set the default protocol version to 1.1, this may cause issues with
        # PHP < 5.3 if the request isn't HTTP 1.1 compliant
        $Options["http"]["protocol_version"] = 1.1;

        # timeout
        $Options["http"]["timeout"] = self::CONNECTION_TIMEOUT;

        # set the User-Agent HTTP header since some servers erroneously require
        # it
        $Options["http"]["user_agent"] = "URL-Checker/".$this->Version." "
           ."CWIS/".CWIS_VERSION." PHP/".PHP_VERSION;

        # some servers erroneously require the Accept header too
        $Options["http"]["header"] = "Accept: text/html,application/xhtml+xml,"
            ."application/xml;q=0.9,*/*;q=0.8";

        # try to prevent hangs in feof by telling the server to close the
        # connection after retrieving all of the content
        $Options["http"]["header"] .= "\r\nConnection: close";

        # fetch content even when the HTTP status code is not 200
        $Options["http"]["ignore_errors"] = TRUE;

        $Stream = stream_context_create($Options);

        # escape spaces so that we don't mess up the http method header line
        $Url = str_replace(" ", "%20", $Url);

        if (FALSE === ($Handle = @fopen($Url, "r", FALSE, $Stream)))
        {
            return TRUE;
        }

        # sleep for 0.15s to allow some of the content to buffer to avoid having
        # the opening HTML tag not show up in the first fread
        usleep(150000);

        # get the first 8KB and do a basic check to see if the file is HTML.
        # since fread might stop before getting 8KB, e.g., if a packet is
        # received or the server is slow, there is a chance that the file is
        # HTML, but it's opening tag won't have arrived in the first fread, and
        # therefore won't be checked. this should be OK since it probably means
        # the server is really slow and it shouldn't be checked anyway
        if (FALSE === ($Html = @fread($Handle, 8192))
            || FALSE === strpos($Html, "<html"))
        {
            return TRUE;
        }

        # this will be used to prevent hangs in feof in case the server doesn't
        # support the Connection header
        $Time = microtime(TRUE);

        # read until the end of the file, the timeout is reached, or if at least
        # 500 KB have been read
        $Failsafe = 1000;
        while (!feof($Handle)
               && (microtime(TRUE) - $Time) < self::CONNECTION_TIMEOUT
               && strlen($Html) < 512000 # strlen can't always be trusted
               && $Failsafe--)
        {
            if (FALSE === ($Html .= @fread($Handle, 8192)))
            {
                return TRUE;
            }
        }

        fclose($Handle);

        # parse out the title and the body to search within
        $Title = (preg_match('/<title[^>]*>(.*?)<\/title>/is', $Html, $Matches))
            ? trim($Matches[1]) : "" ;
        $Body = (preg_match('/<body[^>]*>(.*?)<\/body>/is', $Html, $Matches))
            ? trim ($Matches[1]) : "";
        $Html = $Title." ".$Body;

        # strip out tags that contain data that is probably not HTML
        $Html = preg_replace('/<(script|noscript|style)[^>]*>.*?<\/\1>/is',
            '', $Html);

        # remove HTML tags so we only have text to search
        $Html = strip_tags($Html);

        if (preg_match('/(file|url|page|document)\s+([^\s]+\s+)?(couldn\'t\s+be|'
            .'could\s+not\s+be|cannot\s+be|can\'t\s+be|was\s+not)\s+found/i', $Html))
        {
            return FALSE;
        }

        else if (preg_match('/(file|url|page|404|document)\s+not\s+found|'
            .'(http|error)\s+404/i', $Html))
        {
            return FALSE;
        }

        else if (preg_match('/(couldn\'t|could\s+not|cannot|can\'t)\s+find\s+'
            .'(the|that)\s+(file|url|page|document)/i', $Html))
        {
            return FALSE;
        }

        return TRUE;
    }

    /**
     * Remove any stale data from deleted resources or changed URLs.
     */
    private function RemoveStaleData()
    {
        static $RemovedStaleData;

        # so that the following queries are executed only once per load
        if (isset($RemovedStaleData))
        {
            return;
        }

        $RemovedStaleData = TRUE;
        $DB = new Database();

        # clean history tables of data from deleted resources
        $DB->Query("
            DELETE URH
            FROM UrlChecker_ResourceHistory URH
            LEFT JOIN Resources R
            ON URH.ResourceId = R.ResourceId
            WHERE R.ResourceId IS NULL");
        $DB->Query("
            DELETE UUH
            FROM UrlChecker_UrlHistory UUH
            LEFT JOIN Resources R
            ON UUH.ResourceId = R.ResourceId
            WHERE R.ResourceId IS NULL");

        # clean URL history table of data from deleted fields
        $DB->Query("
            DELETE UUH
            FROM UrlChecker_UrlHistory UUH
            LEFT JOIN MetadataFields M
            ON UUH.FieldId = M.FieldId
            WHERE M.FieldId IS NULL");

        # clean history tables of data from URLs that have changed
        foreach ($this->GetUrlFields() as $Field)
        {
            # both of the following queries use BINARY when checking the URL
            # field to force a case sensitive search

            $DB->Query("
                DELETE URH
                FROM UrlChecker_ResourceHistory URH
                LEFT JOIN (Resources R, UrlChecker_UrlHistory UUH)
                ON (UUH.ResourceId = R.ResourceId
                  AND UUH.ResourceId = URH.ResourceId)
                WHERE UUH.ResourceId IS NOT NULL AND (
                  BINARY UUH.Url != R.".$Field->DBFieldName()."
                  AND UUH.FieldId = ".intval($Field->Id())."
                )");

            $DB->Query("
                DELETE UUH
                FROM UrlChecker_UrlHistory UUH
                LEFT JOIN Resources R
                ON UUH.ResourceId = R.ResourceId
                WHERE
                BINARY UUH.Url != R.".$Field->DBFieldName()."
                AND UUH.FieldId = ".intval($Field->Id()));
        }
    }

    /**
     * Get all the URL metadata fields.
     * @return array of all the metadata fields
     */
    private function GetUrlFields()
    {
        static $Fields;

        if (!isset($Fields))
        {
            $Schema = new MetadataSchema();
            $Fields = $Schema->GetFields(MetadataSchema::MDFTYPE_URL);
        }

        return $Fields;
    }

    /**
    * Get the date/time that the URL checking method will run.
    * @return Returns the date/time that the URL checking method will run.
    */
    private function GetDateOfNextCheck()
    {
        # find the URL checking method
        foreach ($GLOBALS["AF"]->GetKnownPeriodicEvents() as $PeriodicEvent)
        {
            $Callback = $PeriodicEvent["Callback"];

            # if its the URL checking method
            if (is_array($Callback)
                && $Callback[0] instanceof PluginCaller
                && $Callback[0]->GetCallbackAsText() == "UrlChecker::QueueResourceCheckTasks")
            {
                # return the next run date
                return date("Y-m-d H:i:s", $PeriodicEvent["NextRun"]);
            }
        }

        # no next run date
        return NULL;
    }

    /**
     * @var $IsResourceReleasedCallback custom callback
     * @var $ReleaseResourceCallback custom callback
     * @var $WithholdResourceCallback custom callback
     */
    private $IsResourceReleasedCallback;
    private $ReleaseResourceCallback;
    private $WithholdResourceCallback;

}
