<?PHP

class UrlChecker extends Plugin
{

    /**
     * Register information about this plugin.
     */
    public function Register()
    {
        $this->Name = "URL Checker";
        $this->Version = "2.1.2";
        $this->Description = trim(preg_replace('/\s+/', ' ', '
            Periodically validates URL field values.
            <i>System Administrator</i> or <i>Collection Administrator</i> privilege
            is required to view the results.'));
        $this->Author = "Internet Scout";
        $this->Url = "http://scout.wisc.edu/cwis/";
        $this->Email = "scout@scout.wisc.edu";
        $this->Requires = array("CWISCore" => "2.0.0");
        $this->EnabledByDefault = FALSE;
    }

    /**
     * Constructor: initialize objects and settings.
     */
    public function __construct()
    {
        # default constant values
        $this->Timeout = 5.0;
        $this->Threshold = 4;
        $this->NumToCheck = 10;
        $this->ValidGetConstraints = array(
            "ResourceId", "FieldId", "TimesInvalid", "Url", "CheckDate",
            "StatusCode", "ReasonPhrase", "FinalUrl", "FinalStatusCode",
            "FinalReasonPhrase", "Hidden");

        # objects
        $this->DB = new Database();
        $this->Schema = new MetadataSchema();
    }

    /**
     * Need this so that the callback and some other data members aren't
     * serialized. This is necessary to get the next time that
     * self::CheckDelegation() will be called since these other data members
     * affect the signature of this plugin when set.
     * @return the data members to serialize
     */
    public function __sleep()
    {
        return array("DB", "Schema", "Timeout", "Threshold", "NumToCheck",
            "ValidGetConstraints");
    }

    /**
     * Create the database tables necessary to use this plugin.
     * @return NULL if everything went OK or an error message otherwise
     */
    public function Install()
    {
        # resource history table
        if (FALSE === $this->DB->Query("
            CREATE TABLE IF NOT EXISTS UrlChecker_ResourceHistory (
                ResourceId     INT,
                CheckDate      TIMESTAMP,
                PRIMARY KEY    (ResourceId)
            );"))
        { return "Could not create the resource history table"; }

        # url history table
        if (FALSE === $this->DB->Query("
            CREATE TABLE IF NOT EXISTS UrlChecker_UrlHistory (
                ResourceId          INT,
                FieldId             INT,
                Hidden              INT,
                CheckDate           TIMESTAMP,
                TimesInvalid        INT,
                Url                 TEXT,
                StatusCode          SMALLINT,
                ReasonPhrase        TEXT,
                IsFinalUrlInvalid   INT,
                FinalUrl            TEXT,
                FinalStatusCode     SMALLINT,
                FinalReasonPhrase   TEXT,
                PRIMARY KEY         (ResourceId, FieldId)
            );"))
        { return "Could not create the URL history table"; }

        # settings table
        if (FALSE === $this->DB->Query("
            CREATE TABLE IF NOT EXISTS UrlChecker_Settings (
                Name      TEXT,
                Value    TEXT
            );"))
        { return "Could not create the settings table"; }

        # insert default settings
        if (FALSE === $this->DB->Query("
            INSERT INTO UrlChecker_Settings (Name, Value)
            VALUES
            ('NextNormalUrlCheck', '0'),
            ('NextInvalidUrlCheck', '0'),
            ('EnableDeveloper', '0')"))
        { return "Could not initialize the default settings"; }

        return NULL;
    }

    /**
     * Upgrade from a previous version.
     * @param $PreviousVersion previous version
     */
    public function Upgrade($PreviousVersion)
    {
        # ugprade from versions < 2.0.0 to 2.0.0
        if (version_compare($PreviousVersion, "2.0.0", "<"))
        {
            $DB = new Database();

            // make the upgrade process fault tolerant
            $DB->SetQueryErrorsToIgnore(array(
                '/ALTER\s+TABLE\s+[^\s]+\s+CHANGE\s+.+/i'
                  => '/Unknown\s+column\s+[^\s]+\s+in\s+[^\s]+/i',
                '/ALTER\s+TABLE\s+[^\s]+\s+CHANGE\s+.+/i'
                  => '/Table\s+[^\s]+\s+doesn\'t\s+exist/i',
                '/ALTER\s+TABLE\s+[^\s]+\s+ADD\s+.+/i'
                  => '/Duplicate\s+column\s+name\s+[^\s]+/i',
                '/ALTER\s+TABLE\s+[^\s]+\s+ADD\s+.+/i'
                  => '/Table\s+[^\s]+\s+doesn\'t\s+exist/i',
                '/RENAME\s+TABLE\s+[^\s]+\s+TO\s+[^\s]+/i'
                  => '/Table\s+[^\s]+\s+already\s+exists/i',
                '/CREATE\s+TABLE\s+[^\s]+\s+\([^)]+\)/i'
                  => '/Table\s+[^\s]+\s+already\s+exists/i'));

            # rename columns
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE DateChecked CheckDate TIMESTAMP"))
            { return "Could not update the URL history CheckDate column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE TimesFailed TimesInvalid INT"))
            { return "Could not update the TimesInvalid column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE StatusNo StatusCode INT"))
            { return "Could not update the StatusCode column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE StatusText ReasonPhrase TEXT"))
            { return "Could not update the ReasonPhrase column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE DataOne FinalStatusCode INT DEFAULT -1"))
            { return "Could not update the FinalStatusCode column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                CHANGE DataTwo FinalUrl TEXT"))
            { return "Could not update the FinalUrl column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_History
                CHANGE DateChecked CheckDate TIMESTAMP"))
            { return "Could not update the resource history CheckDate column"; }

            # add columns
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                ADD Hidden INT DEFAULT 0
                AFTER FieldId"))
            { return "Could not add the Hidden column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                ADD IsFinalUrlInvalid INT DEFAULT 0
                AFTER ReasonPhrase"))
            { return "Could not add the IsFinalUrlInvalid column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Failures
                ADD FinalReasonPhrase TEXT"))
            { return "Could not add the FinalReasonPhrase column"; }

            # rename history tables
            if (FALSE === $DB->Query("
                RENAME TABLE UrlChecker_Failures
                TO UrlChecker_UrlHistory"))
            { return "Could not rename the URL history table"; }
            if (FALSE === $DB->Query("
                RENAME TABLE UrlChecker_History
                TO UrlChecker_ResourceHistory"))
            { return "Could not rename the resource history table"; }

            # remove any garbage data
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_UrlHistory
                WHERE ResourceId < 0"))
            { return "Could not remove stale data from the URL history"; }
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_ResourceHistory
                WHERE ResourceId < 0"))
            { return "Could not remove stale data from the resource history"; }

            # add settings table
            if (FALSE === $DB->Query("
                CREATE TABLE UrlChecker_Settings (
                    NextNormalUrlCheck     INT,
                    NextInvalidUrlCheck    INT
                );"))
            { return "Could not create the settings table"; }

            # repair and optimize tables after the changes. if this isn't done,
            # weird ordering issues might pop up
            if (FALSE === $DB->Query("
                REPAIR TABLE UrlChecker_UrlHistory"))
            { return "Could not repair the URL history table"; }
            if (FALSE === $DB->Query("
                REPAIR TABLE UrlChecker_ResourceHistory"))
            { return "Could not repair the resource history table"; }
            if (FALSE === $DB->Query("
                OPTIMIZE TABLE UrlChecker_UrlHistory"))
            { return "Could not optimize the URL history table"; }
            if (FALSE === $DB->Query("
                OPTIMIZE TABLE UrlChecker_ResourceHistory"))
            { return "Could not optimize the resource history table"; }
        }

        # upgrade from version 2.0.0 to 2.1.0
        if (version_compare($PreviousVersion, "2.1.0", "<"))
        {
            $DB = new Database();

            // make the upgrade process fault tolerant
            $DB->SetQueryErrorsToIgnore(array(
                '/ALTER\s+TABLE\s+[^\s]+\s+ADD\s+.+/i'
                  => '/Duplicate\s+column\s+name\s+[^\s]+/i',
                '/ALTER\s+TABLE\s+[^\s]+\s+DROP\s+.+/i'
                  => '/Can\'t\s+DROP\s+[^\s;]+;\s+check\s+that\s+column\/key\s+exists/i'));

            # get old settings data
            if (FALSE === $DB->Query("
                SELECT * FROM UrlChecker_Settings LIMIT 1"))
            { return "Could not get settings data"; }

            if ($DB->NumRowsSelected())
            {
                $Row = $DB->FetchRow();
                $NextNormalUrlCheck = $Row["NextNormalUrlCheck"];
                $NextInvalidUrlCheck = $Row["NextInvalidUrlCheck"];
            }

            else
            {
                $NextNormalUrlCheck = 0;
                $NextInvalidUrlCheck = 0;
            }

            # add column
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Settings
                ADD Name Text"))
            { return "Could not add the Name column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Settings
                ADD Value Text"))
            { return "Could not add the Value column"; }

            # remove old columns
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Settings
                DROP NextNormalUrlCheck"))
            { return "Could not remove the NextNormalUrlCheck Column"; }
            if (FALSE === $DB->Query("
                ALTER TABLE UrlChecker_Settings
                DROP NextInvalidUrlCheck"))
            { return "Could not remove the NextInvalidUrlCheck Column"; }

            # remove any garbage data from the tables
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_UrlHistory
                WHERE ResourceId < 0"))
            { return "Could not remove stale data from the URL history"; }
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_ResourceHistory
                WHERE ResourceId < 0"))
            { return "Could not remove stale data from the resource history"; }

            # this makes sure that no garbage rows exist
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_Settings"))
            { return "Could not remove stale data from the settings table"; }

            # add settings back into the table
            if (FALSE === $DB->Query("
                INSERT INTO UrlChecker_Settings (Name, Value)
                VALUES
                ('NextNormalUrlCheck', '".addslashes($NextNormalUrlCheck)."'),
                ('NextInvalidUrlCheck', '".addslashes($NextInvalidUrlCheck)."'),
                ('EnableDeveloper', '0')"))
            { return "Could not initialize the updated settings"; }

            # repair and optimize the settings table after the changes
            if (FALSE === $DB->Query("
                REPAIR TABLE UrlChecker_Settings"))
            { return "Could not repair the settings table"; }
            if (FALSE === $DB->Query("
                OPTIMIZE TABLE UrlChecker_Settings"))
            { return "Could not optimize the settings table"; }
        }

        # upgrade from version 2.1.0 to 2.1.1
        if (version_compare($PreviousVersion, "2.1.1", "<"))
        {
            $DB = new Database();

            # remove old garbage data
            if (FALSE === $DB->Query("
                DELETE FROM UrlChecker_UrlHistory
                WHERE Url NOT REGEXP '^https?:\/\/'"))
            { return "Could not remove stale data from the URL history"; }
        }

        return NULL;
    }

    /**
     * Declare the events this plugin provides to the application framework.
     * @return an array of the events this plugin provides
     */
    public function DeclareEvents()
    {
        return array(
            # this event should get hooked by an outside plugin
            "URLCHECKER_SET_RESOURCE_RELEASE_CALLBACKS"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_USING_CUSTOM_RELEASE_CALLBACKS"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_GET_INFORMATION"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_CHECK_RESOURCE_URLS"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_GET_INVALID_COUNT"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_GET_INVALID_URLS"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_GET_INVALID_URL"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_IS_RESOURCE_RELEASED"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_RELEASE_RESOURCE"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_WITHHOLD_RESOURCE"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_HIDE_URL"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_REPORT_URL"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_IS_DEVELOPER_ENABLED"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_ENABLE_DEVELOPER"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_DISABLE_DEVELOPER"
              => ApplicationFramework::EVENTTYPE_DEFAULT,
            "URLCHECKER_GET_NEXT_RESOURCES_TO_BE_CHECKED"
              => ApplicationFramework::EVENTTYPE_FIRST,
            "URLCHECKER_GET_NEXT_URLS_TO_BE_CHECKED"
              => ApplicationFramework::EVENTTYPE_FIRST);
    }

    /**
     * Hook the events into the application framework.
     * @return an array of events to be hooked into the application framework
     */
    public function HookEvents()
    {
        $Events = array(
            # this is useful for debugging but otherwise shouldn't be used
            #"EVENT_HTML_FILE_LOAD_COMPLETE" => "CheckDelegation",

            "EVENT_COLLECTION_ADMINISTRATION_MENU" => "DeclareColAdminPages",
            "EVENT_PAGE_LOAD" => "SetResourceReleaseCallbacks",
            "EVENT_PERIODIC" => "CheckDelegation",
            "URLCHECKER_USING_CUSTOM_RELEASE_CALLBACKS" => "UsingCustomReleaseCallbacks",
            "URLCHECKER_GET_INFORMATION" => "GetInformation",
            "URLCHECKER_CHECK_RESOURCE_URLS" => "CheckResourceUrls",
            "URLCHECKER_GET_INVALID_COUNT" => "GetInvalidCount",
            "URLCHECKER_GET_INVALID_URLS" => "GetInvalidUrls",
            "URLCHECKER_GET_INVALID_URL" => "GetInvalidUrl",
            "URLCHECKER_IS_RESOURCE_RELEASED" => "IsResourceReleased",
            "URLCHECKER_RELEASE_RESOURCE" => "ReleaseResource",
            "URLCHECKER_WITHHOLD_RESOURCE" => "WithholdResource",
            "URLCHECKER_HIDE_URL" => "HideUrl",
            "URLCHECKER_REPORT_URL" => "ReportUrl",
            "URLCHECKER_IS_DEVELOPER_ENABLED" => "IsDeveloperEnabled",
            "URLCHECKER_ENABLE_DEVELOPER" => "EnableDeveloper",
            "URLCHECKER_DISABLE_DEVELOPER" => "DisableDeveloper",
            "URLCHECKER_GET_NEXT_RESOURCES_TO_BE_CHECKED" => "GetNextResourcesToBeChecked",
            "URLCHECKER_GET_NEXT_URLS_TO_BE_CHECKED" => "GetNextUrlsToBeChecked");

        $Settings = $this->GetSettings();

        if ($Settings["EnableDeveloper"])
        {
            $Events["EVENT_SYSTEM_ADMINISTRATION_MENU"] = "DeclareSysAdminPages";
        }

        return $Events;
    }

    /**
     * Add page hooks for the collection administration section.
     * @return map page name to page title for the application framework
     */
    public function DeclareColAdminPages()
    {
        $Settings = $this->GetSettings();
        if ($Settings["EnableDeveloper"])
        {
            return array(
              "Results" => "URL Checker Results",
              "HiddenUrls" => "URL Checker Hidden URLs");
        }

        return array(
            "Results" => "URL Checker Results");
    }

    /**
     * Add page hooks for the system administration section. This should only
     * be called if EnableDeveloper is TRUE.
     * @return map page name to page title for the application framework
     */
    public function DeclareSysAdminPages()
    {
        return array(
          "Developer" => "URL Checker Developer Page");
    }

    /**
     * Signal to set custom resource releasing/withholding callbacks on page
     * load.
     */
    public function SetResourceReleaseCallbacks()
    {
        global $AF;

        $Callbacks = $AF->SignalEvent("URLCHECKER_SET_RESOURCE_RELEASE_CALLBACKS");

        if (is_array($Callbacks) && count($Callbacks) == 3
            && is_callable($Callbacks[0]) && is_callable($Callbacks[1])
            && is_callable($Callbacks[2]))
        {
            $this->IsResourceReleasedCallback = $Callbacks[0];
            $this->ReleaseResourceCallback = $Callbacks[1];
            $this->WithholdResourceCallback = $Callbacks[2];
        }
    }

    /**
     * Return whether or not custom callbacks are set.
     * @return TRUE if custom callbacks are set, FALSE otherwise
     */
    public function UsingCustomReleaseCallbacks()
    {
        # if callbacks are set, then so will this data member
        return isset($this->IsResourceReleasedCallback);
    }

    /**
     * Delegate checks between normal and invalid URLs.
     * @return when this should be called again, in seconds
     */
    public function CheckDelegation()
    {
        # don't waste time and resources if there aren't any URL fields
        if (count($this->GetUrlFields()) == 0)
        {
            return 60;
        }

        $Settings = $this->GetSettings();
        $NextNormalUrlCheck = $Settings["NextNormalUrlCheck"];
        $NextInvalidUrlCheck = $Settings["NextInvalidUrlCheck"];

        # if we should check the invalid urls
        if ($Settings["NextInvalidUrlCheck"] < $Settings["NextNormalUrlCheck"])
        {
            $NextInvalidUrlCheck = $this->CheckInvalidUrls();
            $this->UpdateSetting("NextInvalidUrlCheck", $NextInvalidUrlCheck);
        }

        # or the regular ones (default if there is a tie)
        else
        {
            $NextNormalUrlCheck = $this->CheckNormalUrls();
            $this->UpdateSetting("NextNormalUrlCheck", $NextNormalUrlCheck);
        }

        # return when this method should be called again depending on when the
        # next normal and invalid checks are
        return floor((min($NextNormalUrlCheck, $NextInvalidUrlCheck)-time())/60);
    }

    /**
     * Get information/stats of the various data saved.
     * @return array of various information
     */
    public function GetInformation()
    {
        $this->RemoveStaleData();

        $Info = array();

        # database settings
        $Settings = $this->GetSettings();
        $Info["NextNormalUrlCheck"] = intval($Settings["NextNormalUrlCheck"]);
        $Info["NextInvalidUrlCheck"]  = intval($Settings["NextInvalidUrlCheck"]);
        $Info["EnableDeveloper"] = intval($Settings["EnableDeveloper"]);

        # hard-coded settings
        $Info["Timeout"] = $this->Timeout;
        $Info["Threshold"] = $this->Threshold;
        $Info["NumToCheck"] = $this->NumToCheck;

        # the number of resources checked so far
        $this->DB->Query("SELECT COUNT(*) as NumChecked FROM UrlChecker_ResourceHistory");
        $Info["NumResourcesChecked"] = intval($this->DB->FetchField("NumChecked"));

        # the number of resources that haven't been checked so far (don't count
        # resources with IDs < 0 since they're probably bad)
        $this->DB->Query("
            SELECT COUNT(*) as NumResources
            FROM Resources
            WHERE ResourceId >= 0");
        $Info["NumResourcesUnchecked"] = intval($this->DB->FetchField("NumResources"))
            - $Info["NumResourcesChecked"];

        # the number of the invalid URLs past the threshold and "not hidden"
        $this->DB->Query("
            SELECT COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE Hidden = 0
            AND TimesInvalid > ".$this->Threshold);
        $Info["NumInvalid"] = intval($this->DB->FetchField("NumInvalid"));

        # the number of the invalid URLs past the threshold and hidden
        $this->DB->Query("
            SELECT COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE Hidden = 1
            AND TimesInvalid > ".$this->Threshold);
        $Info["NumInvalidAndHidden"] = intval($this->DB->FetchField("NumInvalid"));

        # the number of possibly invalid urls
        $this->DB->Query("
            SELECT COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE TimesInvalid <= ".$this->Threshold);
        $Info["NumPossiblyInvalid"] = intval($this->DB->FetchField("NumInvalid"));

        # the number of "not hidden" invalid URLs for each status code
        $Info["InvalidUrlsForStatusCodes"] = array();
        $this->DB->Query("
            SELECT StatusCode, COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE Hidden = 0
            AND TimesInvalid > ".$this->Threshold."
            GROUP BY StatusCode");
        while (FALSE !== ($Row = $this->DB->FetchRow()))
        {
            $Info["InvalidUrlsForStatusCodes"][intval($Row["StatusCode"])]
                = intval($Row["NumInvalid"]);
        }

        # the number of "hidden" invalid URLs for each status code
        $Info["HiddenInvalidUrlsForStatusCodes"] = array();
        $this->DB->Query("
            SELECT StatusCode, COUNT(*) as NumInvalid
            FROM UrlChecker_UrlHistory
            WHERE Hidden = 1
            AND TimesInvalid > ".$this->Threshold."
            GROUP BY StatusCode");
        while (FALSE !== ($Row = $this->DB->FetchRow()))
        {
            $Info["HiddenInvalidUrlsForStatusCodes"][intval($Row["StatusCode"])]
                = intval($Row["NumInvalid"]);
        }

        # if using custom callbacks
        $Info["UsingCustomReleaseCallbacks"] =
            ($this->UsingCustomReleaseCallbacks()) ? "Yes" : "No";

        # the last time a check was done
        $this->DB->Query("
            SELECT *
            FROM UrlChecker_ResourceHistory
            ORDER BY CheckDate DESC LIMIT 1");
        $Info["DateLastResourceChecked"] = $this->DB->FetchField("CheckDate");

        # the next time a check will be performed. this creates a signature the
        # same way the application framework does for the moment. thus, certain
        # changes may break this
        $Signature = md5(serialize(new PluginCaller("UrlChecker",
            "CheckDelegation")))."::CallPluginMethod";
        $this->DB->Query("
            SELECT * FROM PeriodicEvents
            WHERE Signature = '".addslashes($Signature)."'");
        $Info["DateOfNextCheck"] = $this->DB->FetchField("LastRunAt");

        # version information
        $Info["Version"] = $this->Version;
        $Info["CwisVersion"] = CWIS_VERSION;
        $Info["PhpVersion"] = PHP_VERSION;

        return $Info;
    }

    /**
     * Check all of the URL metadata field values for the given resource.
     * @param $Resource resource
     */
    public function CheckResourceUrls(Resource $Resource)
    {
        # invalid or bad resource so don't check it
        if ($Resource->Status() != 1 || $Resource->Id() < 0)
        {
            return;
        }

        foreach ($this->GetUrlFields() as $Field)
        {
            $Url = $Resource->Get($Field);

            # default to HTTP if not protocol is specified
            if (!@preg_match('/^[a-z]+:\/\//', $Url))
            {
                $Url = "http://".$Url;
            }

            # only check HTTP URLs
            if (!@preg_match('/^https?:\/\//', $Url))
            {
                continue;
            }

            # get the url's http status
            $Info = $this->GetHttpInformation($Url);

            # remove old failure data, if any, if the url is ok
            if ($Info["StatusCode"] == -1 || ($Info["StatusCode"] == 200
                && $this->HasValidContent($Resource->Get($Field))))
            {
                # delete/insert record (fragmentation? mysql: prob. not, pgsql: no)
                # avoids any sync issues and self-heals if sync issues do arise
                $this->DB->Query("LOCK TABLES UrlChecker_UrlHistory WRITE");
                $this->DB->Query("
                    DELETE FROM UrlChecker_UrlHistory
                    WHERE ResourceId = '".intval($Resource->Id())."'
                    AND FieldId = '".intval($Field->Id())."'");
                $this->DB->Query("UNLOCK TABLES");
            }

            # record a failure since there was a problem
            else
            {
                $this->DB->Query("LOCK TABLES UrlChecker_UrlHistory WRITE");
                $this->DB->Query("
                    SELECT * FROM UrlChecker_UrlHistory
                    WHERE ResourceId = '".intval($Resource->Id())."'
                    AND FieldId = '".intval($Field->Id())."'");

                # try to use an existing TimesInvalid value if possible and the
                # HTTP info is not too different
                $TimesInvalid = 1;
                $Hidden = 0;
                if (FALSE !== ($Row = $this->DB->FetchRow())
                    && $Row["StatusCode"] == strval($Info["StatusCode"])
                    && $Row["FinalStatusCode"] == strval($Info["FinalStatusCode"]))
                {
                    # the URL hasn't changed at all
                    if ($Row["FinalUrl"] == $Info["FinalUrl"])
                    {
                        $TimesInvalid = intval($Row["TimesInvalid"]) + 1;
                        $Hidden = intval($Row["Hidden"]);
                    }

                    # if the server uses cookies, and there is a redirect, the
                    # URL is likely to change every time a check takes place.
                    # thus, only check the host portions if those conditions are
                    # true
                    else if ($Row["StatusCode"]{0} == "3" && $Info["UsesCookies"])
                    {
                        $DbUrl = @parse_url($Row["FinalUrl"]);
                        $NewUrl = @parse_url($Info["FinalUrl"]);

                        if ($DbUrl && $NewUrl && isset($DbUrl["host"])
                            && isset($NewUrl["host"])
                            && $DbUrl["host"] == $NewUrl["host"])
                        {
                            $TimesInvalid = intval($Row["TimesInvalid"]) + 1;
                            $Hidden = intval($Row["Hidden"]);
                        }
                    }
                }

                if ($Info["FinalStatusCode"] == 200
                    && !$this->HasValidContent($Info["FinalUrl"]))
                {
                    $IsFinalUrlInvalid = 1;
                }

                else
                {
                    $IsFinalUrlInvalid = 0;
                }

                # add the new row with the updated info
                $this->DB->Query("
                    DELETE FROM UrlChecker_UrlHistory
                    WHERE ResourceId = '".intval($Resource->Id())."'
                    AND FieldId = '".intval($Field->Id())."'");
                $this->DB->Query("
                    INSERT INTO UrlChecker_UrlHistory SET
                    ResourceId = '".intval($Resource->Id())."',
                    FieldId = '".intval($Field->Id())."',
                    Hidden = '".$Hidden."',
                    TimesInvalid = ".intval($TimesInvalid).",
                    Url = '".addslashes($Resource->Get($Field))."',
                    StatusCode = '".intval($Info["StatusCode"])."',
                    ReasonPhrase = '".addslashes($Info["ReasonPhrase"])."',
                    IsFinalUrlInvalid = '".$IsFinalUrlInvalid."',
                    FinalUrl = '".addslashes($Info["FinalUrl"])."',
                    FinalStatusCode = '".intval($Info["FinalStatusCode"])."',
                    FinalReasonPhrase = '".addslashes($Info["FinalReasonPhrase"])."'");
                $this->DB->Query("UNLOCK TABLES");
            }
        }

        # record that the resource was checked
        # delete/insert record (fragmentation? mysql: prob. not, pgsql: no)
        # avoids any sync issues and self-heals if sync issues do arise
        $this->DB->Query("LOCK TABLES UrlChecker_ResourceHistory WRITE");
        $this->DB->Query("
            DELETE FROM UrlChecker_ResourceHistory
            WHERE ResourceId = '".intval($Resource->Id())."'");
        $this->DB->Query("
            INSERT INTO UrlChecker_ResourceHistory
            SET ResourceId = '".intval($Resource->Id())."'");
        $this->DB->Query("UNLOCK TABLES");
    }

    /**
     * Get the number of invalid URLs that match the given constraints
     * @param $Constraints array of constraints
     * @return the number of invalid URLs that match the constraints
     */
    public function GetInvalidCount(array $Constraints = array())
    {
        $this->RemoveStaleData();

        $ValidRelations = array("=", "!=", "<", ">", "<=", ">=");

        # construct the where constraint
        $Where = " WHERE URH.TimesInvalid > ".$this->Threshold." ";
        $OuterGroup = "";
        foreach ($Constraints as $ConstraintList)
        {
            # skip invalid constraints
            if (!($ConstraintList instanceof UrlChecker_ConstraintList))
            {
                continue;
            }

            $InnerGroup = "";
            foreach ($ConstraintList as $Constraint)
            {
                $Key = $Constraint->Key;
                $Value = $Constraint->Value;
                $Relation = $Constraint->Relation;

                # skip if the relation is invalid
                if (!in_array($Relation, $ValidRelations))
                {
                    continue;
                }

                # Resource table constraint
                if ($Key instanceof MetadataField
                    && $Key->Status == MetadataSchema::MDFSTAT_OK)
                {
                    $LogicOperator = (strlen($InnerGroup)) ? "AND" : "";
                    $InnerGroup .= " ".$LogicOperator." R.".$Key->DBFieldName();
                    $InnerGroup .= " ".$Relation." '".addslashes($Value)."'";
                }

                # UrlChecker_History table constraint
                else if (is_string($Key))
                {
                    $LogicOperator = (strlen($InnerGroup)) ? "AND" : "";
                    $InnerGroup .= " ".$LogicOperator." URH.".$Key;
                    $InnerGroup .= " ".$Relation." '".addslashes($Value)."'";
                }

                # otherwise ignore the invalid key value
            }

            if (strlen($InnerGroup))
            {
                $OuterGroup .= (strlen($OuterGroup)) ? " OR " : "";
                $OuterGroup .= " ( " . $InnerGroup . " ) ";
            }
        }

        if (strlen($OuterGroup))
        {
            $Where .= " AND " . $OuterGroup;
        }

        # get the url data
        $this->DB->Query("
            SELECT COUNT(*) AS NumInvalid
            FROM UrlChecker_UrlHistory URH
            LEFT JOIN Resources R
            ON URH.ResourceId = R.ResourceId
            ".$Where);

        return intval($this->DB->FetchField("NumInvalid"));
    }

    /**
     * Get the invalid URLs that match the given constraints.
     * @param $Constraints array of constraints
     * @param $OrderBy field by which the URLs should be sorted
     * @param $OrderDirection direction in which the URLs should be sorted
     * @param $Limit how many URLs should be returned
     * @param $Offset where the result set should begin
     * @param $Options various other options
     * @return an array of UrlChecker_InvalidUrl objects
     */
    public function GetInvalidUrls(array $Constraints=array(), $OrderBy="StatusCode",
        $OrderDirection="DESC", $Limit=15, $Offset=0, array $Options=array())
    {
        $this->RemoveStaleData();

        $ValidRelations = array("=", "!=", "<", ">", "<=", ">=");

        # construct the where constraint
        $Where = " WHERE URH.TimesInvalid > ".$this->Threshold." ";
        $OuterGroup = "";
        foreach ($Constraints as $ConstraintList)
        {
            # skip invalid constraints
            if (!($ConstraintList instanceof UrlChecker_ConstraintList))
            {
                continue;
            }

            $InnerGroup = "";
            foreach ($ConstraintList as $Constraint)
            {
                $Key = $Constraint->Key;
                $Value = $Constraint->Value;
                $Relation = $Constraint->Relation;

                # skip if the relation is invalid
                if (!in_array($Relation, $ValidRelations))
                {
                    continue;
                }

                # Resource table constraint
                if ($Key instanceof MetadataField
                    && $Key->Status == MetadataSchema::MDFSTAT_OK)
                {
                    $LogicOperator = (strlen($InnerGroup)) ? "AND" : "";
                    $InnerGroup .= " ".$LogicOperator." R.".$Key->DBFieldName();
                    $InnerGroup .= " ".$Relation." '".addslashes($Value)."'";
                }

                # UrlChecker_History table constraint
                else if (is_string($Key))
                {
                    $LogicOperator = (strlen($InnerGroup)) ? "AND" : "";
                    $InnerGroup .= " ".$LogicOperator." URH.".$Key;
                    $InnerGroup .= " ".$Relation." '".addslashes($Value)."'";
                }

                # otherwise ignore the invalid key value
            }

            if (strlen($InnerGroup))
            {
                $OuterGroup .= (strlen($OuterGroup)) ? " OR " : "";
                $OuterGroup .= " ( " . $InnerGroup . " ) ";
            }
        }

        if (strlen($OuterGroup))
        {
            $Where .= " AND " . $OuterGroup;
        }

        # valid UrlChecker_History table order
        if (in_array($OrderBy, $this->ValidGetConstraints))
        {
            $OrderBy = "URH.".$OrderBy;
        }

        # valid Resource table order
        else if ($OrderBy instanceof MetadataField
                && $OrderBy->Status() == MetadataSchema::MDFSTAT_OK)
        {
            $OrderBy = "R.".$OrderBy->DBFieldName();
        }

        # otherwise default the StatusCode field of the UrlChecker_History tale
        else
        {
            $OrderBy = "URH.StatusCode";
        }

        # make sure order direction is valid
        if ($OrderDirection != "ASC" && $OrderDirection != "DESC")
        {
            $OrderDirection = "DESC";
        }

        # get the url data
        $this->DB->Query("
            SELECT * FROM UrlChecker_UrlHistory URH
            LEFT JOIN Resources R
            ON URH.ResourceId = R.ResourceId
            ".$Where."
            ORDER BY ".$OrderBy." ".$OrderDirection."
            LIMIT ".intval($Limit)."
            OFFSET ".intval($Offset));

        # create url objects
        $Urls = array();
        while (FALSE !== ($Row = $this->DB->FetchRow()))
        {
            $Urls[] = new UrlChecker_InvalidUrl($Row);
        }

        return $Urls;
    }

    /**
     * Get the invalid URL that is associated with the given resource and
     * metadata field, or NULL if one doesn't exist.
     * @param $Resource resource
     * @param $Field metadata field
     * @return an UrlChecker_InvalidUrl object or NULL
     */
    public function GetInvalidUrl(Resource $Resource, MetadataField $Field)
    {
        $this->DB->Query("
            SELECT *
            FROM UrlChecker_UrlHistory
            WHERE ResourceId = ".intval($Resource->Id())."
            AND FieldId = ".$Field->Id());

        if (!$this->DB->NumRowsSelected())
        {
            return NULL;
        }

        return new UrlChecker_InvalidUrl($this->DB->FetchRow());
    }

    /**
     * Determine whether or not the resource is "released". By default, this
     * means whether or not the Release Flag value is set to TRUE or not, but
     * may be different if a custom callback has been set.
     * @param $Resource resource
     * @return TRUE if the resource is released, FALSE otherwise
     */
    public function IsResourceReleased(Resource $Resource)
    {
        # custom callback set
        if (isset($this->IsResourceReleasedCallback))
        {
            return call_user_func($this->IsResourceReleasedCallback, $Resource);
        }

        # release flag does not exist or is disabled, assume TRUE
        else if (NULL === ($ReleaseFlag = $this->Schema->GetFieldByName("Release Flag"))
                || $ReleaseFlag->Status() != MetadataSchema::MDFSTAT_OK
                || !$ReleaseFlag->Enabled())
        {
          return TRUE;
        }

        return (bool) $Resource->Get("Release Flag");
    }

    /**
     * Release the given resource. By default, this means that the Release Flag
     * value for the resource will be set to TRUE, but may be different if a
     * custom callback has been set.
     * @param $Resource resource
     */
    public function ReleaseResource(Resource $Resource)
    {
        # custom callback set
        if (isset($this->ReleaseResourceCallback))
        {
            call_user_func($this->ReleaseResourceCallback, $Resource);
            return;
        }

        # release flag does not exist or is disabled
        else if (NULL === ($ReleaseFlag = $this->Schema->GetFieldByName("Release Flag"))
                || $ReleaseFlag->Status() != MetadataSchema::MDFSTAT_OK
                || !$ReleaseFlag->Enabled())
        {
            return;
        }

        $Resource->Set("Release Flag", TRUE);
    }

    /**
     * Withhold the given resource. By default, this means that the Release Flag
     * value for the resource will be set to NULL, but may be different if a
     * custom callback has been set.
     * @param $Resource resource
     */
    public function WithholdResource(Resource $Resource)
    {
        # custom callback set
        if (isset($this->WithholdResourceCallback))
        {
            call_user_func($this->WithholdResourceCallback, $Resource);
            return;
        }

        # release flag does not exist or is disabled
        else if (NULL === ($ReleaseFlag = $this->Schema->GetFieldByName("Release Flag"))
                || $ReleaseFlag->Status() != MetadataSchema::MDFSTAT_OK
                || !$ReleaseFlag->Enabled())
        {
            return;
        }

        $Resource->Set("Release Flag", NULL);
    }

    /**
     * Hide the URL associated with the given resource and metadata field so
     * that it doesn't show up on the results page.
     * @param $Resource resource
     * @param $Field metadata field
     */
    public function HideUrl(Resource $Resource, MetadataField $Field)
    {
        $this->DB->Query("
            UPDATE UrlChecker_UrlHistory
            SET Hidden = 1
            WHERE ResourceId = '".intval($Resource->Id())."'
            AND FieldId = '".intval($Field->Id())."'");
    }

    /**
     * Report an URL to the Internet Scout Project. Sends the following
     * information: resource ID, resource title, metadata field id, metadata
     * field name, URL, invalid URL information (if available), portal name, URL
     * checker version, CWIS version, PHP
     * version, and any provided notes.
     * @param $Resource resource
     * @param $Field metadata field
     * @param
     */
    public function ReportUrl(Resource $Resource, MetadataField $Field, $Note=NULL)
    {
        global $SysConfig;

        # given invalid objects
        if ($Resource->Status() != 1
          || $Field->Status() != MetadataSchema::MDFSTAT_OK
          || $Field->Type() != MetadataSchema::MDFTYPE_URL)
        {
            return;
        }

        $Recipient = str_replace("[]", "@", $this->ReportAddress);

        $Header = 'MIME-Version: 1.0' . "\r\n";
        $Header .= 'Content-type: text/html; charset=UTF-8' . "\r\n";
        $Header .= 'From: URL Feedback Reporter <do-not-reply@hidden>' . "\r\n";

        $To = 'CWIS URL Checker - URL Feedback Report <'.$Recipient.'>';

        $Subject = 'CWIS URL Checker Report';

        $Message = "
<html>
<head></head>
<body>
  <h2>Information</h2>
  <table>
    <tr>
      <th align=\"left\">Resource ID</th>
      <td>".$Resource->Id()."</td>
    </tr>
    <tr>
      <th align=\"left\">Resource Title</th>
      <td>".strip_tags($Resource->Get($this->Schema->GetFieldByMappedName("Title")))."</td>
    </tr>
    <tr>
      <th align=\"left\">Metadata Field ID</th>
      <td>".$Field->Id()."</td>
    </tr>
    <tr>
      <th align=\"left\">Metadata Field Name</th>
      <td>".strip_tags($Field->Name())."</td>
    </tr>
    <tr>
      <th align=\"left\">URL</th>
      <td>".htmlentities($Resource->Get($Field))."</td>
    </tr>
";

        $InvalidUrl = $this->GetInvalidUrl($Resource, $Field);

        # add invalid URL info if available
        if (!is_null($InvalidUrl))
        {
            $Message .= "
    <tr>
      <th align=\"left\">Check Date</th>
      <td>".$InvalidUrl->CheckDate."</td>
    </tr>
    <tr>
      <th align=\"left\">Number of Times Invalid</th>
      <td>".$InvalidUrl->TimesInvalid."</td>
    </tr>
    <tr>
      <th align=\"left\">Status Code</th>
      <td>".$InvalidUrl->StatusCode."</td>
    </tr>
    <tr>
      <th align=\"left\">Reason Phrase</th>
      <td>".strip_tags($InvalidUrl->ReasonPhrase)."</td>
    </tr>
    <tr>
      <th align=\"left\">Final URL</th>
      <td>".htmlentities($InvalidUrl->FinalUrl)."</td>
    </tr>
    <tr>
      <th align=\"left\">Final Status Code</th>
      <td>".$InvalidUrl->FinalStatusCode."</td>
    </tr>
    <tr>
      <th align=\"left\">Final Reason Phrase</th>
      <td>".strip_tags($InvalidUrl->FinalReasonPhrase)."</td>
    </tr>
    <tr>
      <th align=\"left\">Hidden?</th>
      <td>".(($InvalidUrl->Hidden) ? "Yes" : "No")."</td>
    </tr>
    <tr>
      <th align=\"left\">Is Final URL Invalid?</th>
      <td>".(($InvalidUrl->IsFinalUrlInvalid) ? "Yes" : "No")."</td>
    </tr>
";
        }

        $Message .= "
    <tr>
      <th align=\"left\">URL Checker Version</th>
      <td>".$this->Version."</td>
    </tr>
    <tr>
      <th align=\"left\">CWIS Version</th>
      <td>".CWIS_VERSION."</td>
    </tr>
    <tr>
      <th align=\"left\">PHP Version</th>
      <td>".PHP_VERSION."</td>
    </tr>
  </table>
  <h2>Note</h2>
  <p>".((!is_null($Note)) ? htmlentities(strip_tags($Note)) : "N/A")."</p>
</body>
<html>";

        # try to mail the message
        return @mail($To, $Subject, $Message, $Header);
    }

    /**
     * Return whether or not the develper pages and settings are enabled.
     * @return TRUE if developer pages and settings are enabled, FALSE otherwise
     */
    public function IsDeveloperEnabled()
    {
        $Settings = $this->GetSettings();
        return (bool) $Settings["EnableDeveloper"];
    }

    /**
     * Enable developer pages and settings.
     */
    public function EnableDeveloper()
    {
        $this->UpdateSetting("EnableDeveloper", 1);
    }

    /**
     * Disable developer pages and settings.
     */
    public function DisableDeveloper()
    {
        $this->UpdateSetting("EnableDeveloper", 0);
    }

    /**
     * Get a subset of the resources that haven't been checked or haven't been
     * checked in at least a day.
     * @return an array of UrlChecker_Resource objects
     */
    public function GetNextResourcesToBeChecked()
    {
        $this->RemoveStaleData();

        $Resources = array();

        # never been checked
        $this->DB->Query("
            SELECT R.*
            FROM Resources R
            LEFT JOIN UrlChecker_ResourceHistory URH
            ON R.ResourceId = URH.ResourceId
            WHERE URH.ResourceId IS NULL
            AND R.ResourceId >= 0
            LIMIT ".$this->NumToCheck);

        $NumNew = $this->DB->NumRowsSelected();

        while (FALSE !== ($Row = $this->DB->FetchRow()))
        {
            $Resources[] = new UrlChecker_Resource($Row["ResourceId"], "N/A");
        }

        # still some space left for more resources to check
        if ($NumNew < $this->NumToCheck)
        {
            # resources that haven't been checked in at least one day, sorted
            # by the last time they were checked.
            $Yesterday = date("Y-m-d H:i:s", strtotime("-1 day"));
            $this->DB->Query("
                SELECT *
                FROM UrlChecker_ResourceHistory
                WHERE CheckDate <= '".strval($Yesterday)."'
                ORDER BY CheckDate ASC
                LIMIT ".($this->NumToCheck - $NumNew));

            while (FALSE !== ($Row = $this->DB->FetchRow()))
            {
                $Resources[] = new UrlChecker_Resource($Row["ResourceId"],
                    $Row["CheckDate"]);
            }
        }

        return $Resources;
    }

    /**
     * Get a subset of the invalid URLS that haven't been checked in over a
     * day and are below the threshold, or haven't been checked in over a week
     * and are over the threshold.
     * @return an array of UrlChecker_InvalidUrl objects
     */
    public function GetNextUrlsToBeChecked()
    {
        $this->RemoveStaleData();

        $Urls = array();

        # (check times > 1 day and <= threshold) OR
        # (check times > 1 week and > threshold)
        $Yesterday = date("Y-m-d H:i:s", strtotime("-1 day"));
        $WeekAgo = date("Y-m-d H:i:s", strtotime("-1 week"));
        $this->DB->Query("
            SELECT *
            FROM UrlChecker_UrlHistory
            WHERE
              (TimesInvalid <= ".intval($this->Threshold)."
               AND CheckDate <= '".strval($Yesterday)."')
              OR
              (TimesInvalid > ".intval($this->Threshold)."
               AND CheckDate <= '".strval($WeekAgo)."')
            ORDER BY CheckDate ASC
            LIMIT ".$this->NumToCheck);

        while (FALSE !== ($Row = $this->DB->FetchRow()))
        {
            $Urls[] = new UrlChecker_InvalidUrl($Row);
        }

        return $Urls;
    }

    /**
     * Check a subset of the resources that haven't been checked or haven't been
     * checked in at least a day.
     */
    private function CheckNormalUrls()
    {
        global $AF;

        $Resources = $this->GetNextResourcesToBeChecked();

        foreach ($Resources as $Resource)
        {
            # invalid or bad resource so don't queue the task
            if ($Resource->Status() != 1 || $Resource->Id() < 0)
            {
                continue;
            }

            $AF->QueueUniqueTask(array($this, "CheckResourceUrls"),
                array($Resource), ApplicationFramework::PRIORITY_BACKGROUND,
                "Validate the URLs associated with the given resource"
                ." (normal)");
        }

        # more new resources to check: 2 mins, otherwise 60 mins
        return (count($Resources) == $this->NumToCheck) ? time()+120 : time()+3600;
    }

    /**
     * Check a subset of the invalid URLS that haven't been checked in over a
     * day and are below the threshold, or haven't been checked in over a week
     * and are over the threshold.
     */
    private function CheckInvalidUrls()
    {
        global $AF;

        $Urls = $this->GetNextUrlsToBeChecked();

        foreach ($Urls as $Url)
        {
            $Resource = new Resource($Url->ResourceId);

            # invalid or bad resource so don't queue the task
            if ($Resource->Status() != 1 || $Resource->Id() < 0)
            {
                continue;
            }

            $AF->QueueUniqueTask(array($this, "CheckResourceUrls"),
                array($Resource), ApplicationFramework::PRIORITY_BACKGROUND,
                "Validate the URLs associated with the given resource"
                ." (invalid)");
        }

        # more invalid urls to check: 5 mins, otherwise 60 mins
        return (count($Urls) == $this->NumToCheck) ? time()+300 : time()+3600;
    }

    /**
     * Get an URL's status info. If there is no redirection, this will be the
     * status line for the URL. If there are redirects, this will be the status
     * line for the URL and the status line for the last URL after redirection.
     * @Param $Url URL
     * @return an array with the same fields as an UrlChecker_HttpInfo object
     */
    public function GetHttpInformation($Url)
    {
        # information for the URL
        list($Info, $Redirect) = $this->GetHttpInformationAux($Url);

        # information for redirects, if any
        if (!is_null($Redirect))
        {
            $MaxIterations = 5;

            while (isset($Redirect) && --$MaxIterations >= 0)
            {
                $FinalUrl = $Redirect;
                list($FinalInfo, $Redirect) =
                    $this->GetHttpInformationAux($Redirect);

                $Info["UsesCookies"] = $Info["UsesCookies"] || $Info["UsesCookies"];

                if (is_null($Redirect))
                {
                    unset($Redirect);
                }
            }

            $Info["FinalUrl"] = $FinalUrl;
            $Info["FinalStatusCode"] = $Info["StatusCode"];
            $Info["FinalReasonPhrase"] = $Info["ReasonPhrase"];
        }

        return $Info;
    }

    /**
     * Auxiliary function for self::GetHttpInformation(). Gets the HTTP
     * information on one URL. Note that this only supports HTTP and HTTPS.
     * @param $Url URL
     * @return an array with the same fields as an UrlChecker_HttpInfo object
     */
    private function GetHttpInformationAux($Url)
    {
        # this should be an UrlChecker_HttpInfo object but some versions of PHP
        # segfault when using them, for an unknown reason
        $Info = array("Url" => "", "StatusCode" => -1, "ReasonPhrase" => "",
            "FinalUrl" => "", "FinalStatusCode" => -1, "FinalReasonPhrase" => "",
            "UsesCookies" => FALSE);

        # blank url (code defaults to -1, i.e., not checked)
        if (!strlen(trim($Url)))
        {
            return array($Info, NULL);
        }

        # assume that we can't connect to the URL
        $Info["Url"] = $Url;
        $Info["StatusCode"] = 0;

        # make sure there are no spaces in the url and parse it
        $ParsedUrl = @parse_url(str_replace(" ", "%20", $Url));

        if (!$ParsedUrl || !isset($ParsedUrl["host"]))
        {
            return array($Info, NULL);
        }

        $HostName = $ParsedUrl["host"];

        # username and password specified in the URL, add to the hostname
        if (isset($ParsedUrl["user"]) && isset($ParsedUrl["pass"]))
        {
            $HostName = $ParsedUrl["user"].":".$ParsedUrl["pass"]."@".$HostName;
        }

        # port specified in the URL, so get it out
        if (isset($ParsedUrl["port"]))
        {
            $Port = intval($ParsedUrl["port"]);
        }

        # HTTPS needs to use the ssl:// protocol with fsockopen
        if (isset($ParsedUrl["scheme"]) && $ParsedUrl["scheme"] == "https")
        {
            $HostName = "ssl://".$HostName;

            # default to port 443 if no port is specified
            if (!isset($Port))
            {
                $Port = 443;
            }
        }

        # default to port 80 if no port specified
        if (!isset($Port))
        {
            $Port = 80;
        }

        # can't connect. also the timeout is set to 5 seconds
        if (FALSE === ($Stream = @fsockopen($HostName, $Port, $ErrNo,
            $ErrStr, $this->Timeout)))
        {
            return array($Info, NULL);
        }

        # construct the path that's going to be GET'ed
        if (isset($ParsedUrl["path"]))
        {
            $Path = $ParsedUrl["path"];

            if (isset($ParsedUrl["query"]))
            {
                $Path .= "?".$ParsedUrl["query"];
            }
        }

        else
        {
            $Path = "/";
        }

        # basic headers required for HTTP version 1.1
        $RequestHeaders = "GET ".$Path." HTTP/1.1\r\n";
        $RequestHeaders .= "Host: ".$ParsedUrl["host"]."\r\n";

        # set the User-Agent header since some servers erroneously require it
        $RequestHeaders .= "User-Agent: URL-Checker/".$this->Version." "
           ."CWIS/".CWIS_VERSION." PHP/".PHP_VERSION."\r\n";

        # some servers erroneously require the Accept header too
        $RequestHeaders .= "Accept: text/html,application/xhtml+xml,"
            ."application/xml;q=0.9,*/*;q=0.8\r\n";

        # final newline to signal that we're done sending headers
        $RequestHeaders .= "\r\n";

        if (FALSE === fwrite($Stream, $RequestHeaders))
        {
            # couldn't send anything
            fclose($Stream);
            return array($Info, NULL);
        }

        # HTTP status line. trim() gets rid of the trailing newline
        if (!feof($Stream) && FALSE !== ($Line = trim(fgets($Stream))))
        {
            $StatusLine = new UrlChecker_StatusLine($Line);
            $Info["StatusCode"] = $StatusLine->GetStatusCode();
            $Info["ReasonPhrase"] = $StatusLine->GetReasonPhrase();
        }

        else
        {
            # the server responded with nothing so mark the URL as an internal
            # server error (500)
            fclose($Stream);
            $Info["StatusCode"] = 500;
            $Info["ReasonPhrase"] = "Internal Server Error";
            return array($Info, NULL);
        }

        # this might cause hangs for line > 8KB. trim() removes trailing newline
        while (!feof($Stream) && FALSE !== ($Line = trim(fgets($Stream))))
        {
            # stop before reading any content
            if ($Line == "")
            {
                break;
            }

            # a Location header
            if (substr($Line, 0, 9) == "Location:")
            {
                list(, $Location) = explode(":", $Line, 2);
                $Location = ltrim($Location);
            }

            # a Set-Cookie header
            if (substr($Line, 0, 11) == "Set-Cookie:")
            {
                $Info["UsesCookies"] = TRUE;
            }
        }

        # given a Location value; need to make sure it's absolute
        if (isset($Location) && strlen($Location)
            && substr($Location, 0, 4) != "http")
        {
            # relative path, relative URI, so add in the path info
            if ($Location{0} != "/")
            {
                $Location = dirname($ParsedUrl["path"])."/".$Location;
            }

            if (substr($HostName, 0, 6) == "ssl://")
            {
                $Location = "https://".substr($HostName, 5).$Location;
            }

            else
            {
                $Location = "http://".$HostName.$Location;
            }
        }

        return array($Info, isset($Location) ? $Location : NULL);
    }

    /**
     * Determine if a given URL has valid content, that is, if it doesn't match
     * some rudimentary regular expressions. Checks for "Page Not Found"-type
     * strings.
     * @param $Url URL
     * @return TRUE if the content for the given URL is valid, FALSE otherwise
     */
    private function HasValidContent($Url)
    {
        # set the default protocol version to 1.1, this may cause issues with
        # PHP < 5.3 if the request isn't HTTP 1.1 compliant
        $Options["http"]["protocol_version"] = 1.1;

        # timeout
        $Options["http"]["timeout"] = $this->Timeout;

        # set the User-Agent HTTP header since some servers erroneously require
        # it
        $Options["http"]["user_agent"] = "URL-Checker/".$this->Version." "
           ."CWIS/".CWIS_VERSION." PHP/".PHP_VERSION;

        # some servers erroneously require the Accept header too
        $Options["http"]["header"] = "Accept: text/html,application/xhtml+xml,"
            ."application/xml;q=0.9,*/*;q=0.8";

        # try to prevent hangs in feof by telling the server to close the
        # connection after retrieving all of the content
        $Options["http"]["header"] .= "\r\nConnection: close";

        # fetch content even when the HTTP status code is not 200
        $Options["http"]["ignore_errors"] = TRUE;

        $Stream = stream_context_create($Options);

        # escape spaces so that we don't mess up the http method header line
        $Url = str_replace(" ", "%20", $Url);

        if (FALSE === ($Handle = @fopen($Url, "r", FALSE, $Stream)))
        {
            return TRUE;
        }

        # sleep for 0.15s to allow some of the content to buffer to avoid having
        # the opening HTML tag not show up in the first fread
        usleep(150000);

        # get the first 8KB and do a basic check to see if the file is HTML.
        # since fread might stop before getting 8KB, e.g., if a packet is
        # received or the server is slow, there is a chance that the file is
        # HTML, but it's opening tag won't have arrived in the first fread, and
        # therefore won't be checked. this should be OK since it probably means
        # the server is really slow and it shouldn't be checked anyway
        if (FALSE === ($Html = @fread($Handle, 8192))
            || FALSE === strpos($Html, "<html"))
        {
            return TRUE;
        }

        # this will be used to prevent hangs in feof in case the server doesn't
        # support the Connection header
        $Time = microtime(TRUE);

        # read until the end of the file, the timeout is reached, or if at least
        # 500 KB have been read
        $Failsafe = 1000;
        while (!feof($Handle)
               && (microtime(TRUE) - $Time) < $this->Timeout
               && strlen($Html) < 512000 # strlen can't always be trusted
               && $Failsafe--)
        {
            if (FALSE === ($Html .= @fread($Handle, 8192)))
            {
                return TRUE;
            }
        }

        fclose($Handle);

        # parse out the title and the body to search within
        $Title = (preg_match('/<title[^>]*>(.*?)<\/title>/is', $Html, $Matches))
            ? trim($Matches[1]) : "" ;
        $Body = (preg_match('/<body[^>]*>(.*?)<\/body>/is', $Html, $Matches))
            ? trim ($Matches[1]) : "";
        $Html = $Title." ".$Body;

        # strip out tags that contain data that is probably not HTML
        $Html = preg_replace('/<(script|noscript|style)[^>]*>.*?<\/\1>/is',
            '', $Html);

        # remove HTML tags so we only have text to search
        $Html = strip_tags($Html);

        if (preg_match('/(file|url|page|document)\s+([^\s]+\s+)?(couldn\'t\s+be|'
            .'could\s+not\s+be|cannot\s+be|can\'t\s+be|was\s+not)\s+found/i', $Html))
        {
            return FALSE;
        }

        else if (preg_match('/(file|url|page|404|document)\s+not\s+found|'
            .'(http|error)\s+404/i', $Html))
        {
            return FALSE;
        }

        else if (preg_match('/(couldn\'t|could\s+not|cannot|can\'t)\s+find\s+'
            .'(the|that)\s+(file|url|page|document)/i', $Html))
        {
            return FALSE;
        }

        return TRUE;
    }

    /**
     * Remove any stale data from deleted resources or changed URLs.
     */
    private function RemoveStaleData()
    {
        # so that the following queries are executed only once per load
        if (isset($this->RemovedStaleData))
        {
            return;
        }

        $this->RemovedStaleData = TRUE;

        # clean history tables of data from deleted resources
        $this->DB->Query("
            DELETE URH
            FROM UrlChecker_ResourceHistory URH
            LEFT JOIN Resources R
            ON URH.ResourceId = R.ResourceId
            WHERE R.ResourceId IS NULL");
        $this->DB->Query("
            DELETE UUH
            FROM UrlChecker_UrlHistory UUH
            LEFT JOIN Resources R
            ON UUH.ResourceId = R.ResourceId
            WHERE R.ResourceId IS NULL");

        # clean URL history table of data from deleted fields
        $this->DB->Query("
            DELETE UUH
            FROM UrlChecker_UrlHistory UUH
            LEFT JOIN MetadataFields M
            ON UUH.FieldId = M.FieldId
            WHERE M.FieldId IS NULL");

        # clean history tables of data from URLs that have changed
        foreach ($this->GetUrlFields() as $Field)
        {
            # both of the following queries use BINARY when checking the URL
            # field to force a case sensitive search

            $this->DB->Query("
                DELETE URH
                FROM UrlChecker_ResourceHistory URH
                LEFT JOIN (Resources R, UrlChecker_UrlHistory UUH)
                ON (UUH.ResourceId = R.ResourceId
                  AND UUH.ResourceId = URH.ResourceId)
                WHERE UUH.ResourceId IS NOT NULL AND (
                  BINARY UUH.Url != R.".$Field->DBFieldName()."
                  AND UUH.FieldId = ".intval($Field->Id())."
                )");

            $this->DB->Query("
                DELETE UUH
                FROM UrlChecker_UrlHistory UUH
                LEFT JOIN Resources R
                ON UUH.ResourceId = R.ResourceId
                WHERE
                BINARY UUH.Url != R.".$Field->DBFieldName()."
                AND UUH.FieldId = ".intval($Field->Id()));
        }
    }

    /**
     * Get all the URL metadata fields.
     * @return array of all the metadata fields
     */
    private function GetUrlFields()
    {
        static $Fields;

        if (!isset($Fields))
        {
            $Fields = $this->Schema->GetFields(MetadataSchema::MDFTYPE_URL);
        }

        return $Fields;
    }

    /**
     * Get the URL checker settings
     * @return array of settings
     */
    private function GetSettings()
    {
        # if the settings have already been loaded and are unmodified
        if (isset($this->Settings))
        {
            return $this->Settings;
        }

        $this->DB->Query("SELECT * FROM UrlChecker_Settings");
        if ($this->DB->NumRowsSelected())
        {
            while (FALSE !== ($Row = $this->DB->FetchRow()))
            {
                switch ($Row["Name"])
                {
                    /* future cases here */
                    default:
                        $this->Settings[$Row["Name"]] = intval($Row["Value"]);
                        break;
                }
            }
        }

        else
        {
            # default settings
            $this->Settings = array(
              "NextNormalUrlCheck" => 0,
              "NextInvalidUrlCheck" => 0,
              "EnableDeveloper" => 0);
        }

        return $this->Settings;
    }

    /**
     * Update or add a setting to the settings table.
     * @param $Name setting name
     * @param $Value setting value
     */
    private function UpdateSetting($Name, $Value)
    {
        # delete old setting value
        $this->DB->Query("
            DELETE FROM UrlChecker_Settings
            WHERE Name = '".addslashes($Name)."'");

        # update value
        $this->DB->Query("
            INSERT INTO UrlChecker_Settings (Name, Value)
            VALUES ('".addslashes($Name)."', '".addslashes($Value)."')");

        # so that the settings will be reloaded when self::GetSettings is
        # called next
        unset($this->Settings);
    }

    /**
     * These are serialized and are the only data members that are saved when
     * the URL checker is run as a task, e.g., during self::CheckDelegation
     * @var $DB database instance
     * @var $Schema MetadataSchema object
     * @var $Timeout connection timeout
     * @var $Threshold threshold for invalid URLs
     * @var $ValidGetConstraints valid constraints
     * @var $ReportAddress recipient of report e-mails [] auto replaced
     */
    private $DB;
    private $Schema;
    private $Timeout;
    private $Threshold;
    private $NumToCheck;
    private $ValidGetConstraints;
    private $ReportAddress = "tbaumgard[]scout.wisc.edu";

    /**
     * These aren't and shouldn't be saved when serialized
     * @var $Settings settings in database
     * @var $IsResourceReleasedCallback custom callback
     * @var $ReleaseResourceCallback custom callback
     * @var $WithholdResourceCallback custom callback
     * @var $RemovedStaleData variable used to run RemoveStaleData once
     */
    private $Settings;
    private $IsResourceReleasedCallback;
    private $ReleaseResourceCallback;
    private $WithholdResourceCallback;
    private $RemovedStaleData;

}
