<?PHP

#
#   FILE:  Scout--RSSClient.php
#
#   METHODS PROVIDED:
#       RSSClient()
#           - constructor
#       SomeMethod($SomeParameter, $AnotherParameter)
#           - short description of method
#
#   AUTHOR:  Edward Almasy
#
#   Copyright 2005 Internet Scout Project
#   http://scout.wisc.edu
#

class RSSClient {

    # ---- PUBLIC INTERFACE --------------------------------------------------

    # object constructor
    function RSSClient($ServerUrl, $CacheDB = NULL, $RefreshTime = 600, $Encoding = "UTF-8", $DebugLevel = 0)
    {
        # set default debug level
        $this->DebugLevel = $DebugLevel;

        # set default encoding
        $this->Encoding = $Encoding;

        # save cache details
        $this->CacheDB = $CacheDB;
        $this->RefreshTime = $RefreshTime;

        # query server (or cache) for XML text
        $this->XmlText = $this->QueryServerWithCaching(
            $ServerUrl, $CacheDB, $RefreshTime);

        # create XML parser and parse text
        $this->Parser = new XMLParser($this->Encoding);
        if ($this->DebugLevel > 3) {  $Parser->SetDebugLevel($this->DebugLevel - 3);  }
        $this->Parser->ParseText($this->XmlText);

        if ($this->DebugLevel) {  print("RSSClient->RSSClient() returned ".strlen($this->XmlText)." characters from server query<br>\n");  }
    }

    # get/set server URL
    function ServerUrl($NewValue = NULL)
    {
        # if new RSS server URL supplied
        if (($NewValue != NULL) && ($NewValue != $this->ServerUrl))
        {
            # save new value
            $this->ServerUrl = $NewValue;

            # re-read XML from server at new URL
            $this->XmlText = $this->QueryServerWithCaching(
                $NewValue,
                $this->CacheDB,
                $this->RefreshTime);

            # create new XML parser and parse text
            $this->Parser = new XMLParser();
            if ($this->DebugLevel > 3) {  $Parser->SetDebugLevel($this->DebugLevel - 3);  }
            $this->Parser->ParseText($this->XmlText);
        }

        # return RSS server URL to caller
        return $this->ServerUrl;
    }

    # get/set encoding
    function Encoding($NewValue = NULL)
    {
        # if new encoding supplied
        if (($NewValue != NULL) && ($NewValue != $this->Encoding))
        {
            # save new value
            $this->Encoding = $NewValue;

            # re-read XML from server
            $this->XmlText = $this->QueryServerWithCaching(
                $this->ServerUrl,
                $this->CacheDB,
                $this->RefreshTime);

            # create new XML parser and parse text
            $this->Parser = new XMLParser($this->Encoding);
            if ($this->DebugLevel > 3) {  $Parser->SetDebugLevel($this->DebugLevel - 3);  }
            $this->Parser->ParseText($this->XmlText);
        }

        # return encoding to caller
        return $this->Encoding;
    }

    /**
     * Try to automatically detect and set the encoding of the RSS feed. The
     * precedence is as follows: encoding declared in the XML file, charset
     * parameter in the Content-Type HTTP response header, then ISO-8859-1.
     */
    function AutodetectEncoding()
    {
        # if neither the XML file nor the HTTP response headers specify an
        # encoding, there is an overwhelming chance that it's ISO-8859-1, so
        # use it as the default
        $Encoding = "ISO-8859-1";

        # only get up to the the encoding portion of the XML declartion
        # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-prolog-dtd
        $S = '[ \t\r\n]';
        $Eq = "{$S}?={$S}?";
        $VersionNum = '1.0';
        $EncName = '[A-Za-z]([A-Za-z0-9._]|-)*';
        $VersionInfo = "{$S}version{$Eq}('{$VersionNum}'|\"{$VersionNum}\")";
        $EncodingDecl = "{$S}encoding{$Eq}('{$EncName}'|\"{$EncName}\")";
        $XMLDecl = "<\?xml{$VersionInfo}({$EncodingDecl})?";
        $RegEx = "/{$XMLDecl}/";

        # try to find the encoding, index 3 will be set if encoding is declared
        preg_match($RegEx, $this->XmlText, $Matches);

        # give precedence to the encoding specified within the XML file since
        # a RSS feed publisher might not have access to HTTP response headers
        if (count($Matches) >= 4)
        {
            # also need to strip off the quotes
            $Encoding = trim($Matches[3], "'\"");
        }

        # then give precedence to the charset parameter in the Content-Type
        # response header
        else if ($this->CacheDB)
        {
            # create cache table if it doesn't exist
            $DB = $this->CacheDB;
            $ServerUrl = addslashes($this->ServerUrl);

            # get the cache value
            $DB->Query("
                SELECT * FROM RSSClientCache
                WHERE ServerUrl = '".$ServerUrl."'");
            $Exists = ($DB->NumRowsSelected() > 0);
            $Cache = $DB->FetchRow();

            # if cached and charset parameter was given in the response headers
            if ($Exists && strlen($Cache["Charset"]))
            {
                $Encoding = $Cache["Charset"];
            }
        }

        $this->Encoding($Encoding);
    }

    # retrieve RSS items (from first channel if not otherwise specified)
    function GetItems($NumberOfItems = NULL, $ChannelName = NULL)
    {
        # start by assuming no items will be found
        $Items = array();

        # move parser to area in XML with items
        $Parser = $this->Parser;
        $Parser->SeekToRoot();
        $Result = $Parser->SeekTo("rss");
        if ($Result === NULL)
        {
            $Result = $Parser->SeekTo("rdf:RDF");
        }
        else
        {
            $Parser->SeekTo("channel");
        }

        # if items are found
        $ItemCount = $Parser->SeekTo("item");
        if ($ItemCount)
        {
            # for each record
            $Index = 0;
            do
            {
                # retrieve item info
                $Items[$Index]["title"] = $Parser->GetData("title");
                $Items[$Index]["description"] = $Parser->GetData("description");
                $Items[$Index]["link"] = $Parser->GetData("link");
                $Items[$Index]["enclosure"] = $Parser->GetAttributes("enclosure");

                $Index++;
            }
            while ($Parser->NextItem() && (($NumberOfItems == NULL) || ($Index < $NumberOfItems)));
        }

        # return records to caller
        return $Items;
    }

    # retrieve site name as given in feed
    function GetChannelTitle()
    {
        if (!isset($this->ChannelTitle)) {  $this->LoadChannelInfo();  }
        return $this->ChannelTitle;
    }

    # retrieve site link as given in feed
    function GetChannelLink()
    {
        if (!isset($this->ChannelLink)) {  $this->LoadChannelInfo();  }
        return $this->ChannelLink;
    }

    # retrieve site description as given in feed
    function GetChannelDescription()
    {
        if (!isset($this->ChannelDescription)) {  $this->LoadChannelInfo();  }
        return $this->ChannelDescription;
    }

    # tell caller whether client is using cached data
    function UsedCachedData()
    {
        return $this->CachedDataWasUsed;
    }


    # ---- PRIVATE INTERFACE -------------------------------------------------

    var $CacheDB;
    var $RefreshTime;
    var $ServerUrl;
    var $MetadataPrefix;
    var $SetSpec;
    var $DebugLevel;
    var $Encoding;
    var $XmlText;
    var $Parser;
    var $ChannelTitle;
    var $ChannelLink;
    var $ChannelDescription;
    var $CachedDataWasUsed;

    # set current debug output level (0-9)
    function SetDebugLevel($NewLevel)
    {
        $this->DebugLevel = $NewLevel;
    }

    /**
     * Get the XML text at the given URL, along with the type and charset of the
     * text.
     * @param $Url URL of XML text
     * @return array(
     *                XML text or FALSE on failure,
     *                Type or NULL on failure or if not set,
     *                Charset or NULL on failure or if not set)
     */
    function GetXmlInfo($Url)
    {
        $Text = @file_get_contents($Url);
        $Type = NULL;
        $Charset = NULL;

        # get the type and charset if the fetch was successful
        if ($Text !== FALSE)
        {
            # this must come after file_get_contents() and before any other remote
            # fetching is done
            $Headers = $http_response_header;

            # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
            $LWS = '([ \t]*|\r\n[ \t]+)';
            $Token = '[!\x23-\x27*+-.\x30-\x39\x41-\x5A\x5E-\x7A|~]+';
            $QuotedPair = '\\[\x00-\x7F]';
            $QdText = "([^\\x00-\\x1F\\x7F\"]|{$LWS})";
            $QuotedString = "\"({$QdText}|{$QuotedPair})*\"";
            $Value = "({$Token}|{$QuotedString})";
            $Parameter = "{$Token}{$LWS}={$LWS}{$Value}";

            # these make the Content-Type regex specific to Content-Type
            # values with charset parameters in them, but make capturing
            # the charset much easier
            $BasicParameter = "(;{$LWS}{$Parameter})*";
            $CharsetParameter = "(;{$LWS}charset{$LWS}={$LWS}{$Value})";
            $ModParameter = "{$BasicParameter}{$CharsetParameter}{$BasicParameter}";
            $MediaType = "({$Token}{$LWS}\\/{$LWS}{$Token}){$LWS}{$ModParameter}";

            # back to the spec
            $ContentType = "Content-Type{$LWS}:{$LWS}{$MediaType}{$LWS}";
            $RegEx = "/^{$ContentType}$/i";

            foreach ($Headers as $Header)
            {
                preg_match($RegEx, $Header, $Matches);

                if (isset($Matches[3]) && isset($Matches[19]))
                {
                    $Type = $Matches[3];
                    $Charset = $Matches[19];
                    break;
                }
            }
        }

        return array($Text, $Type, $Charset);
    }

    # load RSS XML from server or cache
    function QueryServerWithCaching($ServerUrl, $CacheDB, $RefreshTime)
    {
        # save RSS server URL
        $this->ServerUrl = $ServerUrl;

        # save caching info (if any)
        if ($CacheDB)
        {
            $this->CacheDB = $CacheDB;
        }

        # if caching info was supplied
        if ($this->CacheDB)
        {
            $DB = $this->CacheDB;

            # look up cached information for this server
            $QueryTimeCutoff = date("Y-m-d H:i:s", (time() - $RefreshTime));
            $DB->Query("
                SELECT * FROM RSSClientCache
                WHERE ServerUrl = '".addslashes($ServerUrl)."'
                AND LastQueryTime > '".$QueryTimeCutoff."'");

            # if we have cached info that has not expired
            if ($CachedXml = $DB->FetchField("CachedXml"))
            {
                # use cached info
                $QueryResult = $CachedXml;
                $this->CachedDataWasUsed = TRUE;
            }
            else
            {
                $this->CachedDataWasUsed = FALSE;

                # query server for XML text
                list($Text, $Type, $Charset) = $this->GetXmlInfo($ServerUrl);
                $QueryResult = "";

                # if query was successful
                if ($Text !== FALSE)
                {
                    $QueryResult = $Text;

                    # clear out any old cache entries
                    $DB->Query("
                        DELETE FROM RSSClientCache
                        WHERE ServerUrl = '".addslashes($ServerUrl)."'");

                    # save info in cache
                    $DB->Query("
                        INSERT INTO RSSClientCache
                        (ServerUrl, CachedXml, Type, Charset, LastQueryTime)
                        VALUES (
                            '".addslashes($ServerUrl)."',
                            '".addslashes($Text)."',
                            '".addslashes($Type)."',
                            '".addslashes($Charset)."',
                            NOW())");
                }
            }
        }

        # return query result to caller
        return $QueryResult;
    }

    function LoadChannelInfo()
    {
        $Parser = $this->Parser;
        $Parser->SeekToRoot();
        $Result = $Parser->SeekTo("rss");
        if ($Result === NULL)
        {
            $Result = $Parser->SeekTo("rdf:RDF");
        }
        $Parser->SeekTo("channel");
        $this->ChannelTitle = $Parser->GetData("title");
        $this->ChannelLink = $Parser->GetData("link");
        $this->ChannelDescription = $Parser->GetData("description");
    }
}
