5 #   Part of the ScoutLib application support library 
    6 #   Copyright 2002-2013 Edward Almasy and Internet Scout Research Group 
    7 #   http://scout.wisc.edu/ 
   15     # ---- PUBLIC INTERFACE -------------------------------------------------- 
   31         # set default debug level 
   34         # set default encoding 
   41         # query server (or cache) for XML text 
   45         # create XML parser and parse text 
   47         if ($this->DebugLevel > 3) {  
$Parser->SetDebugLevel($this->DebugLevel - 3);  }
 
   48         $this->Parser->ParseText($this->XmlText);
 
   50         if ($this->DebugLevel) {  print(
"RSSClient->RSSClient() returned ".strlen($this->XmlText).
" characters from server query<br>\n");  }
 
   60         # if new RSS server URL supplied 
   61         if (($NewValue != NULL) && ($NewValue != $this->
ServerUrl))
 
   66             # re-read XML from server at new URL 
   72             # create new XML parser and parse text 
   74             if ($this->DebugLevel > 3) {  
$Parser->SetDebugLevel($this->DebugLevel - 3);  }
 
   75             $this->Parser->ParseText($this->XmlText);
 
   78         # return RSS server URL to caller 
   90         # if new encoding supplied 
   91         if (($NewValue != NULL) && ($NewValue != $this->
Encoding))
 
   96             # re-read XML from server 
  102             # create new XML parser and parse text 
  104             if ($this->DebugLevel > 3) {  
$Parser->SetDebugLevel($this->DebugLevel - 3);  }
 
  105             $this->Parser->ParseText($this->XmlText);
 
  108         # return encoding to caller 
  119         # if neither the XML file nor the HTTP response headers specify an 
  120         # encoding, there is an overwhelming chance that it's ISO-8859-1, so 
  121         # use it as the default 
  124         # only get up to the the encoding portion of the XML declartion 
  125         # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-prolog-dtd 
  129         $EncName = 
'[A-Za-z]([A-Za-z0-9._]|-)*';
 
  130         $VersionInfo = 
"{$S}version{$Eq}('{$VersionNum}'|\"{$VersionNum}\")";
 
  131         $EncodingDecl = 
"{$S}encoding{$Eq}('{$EncName}'|\"{$EncName}\")";
 
  132         $XMLDecl = 
"<\?xml{$VersionInfo}({$EncodingDecl})?";
 
  133         $RegEx = 
"/{$XMLDecl}/";
 
  135         # try to find the encoding, index 3 will be set if encoding is declared 
  136         preg_match($RegEx, $this->XmlText, $Matches);
 
  138         # give precedence to the encoding specified within the XML file since 
  139         # a RSS feed publisher might not have access to HTTP response headers 
  140         if (count($Matches) >= 4)
 
  142             # also need to strip off the quotes 
  146         # then give precedence to the charset parameter in the Content-Type 
  148         else if ($this->CacheDB)
 
  150             # create cache table if it doesn't exist 
  154             # get the cache value 
  156                 SELECT * FROM RSSClientCache 
  158             $Exists = ($DB->NumRowsSelected() > 0);
 
  159             $Cache = $DB->FetchRow();
 
  161             # if cached and charset parameter was given in the response headers 
  162             if ($Exists && strlen($Cache[
"Charset"]))
 
  179     function GetItems($NumberOfItems = NULL, $ChannelName = NULL)
 
  181         # start by assuming no items will be found 
  184         # move parser to area in XML with items 
  187         $Result = 
$Parser->SeekTo(
"rss");
 
  188         if ($Result === NULL)
 
  190             $Result = 
$Parser->SeekTo(
"rdf:RDF");
 
  198         $ItemCount = 
$Parser->SeekTo(
"item");
 
  207                 $Items[$Index][
"description"] = 
$Parser->GetData(
"description");
 
  209                 $Items[$Index][
"enclosure"] = 
$Parser->GetAttributes(
"enclosure");
 
  213             while (
$Parser->NextItem() && (($NumberOfItems == NULL) || ($Index < $NumberOfItems)));
 
  216         # return records to caller 
  259     # ---- PRIVATE INTERFACE ------------------------------------------------- 
  282         $this->DebugLevel = $NewLevel;
 
  296         $Text = @file_get_contents($Url);
 
  300         # get the type and charset if the fetch was successful 
  303             # this must come after file_get_contents() and before any other remote 
  305             $Headers = $http_response_header;
 
  307             # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17 
  308             $LWS = 
'([ \t]*|\r\n[ \t]+)';
 
  309             $Token = 
'[!\x23-\x27*+-.\x30-\x39\x41-\x5A\x5E-\x7A|~]+';
 
  310             $QuotedPair = 
'\\[\x00-\x7F]';
 
  311             $QdText = 
"([^\\x00-\\x1F\\x7F\"]|{$LWS})";
 
  312             $QuotedString = 
"\"({$QdText}|{$QuotedPair})*\"";
 
  313             $Value = 
"({$Token}|{$QuotedString})";
 
  314             $Parameter = 
"{$Token}{$LWS}={$LWS}{$Value}";
 
  316             # these make the Content-Type regex specific to Content-Type 
  317             # values with charset parameters in them, but make capturing 
  318             # the charset much easier 
  319             $BasicParameter = 
"(;{$LWS}{$Parameter})*";
 
  320             $CharsetParameter = 
"(;{$LWS}charset{$LWS}={$LWS}{$Value})";
 
  321             $ModParameter = 
"{$BasicParameter}{$CharsetParameter}{$BasicParameter}";
 
  322             $MediaType = 
"({$Token}{$LWS}\\/{$LWS}{$Token}){$LWS}{$ModParameter}";
 
  325             $ContentType = 
"Content-Type{$LWS}:{$LWS}{$MediaType}{$LWS}";
 
  326             $RegEx = 
"/^{$ContentType}$/i";
 
  328             foreach ($Headers as $Header)
 
  330                 preg_match($RegEx, $Header, $Matches);
 
  332                 if (isset($Matches[3]) && isset($Matches[19]))
 
  335                     $Charset = $Matches[19];
 
  341         return array($Text, $Type, $Charset);
 
  356         # save RSS server URL 
  359         # save caching info (if any) 
  365         # if caching info was supplied 
  370             # look up cached information for this server 
  371             $QueryTimeCutoff = date(
"Y-m-d H:i:s", (time() - $RefreshTime));
 
  373                 SELECT * FROM RSSClientCache 
  374                 WHERE ServerUrl = '".addslashes($ServerUrl).
"' 
  375                 AND LastQueryTime > '".$QueryTimeCutoff.
"'");
 
  377             # if we have cached info that has not expired 
  378             if ($CachedXml = $DB->FetchField(
"CachedXml"))
 
  381                 $QueryResult = $CachedXml;
 
  382                 $this->CachedDataWasUsed = TRUE;
 
  386                 $this->CachedDataWasUsed = FALSE;
 
  388                 # query server for XML text 
  389                 list($Text, $Type, $Charset) = $this->
GetXmlInfo($ServerUrl);
 
  392                 # if query was successful 
  395                     $QueryResult = $Text;
 
  397                     # clear out any old cache entries 
  399                         DELETE FROM RSSClientCache 
  400                         WHERE ServerUrl = '".addslashes($ServerUrl).
"'");
 
  404                         INSERT INTO RSSClientCache 
  405                         (ServerUrl, CachedXml, Type, Charset, LastQueryTime) 
  407                             '".addslashes($ServerUrl).
"', 
  408                             '".addslashes($Text).
"', 
  409                             '".addslashes($Type).
"', 
  410                             '".addslashes($Charset).
"', 
  416         # return query result to caller 
  427         $Parser->SeekToRoot();
 
  428         $Result = $Parser->SeekTo(
"rss");
 
  429         if ($Result === NULL)
 
  431             $Result = $Parser->SeekTo(
"rdf:RDF");
 
  433         $Parser->SeekTo(
"channel");
 
  434         $this->ChannelTitle = $Parser->GetData(
"title");
 
  435         $this->ChannelLink = $Parser->GetData(
"link");
 
  436         $this->ChannelDescription = $Parser->GetData(
"description");