CWIS Developer Documentation
RSSClient.php
Go to the documentation of this file.
1 <?PHP
2 #
3 # FILE: RSSClient.php
4 #
5 # Part of the ScoutLib application support library
6 # Copyright 2002-2013 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu/
8 #
9 
13 class RSSClient
14 {
15  # ---- PUBLIC INTERFACE --------------------------------------------------
16 
29  public function __construct($ServerUrl, $CacheDB = NULL, $RefreshTime = 600,
30  $Encoding = "UTF-8", $DebugLevel = 0)
31  {
32  # set default debug level
33  $this->DebugLevel = $DebugLevel;
34 
35  # set default encoding
36  $this->Encoding = $Encoding;
37 
38  # save cache details
39  $this->CacheDB = $CacheDB;
40  $this->RefreshTime = $RefreshTime;
41 
42  # query server (or cache) for XML text
43  $this->XmlText = $this->QueryServerWithCaching(
44  $ServerUrl, $CacheDB, $RefreshTime);
45 
46  # create XML parser and parse text
47  $this->Parser = new XMLParser($this->Encoding);
48  if ($this->DebugLevel > 3) { $Parser->SetDebugLevel($this->DebugLevel - 3); }
49  $this->Parser->ParseText($this->XmlText);
50 
51  if ($this->DebugLevel)
52  {
53  print("RSSClient->RSSClient() returned ".strlen($this->XmlText)
54  ." characters from server query<br>\n");
55  }
56  }
57 
63  public function ServerUrl($NewValue = NULL)
64  {
65  # if new RSS server URL supplied
66  if (($NewValue != NULL) && ($NewValue != $this->ServerUrl))
67  {
68  # save new value
69  $this->ServerUrl = $NewValue;
70 
71  # re-read XML from server at new URL
72  $this->XmlText = $this->QueryServerWithCaching(
73  $NewValue,
74  $this->CacheDB,
75  $this->RefreshTime);
76 
77  # create new XML parser and parse text
78  $this->Parser = new XMLParser();
79  if ($this->DebugLevel > 3)
80  {
81  $Parser->SetDebugLevel($this->DebugLevel - 3);
82  }
83  $this->Parser->ParseText($this->XmlText);
84  }
85 
86  # return RSS server URL to caller
87  return $this->ServerUrl;
88  }
89 
96  public function Encoding($NewValue = NULL)
97  {
98  # if new encoding supplied
99  if (($NewValue != NULL) && ($NewValue != $this->Encoding))
100  {
101  # save new value
102  $this->Encoding = $NewValue;
103 
104  # re-read XML from server
105  $this->XmlText = $this->QueryServerWithCaching(
106  $this->ServerUrl,
107  $this->CacheDB,
108  $this->RefreshTime);
109 
110  # create new XML parser and parse text
111  $this->Parser = new XMLParser($this->Encoding);
112  if ($this->DebugLevel > 3)
113  {
114  $Parser->SetDebugLevel($this->DebugLevel - 3);
115  }
116  $this->Parser->ParseText($this->XmlText);
117  }
118 
119  # return encoding to caller
120  return $this->Encoding;
121  }
122 
128  public function AutodetectEncoding()
129  {
130  # if neither the XML file nor the HTTP response headers specify an
131  # encoding, there is an overwhelming chance that it's ISO-8859-1, so
132  # use it as the default
133  $Encoding = "ISO-8859-1";
134 
135  # only get up to the the encoding portion of the XML declartion
136  # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-prolog-dtd
137  $S = '[ \t\r\n]';
138  $Eq = "{$S}?={$S}?";
139  $VersionNum = '1.0';
140  $EncName = '[A-Za-z]([A-Za-z0-9._]|-)*';
141  $VersionInfo = "{$S}version{$Eq}('{$VersionNum}'|\"{$VersionNum}\")";
142  $EncodingDecl = "{$S}encoding{$Eq}('{$EncName}'|\"{$EncName}\")";
143  $XMLDecl = "<\?xml{$VersionInfo}({$EncodingDecl})?";
144  $RegEx = "/{$XMLDecl}/";
145 
146  # try to find the encoding, index 3 will be set if encoding is declared
147  preg_match($RegEx, $this->XmlText, $Matches);
148 
149  # give precedence to the encoding specified within the XML file since
150  # a RSS feed publisher might not have access to HTTP response headers
151  if (count($Matches) >= 4)
152  {
153  # also need to strip off the quotes
154  $Encoding = trim($Matches[3], "'\"");
155  }
156 
157  # then give precedence to the charset parameter in the Content-Type
158  # response header
159  else if ($this->CacheDB)
160  {
161  # create cache table if it doesn't exist
162  $DB = $this->CacheDB;
163  $ServerUrl = addslashes($this->ServerUrl);
164 
165  # get the cache value
166  $DB->Query("
167  SELECT * FROM RSSClientCache
168  WHERE ServerUrl = '".$ServerUrl."'");
169  $Exists = ($DB->NumRowsSelected() > 0);
170  $Cache = $DB->FetchRow();
171 
172  # if cached and charset parameter was given in the response headers
173  if ($Exists && strlen($Cache["Charset"]))
174  {
175  $Encoding = $Cache["Charset"];
176  }
177  }
178 
179  $this->Encoding($Encoding);
180  }
181 
190  public function GetItems($NumberOfItems = NULL, $ChannelName = NULL)
191  {
192  # start by assuming no items will be found
193  $Items = array();
194 
195  # move parser to area in XML with items
196  $Parser = $this->Parser;
197  $Parser->SeekToRoot();
198  $Result = $Parser->SeekTo("rss");
199  if ($Result === NULL)
200  {
201  $Result = $Parser->SeekTo("rdf:RDF");
202  }
203  else
204  {
205  $Parser->SeekTo("channel");
206  }
207 
208  # if items are found
209  $ItemCount = $Parser->SeekTo("item");
210  if ($ItemCount)
211  {
212  # for each record
213  $Index = 0;
214  do
215  {
216  # retrieve item info
217  $Items[$Index]["title"] = $Parser->GetData("title");
218  $Items[$Index]["description"] = $Parser->GetData("description");
219  $Items[$Index]["link"] = $Parser->GetData("link");
220  $Items[$Index]["enclosure"] = $Parser->GetAttributes("enclosure");
221 
222  $Index++;
223  }
224  while ($Parser->NextItem()
225  && (($NumberOfItems == NULL) || ($Index < $NumberOfItems)));
226  }
227 
228  # return records to caller
229  return $Items;
230  }
231 
236  public function GetChannelTitle()
237  {
238  if (!isset($this->ChannelTitle)) { $this->LoadChannelInfo(); }
239  return $this->ChannelTitle;
240  }
241 
246  public function GetChannelLink()
247  {
248  if (!isset($this->ChannelLink)) { $this->LoadChannelInfo(); }
249  return $this->ChannelLink;
250  }
251 
256  public function GetChannelDescription()
257  {
258  if (!isset($this->ChannelDescription)) { $this->LoadChannelInfo(); }
259  return $this->ChannelDescription;
260  }
261 
266  public function UsedCachedData()
267  {
268  return $this->CachedDataWasUsed;
269  }
270 
271  # ---- PRIVATE INTERFACE -------------------------------------------------
272 
273  private $CacheDB;
274  private $RefreshTime;
275  private $ServerUrl;
276  private $MetadataPrefix;
277  private $SetSpec;
278  private $DebugLevel;
279  private $Encoding;
280  private $XmlText;
281  private $Parser;
282  private $ChannelTitle;
283  private $ChannelLink;
284  private $ChannelDescription;
285  private $CachedDataWasUsed;
286 
292  private function SetDebugLevel($NewLevel)
293  {
294  $this->DebugLevel = $NewLevel;
295  }
296 
305  private function GetXmlInfo($Url)
306  {
307  $Text = @file_get_contents($Url);
308  $Type = NULL;
309  $Charset = NULL;
310 
311  # get the type and charset if the fetch was successful
312  if ($Text !== FALSE)
313  {
314  # this must come after file_get_contents() and before any other remote
315  # fetching is done
316  $Headers = $http_response_header;
317 
318  # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.17
319  $LWS = '([ \t]*|\r\n[ \t]+)';
320  $Token = '[!\x23-\x27*+-.\x30-\x39\x41-\x5A\x5E-\x7A|~]+';
321  $QuotedPair = '\\[\x00-\x7F]';
322  $QdText = "([^\\x00-\\x1F\\x7F\"]|{$LWS})";
323  $QuotedString = "\"({$QdText}|{$QuotedPair})*\"";
324  $Value = "({$Token}|{$QuotedString})";
325  $Parameter = "{$Token}{$LWS}={$LWS}{$Value}";
326 
327  # these make the Content-Type regex specific to Content-Type
328  # values with charset parameters in them, but make capturing
329  # the charset much easier
330  $BasicParameter = "(;{$LWS}{$Parameter})*";
331  $CharsetParameter = "(;{$LWS}charset{$LWS}={$LWS}{$Value})";
332  $ModParameter = "{$BasicParameter}{$CharsetParameter}{$BasicParameter}";
333  $MediaType = "({$Token}{$LWS}\\/{$LWS}{$Token}){$LWS}{$ModParameter}";
334 
335  # back to the spec
336  $ContentType = "Content-Type{$LWS}:{$LWS}{$MediaType}{$LWS}";
337  $RegEx = "/^{$ContentType}$/i";
338 
339  foreach ($Headers as $Header)
340  {
341  preg_match($RegEx, $Header, $Matches);
342 
343  if (isset($Matches[3]) && isset($Matches[19]))
344  {
345  $Type = $Matches[3];
346  $Charset = $Matches[19];
347  break;
348  }
349  }
350  }
351 
352  return array($Text, $Type, $Charset);
353  }
354 
365  private function QueryServerWithCaching($ServerUrl, $CacheDB, $RefreshTime)
366  {
367  # save RSS server URL
368  $this->ServerUrl = $ServerUrl;
369 
370  # save caching info (if any)
371  if ($CacheDB)
372  {
373  $this->CacheDB = $CacheDB;
374  }
375 
376  # if caching info was supplied
377  if ($this->CacheDB)
378  {
379  $DB = $this->CacheDB;
380 
381  # look up cached information for this server
382  $QueryTimeCutoff = date("Y-m-d H:i:s", (time() - $RefreshTime));
383  $DB->Query("
384  SELECT * FROM RSSClientCache
385  WHERE ServerUrl = '".addslashes($ServerUrl)."'
386  AND LastQueryTime > '".$QueryTimeCutoff."'");
387 
388  # if we have cached info that has not expired
389  if ($CachedXml = $DB->FetchField("CachedXml"))
390  {
391  # use cached info
392  $QueryResult = $CachedXml;
393  $this->CachedDataWasUsed = TRUE;
394  }
395  else
396  {
397  $this->CachedDataWasUsed = FALSE;
398 
399  # query server for XML text
400  list($Text, $Type, $Charset) = $this->GetXmlInfo($ServerUrl);
401  $QueryResult = "";
402 
403  # if query was successful
404  if ($Text !== FALSE)
405  {
406  $QueryResult = $Text;
407 
408  # clear out any old cache entries
409  $DB->Query("
410  DELETE FROM RSSClientCache
411  WHERE ServerUrl = '".addslashes($ServerUrl)."'");
412 
413  # save info in cache
414  $DB->Query("
415  INSERT INTO RSSClientCache
416  (ServerUrl, CachedXml, Type, Charset, LastQueryTime)
417  VALUES (
418  '".addslashes($ServerUrl)."',
419  '".addslashes($Text)."',
420  '".addslashes($Type)."',
421  '".addslashes($Charset)."',
422  NOW())");
423  }
424  }
425  }
426 
427  # return query result to caller
428  return $QueryResult;
429  }
430 
435  private function LoadChannelInfo()
436  {
437  $Parser = $this->Parser;
438  $Parser->SeekToRoot();
439  $Result = $Parser->SeekTo("rss");
440  if ($Result === NULL)
441  {
442  $Result = $Parser->SeekTo("rdf:RDF");
443  }
444  $Parser->SeekTo("channel");
445  $this->ChannelTitle = $Parser->GetData("title");
446  $this->ChannelLink = $Parser->GetData("link");
447  $this->ChannelDescription = $Parser->GetData("description");
448  }
449 }
UsedCachedData()
Determine whether the RSS client is using cached data.
Definition: RSSClient.php:266
GetItems($NumberOfItems=NULL, $ChannelName=NULL)
Retrieve the RSS items from the RSS feed.
Definition: RSSClient.php:190
GetChannelTitle()
Retrieve the channel title as given in the RSS feed.
Definition: RSSClient.php:236
Implements an RSS client for fetching, parsing, and caching RSS feeds.
Definition: RSSClient.php:13
GetChannelLink()
Retrive the URL to the site of the channel in the RSS feed.
Definition: RSSClient.php:246
ServerUrl($NewValue=NULL)
Get or set the RSS feed URL.
Definition: RSSClient.php:63
Encoding($NewValue=NULL)
Get or set the character encoding of the RSS feed.
Definition: RSSClient.php:96
AutodetectEncoding()
Try to automatically detect and set the encoding of the RSS feed.
Definition: RSSClient.php:128
__construct($ServerUrl, $CacheDB=NULL, $RefreshTime=600, $Encoding="UTF-8", $DebugLevel=0)
Object constructor.
Definition: RSSClient.php:29
GetChannelDescription()
Get the description of the channel as given in the RSS feed.
Definition: RSSClient.php:256