4 #   FILE:  Scout--OAIClient.php 
    5 #     Provides a client for pulling data from OAI-PMH providers 
    6 #     For protocol documentation, see: 
    7 #     http://www.openarchives.org/OAI/openarchivesprotocol.html 
   10 #       OAIClient(ServerUrl, Cache) 
   13 #           - Change the base url of the remote repository 
   14 #       MetadataPrefix($pfx) 
   15 #           - Set the schema we will request from remote 
   17 #           - Restrict queries to a single set 
   19 #             http://www.openarchives.org/OAI/openarchivesprotocol.html#Set 
   21 #           - Fetch identifying information about the remote repository 
   23 #           - Fetch information about what schemas remote can serve 
   24 #       GetRecords($start,$end) 
   25 #           - Pull records in batches, optionally with date restrictions 
   27 #           - Pull a single record using a unique identifier 
   28 #       MoreRecordsAvailable() 
   29 #           - Determine if a batch pull is complete or not 
   30 #       ResetRecordPointer() 
   31 #           - Restart a batch pull from the beginning 
   33 #           - Determine verbosity 
   35 #   Copyright 2008 Edward Almasy and Internet Scout 
   36 #   http://scout.wisc.edu 
   39 require_once(
"XMLParser.php");
 
   44     # ---- PUBLIC INTERFACE -------------------------------------------------- 
   54         # set default debug level 
   55         $this->DebugLevel = 0;
 
   60         # set default metadata prefix 
   63         # set default set specification for queries 
   66         $this->CacheSequenceNumber = 0;
 
   69             $this->Cache = $Cache;
 
   70             $this->UsingCache = is_dir($Cache);
 
   71             if ($this->UsingCache == FALSE )
 
   86         if ($NewValue != NULL)
 
   90         return $this->ServerUrl;
 
  101         if ($NewValue != NULL)
 
  105         return $this->MetadataPrefix;
 
  114     function SetSpec($NewValue = 
"X-NOSETSPECVALUE-X")
 
  116         if ($NewValue != 
"X-NOSETSPECVALUE-X")
 
  120         return $this->SetSpec;
 
  132         # query server for XML text 
  133         $XmlText = $this->PerformQuery(
"Identify");
 
  134         $this->DebugOutVar(8,__METHOD__,
"XmlText",htmlspecialchars($XmlText));
 
  136         # convert XML text into object 
  137         $Xml = simplexml_load_string($XmlText);
 
  138         $this->DebugOutVar(9, __METHOD__, 
"Xml", $Xml);
 
  140         # if identification info was found 
  142         if (isset($Xml->Identify))
 
  145             $Ident = $Xml->Identify;
 
  146             $this->GetValFromXml($Ident, 
"repositoryName", 
"Name", $Info);
 
  147             $this->GetValFromXml($Ident, 
"adminEmail", 
"Email", $Info);
 
  148             $this->GetValFromXml($Ident, 
"baseURL", 
"URL", $Info);
 
  151         # return info to caller 
  162         # query server for XML text 
  163         $XmlText = $this->PerformQuery(
"ListMetadataFormats");
 
  164         $this->DebugOutVar(8,__METHOD__,
"XmlText",htmlspecialchars($XmlText));
 
  166         # convert XML text into object 
  167         $Xml = simplexml_load_string($XmlText);
 
  168         $this->DebugOutVar(9, __METHOD__, 
"Xml", $Xml);
 
  170         # if format info was found 
  172         if (isset($Xml->ListMetadataFormats->metadataFormat))
 
  176             foreach ($Xml->ListMetadataFormats->metadataFormat as $Format)
 
  178                 $this->GetValFromXml(
 
  179                         $Format, 
"metadataPrefix", 
"Name", $Formats[$Index]);
 
  180                 $this->GetValFromXml(
 
  181                         $Format, 
"schema", 
"Schema", $Formats[$Index]);
 
  182                 $this->GetValFromXml(
 
  183                         $Format, 
"metadataNamespace", 
"Namespace",
 
  189         # return info to caller 
  202         if( $this->Cache != NULL )
 
  204             $cache_fname = sprintf(
"%s/%010x",
 
  206                                    $this->CacheSequenceNumber);
 
  207             $this->CacheSequenceNumber++;
 
  210         if( $this->Cache == NULL or $this->UsingCache == FALSE )
 
  212             # if we have resumption token from prior query 
  213             if (isset($this->ResumptionToken))
 
  215                 # use resumption token as sole argument 
  216                 $Args[
"resumptionToken"] = $this->ResumptionToken;
 
  220                 # set up arguments for query 
  221                 $Args[
"metadataPrefix"] = $this->MetadataPrefix;
 
  222                 if ($StartDate) {  $Args[
"from"] = $StartDate;  }
 
  223                 if ($EndDate)   {  $Args[
"until"] = $EndDate;  }
 
  224                 if ($this->
SetSpec) {  $Args[
"set"] = $this->SetSpec;  }
 
  227             # query server for XML text 
  228             $XmlText = $this->PerformQuery(
"ListRecords", $Args);
 
  230             if( $this->Cache != NULL )
 
  232                 file_put_contents( $cache_fname, $XmlText );
 
  237             # Get XML text from the cache 
  238             $XmlText = file_get_contents( $cache_fname );
 
  241         $this->DebugOutVar(8, __METHOD__,
"XmlText",htmlspecialchars($XmlText));
 
  243         return $this->GetRecordsFromXML($XmlText, 
"listrecords" );
 
  262         $Args[
"metadataPrefix"] = $this->MetadataPrefix;
 
  263         $Args[
"identifier"] = $Id;
 
  265         # query server for XML text 
  266         $XmlText = $this->PerformQuery(
"GetRecord", $Args);
 
  267         $this->DebugOutVar(8, __METHOD__,
"XmlText",htmlspecialchars($XmlText));
 
  269         return $this->GetRecordsFromXML($XmlText, 
"getrecord" );
 
  279         return isset($this->ResumptionToken) ? TRUE : FALSE;
 
  287         unset($this->ResumptionToken);
 
  288         $this->CacheSequenceNumber = 0;
 
  298         $this->DebugLevel = $NewLevel;
 
  302     # ---- PRIVATE INTERFACE ------------------------------------------------- 
  305     private $MetadataPrefix;
 
  308     private $ResumptionToken;
 
  311     private $CacheSequenceNumber;
 
  313     # perform OAI query and return resulting data to caller 
  314     private function PerformQuery($QueryVerb, $Args = NULL)
 
  316         # open stream to OAI server 
  318         if (strpos($this->
ServerUrl, 
"?") === FALSE)
 
  320             $QueryUrl = $this->
ServerUrl.
"?verb=".$QueryVerb;
 
  324             $QueryUrl = $this->
ServerUrl.
"&verb=".$QueryVerb;
 
  329             foreach ($Args as $ArgName => $ArgValue)
 
  331                 $QueryUrl .= 
"&".urlencode($ArgName).
"=".urlencode($ArgValue);
 
  334         $FHndl = fopen($QueryUrl, 
"r");
 
  336         # if stream was successfully opened 
  338         if ($FHndl !== FALSE)
 
  340             # while lines left in response 
  341             while (!feof($FHndl))
 
  343                 # read line from server and add it to text to be parsed 
  344                 $Text .= fread($FHndl, 10000000);
 
  348         # close OAI server stream 
  351         # return query result data to caller 
  355     # set array value if available in simplexml object 
  356     private function GetValFromXml($Xml, $SrcName, $DstName, &$Results)
 
  358         if (isset($Xml->$SrcName))
 
  360             $Results[$DstName] = trim($Xml->$SrcName);
 
  364     # print variable contents if debug is above specified level 
  365     private function DebugOutVar($Level, $MethodName, $VarName, $VarValue)
 
  367         if ($this->DebugLevel >= $Level)
 
  369             print(
"\n<pre>".$MethodName.
"()  ".$VarName.
" = \n");
 
  375     # Recursively dump tags inside a metadata section, flattening them 
  377     private function DumpTagsRecursive(&$Records, $Index, $Parser, $ParentTagName=NULL)
 
  379         $TagName = $Parser->GetTagName();
 
  382             $StorageTagName = ($ParentTagName!==NULL) ?
 
  383                 $ParentTagName.
"/".$TagName : $TagName;
 
  385             if ($Parser->SeekToChild() ){
 
  386                 $this->DumpTagsRecursive( $Records, $Index, $Parser, $StorageTagName );
 
  387                 $Parser->SeekToParent();
 
  391                 $Records[$Index][
"metadata"][$StorageTagName][] = $Parser->GetData();
 
  393         } 
while ($TagName = $Parser->NextTag());
 
  396     # Query has been sent, we need to retrieve records that came from it. 
  397     private function GetRecordsFromXML($XmlText, $ParseTo ){
 
  398         # create XML parser and pass it text 
  400         $Parser->ParseText($XmlText);
 
  402         $this->DebugOutVar(9, __METHOD__, 
"Parser", $Parser);
 
  404         # if records were found 
  406         $ItemCount = $Parser->SeekTo(
"oai-pmh", $ParseTo, 
"record");
 
  413                 # grab record identifier and date 
  414                 $Records[$Index][
"identifier"]=$Parser->GetData(
"header",
 
  416                 $Records[$Index][
"datestamp"]=$Parser->GetData(
"header",
 
  420                 $SeekResult = $Parser->SeekTo(
"metadata");
 
  423                     $SeekResult = $Parser->SeekToChild();
 
  426                         $Records[$Index][
"format"] = $Parser->GetTagName();
 
  427                         $SeekResult = $Parser->SeekToChild();
 
  430                             $this->DumpTagsRecursive($Records, $Index, $Parser);
 
  431                             $Parser->SeekToParent();
 
  433                         $Parser->SeekToParent();
 
  435                     $Parser->SeekToParent();
 
  438                 # grab search info (if any) 
  439                 $SeekResult = $Parser->SeekTo(
"about");
 
  442                     $SeekResult = $Parser->SeekTo(
"searchInfo");
 
  445                         $SeekResult = $Parser->SeekToChild();
 
  448                             $TagName = $Parser->GetTagName();
 
  451                                 $Records[$Index][
"about"][
"SEARCHINFO"][$TagName][] =
 
  453                             } 
while ($TagName = $Parser->NextTag());
 
  454                             $Parser->SeekToParent();
 
  456                         $Parser->SeekToParent();
 
  458                     $Parser->SeekToParent();
 
  463             while ($Parser->NextItem());
 
  466         # look for resumption token and save if found 
  467         $Parser->SeekToRoot();
 
  468         $SeekResult = $Parser->SeekTo(
 
  469                 "oai-pmh", 
"listrecords", 
"resumptiontoken");
 
  470         if ($SeekResult !== NULL)
 
  472             $this->ResumptionToken = $Parser->GetData();
 
  476             unset($this->ResumptionToken);
 
  479         # return records to caller 
ResetRecordPointer()
Clear any additional records available after last GetRecords(). 
ServerUrl($NewValue=NULL)
Get or set URL of target OAI repository server. 
OAIClient($ServerUrl, $Cache=NULL)
Class constructor. 
GetRecord($Id)
Get a single record from a repositry server. 
MoreRecordsAvailable()
Check whether more records are available after last GetRecords(). 
GetRecords($StartDate=NULL, $EndDate=NULL)
Retrieve records from repository server. 
MetadataPrefix($NewValue=NULL)
Get or set metadata schema for records being retrieved. 
SetSpec($NewValue="X-NOSETSPECVALUE-X")
Get or set specification of subset of records to be retrieved. 
GetIdentification()
Retrieve identification information from repository server. 
SetDebugLevel($NewLevel)
Set current debug output level. 
GetFormats()
Retrieve list of available metadata formats from repository server.