SPTSearchEngine.php

Go to the documentation of this file.
00001 <?PHP
00002 #
00003 #   FILE:  SearchEngine.php
00004 #
00005 #   FUNCTIONS PROVIDED:
00006 #       SPTSearchEngine->SPTSearchEngine()
00007 #           - constructor
00008 #       (see Scout--SearchEngine.php for other public methods)
00009 #
00010 #   AUTHOR:  Edward Almasy
00011 #
00012 #   Part of the Scout Portal Toolkit
00013 #   Copyright 2002-2004 Internet Scout Project
00014 #   http://scout.wisc.edu
00015 #
00016 
00017 class SPTSearchEngine extends SearchEngine {
00018 
00019     function SPTSearchEngine()
00020     {
00021         # create a database handle
00022         $DB = new SPTDatabase();
00023 
00024         # pass database handle and config values to real search engine object
00025         $this->SearchEngine($DB, "Resources", "ResourceId");
00026 
00027         # for each field defined in schema
00028         $this->Schema = new MetadataSchema();
00029         $Fields = $this->Schema->GetFields();
00030         foreach ($Fields as $Field)
00031         {
00032             # determine field type for searching
00033             switch ($Field->Type())
00034             {
00035                 case MetadataSchema::MDFTYPE_TEXT:
00036                 case MetadataSchema::MDFTYPE_PARAGRAPH:
00037                 case MetadataSchema::MDFTYPE_USER:
00038                 case MetadataSchema::MDFTYPE_TREE:
00039                 case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00040                 case MetadataSchema::MDFTYPE_OPTION:
00041                 case MetadataSchema::MDFTYPE_IMAGE:
00042                 case MetadataSchema::MDFTYPE_FILE:
00043                 case MetadataSchema::MDFTYPE_URL:
00044                     $FieldType = SEARCHFIELD_TEXT;
00045                     break;
00046 
00047                 case MetadataSchema::MDFTYPE_NUMBER:
00048                 case MetadataSchema::MDFTYPE_FLAG:
00049                     $FieldType = SEARCHFIELD_NUMERIC;
00050                     break;
00051 
00052                 case MetadataSchema::MDFTYPE_DATE:
00053                     $FieldType = SEARCHFIELD_DATERANGE;
00054                     break;
00055 
00056                 case MetadataSchema::MDFTYPE_TIMESTAMP:
00057                     $FieldType = SEARCHFIELD_DATE;
00058                     break;
00059 
00060                 case MetadataSchema::MDFTYPE_POINT:
00061                     $FieldType = NULL;
00062                     break;
00063 
00064                 default:
00065                     exit("ERROR: unknown field type in SPT--SearchEngine.php");
00066                     break;
00067             }
00068 
00069             if ($FieldType !== NULL)
00070             {
00071                 # add field to search engine
00072                 $this->AddField($Field->Name(), $Field->DBFieldName(), $FieldType,
00073                                 $Field->SearchWeight(), $Field->IncludeInKeywordSearch());
00074             }
00075         }
00076     }
00077 
00078     # overloaded version of method to retrieve text from DB
00079     function GetFieldContent($ItemId, $FieldName)
00080     {
00081         # get resource object
00082         $Resource = new Resource($ItemId);
00083 
00084         # retrieve text (including variants) from resource object and return to caller
00085         return $Resource->Get($FieldName, FALSE, TRUE);
00086     }
00087 
00088     # overloaded version of method to retrieve resource/phrase match list
00089     function SearchFieldForPhrases($FieldName, $Phrase)
00090     {
00091         # normalize and escape search phrase for use in SQL query
00092         $SearchPhrase = strtolower(addslashes($Phrase));
00093 
00094         # query DB for matching list based on field type
00095         $Field = $this->Schema->GetFieldByName($FieldName);
00096         switch ($Field->Type())
00097         {
00098             case MetadataSchema::MDFTYPE_TEXT:
00099             case MetadataSchema::MDFTYPE_PARAGRAPH:
00100             case MetadataSchema::MDFTYPE_FILE:
00101             case MetadataSchema::MDFTYPE_URL:
00102                 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00103                         ."WHERE POSITION('".$SearchPhrase."'"
00104                             ." IN LOWER(`".$Field->DBFieldName()."`)) ";
00105                 break;
00106 
00107             case MetadataSchema::MDFTYPE_IMAGE:
00108                 $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00109                         ."WHERE POSITION('".$SearchPhrase."'"
00110                             ." IN LOWER(`".$Field->DBFieldName()."AltText`)) ";
00111                 break;
00112 
00113             case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00114                 $NameTableSize = $this->DB->Query("SELECT COUNT(*) AS NameCount"
00115                         ." FROM ControlledNames", "NameCount");
00116                 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00117                         ."FROM ResourceNameInts, ControlledNames "
00118                         ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) "
00119                         ."AND ControlledNames.ControlledNameId"
00120                                 ." = ResourceNameInts.ControlledNameId "
00121                         ."AND ControlledNames.FieldId = ".$Field->Id();
00122                 $SecondQueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00123                         ."FROM ResourceNameInts, ControlledNames, VariantNames "
00124                         ."WHERE POSITION('".$SearchPhrase."' IN LOWER(VariantName)) "
00125                         ."AND VariantNames.ControlledNameId"
00126                                 ." = ResourceNameInts.ControlledNameId "
00127                         ."AND ControlledNames.ControlledNameId"
00128                                 ." = ResourceNameInts.ControlledNameId "
00129                         ."AND ControlledNames.FieldId = ".$Field->Id();
00130                 break;
00131 
00132             case MetadataSchema::MDFTYPE_OPTION:
00133                 $QueryString = "SELECT DISTINCT ResourceNameInts.ResourceId "
00134                         ."FROM ResourceNameInts, ControlledNames "
00135                         ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) "
00136                         ."AND ControlledNames.ControlledNameId = ResourceNameInts.ControlledNameId "
00137                         ."AND ControlledNames.FieldId = ".$Field->Id();
00138                 break;
00139 
00140             case MetadataSchema::MDFTYPE_TREE:
00141                 $QueryString = "SELECT DISTINCT ResourceClassInts.ResourceId "
00142                         ."FROM ResourceClassInts, Classifications "
00143                         ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ClassificationName)) "
00144                         ."AND Classifications.ClassificationId = ResourceClassInts.ClassificationId "
00145                         ."AND Classifications.FieldId = ".$Field->Id();
00146                 break;
00147 
00148             case MetadataSchema::MDFTYPE_USER:
00149                 $UserId = $this->DB->Query("SELECT UserId FROM APUsers "
00150                                            ."WHERE POSITION('".$SearchPhrase."' IN LOWER(UserName)) "
00151                                            ."OR POSITION('".$SearchPhrase."' IN LOWER(RealName))", "UserId");
00152                 if ($UserId != NULL)
00153                 {
00154                     $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00155                                      ."WHERE `".$Field->DBFieldName()."` = ".$UserId;
00156                 }
00157                 break;
00158 
00159             case MetadataSchema::MDFTYPE_NUMBER:
00160                 if ($SearchPhrase > 0)
00161                 {
00162                     $QueryString = "SELECT DISTINCT ResourceId FROM Resources "
00163                                      ."WHERE `".$Field->DBFieldName()."` = ".(int)$SearchPhrase;
00164                 }
00165                 break;
00166 
00167             case MetadataSchema::MDFTYPE_FLAG:
00168             case MetadataSchema::MDFTYPE_DATE:
00169             case MetadataSchema::MDFTYPE_TIMESTAMP:
00170                 # (these types not yet handled by search engine for phrases)
00171                 break;
00172         }
00173 
00174         # build match list based on results returned from DB
00175         if (isset($QueryString))
00176         {
00177             if ($this->DebugLevel > 7) {  print("SE:  performing phrase search query"
00178                     ." (<i>".$QueryString."</i>)<br>\n");  }
00179             if ($this->DebugLevel > 9) {  $StartTime = microtime(TRUE);  }
00180             $this->DB->Query($QueryString);
00181             if ($this->DebugLevel > 9)
00182             {
00183                 $EndTime = microtime(TRUE);
00184                 if (($StartTime - $EndTime) > 0.1)
00185                 {
00186                     printf("SE:  query took %.2f seconds<br>\n",
00187                             ($EndTime - $StartTime));
00188                 }
00189             }
00190             $MatchList = $this->DB->FetchColumn("ResourceId");
00191             if (isset($SecondQueryString))
00192             {
00193                 if ($this->DebugLevel > 7) {  print("SE:  performing second phrase search query"
00194                         ." (<i>".$SecondQueryString."</i>)<br>\n");  }
00195                 if ($this->DebugLevel > 9) {  $StartTime = microtime(TRUE);  }
00196                 $this->DB->Query($SecondQueryString);
00197                 if ($this->DebugLevel > 9)
00198                 {
00199                     $EndTime = microtime(TRUE);
00200                     if (($StartTime - $EndTime) > 0.1)
00201                     {
00202                         printf("SE:  query took %.2f seconds<br>\n",
00203                                 ($EndTime - $StartTime));
00204                     }
00205                 }
00206                 $MatchList = $MatchList + $this->DB->FetchColumn("ResourceId");
00207             }
00208         }
00209         else
00210         {
00211             $MatchList = array();
00212         }
00213 
00214         # return list of matching resources to caller
00215         return $MatchList;
00216     }
00217 
00218     # search field for records that meet comparison
00219     function SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values)
00220     {
00221         # use SQL keyword appropriate to current search logic for combining operations
00222         $CombineWord = ($this->DefaultSearchLogic == SEARCHLOGIC_AND) ? " AND " : " OR ";
00223 
00224         # for each comparison
00225         foreach ($FieldNames as $Index => $FieldName)
00226         {
00227             $Operator = $Operators[$Index];
00228             $Value = $Values[$Index];
00229 
00230             # determine query based on field type
00231             $Field = $this->Schema->GetFieldByName($FieldName);
00232             if ($Field != NULL)
00233             {
00234                 switch ($Field->Type())
00235                 {
00236                     case MetadataSchema::MDFTYPE_TEXT:
00237                     case MetadataSchema::MDFTYPE_PARAGRAPH:
00238                     case MetadataSchema::MDFTYPE_NUMBER:
00239                     case MetadataSchema::MDFTYPE_FLAG:
00240                     case MetadataSchema::MDFTYPE_USER:
00241                     case MetadataSchema::MDFTYPE_URL:
00242                         if (isset($Queries["Resources"]))
00243                         {
00244                             $Queries["Resources"] .= $CombineWord;
00245                         }
00246                         else
00247                         {
00248                             $Queries["Resources"] = "SELECT DISTINCT ResourceId FROM Resources WHERE ";
00249                         }
00250                         if ($Field->Type() == MetadataSchema::MDFTYPE_USER)
00251                         {
00252                             $User = new SPTUser($Value);
00253                             $Value = $User->Id();
00254                         }
00255                         $Queries["Resources"] .= "`".$Field->DBFieldName()."` ".$Operator." '".addslashes($Value)."' ";
00256                         break;
00257 
00258                     case MetadataSchema::MDFTYPE_CONTROLLEDNAME:
00259                         $QueryIndex = "ResourceNameInts".$Field->Id();
00260                         if (!isset($Queries[$QueryIndex]["A"]))
00261                         {
00262                             $Queries[$QueryIndex]["A"] =
00263                                     "SELECT DISTINCT ResourceId"
00264                                     ." FROM ResourceNameInts, ControlledNames "
00265                                     ." WHERE ControlledNames.FieldId = ".$Field->Id()
00266                                     ." AND ( ";
00267                             $CloseQuery[$QueryIndex]["A"] = TRUE;
00268                         }
00269                         else
00270                         {
00271                             $Queries[$QueryIndex]["A"] .= $CombineWord;
00272                         }
00273                         $Queries[$QueryIndex]["A"] .=
00274                                 "((ResourceNameInts.ControlledNameId"
00275                                         ." = ControlledNames.ControlledNameId"
00276                                 ." AND ControlledName "
00277                                         .$Operator." '".addslashes($Value)."'))";
00278                         if (!isset($Queries[$QueryIndex]["B"]))
00279                         {
00280                             $Queries[$QueryIndex]["B"] =
00281                                     "SELECT DISTINCT ResourceId"
00282                                     . " FROM ResourceNameInts, ControlledNames,"
00283                                             ." VariantNames "
00284                                     ." WHERE ControlledNames.FieldId = ".$Field->Id()
00285                                     ." AND ( ";
00286                             $CloseQuery[$QueryIndex]["B"] = TRUE;
00287                         }
00288                         else
00289                         {
00290                             $Queries[$QueryIndex]["B"] .= $CombineWord;
00291                         }
00292                         $Queries[$QueryIndex]["B"] .=
00293                                 "((ResourceNameInts.ControlledNameId"
00294                                         ." = ControlledNames.ControlledNameId"
00295                                 ." AND ResourceNameInts.ControlledNameId"
00296                                         ." = VariantNames.ControlledNameId"
00297                                 ." AND VariantName "
00298                                         .$Operator." '".addslashes($Value)."'))";
00299                         break;
00300 
00301                     case MetadataSchema::MDFTYPE_OPTION:
00302                         $QueryIndex = "ResourceNameInts".$Field->Id();
00303                         if (!isset($Queries[$QueryIndex]))
00304                         {
00305                             $Queries[$QueryIndex] =
00306                                     "SELECT DISTINCT ResourceId FROM ResourceNameInts, ControlledNames "
00307                                     ." WHERE ControlledNames.FieldId = ".$Field->Id()
00308                                     ." AND ( ";
00309                             $CloseQuery[$QueryIndex] = TRUE;
00310                         }
00311                         else
00312                         {
00313                             $Queries[$QueryIndex] .= $CombineWord;
00314                         }
00315                         $Queries[$QueryIndex] .= "(ResourceNameInts.ControlledNameId = ControlledNames.ControlledNameId"
00316                                                        ." AND ControlledName ".$Operator." '".addslashes($Value)."')";
00317                         break;
00318 
00319                     case MetadataSchema::MDFTYPE_TREE:
00320                         $QueryIndex = "ResourceClassInts".$Field->Id();
00321                         if (!isset($Queries[$QueryIndex]))
00322                         {
00323                             $Queries[$QueryIndex] = "SELECT DISTINCT ResourceId FROM ResourceClassInts, Classifications "
00324                                                  ." WHERE ResourceClassInts.ClassificationId = Classifications.ClassificationId"
00325                                                  ." AND Classifications.FieldId = ".$Field->Id()." AND ( ";
00326                             $CloseQuery[$QueryIndex] = TRUE;
00327                         }
00328                         else
00329                         {
00330                             $Queries[$QueryIndex] .= $CombineWord;
00331                         }
00332                         $Queries[$QueryIndex] .= " ClassificationName ".$Operator." '".addslashes($Value)."'";
00333                         break;
00334 
00335                     case MetadataSchema::MDFTYPE_TIMESTAMP:
00336                         # if value appears to have time component or text description
00337                         if (strpos($Value, ":")
00338                                 || strstr($Value, "day")
00339                                 || strstr($Value, "week")
00340                                 || strstr($Value, "month")
00341                                 || strstr($Value, "year")
00342                                 || strstr($Value, "hour")
00343                                 || strstr($Value, "minute"))
00344                         {
00345                             if (isset($Queries["Resources"]))
00346                             {
00347                                 $Queries["Resources"] .= $CombineWord;
00348                             }
00349                             else
00350                             {
00351                                 $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00352                                         ." FROM Resources WHERE ";
00353                             }
00354 
00355                             # flip operator if necessary
00356                             if (strstr($Value, "ago"))
00357                             {
00358                                 $OperatorFlipMap = array(
00359                                         "<" => ">=",
00360                                         ">" => "<=",
00361                                         "<=" => ">",
00362                                         ">=" => "<",
00363                                         );
00364                                 $Operator = isset($OperatorFlipMap[$Operator])
00365                                         ? $OperatorFlipMap[$Operator] : $Operator;
00366                             }
00367 
00368                             # use strtotime method to build condition
00369                             $TimestampValue = strtotime($Value);
00370                             if (($TimestampValue !== FALSE) && ($TimestampValue != -1))
00371                             {
00372                                 if ((date("H:i:s", $TimestampValue) == "00:00:00")
00373                                         && (strpos($Value, "00:00") === FALSE)
00374                                         && ($Operator == "<="))
00375                                 {
00376                                     $NormalizedValue =
00377                                             date("Y-m-d", $TimestampValue)." 23:59:59";
00378                                 }
00379                                 else
00380                                 {
00381                                     $NormalizedValue = date("Y-m-d H:i:s", $TimestampValue);
00382                                 }
00383                             }
00384                             else
00385                             {
00386                                 $NormalizedValue = addslashes($Value);
00387                             }
00388                             $Queries["Resources"] .=
00389                                     " ( `".$Field->DBFieldName()."` "
00390                                     .$Operator
00391                                     ." '".$NormalizedValue."' ) ";
00392                         }
00393                         else
00394                         {
00395                             # use Date object method to build condition
00396                             $Date = new Date($Value);
00397                             if ($Date->Precision())
00398                             {
00399                                 if (isset($Queries["Resources"]))
00400                                 {
00401                                     $Queries["Resources"] .= $CombineWord;
00402                                 }
00403                                 else
00404                                 {
00405                                     $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00406                                             ." FROM Resources WHERE ";
00407                                 }
00408                                 $Queries["Resources"] .= " ( ".$Date->SqlCondition(
00409                                         $Field->DBFieldName(), NULL, $Operator)." ) ";
00410                             }
00411                         }
00412                         break;
00413 
00414                     case MetadataSchema::MDFTYPE_DATE:
00415                         $Date = new Date($Value);
00416                         if ($Date->Precision())
00417                         {
00418                             if (isset($Queries["Resources"]))
00419                             {
00420                                 $Queries["Resources"] .= $CombineWord;
00421                             }
00422                             else
00423                             {
00424                                 $Queries["Resources"] = "SELECT DISTINCT ResourceId"
00425                                         ." FROM Resources WHERE ";
00426                             }
00427                             $Queries["Resources"] .= " ( ".$Date->SqlCondition(
00428                                     $Field->DBFieldName()."Begin",
00429                                     $Field->DBFieldName()."End", $Operator)." ) ";
00430                         }
00431                         break;
00432 
00433                     case MetadataSchema::MDFTYPE_IMAGE:
00434                     case MetadataSchema::MDFTYPE_FILE:
00435                         # (these types not yet handled by search engine for comparisons)
00436                         break;
00437                 }
00438             }
00439         }
00440 
00441         # if queries found
00442         if (isset($Queries))
00443         {
00444             # for each assembled query
00445             foreach ($Queries as $QueryIndex => $Query)
00446             {
00447                 # if query has multiple parts
00448                 if (is_array($Query))
00449                 {
00450                     # for each part of query
00451                     $ResourceIds = array();
00452                     foreach ($Query as $PartIndex => $PartQuery)
00453                     {
00454                         # add closing paren if query was flagged to be closed
00455                         if (isset($CloseQuery[$QueryIndex])) {  $PartQuery .= " ) ";  }
00456 
00457                         # perform query and retrieve IDs
00458                         if ($this->DebugLevel > 5) {  print("SE: "
00459                                 ." performing comparison query (<i>".$PartQuery
00460                                 ."</i>)<br>\n");  }
00461                         $this->DB->Query($PartQuery);
00462                         $ResourceIds = $ResourceIds
00463                                 + $this->DB->FetchColumn("ResourceId");
00464                         if ($this->DebugLevel > 5) {  print("SE: "
00465                                 ." comparison query produced <i>"
00466                                 .count($ResourceIds)."</i> results<br>\n");  }
00467                     }
00468                 }
00469                 else
00470                 {
00471                     # add closing paren if query was flagged to be closed
00472                     if (isset($CloseQuery[$QueryIndex])) {  $Query .= " ) ";  }
00473 
00474                     # perform query and retrieve IDs
00475                     if ($this->DebugLevel > 5) {  print("SE: "
00476                             ." performing comparison query (<i>".$Query
00477                             ."</i>)<br>\n");  }
00478                     $this->DB->Query($Query);
00479                     $ResourceIds = $this->DB->FetchColumn("ResourceId");
00480                     if ($this->DebugLevel > 5) {  print("SE: "
00481                             ." comparison query produced <i>"
00482                             .count($ResourceIds)."</i> results<br>\n");  }
00483                 }
00484 
00485                 # if we already have some results
00486                 if (isset($Results))
00487                 {
00488                     # if search logic is set to AND
00489                     if ($this->DefaultSearchLogic == SEARCHLOGIC_AND)
00490                     {
00491                         # remove anything from results that was not returned from query
00492                         $Results = array_intersect($Results, $ResourceIds);
00493                     }
00494                     else
00495                     {
00496                         # add values returned from query to results
00497                         $Results = array_unique(array_merge($Results, $ResourceIds));
00498                     }
00499                 }
00500                 else
00501                 {
00502                     # set results to values returned from query
00503                     $Results = $ResourceIds;
00504                 }
00505             }
00506         }
00507         else
00508         {
00509             # initialize results to empty list
00510             $Results = array();
00511         }
00512 
00513         # return results to caller
00514         return $Results;
00515     }
00516 
00517     function GetItemIdsSortedByField($FieldName, $SortDescending)
00518     {
00519         $RFactory = new ResourceFactory();
00520         return $RFactory->GetResourceIdsSortedBy($FieldName, !$SortDescending);
00521     }
00522 
00523     function QueueUpdateForItem($ItemId,
00524             $Priority = ApplicationFramework::PRIORITY_LOW)
00525     {
00526         global $AF;
00527         $AF->QueueUniqueTask(array(__CLASS__, "RunUpdateForItem"),
00528                 array(intval($ItemId)), $Priority);
00529     }
00530 
00531     static function RunUpdateForItem($ItemId)
00532     {
00533         # check that resource still exists
00534         $RFactory = new ResourceFactory();
00535         if (!$RFactory->ItemExists($ItemId)) {  return;  }
00536 
00537         # update search data for resource
00538         $SearchEngine = new SPTSearchEngine();
00539         $SearchEngine->UpdateForItem($ItemId);
00540     }
00541 
00542     private $Schema;
00543 
00544     # functions for backward compatability w/ old SPT code
00545     function UpdateForResource($ItemId) {  $this->UpdateForItem($ItemId);  }
00546 }