CWIS Developer Documentation
SearchEngine.php
Go to the documentation of this file.
1 <?PHP
2 #
3 # FILE: SearchEngine.php
4 #
5 # Open Source Metadata Archive Search Engine (OSMASE)
6 # Copyright 2002-2016 Edward Almasy and Internet Scout Research Group
7 # http://scout.wisc.edu
8 #
9 
14 {
15 
16  # ---- PUBLIC INTERFACE --------------------------------------------------
17 
18  # possible types of logical operators
19  const LOGIC_AND = 1;
20  const LOGIC_OR = 2;
21 
22  # flags used for indicating field types
23  const FIELDTYPE_TEXT = 1;
24  const FIELDTYPE_NUMERIC = 2;
25  const FIELDTYPE_DATE = 3;
27 
28  # flags used for indicating word states
29  const WORD_PRESENT = 1;
30  const WORD_EXCLUDED = 2;
31  const WORD_REQUIRED = 4;
32 
41  public function __construct(
43  {
44  # create database object for our use
45  $this->DB = new Database();
46 
47  # save item access parameters
48  $this->ItemTableName = $ItemTableName;
49  $this->ItemIdFieldName = $ItemIdFieldName;
50  $this->ItemTypeFieldName = $ItemTypeFieldName;
51 
52  # set default debug state
53  $this->DebugLevel = 0;
54  }
55 
66  public function AddField($FieldId, $FieldType, $ItemTypes,
67  $Weight, $UsedInKeywordSearch)
68  {
69  # save values
70  $this->FieldInfo[$FieldId]["FieldType"] = $FieldType;
71  $this->FieldInfo[$FieldId]["Weight"] = $Weight;
72  $this->FieldInfo[$FieldId]["InKeywordSearch"] =
73  $UsedInKeywordSearch ? TRUE : FALSE;
74  $this->FieldInfo[$FieldId]["ItemTypes"] = is_array($ItemTypes)
75  ? $ItemTypes : array($ItemTypes);
76  }
77 
83  public function FieldType($FieldId)
84  {
85  return $this->FieldInfo[$FieldId]["FieldType"];
86  }
87 
93  public function FieldWeight($FieldId)
94  {
95  return $this->FieldInfo[$FieldId]["Weight"];
96  }
97 
103  public function FieldInKeywordSearch($FieldId)
104  {
105  return $this->FieldInfo[$FieldId]["InKeywordSearch"];
106  }
107 
112  public function DebugLevel($NewValue)
113  {
114  $this->DebugLevel = $NewValue;
115  }
116 
117 
118  # ---- search functions
119 
138  public function Search(
139  $SearchParams, $StartingResult = 0, $NumberOfResults = PHP_INT_MAX,
140  $SortByField = NULL, $SortDescending = TRUE)
141  {
142  # if keyword search string was passed in
143  if (is_string($SearchParams))
144  {
145  # convert string to search parameter set
146  $SearchString = $SearchParams;
147  $SearchParams = new SearchParameterSet();
148  $SearchParams->AddParameter($SearchString);
149  }
150 
151  # interpret and filter out magic debugging keyword (if any)
152  $KeywordStrings = $SearchParams->GetKeywordSearchStrings();
153  foreach ($KeywordStrings as $String)
154  {
155  $FilteredString = $this->ExtractDebugLevel($String);
156  if ($FilteredString != $String)
157  {
158  $SearchParams->RemoveParameter($String);
159  $SearchParams->AddParameter($FilteredString);
160  }
161  }
162  $this->DMsg(0, "Description: ".$SearchParams->TextDescription());
163 
164  # save start time to use in calculating search time
165  $StartTime = microtime(TRUE);
166 
167  # clear parsed search term list
168  $this->SearchTermList = array();
169 
170  # perform search
171  $Scores = $this->RawSearch($SearchParams);
172 
173  # count, sort, and trim search result scores list
174  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
175  $SortByField, $SortDescending);
176 
177  # record search time
178  $this->LastSearchTime = microtime(TRUE) - $StartTime;
179 
180  # return search results to caller
181  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
182  return $Scores;
183  }
184 
204  public function FieldedSearch(
205  $SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
206  $SortByField = NULL, $SortDescending = TRUE)
207  {
208  # pass off the request to grouped search (for now) if appropriate
209  if ($SearchStrings instanceof SearchParameterSet)
210  {
211  return $this->GroupedSearch($SearchStrings, $StartingResult,
212  $NumberOfResults, $SortByField, $SortDescending);
213  }
214 
215  # interpret and filter out magic debugging keyword (if any)
216  $SearchStrings = $this->SetDebugLevel($SearchStrings);
217  $this->DMsg(0, "In FieldedSearch() with "
218  .count($SearchStrings)." search strings");
219 
220  # save start time to use in calculating search time
221  $StartTime = microtime(TRUE);
222 
223  # perform search
224  $Scores = $this->SearchAcrossFields($SearchStrings);
225  $Scores = ($Scores === NULL) ? array() : $Scores;
226 
227  # count, sort, and trim search result scores list
228  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
229  $SortByField, $SortDescending);
230 
231  # record search time
232  $this->LastSearchTime = microtime(TRUE) - $StartTime;
233 
234  # return list of items to caller
235  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
236  return $Scores;
237  }
238 
243  public function AddResultFilterFunction($FunctionName)
244  {
245  # save filter function name
246  $this->FilterFuncs[] = $FunctionName;
247  }
248 
255  public function NumberOfResults($ItemType = NULL)
256  {
257  return ($ItemType === NULL) ? $this->NumberOfResultsAvailable
258  : (isset($this->NumberOfResultsPerItemType[$ItemType])
259  ? $this->NumberOfResultsPerItemType[$ItemType] : 0);
260  }
261 
266  public function SearchTerms()
267  {
268  return $this->SearchTermList;
269  }
270 
275  public function SearchTime()
276  {
277  return $this->LastSearchTime;
278  }
279 
286  public function FieldedSearchWeightScale($SearchParams)
287  {
288  $Weight = 0;
289  $FieldIds = $SearchParams->GetFields();
290  foreach ($FieldIds as $FieldId)
291  {
292  if (array_key_exists($FieldId, $this->FieldInfo))
293  {
294  $Weight += $this->FieldInfo[$FieldId]["Weight"];
295  }
296  }
297  if (count($SearchParams->GetKeywordSearchStrings()))
298  {
299  foreach ($this->FieldInfo as $FieldId => $Info)
300  {
301  if ($Info["InKeywordSearch"])
302  {
303  $Weight += $Info["Weight"];
304  }
305  }
306  }
307  return $Weight;
308  }
309 
310 
311  # ---- search database update functions
312 
318  public function UpdateForItem($ItemId, $ItemType)
319  {
320  # clear word count added flags for this item
321  unset($this->WordCountAdded);
322 
323  # delete any existing info for this item
324  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
325  $this->DB->Query("DELETE FROM SearchItemTypes WHERE ItemId = ".$ItemId);
326 
327  # save item type
328  $this->DB->Query("INSERT INTO SearchItemTypes (ItemId, ItemType)"
329  ." VALUES (".intval($ItemId).", ".intval($ItemType).")");
330 
331  # for each metadata field
332  foreach ($this->FieldInfo as $FieldId => $Info)
333  {
334  # if valid search weight for field and field applies to this item
335  if (($Info["Weight"] > 0)
336  && in_array($ItemType, $Info["ItemTypes"]))
337  {
338  # retrieve text for field
339  $Text = $this->GetFieldContent($ItemId, $FieldId);
340 
341  # if text is array
342  if (is_array($Text))
343  {
344  # for each text string in array
345  foreach ($Text as $String)
346  {
347  # record search info for text
348  $this->RecordSearchInfoForText($ItemId, $FieldId,
349  $Info["Weight"], $String,
350  $Info["InKeywordSearch"]);
351  }
352  }
353  else
354  {
355  # record search info for text
356  $this->RecordSearchInfoForText($ItemId, $FieldId,
357  $Info["Weight"], $Text,
358  $Info["InKeywordSearch"]);
359  }
360  }
361  }
362  }
363 
370  public function UpdateForItems($StartingItemId, $NumberOfItems)
371  {
372  # retrieve IDs for specified number of items starting at specified ID
373  $this->DB->Query("SELECT ".$this->ItemIdFieldName.", ".$this->ItemTypeFieldName
374  ." FROM ".$this->ItemTableName
375  ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId
376  ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems);
377  $ItemIds = $this->DB->FetchColumn(
378  $this->ItemTypeFieldName, $this->ItemIdFieldName);
379 
380  # for each retrieved item ID
381  foreach ($ItemIds as $ItemId => $ItemType)
382  {
383  # update search info for item
384  $this->UpdateForItem($ItemId, $ItemType);
385  }
386 
387  # return ID of last item updated to caller
388  return $ItemId;
389  }
390 
395  public function DropItem($ItemId)
396  {
397  # drop all entries pertaining to item from word count table
398  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
399  $this->DB->Query("DELETE FROM SearchItemTypes WHERE ItemId = ".$ItemId);
400  }
401 
406  public function DropField($FieldId)
407  {
408  # drop all entries pertaining to field from word counts table
409  $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'");
410  }
411 
416  public function SearchTermCount()
417  {
418  return $this->DB->Query("SELECT COUNT(*) AS TermCount"
419  ." FROM SearchWords", "TermCount");
420  }
421 
426  public function ItemCount()
427  {
428  return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount"
429  ." FROM SearchWordCounts", "ItemCount");
430  }
431 
439  public function AddSynonyms($Word, $Synonyms)
440  {
441  # asssume no synonyms will be added
442  $AddCount = 0;
443 
444  # get ID for word
445  $WordId = $this->GetWordId($Word, TRUE);
446 
447  # for each synonym passed in
448  foreach ($Synonyms as $Synonym)
449  {
450  # get ID for synonym
451  $SynonymId = $this->GetWordId($Synonym, TRUE);
452 
453  # if synonym is not already in database
454  $this->DB->Query("SELECT * FROM SearchWordSynonyms"
455  ." WHERE (WordIdA = ".$WordId
456  ." AND WordIdB = ".$SynonymId.")"
457  ." OR (WordIdB = ".$WordId
458  ." AND WordIdA = ".$SynonymId.")");
459  if ($this->DB->NumRowsSelected() == 0)
460  {
461  # add synonym entry to database
462  $this->DB->Query("INSERT INTO SearchWordSynonyms"
463  ." (WordIdA, WordIdB)"
464  ." VALUES (".$WordId.", ".$SynonymId.")");
465  $AddCount++;
466  }
467  }
468 
469  # report to caller number of new synonyms added
470  return $AddCount;
471  }
472 
479  public function RemoveSynonyms($Word, $Synonyms = NULL)
480  {
481  # find ID for word
482  $WordId = $this->GetWordId($Word);
483 
484  # if ID found
485  if ($WordId !== NULL)
486  {
487  # if no specific synonyms provided
488  if ($Synonyms === NULL)
489  {
490  # remove all synonyms for word
491  $this->DB->Query("DELETE FROM SearchWordSynonyms"
492  ." WHERE WordIdA = '".$WordId."'"
493  ." OR WordIdB = '".$WordId."'");
494  }
495  else
496  {
497  # for each specified synonym
498  foreach ($Synonyms as $Synonym)
499  {
500  # look up ID for synonym
501  $SynonymId = $this->GetWordId($Synonym);
502 
503  # if synonym ID was found
504  if ($SynonymId !== NULL)
505  {
506  # delete synonym entry
507  $this->DB->Query("DELETE FROM SearchWordSynonyms"
508  ." WHERE (WordIdA = '".$WordId."'"
509  ." AND WordIdB = '".$SynonymId."')"
510  ." OR (WordIdB = '".$WordId."'"
511  ." AND WordIdA = '".$SynonymId."')");
512  }
513  }
514  }
515  }
516  }
517 
521  public function RemoveAllSynonyms()
522  {
523  $this->DB->Query("DELETE FROM SearchWordSynonyms");
524  }
525 
531  public function GetSynonyms($Word)
532  {
533  # assume no synonyms will be found
534  $Synonyms = array();
535 
536  # look up ID for word
537  $WordId = $this->GetWordId($Word);
538 
539  # if word ID was found
540  if ($WordId !== NULL)
541  {
542  # look up IDs of all synonyms for this word
543  $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
544  ." WHERE WordIdA = ".$WordId
545  ." OR WordIdB = ".$WordId);
546  $SynonymIds = array();
547  while ($Record = $this->DB->FetchRow)
548  {
549  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
550  ? $Record["WordIdB"] : $Record["WordIdA"];
551  }
552 
553  # for each synonym ID
554  foreach ($SynonymIds as $SynonymId)
555  {
556  # look up synonym word and add to synonym list
557  $Synonyms[] = $this->GetWord($SynonymId);
558  }
559  }
560 
561  # return synonyms to caller
562  return $Synonyms;
563  }
564 
569  public function GetAllSynonyms()
570  {
571  # assume no synonyms will be found
572  $SynonymList = array();
573 
574  # for each synonym ID pair
575  $OurDB = new Database();
576  $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
577  while ($Record = $OurDB->FetchRow())
578  {
579  # look up words
580  $Word = $this->GetWord($Record["WordIdA"]);
581  $Synonym = $this->GetWord($Record["WordIdB"]);
582 
583  # if we do not already have an entry for the word
584  # or synonym is not listed for this word
585  if (!isset($SynonymList[$Word])
586  || !in_array($Synonym, $SynonymList[$Word]))
587  {
588  # add entry for synonym
589  $SynonymList[$Word][] = $Synonym;
590  }
591 
592  # if we do not already have an entry for the synonym
593  # or word is not listed for this synonym
594  if (!isset($SynonymList[$Synonym])
595  || !in_array($Word, $SynonymList[$Synonym]))
596  {
597  # add entry for word
598  $SynonymList[$Synonym][] = $Word;
599  }
600  }
601 
602  # for each word
603  # (this loop removes reciprocal duplicates)
604  foreach ($SynonymList as $Word => $Synonyms)
605  {
606  # for each synonym for that word
607  foreach ($Synonyms as $Synonym)
608  {
609  # if synonym has synonyms and word is one of them
610  if (isset($SynonymList[$Synonym])
611  && isset($SynonymList[$Word])
612  && in_array($Word, $SynonymList[$Synonym])
613  && in_array($Synonym, $SynonymList[$Word]))
614  {
615  # if word has less synonyms than synonym
616  if (count($SynonymList[$Word])
617  < count($SynonymList[$Synonym]))
618  {
619  # remove synonym from synonym list for word
620  $SynonymList[$Word] = array_diff(
621  $SynonymList[$Word], array($Synonym));
622 
623  # if no synonyms left for word
624  if (!count($SynonymList[$Word]))
625  {
626  # remove empty synonym list for word
627  unset($SynonymList[$Word]);
628  }
629  }
630  else
631  {
632  # remove word from synonym list for synonym
633  $SynonymList[$Synonym] = array_diff(
634  $SynonymList[$Synonym], array($Word));
635 
636  # if no synonyms left for word
637  if (!count($SynonymList[$Synonym]))
638  {
639  # remove empty synonym list for word
640  unset($SynonymList[$Synonym]);
641  }
642  }
643  }
644  }
645  }
646 
647  # sort array alphabetically (just for convenience)
648  foreach ($SynonymList as $Word => $Synonyms)
649  {
650  asort($SynonymList[$Word]);
651  }
652  ksort($SynonymList);
653 
654  # return 2D array of synonyms to caller
655  return $SynonymList;
656  }
657 
663  public function SetAllSynonyms($SynonymList)
664  {
665  # remove all existing synonyms
666  $this->RemoveAllSynonyms();
667 
668  # for each synonym entry passed in
669  foreach ($SynonymList as $Word => $Synonyms)
670  {
671  # add synonyms for word
672  $this->AddSynonyms($Word, $Synonyms);
673  }
674  }
675 
684  public function LoadSynonymsFromFile($FileName)
685  {
686  # asssume no synonyms will be added
687  $AddCount = 0;
688 
689  # read in contents of file
690  $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
691 
692  # if file contained lines
693  if (count($Lines))
694  {
695  # for each line of file
696  foreach ($Lines as $Line)
697  {
698  # if line is not a comment
699  if (!preg_match("/[\s]*#/", $Line))
700  {
701  # split line into words
702  $Words = preg_split("/[\s,]+/", $Line);
703 
704  # if synonyms found
705  if (count($Words) > 1)
706  {
707  # separate out word and synonyms
708  $Word = array_shift($Words);
709 
710  # add synonyms
711  $AddCount += $this->AddSynonyms($Word, $Words);
712  }
713  }
714  }
715  }
716 
717  # return count of synonyms added to caller
718  return $AddCount;
719  }
720 
728  public static function FlattenMultiTypeResults($Results)
729  {
730  $FlatScores = [];
731  foreach ($Results as $ItemType => $ItemScores)
732  {
733  $FlatScores += $ItemScores;
734  }
735 
736  return $FlatScores;
737  }
738 
745  public static function BuildMultiTypeResults($Results)
746  {
747  $DB = new Database();
748  $DB->Query("SELECT * FROM SearchItemTypes");
749  $ItemTypes = $DB->FetchColumn("ItemType", "ItemId");
750 
751  $SplitScores = [];
752  foreach ($Results as $ItemId => $ItemScore)
753  {
754  $ItemType = $ItemTypes[$ItemId];
755  $SplitScores[$ItemType][$ItemId] = $ItemScore;
756  }
757 
758  return $SplitScores;
759  }
760 
761  # ---- PRIVATE INTERFACE -------------------------------------------------
762 
763  protected $DB;
764  protected $DebugLevel;
765  protected $FilterFuncs;
766  protected $ItemIdFieldName;
767  protected $ItemTableName;
769  protected $LastSearchTime;
771  protected $StemmingEnabled = TRUE;
772  protected $SynonymsEnabled = TRUE;
773 
774  private $ExcludedTermCount;
775  private $FieldIds;
776  private $FieldInfo;
777  private $InclusiveTermCount;
778  private $RequiredTermCount;
779  private $RequiredTermCounts;
780  private $SearchTermList;
781  private $WordCountAdded;
782 
783  const KEYWORD_FIELD_ID = -100;
784  const STEM_ID_OFFSET = 1000000;
785 
786 
787  # ---- private methods (searching)
788 
796  private function RawSearch($SearchParams)
797  {
798  # retrieve search strings
799  $SearchStrings = $SearchParams->GetSearchStrings();
800  $KeywordSearchStrings = $SearchParams->GetKeywordSearchStrings();
801 
802  # add keyword searches (if any) to fielded searches
803  if (count($KeywordSearchStrings))
804  {
805  $SearchStrings[self::KEYWORD_FIELD_ID] = $KeywordSearchStrings;
806  }
807 
808  # normalize search strings
809  $NormalizedSearchStrings = array();
810  foreach ($SearchStrings as $FieldId => $SearchStringArray)
811  {
812  if (!is_array($SearchStringArray))
813  {
814  $SearchStringArray = array($SearchStringArray);
815  }
816  foreach ($SearchStringArray as $String)
817  {
818  $String = trim($String);
819  if (strlen($String))
820  {
821  $NormalizedSearchStrings[$FieldId][] = $String;
822  }
823  }
824  }
825  $SearchStrings = $NormalizedSearchStrings;
826 
827  # if we have strings to search for
828  if (count($SearchStrings))
829  {
830  # perform search
831  $Scores = $this->SearchAcrossFields(
832  $SearchStrings, $SearchParams->Logic());
833  }
834 
835  # for each subgroup
836  foreach ($SearchParams->GetSubgroups() as $Subgroup)
837  {
838  # perform subgroup search
839  $NewScores = $this->RawSearch($Subgroup);
840 
841  # added subgroup search scores to previous scores as appropriate
842  if (isset($Scores))
843  {
844  $Scores = $this->CombineScores(
845  $Scores, $NewScores, $SearchParams->Logic());
846  }
847  else
848  {
849  $Scores = $NewScores;
850  }
851  }
852  if (isset($NewScores))
853  {
854  $this->DMsg(2, "Have ".count($Scores)
855  ." results after subgroup processing");
856  }
857 
858  # pare down results to just allowed item types (if specified)
859  if ($SearchParams->ItemTypes())
860  {
861  $AllowedItemTypes = $SearchParams->ItemTypes();
862  foreach ($Scores as $ItemId => $Score)
863  {
864  if (!in_array($this->GetItemType($ItemId), $AllowedItemTypes))
865  {
866  unset($Scores[$ItemId]);
867  }
868  }
869  $this->DMsg(3, "Have ".count($Scores)
870  ." results after paring to allowed item types");
871  }
872 
873  # return search results to caller
874  return isset($Scores) ? $Scores : array();
875  }
876 
884  private function CombineScores($ScoresA, $ScoresB, $Logic)
885  {
886  if ($Logic == "OR")
887  {
888  $Scores = $ScoresA;
889  foreach ($ScoresB as $ItemId => $Score)
890  {
891  if (isset($Scores[$ItemId]))
892  {
893  $Scores[$ItemId] += $Score;
894  }
895  else
896  {
897  $Scores[$ItemId] = $Score;
898  }
899  }
900  }
901  else
902  {
903  $Scores = array();
904  foreach ($ScoresA as $ItemId => $Score)
905  {
906  if (isset($ScoresB[$ItemId]))
907  {
908  $Scores[$ItemId] = $Score + $ScoresB[$ItemId];
909  }
910  }
911  }
912  return $Scores;
913  }
914 
924  private function SearchAcrossFields($SearchStrings, $Logic)
925  {
926  # start by assuming no search will be done
927  $Scores = array();
928 
929  # clear word counts
930  $this->ExcludedTermCount = 0;
931  $this->InclusiveTermCount = 0;
932  $this->RequiredTermCount = 0;
933  $this->RequiredTermCounts = array();
934 
935  # for each field
936  $NeedComparisonSearch = FALSE;
937  foreach ($SearchStrings as $FieldId => $SearchStringArray)
938  {
939  # for each search string for this field
940  foreach ($SearchStringArray as $SearchString)
941  {
942  # if field is keyword or field is text and does not look
943  # like comparison match
944  $NotComparisonSearch = !preg_match(
945  self::COMPARISON_OPERATOR_PATTERN, $SearchString);
946  if (($FieldId == self::KEYWORD_FIELD_ID)
947  || (isset($this->FieldInfo[$FieldId])
948  && ($this->FieldInfo[$FieldId]["FieldType"]
949  == self::FIELDTYPE_TEXT)
950  && $NotComparisonSearch))
951  {
952  if ($FieldId == self::KEYWORD_FIELD_ID)
953  {
954  $this->DMsg(0, "Performing keyword search for string \""
955  .$SearchString."\"");
956  }
957  else
958  {
959  $this->DMsg(0, "Searching text field "
960  .$FieldId." for string \"".$SearchString."\"");
961  }
962 
963  # normalize text and split into words
964  $Words[$FieldId] =
965  $this->ParseSearchStringForWords($SearchString, $Logic);
966 
967  # calculate scores for matching items
968  if (count($Words[$FieldId]))
969  {
970  $Scores = $this->SearchForWords(
971  $Words[$FieldId], $FieldId, $Scores);
972  $this->DMsg(3, "Have "
973  .count($Scores)." results after word search");
974  }
975 
976  # split into phrases
977  $Phrases[$FieldId] = $this->ParseSearchStringForPhrases(
978  $SearchString, $Logic);
979 
980  # handle any phrases
981  if (count($Phrases[$FieldId]))
982  {
983  $Scores = $this->SearchForPhrases(
984  $Phrases[$FieldId], $Scores, $FieldId, TRUE, FALSE);
985  $this->DMsg(3, "Have "
986  .count($Scores)." results after phrase search");
987  }
988  }
989  else
990  {
991  # set flag to indicate possible comparison search candidate found
992  $NeedComparisonSearch = TRUE;
993  }
994  }
995  }
996 
997  # perform comparison searches
998  if ($NeedComparisonSearch)
999  {
1000  $Scores = $this->SearchForComparisonMatches(
1001  $SearchStrings, $Logic, $Scores);
1002  $this->DMsg(3, "Have ".count($Scores)." results after comparison search");
1003  }
1004 
1005  # if no results found, no required terms, and exclusions specified
1006  if ((count($Scores) == 0) &&
1007  ($this->RequiredTermCount == 0) &&
1008  ($this->ExcludedTermCount > 0) )
1009  {
1010  # determine which item types are implicated for keyword searches
1011  $KeywordItemTypes = [];
1012  foreach ($this->FieldInfo as $FieldId => $Info)
1013  {
1014  if ($Info["InKeywordSearch"])
1015  {
1016  $KeywordItemTypes = array_merge(
1017  $KeywordItemTypes,
1018  $Info["ItemTypes"]);
1019  }
1020  }
1021  $KeywordItemTypes = array_unique($KeywordItemTypes);
1022 
1023  # determine what item types were in use for the fields we
1024  # are searching
1025  $FieldTypes = [];
1026  foreach ($SearchStrings as $FieldId => $Info)
1027  {
1028  $MyTypes = ($FieldId == self::KEYWORD_FIELD_ID) ?
1029  $KeywordItemTypes :
1030  $this->FieldInfo[$FieldId]["ItemTypes"];
1031 
1032  $FieldTypes = array_merge(
1033  $FieldTypes, $MyTypes);
1034  }
1035  $FieldTypes = array_unique($FieldTypes);
1036 
1037  # load all records for these field types
1038  $Scores = $this->LoadScoresForAllRecords($FieldTypes);
1039  }
1040 
1041  # if search results found
1042  if (count($Scores))
1043  {
1044  # for each search text string
1045  foreach ($SearchStrings as $FieldId => $SearchStringArray)
1046  {
1047  # for each search string for this field
1048  foreach ($SearchStringArray as $SearchString)
1049  {
1050  # if field is text
1051  if (($FieldId == self::KEYWORD_FIELD_ID)
1052  || (isset($this->FieldInfo[$FieldId])
1053  && ($this->FieldInfo[$FieldId]["FieldType"]
1054  == self::FIELDTYPE_TEXT)))
1055  {
1056  # if there are words in search text
1057  if (isset($Words[$FieldId]))
1058  {
1059  # handle any excluded words
1060  $Scores = $this->FilterOnExcludedWords(
1061  $Words[$FieldId], $Scores, $FieldId);
1062  }
1063 
1064  # handle any excluded phrases
1065  if (isset($Phrases[$FieldId]))
1066  {
1067  $Scores = $this->SearchForPhrases(
1068  $Phrases[$FieldId], $Scores,
1069  $FieldId, FALSE, TRUE);
1070  }
1071  }
1072  }
1073  $this->DMsg(3, "Have ".count($Scores)
1074  ." results after processing exclusions");
1075  }
1076 
1077  # strip off any results that don't contain required words
1078  $Scores = $this->FilterOnRequiredWords($Scores);
1079  }
1080 
1081  # return search result scores to caller
1082  return $Scores;
1083  }
1084 
1094  private function SearchForWords($Words, $FieldId, $Scores = NULL)
1095  {
1096  $DB = $this->DB;
1097 
1098  # start with empty search result scores list if none passed in
1099  if ($Scores == NULL)
1100  {
1101  $Scores = array();
1102  }
1103 
1104  # for each word
1105  foreach ($Words as $Word => $Flags)
1106  {
1107  unset($Counts);
1108  if ($FieldId == self::KEYWORD_FIELD_ID)
1109  {
1110  $this->DMsg(2, "Performing keyword search for word \"".$Word."\"");
1111  }
1112  else
1113  {
1114  $this->DMsg(2, "Searching for word \"".$Word."\" in field ".$FieldId);
1115  }
1116 
1117  # if word is not excluded
1118  if (!($Flags & self::WORD_EXCLUDED))
1119  {
1120  # look up record ID for word
1121  $this->DMsg(2, "Looking up word \"".$Word."\"");
1122  $WordId = $this->GetWordId($Word);
1123 
1124  # if word is in DB
1125  if ($WordId !== NULL)
1126  {
1127  # look up counts for word
1128  $DB->Query("SELECT ItemId,Count FROM SearchWordCounts "
1129  ."WHERE WordId = ".$WordId
1130  ." AND FieldId = ".$FieldId);
1131  $Counts = $DB->FetchColumn("Count", "ItemId");
1132 
1133  # if synonym support is enabled
1134  if ($this->SynonymsEnabled)
1135  {
1136  # look for any synonyms
1137  $DB->Query("SELECT WordIdA, WordIdB"
1138  ." FROM SearchWordSynonyms"
1139  ." WHERE WordIdA = ".$WordId
1140  ." OR WordIdB = ".$WordId);
1141 
1142  # if synonyms were found
1143  if ($DB->NumRowsSelected())
1144  {
1145  # retrieve synonym IDs
1146  $SynonymIds = array();
1147  while ($Record = $DB->FetchRow())
1148  {
1149  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
1150  ? $Record["WordIdB"]
1151  : $Record["WordIdA"];
1152  }
1153 
1154  # for each synonym
1155  foreach ($SynonymIds as $SynonymId)
1156  {
1157  # retrieve counts for synonym
1158  $DB->Query("SELECT ItemId,Count"
1159  ." FROM SearchWordCounts"
1160  ." WHERE WordId = ".$SynonymId
1161  ." AND FieldId = ".$FieldId);
1162  $SynonymCounts = $DB->FetchColumn("Count", "ItemId");
1163 
1164  # for each count
1165  foreach ($SynonymCounts as $ItemId => $Count)
1166  {
1167  # adjust count because it's a synonym
1168  $AdjustedCount = ceil($Count / 2);
1169 
1170  # add count to existing counts
1171  if (isset($Counts[$ItemId]))
1172  {
1173  $Counts[$ItemId] += $AdjustedCount;
1174  }
1175  else
1176  {
1177  $Counts[$ItemId] = $AdjustedCount;
1178  }
1179  }
1180  }
1181  }
1182  }
1183  }
1184 
1185  # if stemming is enabled
1186  if ($this->StemmingEnabled)
1187  {
1188  # retrieve word stem
1189  $Stem = PorterStemmer::Stem($Word);
1190 
1191  # if stem was different from word
1192  if ($Stem != $Word)
1193  {
1194  # retrieve stem ID
1195  $this->DMsg(2, "Looking up stem \"".$Stem."\"");
1196  $StemId = $this->GetStemId($Stem);
1197 
1198  # if ID found for stem
1199  if ($StemId !== NULL)
1200  {
1201  # retrieve counts for stem
1202  $DB->Query("SELECT ItemId,Count"
1203  ." FROM SearchWordCounts"
1204  ." WHERE WordId = ".$StemId
1205  ." AND FieldId = ".$FieldId);
1206  $StemCounts = $DB->FetchColumn("Count", "ItemId");
1207 
1208  # for each count
1209  foreach ($StemCounts as $ItemId => $Count)
1210  {
1211  # adjust count because it's a stem
1212  $AdjustedCount = ceil($Count / 2);
1213 
1214  # add count to existing counts
1215  if (isset($Counts[$ItemId]))
1216  {
1217  $Counts[$ItemId] += $AdjustedCount;
1218  }
1219  else
1220  {
1221  $Counts[$ItemId] = $AdjustedCount;
1222  }
1223  }
1224  }
1225  }
1226  }
1227 
1228  # if counts were found
1229  if (isset($Counts))
1230  {
1231  # for each count
1232  foreach ($Counts as $ItemId => $Count)
1233  {
1234  # if word flagged as required
1235  if ($Flags & self::WORD_REQUIRED)
1236  {
1237  # increment required word count for record
1238  if (isset($this->RequiredTermCounts[$ItemId]))
1239  {
1240  $this->RequiredTermCounts[$ItemId]++;
1241  }
1242  else
1243  {
1244  $this->RequiredTermCounts[$ItemId] = 1;
1245  }
1246  }
1247 
1248  # add to item record score
1249  if (isset($Scores[$ItemId]))
1250  {
1251  $Scores[$ItemId] += $Count;
1252  }
1253  else
1254  {
1255  $Scores[$ItemId] = $Count;
1256  }
1257  }
1258  }
1259  }
1260  }
1261 
1262  # return basic scores to caller
1263  return $Scores;
1264  }
1265 
1273  private function ParseSearchStringForPhrases($SearchString, $Logic)
1274  {
1275  # split into chunks delimited by double quote marks
1276  $Pieces = explode("\"", $SearchString); # "
1277 
1278  # for each pair of chunks
1279  $Index = 2;
1280  $Phrases = array();
1281  while ($Index < count($Pieces))
1282  {
1283  # grab phrase from chunk
1284  $Phrase = trim(addslashes($Pieces[$Index - 1]));
1285  $Flags = self::WORD_PRESENT;
1286 
1287  # grab first character of phrase
1288  $FirstChar = substr($Pieces[$Index - 2], -1);
1289 
1290  # set flags to reflect any option characters
1291  if ($FirstChar == "-")
1292  {
1293  $Flags |= self::WORD_EXCLUDED;
1294  if (!isset($Phrases[$Phrase]))
1295  {
1296  $this->ExcludedTermCount++;
1297  }
1298  }
1299  else
1300  {
1301  if ((($Logic == "AND")
1302  && ($FirstChar != "~"))
1303  || ($FirstChar == "+"))
1304  {
1305  $Flags |= self::WORD_REQUIRED;
1306  if (!isset($Phrases[$Phrase]))
1307  {
1308  $this->RequiredTermCount++;
1309  }
1310  }
1311  if (!isset($Phrases[$Phrase]))
1312  {
1313  $this->InclusiveTermCount++;
1314  $this->SearchTermList[] = $Phrase;
1315  }
1316  }
1317  $Phrases[$Phrase] = $Flags;
1318 
1319  # move to next pair of chunks
1320  $Index += 2;
1321  }
1322 
1323  # return phrases to caller
1324  return $Phrases;
1325  }
1326 
1332  protected function SearchFieldForPhrases($FieldId, $Phrase)
1333  {
1334  # error out
1335  exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n");
1336  }
1337 
1349  private function SearchForPhrases($Phrases, $Scores, $FieldId,
1350  $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
1351  {
1352  # if phrases are found
1353  if (count($Phrases) > 0)
1354  {
1355  # if this is a keyword search
1356  if ($FieldId == self::KEYWORD_FIELD_ID)
1357  {
1358  # for each field
1359  foreach ($this->FieldInfo as $KFieldId => $Info)
1360  {
1361  # if field is marked to be included in keyword searches
1362  if ($Info["InKeywordSearch"])
1363  {
1364  # call ourself with that field
1365  $Scores = $this->SearchForPhrases(
1366  $Phrases, $Scores, $KFieldId,
1367  $ProcessNonExcluded, $ProcessExcluded);
1368  }
1369  }
1370  }
1371  else
1372  {
1373  # for each phrase
1374  foreach ($Phrases as $Phrase => $Flags)
1375  {
1376  $this->DMsg(2, "Searching for phrase '".$Phrase
1377  ."' in field ".$FieldId);
1378 
1379  # if phrase flagged as excluded and we are doing excluded
1380  # phrases or phrase flagged as non-excluded and we
1381  # are doing non-excluded phrases
1382  if (($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1383  || ($ProcessNonExcluded && !($Flags & self::WORD_EXCLUDED)))
1384  {
1385  # initialize score list if necessary
1386  if ($Scores === NULL) { $Scores = array(); }
1387 
1388  # retrieve list of items that contain phrase
1389  $ItemIds = $this->SearchFieldForPhrases(
1390  $FieldId, $Phrase);
1391 
1392  # for each item that contains phrase
1393  foreach ($ItemIds as $ItemId)
1394  {
1395  # if we are doing excluded phrases and phrase
1396  # is flagged as excluded
1397  if ($ProcessExcluded && ($Flags & self::WORD_EXCLUDED))
1398  {
1399  # knock item off of list
1400  unset($Scores[$ItemId]);
1401  }
1402  elseif ($ProcessNonExcluded)
1403  {
1404  # calculate phrase value based on number of
1405  # words and field weight
1406  $PhraseScore = count(preg_split("/[\s]+/",
1407  $Phrase, -1, PREG_SPLIT_NO_EMPTY))
1408  * $this->FieldInfo[$FieldId]["Weight"];
1409  $this->DMsg(2, "Phrase score is ".$PhraseScore);
1410 
1411  # bump up item record score
1412  if (isset($Scores[$ItemId]))
1413  {
1414  $Scores[$ItemId] += $PhraseScore;
1415  }
1416  else
1417  {
1418  $Scores[$ItemId] = $PhraseScore;
1419  }
1420 
1421  # if phrase flagged as required
1422  if ($Flags & self::WORD_REQUIRED)
1423  {
1424  # increment required word count for record
1425  if (isset($this->RequiredTermCounts[$ItemId]))
1426  {
1427  $this->RequiredTermCounts[$ItemId]++;
1428  }
1429  else
1430  {
1431  $this->RequiredTermCounts[$ItemId] = 1;
1432  }
1433  }
1434  }
1435  }
1436  }
1437  }
1438  }
1439  }
1440 
1441  # return updated scores to caller
1442  return $Scores;
1443  }
1444 
1453  private function FilterOnExcludedWords($Words, $Scores, $FieldId)
1454  {
1455  $DB = $this->DB;
1456 
1457  # for each word
1458  foreach ($Words as $Word => $Flags)
1459  {
1460  # if word flagged as excluded
1461  if ($Flags & self::WORD_EXCLUDED)
1462  {
1463  # look up record ID for word
1464  $WordId = $this->GetWordId($Word);
1465 
1466  # if word is in DB
1467  if ($WordId !== NULL)
1468  {
1469  # look up counts for word
1470  $DB->Query("SELECT ItemId FROM SearchWordCounts "
1471  ."WHERE WordId=${WordId} AND FieldId=${FieldId}");
1472 
1473  # for each count
1474  while ($Record = $DB->FetchRow())
1475  {
1476  # if item record is in score list
1477  $ItemId = $Record["ItemId"];
1478  if (isset($Scores[$ItemId]))
1479  {
1480  # remove item record from score list
1481  $this->DMsg(3, "Filtering out item ".$ItemId
1482  ." because it contained word \"".$Word."\"");
1483  unset($Scores[$ItemId]);
1484  }
1485  }
1486  }
1487  }
1488  }
1489 
1490  # returned filtered score list to caller
1491  return $Scores;
1492  }
1493 
1499  private function FilterOnRequiredWords($Scores)
1500  {
1501  # if there were required words
1502  if ($this->RequiredTermCount > 0)
1503  {
1504  # for each item
1505  foreach ($Scores as $ItemId => $Score)
1506  {
1507  # if item does not meet required word count
1508  if (!isset($this->RequiredTermCounts[$ItemId])
1509  || ($this->RequiredTermCounts[$ItemId]
1510  < $this->RequiredTermCount))
1511  {
1512  # filter out item
1513  $this->DMsg(4, "Filtering out item ".$ItemId
1514  ." because it didn't have required word count of "
1515  .$this->RequiredTermCount
1516  .(isset($this->RequiredTermCounts[$ItemId])
1517  ? " (only had "
1518  .$this->RequiredTermCounts[$ItemId]
1519  : " (had none")
1520  .")");
1521  unset($Scores[$ItemId]);
1522  }
1523  }
1524  }
1525 
1526  # return filtered list to caller
1527  return $Scores;
1528  }
1529 
1542  private function CleanScores($Scores, $StartingResult, $NumberOfResults,
1543  $SortByField, $SortDescending)
1544  {
1545  # perform any requested filtering
1546  $this->DMsg(0, "Have ".count($Scores)." results before filter callbacks");
1547  $Scores = $this->FilterOnSuppliedFunctions($Scores);
1548 
1549  # save total number of results available
1550  $this->NumberOfResultsAvailable = count($Scores);
1551 
1552  # sort search scores into item type bins
1553  $NewScores = array();
1554  foreach ($Scores as $Id => $Score)
1555  {
1556  $ItemType = $this->GetItemType($Id);
1557  if ($ItemType !== NULL)
1558  {
1559  $NewScores[$ItemType][$Id] = $Score;
1560  }
1561  }
1562  $Scores = $NewScores;
1563 
1564  # for each item type
1565  $NewSortByField = array();
1566  $NewSortDescending = array();
1567  foreach ($Scores as $ItemType => $TypeScores)
1568  {
1569  # normalize sort field parameter
1570  $NewSortByField[$ItemType] = !is_array($SortByField) ? $SortByField
1571  : (isset($SortByField[$ItemType])
1572  ? $SortByField[$ItemType] : NULL);
1573 
1574  # normalize sort direction parameter
1575  $NewSortDescending[$ItemType] = !is_array($SortDescending) ? $SortDescending
1576  : (isset($SortDescending[$ItemType])
1577  ? $SortDescending[$ItemType] : TRUE);
1578  }
1579  $SortByField = $NewSortByField;
1580  $SortDescending = $NewSortDescending;
1581 
1582  # for each item type
1583  foreach ($Scores as $ItemType => $TypeScores)
1584  {
1585  # save number of results
1586  $this->NumberOfResultsPerItemType[$ItemType] = count($TypeScores);
1587 
1588  # if no sorting field specified
1589  if ($SortByField[$ItemType] === NULL)
1590  {
1591  # sort result list by score
1592  if ($SortDescending[$ItemType])
1593  {
1594  arsort($Scores[$ItemType], SORT_NUMERIC);
1595  }
1596  else
1597  {
1598  asort($Scores[$ItemType], SORT_NUMERIC);
1599  }
1600  }
1601  else
1602  {
1603  # get list of item IDs in sorted order
1604  $SortedIds = $this->GetItemIdsSortedByField($ItemType,
1605  $SortByField[$ItemType], $SortDescending[$ItemType]);
1606 
1607  # if we have sorted item IDs
1608  if (count($SortedIds) && count($TypeScores))
1609  {
1610  # strip sorted ID list down to those that appear in search results
1611  $SortedIds = array_intersect($SortedIds,
1612  array_keys($TypeScores));
1613 
1614  # rebuild score list in sorted order
1615  $NewScores = array();
1616  foreach ($SortedIds as $Id)
1617  {
1618  $NewScores[$Id] = $TypeScores[$Id];
1619  }
1620  $Scores[$ItemType] = $NewScores;
1621  }
1622  else
1623  {
1624  # sort result list by score
1625  arsort($Scores[$ItemType], SORT_NUMERIC);
1626  }
1627  }
1628 
1629  # if subset of scores requested
1630  if (($StartingResult > 0) || ($NumberOfResults < PHP_INT_MAX))
1631  {
1632  # trim scores back to requested subset
1633  $ScoresKeys = array_slice(array_keys($Scores[$ItemType]),
1634  $StartingResult, $NumberOfResults);
1635  $NewScores = array();
1636  foreach ($ScoresKeys as $Key)
1637  {
1638  $NewScores[$Key] = $Scores[$ItemType][$Key];
1639  }
1640  $Scores[$ItemType] = $NewScores;
1641  }
1642  }
1643 
1644  # returned cleaned search result scores list to caller
1645  return $Scores;
1646  }
1647 
1653  protected function FilterOnSuppliedFunctions($Scores)
1654  {
1655  # if filter functions have been set
1656  if (isset($this->FilterFuncs))
1657  {
1658  # for each result
1659  foreach ($Scores as $ItemId => $Score)
1660  {
1661  # for each filter function
1662  foreach ($this->FilterFuncs as $FuncName)
1663  {
1664  # if filter function return TRUE for item
1665  if (call_user_func($FuncName, $ItemId))
1666  {
1667  # discard result
1668  $this->DMsg(2, "Filter callback <i>".$FuncName
1669  ."</i> rejected item ".$ItemId);
1670  unset($Scores[$ItemId]);
1671 
1672  # bail out of filter func loop
1673  continue 2;
1674  }
1675  }
1676  }
1677  }
1678 
1679  # return filtered list to caller
1680  return $Scores;
1681  }
1682 
1692  private function SearchForComparisonMatches($SearchStrings, $Logic, $Scores)
1693  {
1694  # for each field
1695  $Index = 0;
1696  foreach ($SearchStrings as $SearchFieldId => $SearchStringArray)
1697  {
1698  # if field is not keyword
1699  if ($SearchFieldId != self::KEYWORD_FIELD_ID)
1700  {
1701  # for each search string for this field
1702  foreach ($SearchStringArray as $SearchString)
1703  {
1704  # look for comparison operators
1705  $FoundOperator = preg_match(
1706  self::COMPARISON_OPERATOR_PATTERN,
1707  $SearchString, $Matches);
1708 
1709  # if a comparison operator was found
1710  # or this is a field type that is always a comparison search
1711  if ($FoundOperator ||
1712  ($this->FieldInfo[$SearchFieldId]["FieldType"]
1713  != self::FIELDTYPE_TEXT))
1714  {
1715  # determine value to compare against
1716  $Value = trim(preg_replace(
1717  self::COMPARISON_OPERATOR_PATTERN, '\2',
1718  $SearchString));
1719 
1720  # if no comparison operator was found
1721  if (!$FoundOperator)
1722  {
1723  # assume comparison is equality
1724  $Operators[$Index] = "=";
1725  }
1726  else
1727  {
1728  # use operator from comparison match
1729  $Operators[$Index] = $Matches[1];
1730  }
1731 
1732  # if operator was found
1733  if (isset($Operators[$Index]))
1734  {
1735  # save value
1736  $Values[$Index] = $Value;
1737 
1738  # save field name
1739  $FieldIds[$Index] = $SearchFieldId;
1740  $this->DMsg(3, "Added comparison (field = <i>"
1741  .$FieldIds[$Index]."</i> op = <i>"
1742  .$Operators[$Index]."</i> val = <i>"
1743  .$Values[$Index]."</i>)");
1744 
1745  # move to next comparison array entry
1746  $Index++;
1747  }
1748  }
1749  }
1750  }
1751  }
1752 
1753  # if comparisons found
1754  if (isset($Operators))
1755  {
1756  # perform comparisons on fields and gather results
1757  $Results = $this->SearchFieldsForComparisonMatches(
1758  $FieldIds, $Operators, $Values, $Logic);
1759 
1760  # if search logic is set to AND
1761  if ($Logic == "AND")
1762  {
1763  # if results were found
1764  if (count($Results))
1765  {
1766  # if there were no prior results and no terms for keyword search
1767  if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
1768  {
1769  # add all results to scores
1770  foreach ($Results as $ItemId)
1771  {
1772  $Scores[$ItemId] = 1;
1773  }
1774  }
1775  else
1776  {
1777  # remove anything from scores that is not part of results
1778  foreach ($Scores as $ItemId => $Score)
1779  {
1780  if (in_array($ItemId, $Results) == FALSE)
1781  {
1782  unset($Scores[$ItemId]);
1783  }
1784  }
1785  }
1786  }
1787  else
1788  {
1789  # clear scores
1790  $Scores = array();
1791  }
1792  }
1793  else
1794  {
1795  # add result items to scores
1796  if ($Scores === NULL) { $Scores = array(); }
1797  foreach ($Results as $ItemId)
1798  {
1799  if (isset($Scores[$ItemId]))
1800  {
1801  $Scores[$ItemId] += 1;
1802  }
1803  else
1804  {
1805  $Scores[$ItemId] = 1;
1806  }
1807  }
1808  }
1809  }
1810 
1811  # return results to caller
1812  return $Scores;
1813  }
1814 
1822  private function SetDebugLevel($SearchStrings)
1823  {
1824  # if search info is an array
1825  if (is_array($SearchStrings))
1826  {
1827  # for each array element
1828  foreach ($SearchStrings as $FieldId => $SearchStringArray)
1829  {
1830  # if element is an array
1831  if (is_array($SearchStringArray))
1832  {
1833  # for each array element
1834  foreach ($SearchStringArray as $Index => $SearchString)
1835  {
1836  # pull out search string if present
1837  $SearchStrings[$FieldId][$Index] =
1838  $this->ExtractDebugLevel($SearchString);
1839  }
1840  }
1841  else
1842  {
1843  # pull out search string if present
1844  $SearchStrings[$FieldId] =
1845  $this->ExtractDebugLevel($SearchStringArray);
1846  }
1847  }
1848  }
1849  else
1850  {
1851  # pull out search string if present
1852  $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
1853  }
1854 
1855  # return new search info to caller
1856  return $SearchStrings;
1857  }
1858 
1865  private function ExtractDebugLevel($SearchString)
1866  {
1867  # if search string contains debug level indicator
1868  if (strstr($SearchString, "DBUGLVL="))
1869  {
1870  # remove indicator and set debug level
1871  $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString);
1872  if ($Level > 0)
1873  {
1874  $this->DebugLevel = $Level;
1875  $this->DMsg(0, "Setting debug level to ".$Level);
1876  $SearchString = preg_replace("/\s*DBUGLVL=${Level}\s*/", "",
1877  $SearchString);
1878  }
1879  }
1880 
1881  # return (possibly) modified search string to caller
1882  return $SearchString;
1883  }
1884 
1890  private function LoadScoresForAllRecords($ItemTypes)
1891  {
1892  # if no item types were provided return an empty array
1893  if (count($ItemTypes)==0)
1894  {
1895  return [];
1896  }
1897 
1898  # get all the ItemIds belonging to the given types
1899  $this->DB->Query("SELECT ".$this->ItemIdFieldName." AS ItemId"
1900  ." FROM ".$this->ItemTableName
1901  ." WHERE ".$this->ItemTypeFieldName." IN(".implode(",", $ItemTypes).")");
1902 
1903  # return array with all scores to caller
1904  return array_fill_keys($this->DB->FetchColumn("ItemId"), 1);
1905  }
1906 
1907  # ---- private methods (search DB building)
1908 
1916  private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1)
1917  {
1918  # retrieve ID for word
1919  $WordIds[] = $this->GetWordId($Word, TRUE);
1920 
1921  # if stemming is enabled and word looks appropriate for stemming
1922  if ($this->StemmingEnabled && !is_numeric($Word))
1923  {
1924  # retrieve stem of word
1925  $Stem = PorterStemmer::Stem($Word, TRUE);
1926 
1927  # if stem is different
1928  if ($Stem != $Word)
1929  {
1930  # retrieve ID for stem of word
1931  $WordIds[] = $this->GetStemId($Stem, TRUE);
1932  }
1933  }
1934 
1935  # for word and stem of word
1936  foreach ($WordIds as $WordId)
1937  {
1938  # if word count already added to database
1939  if (isset($this->WordCountAdded[$WordId][$FieldId]))
1940  {
1941  # update word count
1942  $this->DB->Query("UPDATE SearchWordCounts SET Count=Count+".$Weight
1943  ." WHERE WordId=".$WordId
1944  ." AND ItemId=".$ItemId
1945  ." AND FieldId=".$FieldId);
1946  }
1947  else
1948  {
1949  # add word count to DB
1950  $this->DB->Query("INSERT INTO SearchWordCounts"
1951  ." (WordId, ItemId, FieldId, Count) VALUES"
1952  ." (".$WordId.", ".$ItemId.", ".$FieldId.", ".$Weight.")");
1953 
1954  # remember that we added count for this word
1955  $this->WordCountAdded[$WordId][$FieldId] = TRUE;
1956  }
1957 
1958  # decrease weight for stem
1959  $Weight = ceil($Weight / 2);
1960  }
1961  }
1962 
1968  protected function GetFieldContent($ItemId, $FieldId)
1969  {
1970  # error out
1971  throw Exception("GetFieldContent() not implemented.");
1972  }
1973 
1983  private function RecordSearchInfoForText(
1984  $ItemId, $FieldId, $Weight, $Text, $IncludeInKeyword)
1985  {
1986  # normalize text
1987  $Words = $this->ParseSearchStringForWords($Text, "OR", TRUE);
1988 
1989  # if there was text left after parsing
1990  if (count($Words) > 0)
1991  {
1992  # for each word
1993  foreach ($Words as $Word => $Flags)
1994  {
1995  # update count for word
1996  $this->UpdateWordCount($Word, $ItemId, $FieldId);
1997 
1998  # if text should be included in keyword searches
1999  if ($IncludeInKeyword)
2000  {
2001  # update keyword field count for word
2002  $this->UpdateWordCount(
2003  $Word, $ItemId, self::KEYWORD_FIELD_ID, $Weight);
2004  }
2005  }
2006  }
2007  }
2008 
2009  # ---- common private methods (used in both searching and DB build)
2010 
2021  private function ParseSearchStringForWords(
2022  $SearchString, $Logic, $IgnorePhrases = FALSE)
2023  {
2024  # strip off any surrounding whitespace
2025  $Text = trim($SearchString);
2026 
2027  # define phrase and group search patterns separately, so that we can
2028  # later replace them easily if necessary
2029  $PhraseSearchPattern = "/\"[^\"]*\"/";
2030  $GroupSearchPattern = "/\\([^)]*\\)/";
2031 
2032  # set up search string normalization replacement strings (NOTE: these
2033  # are performed in sequence, so the order IS SIGNIFICANT)
2034  $ReplacementPatterns = array(
2035  # get rid of possessive plurals
2036  "/'s[^a-z0-9\\-+~]+/i" => " ",
2037  # get rid of single quotes / apostrophes
2038  "/'/" => "",
2039  # get rid of phrases
2040  $PhraseSearchPattern => " ",
2041  # get rid of groups
2042  $GroupSearchPattern => " ",
2043  # convert everything but alphanumerics and minus/plus/tilde to a space
2044  "/[^a-z0-9\\-+~]+/i" => "\\1 ",
2045  # truncate any runs of minus/plus/tilde to just the first char
2046  "/([~+-])[~+-]+/" => "\\1",
2047  # convert two alphanumerics segments separated by a minus into
2048  # both separate words and a single combined word
2049  "/([~+-]?)([a-z0-9]+)-([a-z0-9]+)/i" => "\\1\\2 \\1\\3 \\1\\2\\3",
2050  # convert minus/plus/tilde preceded by anything but whitespace to a space
2051  "/([^\\s])[~+-]+/i" => "\\1 ",
2052  # convert minus/plus/tilde followed by whitespace to a space
2053  "/[~+-]+\\s/i" => " ",
2054  # convert multiple spaces to one space
2055  "/[ ]+/" => " ",
2056  );
2057 
2058  # if we are supposed to ignore phrasing (series of words in quotes)
2059  # and grouping (series of words surrounded by parens)
2060  if ($IgnorePhrases)
2061  {
2062  # switch phrase removal to double quote removal
2063  # and switch group removal to paren removal
2064  foreach ($ReplacementPatterns as $Pattern => $Replacement)
2065  {
2066  if ($Pattern == $PhraseSearchPattern)
2067  {
2068  $Pattern = "/\"/";
2069  }
2070  elseif ($Pattern == $GroupSearchPattern)
2071  {
2072  $Pattern = "/[\(\)]+/";
2073  }
2074  $NewReplacementPatterns[$Pattern] = $Replacement;
2075  }
2076  $ReplacementPatterns = $NewReplacementPatterns;
2077  }
2078 
2079  # remove punctuation from text and normalize whitespace
2080  $Text = preg_replace(array_keys($ReplacementPatterns),
2081  $ReplacementPatterns, $Text);
2082  $this->DMsg(2, "Normalized search string is \"".$Text."\"");
2083 
2084  # convert text to lower case
2085  $Text = strtolower($Text);
2086 
2087  # strip off any extraneous whitespace
2088  $Text = trim($Text);
2089 
2090  # start with an empty array
2091  $Words = array();
2092 
2093  # if we have no words left after parsing
2094  if (strlen($Text) != 0)
2095  {
2096  # for each word
2097  foreach (explode(" ", $Text) as $Word)
2098  {
2099  # grab first character of word
2100  $FirstChar = substr($Word, 0, 1);
2101 
2102  # strip off option characters and set flags appropriately
2103  $Flags = self::WORD_PRESENT;
2104  if ($FirstChar == "-")
2105  {
2106  $Word = substr($Word, 1);
2107  $Flags |= self::WORD_EXCLUDED;
2108  if (!isset($Words[$Word]))
2109  {
2110  $this->ExcludedTermCount++;
2111  }
2112  }
2113  else
2114  {
2115  if ($FirstChar == "~")
2116  {
2117  $Word = substr($Word, 1);
2118  }
2119  elseif (($Logic == "AND")
2120  || ($FirstChar == "+"))
2121  {
2122  if ($FirstChar == "+")
2123  {
2124  $Word = substr($Word, 1);
2125  }
2126  $Flags |= self::WORD_REQUIRED;
2127  if (!isset($Words[$Word]))
2128  {
2129  $this->RequiredTermCount++;
2130  }
2131  }
2132  if (!isset($Words[$Word]))
2133  {
2134  $this->InclusiveTermCount++;
2135  $this->SearchTermList[] = $Word;
2136  }
2137  }
2138 
2139  # store flags to indicate word found
2140  $Words[$Word] = $Flags;
2141  $this->DMsg(3, "Word identified (".$Word.")");
2142  }
2143  }
2144 
2145  # return normalized words to caller
2146  return $Words;
2147  }
2148 
2156  private function GetWordId($Word, $AddIfNotFound = FALSE)
2157  {
2158  static $WordIdCache;
2159 
2160  # if word was in ID cache
2161  if (isset($WordIdCache[$Word]))
2162  {
2163  # use ID from cache
2164  $WordId = $WordIdCache[$Word];
2165  }
2166  else
2167  {
2168  # look up ID in database
2169  $WordId = $this->DB->Query("SELECT WordId"
2170  ." FROM SearchWords"
2171  ." WHERE WordText='".addslashes($Word)."'",
2172  "WordId");
2173 
2174  # if ID was not found and caller requested it be added
2175  if (($WordId === NULL) && $AddIfNotFound)
2176  {
2177  # add word to database
2178  $this->DB->Query("INSERT INTO SearchWords (WordText)"
2179  ." VALUES ('".addslashes(strtolower($Word))."')");
2180 
2181  # get ID for newly added word
2182  $WordId = $this->DB->LastInsertId();
2183  }
2184 
2185  # save ID to cache
2186  $WordIdCache[$Word] = $WordId;
2187  }
2188 
2189  # return ID to caller
2190  return $WordId;
2191  }
2192 
2200  private function GetStemId($Stem, $AddIfNotFound = FALSE)
2201  {
2202  static $StemIdCache;
2203 
2204  # if stem was in ID cache
2205  if (isset($StemIdCache[$Stem]))
2206  {
2207  # use ID from cache
2208  $StemId = $StemIdCache[$Stem];
2209  }
2210  else
2211  {
2212  # look up ID in database
2213  $StemId = $this->DB->Query("SELECT WordId"
2214  ." FROM SearchStems"
2215  ." WHERE WordText='".addslashes($Stem)."'",
2216  "WordId");
2217 
2218  # if ID was not found and caller requested it be added
2219  if (($StemId === NULL) && $AddIfNotFound)
2220  {
2221  # add stem to database
2222  $this->DB->Query("INSERT INTO SearchStems (WordText)"
2223  ." VALUES ('".addslashes(strtolower($Stem))."')");
2224 
2225  # get ID for newly added stem
2226  $StemId = $this->DB->LastInsertId();
2227  }
2228 
2229  # adjust from DB ID value to stem ID value
2230  $StemId += self::STEM_ID_OFFSET;
2231 
2232  # save ID to cache
2233  $StemIdCache[$Stem] = $StemId;
2234  }
2235 
2236  # return ID to caller
2237  return $StemId;
2238  }
2239 
2245  private function GetWord($WordId)
2246  {
2247  static $WordCache;
2248 
2249  # if word was in cache
2250  if (isset($WordCache[$WordId]))
2251  {
2252  # use word from cache
2253  $Word = $WordCache[$WordId];
2254  }
2255  else
2256  {
2257  # adjust search location and word ID if word is stem
2258  $TableName = "SearchWords";
2259  if ($WordId >= self::STEM_ID_OFFSET)
2260  {
2261  $TableName = "SearchStems";
2262  $WordId -= self::STEM_ID_OFFSET;
2263  }
2264 
2265  # look up word in database
2266  $Word = $this->DB->Query("SELECT WordText"
2267  ." FROM ".$TableName
2268  ." WHERE WordId='".$WordId."'",
2269  "WordText");
2270 
2271  # save word to cache
2272  $WordCache[$WordId] = $Word;
2273  }
2274 
2275  # return word to caller
2276  return $Word;
2277  }
2278 
2284  private function GetItemType($ItemId)
2285  {
2286  static $ItemTypeCache;
2287  if (!isset($ItemTypeCache))
2288  {
2289  $this->DB->Query("SELECT * FROM SearchItemTypes");
2290  $ItemTypeCache = $this->DB->FetchColumn("ItemType", "ItemId");
2291  }
2292  return isset($ItemTypeCache[$ItemId])
2293  ? (int)$ItemTypeCache[$ItemId] : NULL;
2294  }
2295 
2301  protected function DMsg($Level, $Msg)
2302  {
2303  if ($this->DebugLevel > $Level)
2304  {
2305  print "SE: ".$Msg."<br>\n";
2306  }
2307  }
2308 
2309  # ---- BACKWARD COMPATIBILITY --------------------------------------------
2310 
2311  # possible types of logical operators
2312  const SEARCHLOGIC_AND = 1;
2313  const SEARCHLOGIC_OR = 2;
2314 
2315  # pattern to detect search strings that are explicit comparisons
2316  const COMPARISON_OPERATOR_PATTERN = '/^([><=^$@]+|!=)([^><=^$@])/';
2317 }
SearchTermCount()
Get total number of search terms indexed by search engine.
SetAllSynonyms($SynonymList)
Set all synonyms.
DropItem($ItemId)
Drop all data pertaining to item from search database.
AddField($FieldId, $FieldType, $ItemTypes, $Weight, $UsedInKeywordSearch)
Add field to include in searching.
RemoveSynonyms($Word, $Synonyms=NULL)
Remove synonym(s).
const KEYWORD_FIELD_ID
LoadSynonymsFromFile($FileName)
Load synonyms from a file.
Set of parameters used to perform a search.
SQL database abstraction object with smart query caching.
Definition: Database.php:22
SearchFieldForPhrases($FieldId, $Phrase)
Search for phrase in specified field.
GetAllSynonyms()
Get all synonyms.
const SEARCHLOGIC_OR
const FIELDTYPE_NUMERIC
FilterOnSuppliedFunctions($Scores)
Filter search scores through any supplied functions.
UpdateForItem($ItemId, $ItemType)
Update search database for the specified item.
AddSynonyms($Word, $Synonyms)
Add synonyms.
const FIELDTYPE_DATERANGE
const FIELDTYPE_DATE
const SEARCHLOGIC_AND
SearchTerms()
Get normalized list of search terms.
const WORD_EXCLUDED
NumberOfResults($ItemType=NULL)
Get number of results found by most recent search.
FieldWeight($FieldId)
Get search weight for specified field.
FieldType($FieldId)
Get type of specified field (text/numeric/date/daterange).
ItemCount()
Get total number of items indexed by search engine.
static BuildMultiTypeResults($Results)
Expand a one-dimensional array(ItemId => ItemScore) into a two-dimensional array(ItemType => array(It...
FieldedSearch($SearchStrings, $StartingResult=0, $NumberOfResults=10, $SortByField=NULL, $SortDescending=TRUE)
Perform search across multiple fields, with different values or comparisons specified for each field...
__construct($ItemTableName, $ItemIdFieldName, $ItemTypeFieldName)
Object constructor.
const STEM_ID_OFFSET
Search($SearchParams, $StartingResult=0, $NumberOfResults=PHP_INT_MAX, $SortByField=NULL, $SortDescending=TRUE)
Perform search with specified parameters.
RemoveAllSynonyms()
Remove all synonyms.
DMsg($Level, $Msg)
Print debug message if level set high enough.
const WORD_PRESENT
DropField($FieldId)
Drop all data pertaining to field from search database.
GetFieldContent($ItemId, $FieldId)
Retrieve content for specified field for specified item.
Core metadata archive search engine class.
const COMPARISON_OPERATOR_PATTERN
const FIELDTYPE_TEXT
const WORD_REQUIRED
DebugLevel($NewValue)
Set debug output level.
UpdateForItems($StartingItemId, $NumberOfItems)
Update search database for the specified range of items.
FieldedSearchWeightScale($SearchParams)
Get total of weights for all fields involved in search, useful for assessing scale of scores in searc...
static FlattenMultiTypeResults($Results)
Flatten a two-dimensional array keyed by ItemType with results for each type as the outer values into...
FieldInKeywordSearch($FieldId)
Get whether specified field is included in keyword searches.
AddResultFilterFunction($FunctionName)
Add function that will be called to filter search results.
SearchTime()
Get time that last search took, in seconds.
GetSynonyms($Word)
Get synonyms for word.