CWIS Developer Documentation
SearchEngine.php
Go to the documentation of this file.
1 <?PHP
2 
3 #
4 # FILE: SearchEngine.php
5 #
6 # Open Source Metadata Archive Search Engine (OSMASE)
7 # Copyright 2002-2011 Edward Almasy and Internet Scout
8 # http://scout.wisc.edu
9 #
10 
11 class SearchEngine {
12 
13  # ---- PUBLIC INTERFACE --------------------------------------------------
14 
15  # possible types of logical operators
16  const LOGIC_AND = 1;
17  const LOGIC_OR = 2;
18 
19  # flags used for indicating field types
20  const FIELDTYPE_TEXT = 1;
21  const FIELDTYPE_NUMERIC = 2;
22  const FIELDTYPE_DATE = 3;
24 
25  # object constructor
27  {
28  # save database object for our use
29  $this->DB = $DB;
30 
31  # save item access parameters
34 
35  # define flags used for indicating word states
36  if (!defined("WORD_PRESENT")) { define("WORD_PRESENT", 1); }
37  if (!defined("WORD_EXCLUDED")) { define("WORD_EXCLUDED", 2); }
38  if (!defined("WORD_REQUIRED")) { define("WORD_REQUIRED", 4); }
39 
40  # set default debug state
41  $this->DebugLevel = 0;
42  }
43 
44  # add field to be searched
45  function AddField(
46  $FieldName, $DBFieldName, $FieldType, $Weight, $UsedInKeywordSearch)
47  {
48  # save values
49  $this->FieldInfo[$FieldName]["DBFieldName"] = $DBFieldName;
50  $this->FieldInfo[$FieldName]["FieldType"] = $FieldType;
51  $this->FieldInfo[$FieldName]["Weight"] = $Weight;
52  $this->FieldInfo[$FieldName]["InKeywordSearch"] = $UsedInKeywordSearch;
53  }
54 
55  # retrieve info about tables and fields (useful for child objects)
56  function ItemTableName() { return $this->ItemTableName; }
57  function ItemIdFieldName() { return $this->ItemIdFieldName; }
58  function DBFieldName($FieldName)
59  { return $this->FieldInfo[$FieldName]["DBFieldName"]; }
60  function FieldType($FieldName)
61  { return $this->FieldInfo[$FieldName]["FieldType"]; }
62  function FieldWeight($FieldName)
63  { return $this->FieldInfo[$FieldName]["Weight"]; }
64  function FieldInKeywordSearch($FieldName)
65  { return $this->FieldInfo[$FieldName]["InKeywordSearch"]; }
66 
67  # set debug level
68  function DebugLevel($Setting)
69  {
70  $this->DebugLevel = $Setting;
71  }
72 
73 
74  # ---- search functions
75 
76  # perform keyword search
77  function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10,
78  $SortByField = NULL, $SortDescending = TRUE)
79  {
80  $SearchString = $this->SetDebugLevel($SearchString);
81  $this->DMsg(0, "In Search() with search string \"".$SearchString."\"");
82 
83  # save start time to use in calculating search time
84  $StartTime = microtime(TRUE);
85 
86  # clear word counts
87  $this->InclusiveTermCount = 0;
88  $this->RequiredTermCount = 0;
89  $this->ExcludedTermCount = 0;
90 
91  # parse search string into terms
92  $Words = $this->ParseSearchStringForWords($SearchString);
93  $this->DMsg(1, "Found ".count($Words)." words");
94 
95  # parse search string for phrases
96  $Phrases = $this->ParseSearchStringForPhrases($SearchString);
97  $this->DMsg(1, "Found ".count($Phrases)." phrases");
98 
99  # if only excluded terms specified
100  if ($this->ExcludedTermCount && !$this->InclusiveTermCount)
101  {
102  # load all records
103  $this->DMsg(1, "Loading all records");
104  $Scores = $this->LoadScoresForAllRecords();
105  }
106  else
107  {
108  # perform searches
109  $Scores = $this->SearchForWords($Words);
110  $this->DMsg(1, "Found ".count($Scores)." results after word search");
111  $Scores = $this->SearchForPhrases($Phrases, $Scores);
112  $this->DMsg(1, "Found ".count($Scores)." results after phrase search");
113  }
114 
115  # if search results found
116  if (count($Scores) > 0)
117  {
118  # handle any excluded words
119  $Scores = $this->FilterOnExcludedWords($Words, $Scores);
120 
121  # strip off any results that don't contain required words
122  $Scores = $this->FilterOnRequiredWords($Scores);
123  }
124 
125  # count, sort, and trim search result scores list
126  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
127  $SortByField, $SortDescending);
128 
129  # record search time
130  $this->LastSearchTime = microtime(TRUE) - $StartTime;
131 
132  # return list of items to caller
133  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
134  return $Scores;
135  }
136 
137  # perform search across multiple fields and return trimmed results to caller
138  function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
139  $SortByField = NULL, $SortDescending = TRUE)
140  {
141  $SearchStrings = $this->SetDebugLevel($SearchStrings);
142  $this->DMsg(0, "In FieldedSearch() with "
143  .count($SearchStrings)." search strings");
144 
145  # save start time to use in calculating search time
146  $StartTime = microtime(TRUE);
147 
148  # perform search
149  $Scores = $this->SearchAcrossFields($SearchStrings);
150  $Scores = ($Scores === NULL) ? array() : $Scores;
151 
152  # count, sort, and trim search result scores list
153  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
154  $SortByField, $SortDescending);
155 
156  # record search time
157  $this->LastSearchTime = microtime(TRUE) - $StartTime;
158 
159  # return list of items to caller
160  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
161  return $Scores;
162  }
163 
164  # perform search with logical groups of fielded searches
165  function GroupedSearch($SearchGroups, $StartingResult = 0, $NumberOfResults = 10,
166  $SortByField = NULL, $SortDescending = TRUE)
167  {
168  foreach ($SearchGroups as $Index => $Groups)
169  {
170  if (isset($SearchGroups[$Index]["SearchStrings"]))
171  {
172  $SearchGroups[$Index]["SearchStrings"] =
173  $this->SetDebugLevel($SearchGroups[$Index]["SearchStrings"]);
174  }
175  }
176  $this->DMsg(0, "In GroupedSearch() with "
177  .count($SearchGroups)." search groups");
178 
179  # save start time to use in calculating search time
180  $StartTime = microtime(TRUE);
181 
182  # start with no results
183  $Scores = array();
184 
185  # save AND/OR search setting
186  $SavedSearchLogic = $this->DefaultSearchLogic;
187 
188  # for each search group
189  $FirstSearch = TRUE;
190  foreach ($SearchGroups as $Group)
191  {
192  $this->DMsg(0, "----- GROUP ---------------------------");
193 
194  # if group has AND/OR setting specified
195  if (isset($Group["Logic"]))
196  {
197  # use specified AND/OR setting
198  $this->DefaultSearchLogic = $Group["Logic"];
199  }
200  else
201  {
202  # use saved AND/OR setting
203  $this->DefaultSearchLogic = $SavedSearchLogic;
204  }
205  $this->DMsg(2, "Logic is "
206  .(($this->DefaultSearchLogic == self::LOGIC_AND) ? "AND" : "OR"));
207 
208  # if we have search strings for this group
209  if (isset($Group["SearchStrings"]))
210  {
211  # perform search
212  $GroupScores = $this->SearchAcrossFields($Group["SearchStrings"]);
213 
214  # if search was conducted
215  if ($GroupScores !== NULL)
216  {
217  # if saved AND/OR setting is OR or this is first search
218  if (($SavedSearchLogic == self::LOGIC_OR) || $FirstSearch)
219  {
220  # add search results to result list
221  foreach ($GroupScores as $ItemId => $Score)
222  {
223  if (isset($Scores[$ItemId]))
224  {
225  $Scores[$ItemId] += $Score;
226  }
227  else
228  {
229  $Scores[$ItemId] = $Score;
230  }
231  }
232 
233  # (reset flag indicating first search)
234  $FirstSearch = FALSE;
235  }
236  else
237  {
238  # AND search results with previous results
239  $OldScores = $Scores;
240  $Scores = array();
241  foreach ($GroupScores as $ItemId => $Score)
242  {
243  if (isset($OldScores[$ItemId]))
244  {
245  $Scores[$ItemId] = $OldScores[$ItemId] + $Score;
246  }
247  }
248  }
249  }
250  }
251  }
252 
253  # restore AND/OR search setting
254  $this->DefaultSearchLogic = $SavedSearchLogic;
255 
256  # count, sort, and trim search result scores list
257  $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
258  $SortByField, $SortDescending);
259 
260  # record search time
261  $this->LastSearchTime = microtime(TRUE) - $StartTime;
262 
263  # return search results to caller
264  $this->DMsg(0, "Ended up with ".$this->NumberOfResultsAvailable." results");
265  return $Scores;
266  }
267 
268  # add function that will be called to filter search results
269  function AddResultFilterFunction($FunctionName)
270  {
271  # save filter function name
272  $this->FilterFuncs[] = $FunctionName;
273  }
274 
275  # get or set default search logic (AND or OR)
276  function DefaultSearchLogic($NewSetting = NULL)
277  {
278  if ($NewSetting != NULL)
279  {
280  $this->DefaultSearchLogic = $NewSetting;
281  }
283  }
284 
285  function SearchTermsRequiredByDefault($NewSetting = TRUE)
286  {
287  if ($NewSetting)
288  {
289  $this->DefaultSearchLogic = self::LOGIC_AND;
290  }
291  else
292  {
293  $this->DefaultSearchLogic = self::LOGIC_OR;
294  }
295  }
296 
297  function NumberOfResults()
298  {
300  }
301 
302  function SearchTerms()
303  {
304  return $this->SearchTermList;
305  }
306 
307  function SearchTime()
308  {
309  return $this->LastSearchTime;
310  }
311 
312  # report total weight for all fields involved in search
313  function FieldedSearchWeightScale($SearchStrings)
314  {
315  $Weight = 0;
316  $IncludedKeywordSearch = FALSE;
317  foreach ($SearchStrings as $FieldName => $SearchStringArray)
318  {
319  if ($FieldName == "XXXKeywordXXX")
320  {
321  $IncludedKeywordSearch = TRUE;
322  }
323  else
324  {
325  if (array_key_exists($FieldName, $this->FieldInfo))
326  {
327  $Weight += $this->FieldInfo[$FieldName]["Weight"];
328  }
329  }
330  }
331  if ($IncludedKeywordSearch)
332  {
333  foreach ($this->FieldInfo as $FieldName => $Info)
334  {
335  if ($Info["InKeywordSearch"])
336  {
337  $Weight += $Info["Weight"];
338  }
339  }
340  }
341  return $Weight;
342  }
343 
344 
345  # ---- search database update functions
346 
347  # update search DB for the specified item
348  function UpdateForItem($ItemId)
349  {
350  # bail out if item ID is negative (indicating a temporary record)
351  if ($ItemId < 0) { return; }
352 
353  # clear word count added flags for this item
354  unset($this->WordCountAdded);
355 
356  # delete any existing info for this item
357  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
358 
359  # for each metadata field
360  foreach ($this->FieldInfo as $FieldName => $Info)
361  {
362  # if search weight for field is positive
363  if ($Info["Weight"] > 0)
364  {
365  # retrieve text for field
366  $Text = $this->GetFieldContent($ItemId, $FieldName);
367 
368  # if text is array
369  if (is_array($Text))
370  {
371  # for each text string in array
372  foreach ($Text as $String)
373  {
374  # record search info for text
375  $this->RecordSearchInfoForText($ItemId, $FieldName,
376  $Info["Weight"], $String,
377  $Info["InKeywordSearch"]);
378  }
379  }
380  else
381  {
382  # record search info for text
383  $this->RecordSearchInfoForText($ItemId, $FieldName,
384  $Info["Weight"], $Text,
385  $Info["InKeywordSearch"]);
386  }
387  }
388  }
389  }
390 
391  # update search DB for the specified range of items
392  function UpdateForItems($StartingItemId, $NumberOfItems)
393  {
394  # retrieve IDs for specified number of items starting at specified ID
395  $this->DB->Query("SELECT ".$this->ItemIdFieldName." FROM ".$this->ItemTableName
396  ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId
397  ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems);
398  $ItemIds = $this->DB->FetchColumn($this->ItemIdFieldName);
399 
400  # for each retrieved item ID
401  foreach ($ItemIds as $ItemId)
402  {
403  # update search info for item
404  $this->UpdateForItem($ItemId);
405  }
406 
407  # return ID of last item updated to caller
408  return $ItemId;
409  }
410 
411  # drop all data pertaining to item from search DB
412  function DropItem($ItemId)
413  {
414  # drop all entries pertaining to item from word count table
415  $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
416  }
417 
418  # drop all data pertaining to field from search DB
419  function DropField($FieldName)
420  {
421  # retrieve our ID for field
422  $FieldId = $this->DB->Query("SELECT FieldId FROM SearchFields "
423  ."WHERE FieldName = '".addslashes($FieldName)."'", "FieldId");
424 
425  # drop all entries pertaining to field from word counts table
426  $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'");
427 
428  # drop field from our fields table
429  $this->DB->Query("DELETE FROM SearchFields WHERE FieldId = \'".$FieldId."\'");
430  }
431 
432  # return total number of terms indexed by search engine
433  function SearchTermCount()
434  {
435  return $this->DB->Query("SELECT COUNT(*) AS TermCount"
436  ." FROM SearchWords", "TermCount");
437  }
438 
439  # return total number of items indexed by search engine
440  function ItemCount()
441  {
442  return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount"
443  ." FROM SearchWordCounts", "ItemCount");
444  }
445 
452  function AddSynonyms($Word, $Synonyms)
453  {
454  # asssume no synonyms will be added
455  $AddCount = 0;
456 
457  # get ID for word
458  $WordId = $this->GetWordId($Word, TRUE);
459 
460  # for each synonym passed in
461  foreach ($Synonyms as $Synonym)
462  {
463  # get ID for synonym
464  $SynonymId = $this->GetWordId($Synonym, TRUE);
465 
466  # if synonym is not already in database
467  $this->DB->Query("SELECT * FROM SearchWordSynonyms"
468  ." WHERE (WordIdA = ".$WordId
469  ." AND WordIdB = ".$SynonymId.")"
470  ." OR (WordIdB = ".$WordId
471  ." AND WordIdA = ".$SynonymId.")");
472  if ($this->DB->NumRowsSelected() == 0)
473  {
474  # add synonym entry to database
475  $this->DB->Query("INSERT INTO SearchWordSynonyms"
476  ." (WordIdA, WordIdB)"
477  ." VALUES (".$WordId.", ".$SynonymId.")");
478  $AddCount++;
479  }
480  }
481 
482  # report to caller number of new synonyms added
483  return $AddCount;
484  }
485 
486  # remove synonym(s)
487  function RemoveSynonyms($Word, $Synonyms = NULL)
488  {
489  # find ID for word
490  $WordId = $this->GetWordId($Word);
491 
492  # if ID found
493  if ($WordId !== NULL)
494  {
495  # if no specific synonyms provided
496  if ($Synonyms === NULL)
497  {
498  # remove all synonyms for word
499  $this->DB->Query("DELETE FROM SearchWordSynonyms"
500  ." WHERE WordIdA = '".$WordId."'"
501  ." OR WordIdB = '".$WordId."'");
502  }
503  else
504  {
505  # for each specified synonym
506  foreach ($Synonyms as $Synonym)
507  {
508  # look up ID for synonym
509  $SynonymId = $this->GetWordId($Synonym);
510 
511  # if synonym ID was found
512  if ($SynonymId !== NULL)
513  {
514  # delete synonym entry
515  $this->DB->Query("DELETE FROM SearchWordSynonyms"
516  ." WHERE (WordIdA = '".$WordId."'"
517  ." AND WordIdB = '".$SynonymId."')"
518  ." OR (WordIdB = '".$WordId."'"
519  ." AND WordIdA = '".$SynonymId."')");
520  }
521  }
522  }
523  }
524  }
525 
526  # remove all synonyms
527  function RemoveAllSynonyms()
528  {
529  $this->DB->Query("DELETE FROM SearchWordSynonyms");
530  }
531 
532  # get synonyms for word (returns array of synonyms)
533  function GetSynonyms($Word)
534  {
535  # assume no synonyms will be found
536  $Synonyms = array();
537 
538  # look up ID for word
539  $WordId = $this->GetWordId($Word);
540 
541  # if word ID was found
542  if ($WordId !== NULL)
543  {
544  # look up IDs of all synonyms for this word
545  $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
546  ." WHERE WordIdA = ".$WordId
547  ." OR WordIdB = ".$WordId);
548  $SynonymIds = array();
549  while ($Record = $this->DB->FetchRow)
550  {
551  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
552  ? $Record["WordIdB"] : $Record["WordIdA"];
553  }
554 
555  # for each synonym ID
556  foreach ($SynonymIds as $SynonymId)
557  {
558  # look up synonym word and add to synonym list
559  $Synonyms[] = $this->GetWord($SynonymId);
560  }
561  }
562 
563  # return synonyms to caller
564  return $Synonyms;
565  }
566 
567  # get all synonyms (returns 2D array w/ words as first index)
568  function GetAllSynonyms()
569  {
570  # assume no synonyms will be found
571  $SynonymList = array();
572 
573  # for each synonym ID pair
574  $OurDB = new Database();
575  $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
576  while ($Record = $OurDB->FetchRow())
577  {
578  # look up words
579  $Word = $this->GetWord($Record["WordIdA"]);
580  $Synonym = $this->GetWord($Record["WordIdB"]);
581 
582  # if we do not already have an entry for the word
583  # or synonym is not listed for this word
584  if (!isset($SynonymList[$Word])
585  || !in_array($Synonym, $SynonymList[$Word]))
586  {
587  # add entry for synonym
588  $SynonymList[$Word][] = $Synonym;
589  }
590 
591  # if we do not already have an entry for the synonym
592  # or word is not listed for this synonym
593  if (!isset($SynonymList[$Synonym])
594  || !in_array($Word, $SynonymList[$Synonym]))
595  {
596  # add entry for word
597  $SynonymList[$Synonym][] = $Word;
598  }
599  }
600 
601  # for each word
602  # (this loop removes reciprocal duplicates)
603  foreach ($SynonymList as $Word => $Synonyms)
604  {
605  # for each synonym for that word
606  foreach ($Synonyms as $Synonym)
607  {
608  # if synonym has synonyms and word is one of them
609  if (isset($SynonymList[$Synonym])
610  && isset($SynonymList[$Word])
611  && in_array($Word, $SynonymList[$Synonym])
612  && in_array($Synonym, $SynonymList[$Word]))
613  {
614  # if word has less synonyms than synonym
615  if (count($SynonymList[$Word])
616  < count($SynonymList[$Synonym]))
617  {
618  # remove synonym from synonym list for word
619  $SynonymList[$Word] = array_diff(
620  $SynonymList[$Word], array($Synonym));
621 
622  # if no synonyms left for word
623  if (!count($SynonymList[$Word]))
624  {
625  # remove empty synonym list for word
626  unset($SynonymList[$Word]);
627  }
628  }
629  else
630  {
631  # remove word from synonym list for synonym
632  $SynonymList[$Synonym] = array_diff(
633  $SynonymList[$Synonym], array($Word));
634 
635  # if no synonyms left for word
636  if (!count($SynonymList[$Synonym]))
637  {
638  # remove empty synonym list for word
639  unset($SynonymList[$Synonym]);
640  }
641  }
642  }
643  }
644  }
645 
646  # sort array alphabetically (just for convenience)
647  foreach ($SynonymList as $Word => $Synonyms)
648  {
649  asort($SynonymList[$Word]);
650  }
651  ksort($SynonymList);
652 
653  # return 2D array of synonyms to caller
654  return $SynonymList;
655  }
656 
657  # set all synonyms (accepts 2D array w/ words as first index)
658  function SetAllSynonyms($SynonymList)
659  {
660  # remove all existing synonyms
661  $this->RemoveAllSynonyms();
662 
663  # for each synonym entry passed in
664  foreach ($SynonymList as $Word => $Synonyms)
665  {
666  # add synonyms for word
667  $this->AddSynonyms($Word, $Synonyms);
668  }
669  }
670 
679  function LoadSynonymsFromFile($FileName)
680  {
681  # asssume no synonyms will be added
682  $AddCount = 0;
683 
684  # read in contents of file
685  $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
686 
687  # if file contained lines
688  if (count($Lines))
689  {
690  # for each line of file
691  foreach ($Lines as $Line)
692  {
693  # if line is not a comment
694  if (!preg_match("/[\s]*#/", $Line))
695  {
696  # split line into words
697  $Words = preg_split("/[\s,]+/", $Line);
698 
699  # if synonyms found
700  if (count($Words) > 1)
701  {
702  # separate out word and synonyms
703  $Word = array_shift($Words);
704 
705  # add synonyms
706  $AddCount += $this->AddSynonyms($Word, $Words);
707  }
708  }
709  }
710  }
711 
712  # return count of synonyms added to caller
713  return $AddCount;
714  }
715 
716  # suggest alternatives
717  function SuggestAlternateSearches($SearchString)
718  {
719  #
720  }
721 
722 
723  # ---- PRIVATE INTERFACE -------------------------------------------------
724 
725  protected $DB;
726  protected $DebugLevel;
727  protected $ItemTableName;
728  protected $ItemIdFieldName;
730  protected $LastSearchTime;
731  protected $FilterFuncs;
732  protected $DefaultSearchLogic = self::LOGIC_AND;
733  protected $StemmingEnabled = TRUE;
734  protected $SynonymsEnabled = TRUE;
735 
736  private $WordCountAdded;
737  private $FieldIds;
738  private $FieldInfo;
739  private $RequiredTermCount;
740  private $RequiredTermCounts;
741  private $InclusiveTermCount;
742  private $ExcludedTermCount;
743  private $SearchTermList;
744 
745  const STEM_ID_OFFSET = 1000000;
746 
747 
748  # ---- common private functions (used in both searching and DB build)
749 
750  # normalize and parse search string into list of search terms
751  private function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE)
752  {
753  # strip off any surrounding whitespace
754  $Text = trim($SearchString);
755 
756  # set up normalization replacement strings
757  $Patterns = array(
758  "/'s[^a-z0-9\\-+~]+/i", # get rid of possessive plurals
759  "/'/", # get rid of single quotes / apostrophes
760  "/\"[^\"]*\"/", # get rid of phrases (NOTE: HARD-CODED INDEX BELOW!!!) "
761  "/\\([^)]*\\)/", # get rid of groups (NOTE: HARD-CODED INDEX BELOW!!!)
762  "/[^a-z0-9\\-+~]+/i", # convert non-alphanumerics / non-minus/plus to a space
763  "/([^\\s])-+/i", # convert minus preceded by anything but whitespace to a space
764  "/([^\\s])\\++/i", # convert plus preceded by anything but whitespace to a space
765  "/-\\s/i", # convert minus followed by whitespace to a space
766  "/\\+\\s/i", # convert plus followed by whitespace to a space
767  "/~\\s/i", # convert tilde followed by whitespace to a space
768  "/[ ]+/" # convert multiple spaces to one space
769  );
770  $Replacements = array(
771  " ",
772  "",
773  " ",
774  " ",
775  "\\1 ",
776  "\\1 ",
777  " ",
778  " ",
779  " ",
780  " ",
781  " "
782  );
783 
784  # if we are supposed to ignore phrases and groups (series of words in quotes or surrounded by parens)
785  if ($IgnorePhrases)
786  {
787  # switch phrase removal to double quote removal (HARD-CODED INDEX INTO PATTERN LIST!!)
788  $Patterns[2] = "/\"/";
789 
790  # switch group removal to paren removal (HARD-CODED INDEX INTO PATTERN LIST!!)
791  $Patterns[3] = "/[\(\)]+/";
792  }
793 
794  # remove punctuation from text and normalize whitespace
795  $Text = preg_replace($Patterns, $Replacements, $Text);
796  $this->DMsg(2, "Normalized search string is '".$Text."'");
797 
798  # convert text to lower case
799  $Text = strtolower($Text);
800 
801  # strip off any extraneous whitespace
802  $Text = trim($Text);
803 
804  # start with an empty array
805  $Words = array();
806 
807  # if we have no words left after parsing
808  if (strlen($Text) != 0)
809  {
810  # for each word
811  foreach (explode(" ", $Text) as $Word)
812  {
813  # grab first character of word
814  $FirstChar = substr($Word, 0, 1);
815 
816  # strip off option characters and set flags appropriately
817  $Flags = WORD_PRESENT;
818  if ($FirstChar == "-")
819  {
820  $Word = substr($Word, 1);
821  $Flags |= WORD_EXCLUDED;
822  if (!isset($Words[$Word]))
823  {
824  $this->ExcludedTermCount++;
825  }
826  }
827  else
828  {
829  if ($FirstChar == "~")
830  {
831  $Word = substr($Word, 1);
832  }
833  elseif (($this->DefaultSearchLogic == self::LOGIC_AND)
834  || ($FirstChar == "+"))
835  {
836  if ($FirstChar == "+")
837  {
838  $Word = substr($Word, 1);
839  }
840  $Flags |= WORD_REQUIRED;
841  if (!isset($Words[$Word]))
842  {
843  $this->RequiredTermCount++;
844  }
845  }
846  if (!isset($Words[$Word]))
847  {
848  $this->InclusiveTermCount++;
849  $this->SearchTermList[] = $Word;
850  }
851  }
852 
853  # store flags to indicate word found
854  $Words[$Word] = $Flags;
855  $this->DMsg(3, "Word identified (".$Word.")");
856  }
857  }
858 
859  # return normalized words to caller
860  return $Words;
861  }
862 
863  protected function GetFieldId($FieldName)
864  {
865  # if field ID is not in cache
866  if (!isset($this->FieldIds[$FieldName]))
867  {
868  # look up field info in database
869  $this->DB->Query("SELECT FieldId FROM SearchFields "
870  ."WHERE FieldName = '".addslashes($FieldName)."'");
871 
872  # if field was found
873  if ($Record = $this->DB->FetchRow())
874  {
875  # load info from DB record
876  $FieldId = $Record["FieldId"];
877  }
878  else
879  {
880  # add field to database
881  $this->DB->Query("INSERT INTO SearchFields (FieldName) "
882  ."VALUES ('".addslashes($FieldName)."')");
883 
884  # retrieve ID for newly added field
885  $FieldId = $this->DB->LastInsertId("SearchFields");
886  }
887 
888  # cache field info
889  $this->FieldIds[$FieldName] = $FieldId;
890  }
891 
892  # return cached ID to caller
893  return $this->FieldIds[$FieldName];
894  }
895 
896  # retrieve ID for specified word (returns NULL if no ID found)
897  private function GetWordId($Word, $AddIfNotFound = FALSE)
898  {
899  static $WordIdCache;
900 
901  # if word was in ID cache
902  if (isset($WordIdCache[$Word]))
903  {
904  # use ID from cache
905  $WordId = $WordIdCache[$Word];
906  }
907  else
908  {
909  # look up ID in database
910  $WordId = $this->DB->Query("SELECT WordId"
911  ." FROM SearchWords"
912  ." WHERE WordText='".addslashes($Word)."'",
913  "WordId");
914 
915  # if ID was not found and caller requested it be added
916  if (($WordId === NULL) && $AddIfNotFound)
917  {
918  # add word to database
919  $this->DB->Query("INSERT INTO SearchWords (WordText)"
920  ." VALUES ('".addslashes(strtolower($Word))."')");
921 
922  # get ID for newly added word
923  $WordId = $this->DB->LastInsertId("SearchWords");
924  }
925 
926  # save ID to cache
927  $WordIdCache[$Word] = $WordId;
928  }
929 
930  # return ID to caller
931  return $WordId;
932  }
933 
934  # retrieve ID for specified word stem (returns NULL if no ID found)
935  private function GetStemId($Stem, $AddIfNotFound = FALSE)
936  {
937  static $StemIdCache;
938 
939  # if stem was in ID cache
940  if (isset($StemIdCache[$Stem]))
941  {
942  # use ID from cache
943  $StemId = $StemIdCache[$Stem];
944  }
945  else
946  {
947  # look up ID in database
948  $StemId = $this->DB->Query("SELECT WordId"
949  ." FROM SearchStems"
950  ." WHERE WordText='".addslashes($Stem)."'",
951  "WordId");
952 
953  # if ID was not found and caller requested it be added
954  if (($StemId === NULL) && $AddIfNotFound)
955  {
956  # add stem to database
957  $this->DB->Query("INSERT INTO SearchStems (WordText)"
958  ." VALUES ('".addslashes(strtolower($Stem))."')");
959 
960  # get ID for newly added stem
961  $StemId = $this->DB->LastInsertId("SearchStems");
962  }
963 
964  # adjust from DB ID value to stem ID value
965  $StemId += self::STEM_ID_OFFSET;
966 
967  # save ID to cache
968  $StemIdCache[$Stem] = $StemId;
969  }
970 
971  # return ID to caller
972  return $StemId;
973  }
974 
975  # retrieve word for specified word ID (returns FALSE if no word found)
976  private function GetWord($WordId)
977  {
978  static $WordCache;
979 
980  # if word was in cache
981  if (isset($WordCache[$WordId]))
982  {
983  # use word from cache
984  $Word = $WordCache[$WordId];
985  }
986  else
987  {
988  # adjust search location and word ID if word is stem
989  $TableName = "SearchWords";
990  if ($WordId >= self::STEM_ID_OFFSET)
991  {
992  $TableName = "SearchStems";
993  $WordId -= self::STEM_ID_OFFSET;
994  }
995 
996  # look up word in database
997  $Word = $this->DB->Query("SELECT WordText"
998  ." FROM ".$TableName
999  ." WHERE WordId='".$WordId."'",
1000  "WordText");
1001 
1002  # save word to cache
1003  $WordCache[$WordId] = $Word;
1004  }
1005 
1006  # return word to caller
1007  return $Word;
1008  }
1009 
1010 
1011  # ---- private functions used in searching
1012 
1013  # perform search across multiple fields and return raw results to caller
1014  private function SearchAcrossFields($SearchStrings)
1015  {
1016  # start by assuming no search will be done
1017  $Scores = NULL;
1018 
1019  # clear word counts
1020  $this->InclusiveTermCount = 0;
1021  $this->RequiredTermCount = 0;
1022  $this->ExcludedTermCount = 0;
1023 
1024  # for each field
1025  $NeedComparisonSearch = FALSE;
1026  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1027  {
1028  # convert search string to array if needed
1029  if (!is_array($SearchStringArray))
1030  {
1031  $SearchStringArray = array($SearchStringArray);
1032  }
1033 
1034  # for each search string for this field
1035  foreach ($SearchStringArray as $SearchString)
1036  {
1037  # if field is keyword or field is text and does not look like comparison match
1038  if (($FieldName == "XXXKeywordXXX")
1039  || (isset($this->FieldInfo[$FieldName])
1040  && ($this->FieldInfo[$FieldName]["FieldType"] == self::FIELDTYPE_TEXT)
1041  && !preg_match("/^[><!]=./", $SearchString)
1042  && !preg_match("/^[><=]./", $SearchString)))
1043  {
1044  $this->DMsg(0, "Searching text field \""
1045  .$FieldName."\" for string \"$SearchString\"");
1046 
1047  # normalize text and split into words
1048  $Words[$FieldName] =
1049  $this->ParseSearchStringForWords($SearchString);
1050 
1051  # calculate scores for matching items
1052  if (count($Words[$FieldName]))
1053  {
1054  $Scores = $this->SearchForWords(
1055  $Words[$FieldName], $FieldName, $Scores);
1056  $this->DMsg(3, "Have "
1057  .count($Scores)." results after word search");
1058  }
1059 
1060  # split into phrases
1061  $Phrases[$FieldName] =
1062  $this->ParseSearchStringForPhrases($SearchString);
1063 
1064  # handle any phrases
1065  if (count($Phrases[$FieldName]))
1066  {
1067  $Scores = $this->SearchForPhrases(
1068  $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE);
1069  $this->DMsg(3, "Have "
1070  .count($Scores)." results after phrase search");
1071  }
1072  }
1073  else
1074  {
1075  # set flag to indicate possible comparison search candidate found
1076  $NeedComparisonSearch = TRUE;
1077  }
1078  }
1079  }
1080 
1081  # perform comparison searches
1082  if ($NeedComparisonSearch)
1083  {
1084  $Scores = $this->SearchForComparisonMatches($SearchStrings, $Scores);
1085  $this->DMsg(3, "Have ".count($Scores)." results after comparison search");
1086  }
1087 
1088  # if no results found and exclusions specified
1089  if (!count($Scores) && $this->ExcludedTermCount)
1090  {
1091  # load all records
1092  $Scores = $this->LoadScoresForAllRecords();
1093  }
1094 
1095  # if search results found
1096  if (count($Scores))
1097  {
1098  # for each search text string
1099  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1100  {
1101  # convert search string to array if needed
1102  if (!is_array($SearchStringArray))
1103  {
1104  $SearchStringArray = array($SearchStringArray);
1105  }
1106 
1107  # for each search string for this field
1108  foreach ($SearchStringArray as $SearchString)
1109  {
1110  # if field is text
1111  if (($FieldName == "XXXKeywordXXX")
1112  || (isset($this->FieldInfo[$FieldName])
1113  && ($this->FieldInfo[$FieldName]["FieldType"]
1114  == self::FIELDTYPE_TEXT)))
1115  {
1116  # if there are words in search text
1117  if (isset($Words[$FieldName]))
1118  {
1119  # handle any excluded words
1120  $Scores = $this->FilterOnExcludedWords($Words[$FieldName], $Scores, $FieldName);
1121  }
1122 
1123  # handle any excluded phrases
1124  if (isset($Phrases[$FieldName]))
1125  {
1126  $Scores = $this->SearchForPhrases(
1127  $Phrases[$FieldName], $Scores, $FieldName, FALSE, TRUE);
1128  }
1129  }
1130  }
1131  }
1132 
1133  # strip off any results that don't contain required words
1134  $Scores = $this->FilterOnRequiredWords($Scores);
1135  }
1136 
1137  # return search result scores to caller
1138  return $Scores;
1139  }
1140 
1141  # search for words in specified field
1142  private function SearchForWords(
1143  $Words, $FieldName = "XXXKeywordXXX", $Scores = NULL)
1144  {
1145  $DB = $this->DB;
1146 
1147  # start with empty search result scores list if none passed in
1148  if ($Scores == NULL)
1149  {
1150  $Scores = array();
1151  }
1152 
1153  # grab field ID
1154  $FieldId = $this->GetFieldId($FieldName);
1155 
1156  # for each word
1157  foreach ($Words as $Word => $Flags)
1158  {
1159  $this->DMsg(2, "Searching for word '${Word}' in field ".$FieldName);
1160 
1161  # if word is not excluded
1162  if (!($Flags & WORD_EXCLUDED))
1163  {
1164  # look up record ID for word
1165  $this->DMsg(2, "Looking up word \"".$Word."\"");
1166  $WordId = $this->GetWordId($Word);
1167 
1168  # if word is in DB
1169  if ($WordId !== NULL)
1170  {
1171  # look up counts for word
1172  $DB->Query("SELECT ItemId,Count FROM SearchWordCounts "
1173  ."WHERE WordId = ".$WordId
1174  ." AND FieldId = ".$FieldId);
1175  $Counts = $DB->FetchColumn("Count", "ItemId");
1176 
1177  # if synonym support is enabled
1178  if ($this->SynonymsEnabled)
1179  {
1180  # look for any synonyms
1181  $DB->Query("SELECT WordIdA, WordIdB"
1182  ." FROM SearchWordSynonyms"
1183  ." WHERE WordIdA = ".$WordId
1184  ." OR WordIdB = ".$WordId);
1185 
1186  # if synonyms were found
1187  if ($DB->NumRowsSelected())
1188  {
1189  # retrieve synonym IDs
1190  $SynonymIds = array();
1191  while ($Record = $DB->FetchRow())
1192  {
1193  $SynonymIds[] = ($Record["WordIdA"] == $WordId)
1194  ? $Record["WordIdB"]
1195  : $Record["WordIdA"];
1196  }
1197 
1198  # for each synonym
1199  foreach ($SynonymIds as $SynonymId)
1200  {
1201  # retrieve counts for synonym
1202  $DB->Query("SELECT ItemId,Count"
1203  ." FROM SearchWordCounts"
1204  ." WHERE WordId = ".$SynonymId
1205  ." AND FieldId = ".$FieldId);
1206  $SynonymCounts = $DB->FetchColumn("Count", "ItemId");
1207 
1208  # for each count
1209  foreach ($SynonymCounts as $ItemId => $Count)
1210  {
1211  # adjust count because it's a synonym
1212  $AdjustedCount = ceil($Count / 2);
1213 
1214  # add count to existing counts
1215  if (isset($Counts[$ItemId]))
1216  {
1217  $Counts[$ItemId] += $AdjustedCount;
1218  }
1219  else
1220  {
1221  $Counts[$ItemId] = $AdjustedCount;
1222  }
1223  }
1224  }
1225  }
1226  }
1227  }
1228 
1229  # if stemming is enabled
1230  if ($this->StemmingEnabled)
1231  {
1232  # retrieve stem ID
1233  $Stem = PorterStemmer::Stem($Word);
1234  $this->DMsg(2, "Looking up stem \"".$Stem."\"");
1235  $StemId = $this->GetStemId($Stem);
1236 
1237  # if ID found for stem
1238  if ($StemId !== NULL)
1239  {
1240  # retrieve counts for stem
1241  $DB->Query("SELECT ItemId,Count"
1242  ." FROM SearchWordCounts"
1243  ." WHERE WordId = ".$StemId
1244  ." AND FieldId = ".$FieldId);
1245  $StemCounts = $DB->FetchColumn("Count", "ItemId");
1246 
1247  # for each count
1248  foreach ($StemCounts as $ItemId => $Count)
1249  {
1250  # adjust count because it's a stem
1251  $AdjustedCount = ceil($Count / 2);
1252 
1253  # add count to existing counts
1254  if (isset($Counts[$ItemId]))
1255  {
1256  $Counts[$ItemId] += $AdjustedCount;
1257  }
1258  else
1259  {
1260  $Counts[$ItemId] = $AdjustedCount;
1261  }
1262  }
1263  }
1264  }
1265 
1266  # if counts were found
1267  if (isset($Counts))
1268  {
1269  # for each count
1270  foreach ($Counts as $ItemId => $Count)
1271  {
1272  # if word flagged as required
1273  if ($Flags & WORD_REQUIRED)
1274  {
1275  # increment required word count for record
1276  if (isset($this->RequiredTermCounts[$ItemId]))
1277  {
1278  $this->RequiredTermCounts[$ItemId]++;
1279  }
1280  else
1281  {
1282  $this->RequiredTermCounts[$ItemId] = 1;
1283  }
1284  }
1285 
1286  # add to item record score
1287  if (isset($Scores[$ItemId]))
1288  {
1289  $Scores[$ItemId] += $Count;
1290  }
1291  else
1292  {
1293  $Scores[$ItemId] = $Count;
1294  }
1295  }
1296  }
1297  }
1298  }
1299 
1300  # return basic scores to caller
1301  return $Scores;
1302  }
1303 
1304  # extract phrases (terms surrounded by quotes) from search string
1305  private function ParseSearchStringForPhrases($SearchString)
1306  {
1307  # split into chunks delimited by double quote marks
1308  $Pieces = explode("\"", $SearchString); # "
1309 
1310  # for each pair of chunks
1311  $Index = 2;
1312  $Phrases = array();
1313  while ($Index < count($Pieces))
1314  {
1315  # grab phrase from chunk
1316  $Phrase = trim(addslashes($Pieces[$Index - 1]));
1317  $Flags = WORD_PRESENT;
1318 
1319  # grab first character of phrase
1320  $FirstChar = substr($Pieces[$Index - 2], -1);
1321 
1322  # set flags to reflect any option characters
1323  if ($FirstChar == "-")
1324  {
1325  $Flags |= WORD_EXCLUDED;
1326  if (!isset($Phrases[$Phrase]))
1327  {
1328  $this->ExcludedTermCount++;
1329  }
1330  }
1331  else
1332  {
1333  if ((($this->DefaultSearchLogic == self::LOGIC_AND) && ($FirstChar != "~"))
1334  || ($FirstChar == "+"))
1335  {
1336  $Flags |= WORD_REQUIRED;
1337  if (!isset($Phrases[$Phrase]))
1338  {
1339  $this->RequiredTermCount++;
1340  }
1341  }
1342  if (!isset($Phrases[$Phrase]))
1343  {
1344  $this->InclusiveTermCount++;
1345  $this->SearchTermList[] = $Phrase;
1346  }
1347  }
1348  $Phrases[$Phrase] = $Flags;
1349 
1350  # move to next pair of chunks
1351  $Index += 2;
1352  }
1353 
1354  # return phrases to caller
1355  return $Phrases;
1356  }
1357 
1358  # extract groups (terms surrounded by parens) from search string
1359  # (NOTE: NOT YET IMPLEMENTED!!!)
1360  private function ParseSearchStringForGroups($SearchString)
1361  {
1362  # split into chunks delimited by open paren
1363  $Pieces = explode("(", $SearchString);
1364 
1365  # for each chunk
1366  $Index = 2;
1367  while ($Index < count($Pieces))
1368  {
1369  # grab phrase from chunk
1370  $Group = trim(addslashes($Pieces[$Index - 1]));
1371  $Groups[] = $Group;
1372 
1373  # move to next pair of chunks
1374  $Index += 2;
1375  }
1376 
1377  # return phrases to caller
1378  return $Groups;
1379  }
1380 
1381  protected function SearchFieldForPhrases($FieldName, $Phrase)
1382  {
1383  # error out
1384  exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n");
1385  }
1386 
1387  private function SearchForPhrases($Phrases, $Scores, $FieldName = "XXXKeywordXXX",
1388  $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
1389  {
1390  # if phrases are found
1391  if (count($Phrases) > 0)
1392  {
1393  # if this is a keyword search
1394  if ($FieldName == "XXXKeywordXXX")
1395  {
1396  # for each field
1397  foreach ($this->FieldInfo as $KFieldName => $Info)
1398  {
1399  # if field is marked to be included in keyword searches
1400  if ($Info["InKeywordSearch"])
1401  {
1402  # call ourself with that field
1403  $Scores = $this->SearchForPhrases($Phrases, $Scores, $KFieldName,
1404  $ProcessNonExcluded, $ProcessExcluded);
1405  }
1406  }
1407  }
1408  else
1409  {
1410  # for each phrase
1411  foreach ($Phrases as $Phrase => $Flags)
1412  {
1413  $this->DMsg(2, "Searching for phrase '".$Phrase
1414  ."' in field ".$FieldName);
1415 
1416  # if phrase flagged as excluded and we are doing excluded phrases
1417  # or phrase flagged as non-excluded and we are doing non-excluded phrases
1418  if (($ProcessExcluded && ($Flags & WORD_EXCLUDED))
1419  || ($ProcessNonExcluded && !($Flags & WORD_EXCLUDED)))
1420  {
1421  # initialize score list if necessary
1422  if ($Scores === NULL) { $Scores = array(); }
1423 
1424  # retrieve list of items that contain phrase
1425  $ItemIds = $this->SearchFieldForPhrases(
1426  $FieldName, $Phrase);
1427 
1428  # for each item that contains phrase
1429  foreach ($ItemIds as $ItemId)
1430  {
1431  # if we are doing excluded phrases and phrase flagged as excluded
1432  if ($ProcessExcluded && ($Flags & WORD_EXCLUDED))
1433  {
1434  # knock item off of list
1435  unset($Scores[$ItemId]);
1436  }
1437  elseif ($ProcessNonExcluded)
1438  {
1439  # calculate phrase value based on number of words and field weight
1440  $PhraseScore = count(preg_split("/[\s]+/", $Phrase, -1, PREG_SPLIT_NO_EMPTY))
1441  * $this->FieldInfo[$FieldName]["Weight"];
1442  $this->DMsg(2, "Phrase score is ".$PhraseScore);
1443 
1444  # bump up item record score
1445  if (isset($Scores[$ItemId]))
1446  {
1447  $Scores[$ItemId] += $PhraseScore;
1448  }
1449  else
1450  {
1451  $Scores[$ItemId] = $PhraseScore;
1452  }
1453 
1454  # if phrase flagged as required
1455  if ($Flags & WORD_REQUIRED)
1456  {
1457  # increment required word count for record
1458  if (isset($this->RequiredTermCounts[$ItemId]))
1459  {
1460  $this->RequiredTermCounts[$ItemId]++;
1461  }
1462  else
1463  {
1464  $this->RequiredTermCounts[$ItemId] = 1;
1465  }
1466  }
1467  }
1468  }
1469  }
1470  }
1471  }
1472  }
1473 
1474  # return updated scores to caller
1475  return $Scores;
1476  }
1477 
1478  private function FilterOnExcludedWords($Words, $Scores, $FieldName = "XXXKeywordXXX")
1479  {
1480  $DB = $this->DB;
1481 
1482  # grab field ID
1483  $FieldId = $this->GetFieldId($FieldName);
1484 
1485  # for each word
1486  foreach ($Words as $Word => $Flags)
1487  {
1488  # if word flagged as excluded
1489  if ($Flags & WORD_EXCLUDED)
1490  {
1491  # look up record ID for word
1492  $WordId = $this->GetWordId($Word);
1493 
1494  # if word is in DB
1495  if ($WordId !== NULL)
1496  {
1497  # look up counts for word
1498  $DB->Query("SELECT ItemId FROM SearchWordCounts "
1499  ."WHERE WordId=${WordId} AND FieldId=${FieldId}");
1500 
1501  # for each count
1502  while ($Record = $DB->FetchRow())
1503  {
1504  # if item record is in score list
1505  $ItemId = $Record["ItemId"];
1506  if (isset($Scores[$ItemId]))
1507  {
1508  # remove item record from score list
1509  $this->DMsg(3, "Filtering out item ".$ItemId
1510  ." because it contained word \"".$Word."\"");
1511  unset($Scores[$ItemId]);
1512  }
1513  }
1514  }
1515  }
1516  }
1517 
1518  # returned filtered score list to caller
1519  return $Scores;
1520  }
1521 
1522  private function FilterOnRequiredWords($Scores)
1523  {
1524  # if there were required words
1525  if ($this->RequiredTermCount > 0)
1526  {
1527  # for each item
1528  foreach ($Scores as $ItemId => $Score)
1529  {
1530  # if item does not meet required word count
1531  if (!isset($this->RequiredTermCounts[$ItemId])
1532  || ($this->RequiredTermCounts[$ItemId] < $this->RequiredTermCount))
1533  {
1534  # filter out item
1535  $this->DMsg(4, "Filtering out item ".$ItemId
1536  ." because it didn't have required word count of "
1537  .$this->RequiredTermCount
1538  .(isset($this->RequiredTermCounts[$ItemId])
1539  ? " (only had "
1540  .$this->RequiredTermCounts[$ItemId]
1541  : " (had none")
1542  .")");
1543  unset($Scores[$ItemId]);
1544  }
1545  }
1546  }
1547 
1548  # return filtered list to caller
1549  return $Scores;
1550  }
1551 
1552  # count, sort, and trim search result scores list
1553  private function CleanScores($Scores, $StartingResult, $NumberOfResults,
1554  $SortByField, $SortDescending)
1555  {
1556  # perform any requested filtering
1557  $this->DMsg(0, "Have ".count($Scores)." results before filter callbacks");
1558  $Scores = $this->FilterOnSuppliedFunctions($Scores);
1559 
1560  # save total number of results available
1561  $this->NumberOfResultsAvailable = count($Scores);
1562 
1563  # if no sorting field specified
1564  if ($SortByField === NULL)
1565  {
1566  # sort result list by score
1567  if ($SortDescending)
1568  arsort($Scores, SORT_NUMERIC);
1569  else
1570  asort($Scores, SORT_NUMERIC);
1571  }
1572  else
1573  {
1574  # get list of item IDs in sorted order
1575  $SortedIds = $this->GetItemIdsSortedByField(
1576  $SortByField, $SortDescending);
1577 
1578  # if we have sorted item IDs
1579  if (count($SortedIds) && count($Scores))
1580  {
1581  # strip sorted ID list down to those that appear in search results
1582  $SortedIds = array_intersect($SortedIds, array_keys($Scores));
1583 
1584  # rebuild score list in sorted order
1585  foreach ($SortedIds as $Id)
1586  {
1587  $NewScores[$Id] = $Scores[$Id];
1588  }
1589  $Scores = $NewScores;
1590  }
1591  else
1592  {
1593  # sort result list by score
1594  arsort($Scores, SORT_NUMERIC);
1595  }
1596  }
1597 
1598  # trim result list to match range requested by caller
1599  $ScoresKeys = array_slice(
1600  array_keys($Scores), $StartingResult, $NumberOfResults);
1601  $TrimmedScores = array();
1602  foreach ($ScoresKeys as $Key) { $TrimmedScores[$Key] = $Scores[$Key]; }
1603 
1604  # returned cleaned search result scores list to caller
1605  return $TrimmedScores;
1606  }
1607 
1608  protected function FilterOnSuppliedFunctions($Scores)
1609  {
1610  # if filter functions have been set
1611  if (isset($this->FilterFuncs))
1612  {
1613  # for each result
1614  foreach ($Scores as $ItemId => $Score)
1615  {
1616  # for each filter function
1617  foreach ($this->FilterFuncs as $FuncName)
1618  {
1619  # if filter function return TRUE for item
1620  if (call_user_func($FuncName, $ItemId))
1621  {
1622  # discard result
1623  $this->DMsg(2, "Filter callback <i>".$FuncName
1624  ."</i> rejected item ".$ItemId);
1625  unset($Scores[$ItemId]);
1626 
1627  # bail out of filter func loop
1628  continue 2;
1629  }
1630  }
1631  }
1632  }
1633 
1634  # return filtered list to caller
1635  return $Scores;
1636  }
1637 
1638  private function SearchForComparisonMatches($SearchStrings, $Scores)
1639  {
1640  # for each field
1641  $Index = 0;
1642  foreach ($SearchStrings as $SearchFieldName => $SearchStringArray)
1643  {
1644  # if field is not keyword
1645  if ($SearchFieldName != "XXXKeywordXXX")
1646  {
1647  # convert search string to array if needed
1648  if (!is_array($SearchStringArray))
1649  {
1650  $SearchStringArray = array($SearchStringArray);
1651  }
1652 
1653  # for each search string for this field
1654  foreach ($SearchStringArray as $SearchString)
1655  {
1656  # if search string looks like comparison search
1657  $FoundOperator = preg_match("/^[><!]=./", $SearchString)
1658  || preg_match("/^[><=]./", $SearchString);
1659  if ($FoundOperator
1660  || (isset($this->FieldInfo[$SearchFieldName]["FieldType"])
1661  && ($this->FieldInfo[$SearchFieldName]["FieldType"]
1662  != self::FIELDTYPE_TEXT)))
1663  {
1664  # determine value
1665  $Patterns = array("/^[><!]=/", "/^[><=]/");
1666  $Replacements = array("", "");
1667  $Value = trim(preg_replace($Patterns, $Replacements, $SearchString));
1668 
1669  # determine and save operator
1670  if (!$FoundOperator)
1671  {
1672  $Operators[$Index] = "=";
1673  }
1674  else
1675  {
1676  $Term = trim($SearchString);
1677  $FirstChar = $Term{0};
1678  $FirstTwoChars = $FirstChar.$Term{1};
1679  if ($FirstTwoChars == ">=") { $Operators[$Index] = ">="; }
1680  elseif ($FirstTwoChars == "<=") { $Operators[$Index] = "<="; }
1681  elseif ($FirstTwoChars == "!=") { $Operators[$Index] = "!="; }
1682  elseif ($FirstChar == ">") { $Operators[$Index] = ">"; }
1683  elseif ($FirstChar == "<") { $Operators[$Index] = "<"; }
1684  elseif ($FirstChar == "=") { $Operators[$Index] = "="; }
1685  }
1686 
1687  # if operator was found
1688  if (isset($Operators[$Index]))
1689  {
1690  # save value
1691  $Values[$Index] = $Value;
1692 
1693  # save field name
1694  $FieldNames[$Index] = $SearchFieldName;
1695  $this->DMsg(3, "Added comparison (field = <i>"
1696  .$FieldNames[$Index]."</i> op = <i>"
1697  .$Operators[$Index]."</i> val = <i>"
1698  .$Values[$Index]."</i>)");
1699 
1700  # move to next comparison array entry
1701  $Index++;
1702  }
1703  }
1704  }
1705  }
1706  }
1707 
1708  # if comparisons found
1709  if (isset($Operators))
1710  {
1711  # perform comparisons on fields and gather results
1712  $Results = $this->SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values);
1713 
1714  # if search logic is set to AND
1715  if ($this->DefaultSearchLogic == self::LOGIC_AND)
1716  {
1717  # if results were found
1718  if (count($Results))
1719  {
1720  # if there were no prior results and no terms for keyword search
1721  if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
1722  {
1723  # add all results to scores
1724  foreach ($Results as $ItemId)
1725  {
1726  $Scores[$ItemId] = 1;
1727  }
1728  }
1729  else
1730  {
1731  # remove anything from scores that is not part of results
1732  foreach ($Scores as $ItemId => $Score)
1733  {
1734  if (in_array($ItemId, $Results) == FALSE)
1735  {
1736  unset($Scores[$ItemId]);
1737  }
1738  }
1739  }
1740  }
1741  else
1742  {
1743  # clear scores
1744  $Scores = array();
1745  }
1746  }
1747  else
1748  {
1749  # add result items to scores
1750  if ($Scores === NULL) { $Scores = array(); }
1751  foreach ($Results as $ItemId)
1752  {
1753  if (isset($Scores[$ItemId]))
1754  {
1755  $Scores[$ItemId] += 1;
1756  }
1757  else
1758  {
1759  $Scores[$ItemId] = 1;
1760  }
1761  }
1762  }
1763  }
1764 
1765  # return results to caller
1766  return $Scores;
1767  }
1768 
1769  private function SetDebugLevel($SearchStrings)
1770  {
1771  # if search info is an array
1772  if (is_array($SearchStrings))
1773  {
1774  # for each array element
1775  foreach ($SearchStrings as $FieldName => $SearchStringArray)
1776  {
1777  # if element is an array
1778  if (is_array($SearchStringArray))
1779  {
1780  # for each array element
1781  foreach ($SearchStringArray as $Index => $SearchString)
1782  {
1783  # pull out search string if present
1784  $SearchStrings[$FieldName][$Index] = $this->ExtractDebugLevel($SearchString);
1785  }
1786  }
1787  else
1788  {
1789  # pull out search string if present
1790  $SearchStrings[$FieldName] = $this->ExtractDebugLevel($SearchStringArray);
1791  }
1792  }
1793  }
1794  else
1795  {
1796  # pull out search string if present
1797  $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
1798  }
1799 
1800  # return new search info to caller
1801  return $SearchStrings;
1802  }
1803 
1804  private function ExtractDebugLevel($SearchString)
1805  {
1806  # if search string contains debug level indicator
1807  if (strstr($SearchString, "DBUGLVL="))
1808  {
1809  # remove indicator and set debug level
1810  $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString);
1811  if ($Level > 0)
1812  {
1813  $this->DebugLevel = $Level;
1814  $this->DMsg(0, "Setting debug level to ".$Level);
1815  $SearchString = preg_replace("/DBUGLVL=${Level}/", "", $SearchString);
1816  }
1817  }
1818 
1819  # return (possibly) modified search string to caller
1820  return $SearchString;
1821  }
1822 
1823  # load and return search result scores array containing all possible records
1824  private function LoadScoresForAllRecords()
1825  {
1826  # start with empty list
1827  $Scores = array();
1828 
1829  # for every item
1830  $this->DB->Query("SELECT ".$this->ItemIdFieldName
1831  ." FROM ".$this->ItemTableName);
1832  while ($Record = $this->DB->FetchRow())
1833  {
1834  # set score for item to 1
1835  $Scores[$Record[$this->ItemIdFieldName]] = 1;
1836  }
1837 
1838  # return array with all scores to caller
1839  return $Scores;
1840  }
1841 
1842 
1843  # ---- private functions used in building search database
1844 
1852  private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1)
1853  {
1854  # retrieve ID for word
1855  $WordIds[] = $this->GetWordId($Word, TRUE);
1856 
1857  # if stemming is enabled
1858  if ($this->StemmingEnabled)
1859  {
1860  # retrieve ID for stem of word
1861  $Stem = PorterStemmer::Stem($Word, TRUE);
1862  $WordIds[] = $this->GetStemId($Stem, TRUE);
1863  }
1864 
1865  # for word and stem of word
1866  foreach ($WordIds as $WordId)
1867  {
1868  # if word count already added to database
1869  if (isset($this->WordCountAdded[$WordId][$FieldId]))
1870  {
1871  # update word count
1872  $this->DB->Query("UPDATE SearchWordCounts SET Count=Count+".$Weight
1873  ." WHERE WordId=".$WordId
1874  ." AND ItemId=".$ItemId
1875  ." AND FieldId=".$FieldId);
1876  }
1877  else
1878  {
1879  # add word count to DB
1880  $this->DB->Query("INSERT INTO SearchWordCounts"
1881  ." (WordId, ItemId, FieldId, Count) VALUES"
1882  ." (".$WordId.", ".$ItemId.", ".$FieldId.", ".$Weight.")");
1883 
1884  # remember that we added count for this word
1885  $this->WordCountAdded[$WordId][$FieldId] = TRUE;
1886  }
1887 
1888  # decrease weight for stem
1889  $Weight = ceil($Weight / 2);
1890  }
1891  }
1892 
1893  protected function GetFieldContent($ItemId, $FieldName)
1894  {
1895  # error out
1896  exit("<br>SE - ERROR: GetFieldContent() not implemented<br>\n");
1897  }
1898 
1899  private function RecordSearchInfoForText(
1900  $ItemId, $FieldName, $Weight, $Text, $IncludeInKeyword)
1901  {
1902  # normalize text
1903  $Words = $this->ParseSearchStringForWords($Text, TRUE);
1904 
1905  # if there was text left after parsing
1906  if (count($Words) > 0)
1907  {
1908  # get ID for field
1909  $FieldId = $this->GetFieldId($FieldName);
1910 
1911  # if text should be included in keyword searches
1912  if ($IncludeInKeyword)
1913  {
1914  # get ID for keyword field
1915  $KeywordFieldId = $this->GetFieldId("XXXKeywordXXX");
1916  }
1917 
1918  # for each word
1919  foreach ($Words as $Word => $Flags)
1920  {
1921  # update count for word
1922  $this->UpdateWordCount($Word, $ItemId, $FieldId);
1923 
1924  # if text should be included in keyword searches
1925  if ($IncludeInKeyword)
1926  {
1927  # update keyword field count for word
1928  $this->UpdateWordCount(
1929  $Word, $ItemId, $KeywordFieldId, $Weight);
1930  }
1931  }
1932  }
1933  }
1934 
1935  # print debug message if level set high enough
1936  protected function DMsg($Level, $Msg)
1937  {
1938  if ($this->DebugLevel > $Level)
1939  {
1940  print("SE: ".$Msg."<br>\n");
1941  }
1942  }
1943 
1944  # ---- BACKWARD COMPATIBILITY --------------------------------------------
1945 
1946  # possible types of logical operators
1947  const SEARCHLOGIC_AND = 1;
1948  const SEARCHLOGIC_OR = 2;
1949 }
1950 
1951 ?>