4 # FILE: SearchEngine.php
6 # Open Source Metadata Archive Search Engine (OSMASE)
7 # Copyright 2002-2011 Edward Almasy and Internet Scout
8 # http://scout.wisc.edu
13 # ---- PUBLIC INTERFACE --------------------------------------------------
15 # possible types of logical operators
19 # flags used for indicating field types
28 # save database object for our use
31 # save item access parameters
35 # define flags used for indicating word states
36 if (!defined(
"WORD_PRESENT")) { define(
"WORD_PRESENT", 1); }
37 if (!defined(
"WORD_EXCLUDED")) { define(
"WORD_EXCLUDED", 2); }
38 if (!defined(
"WORD_REQUIRED")) { define(
"WORD_REQUIRED", 4); }
40 # set default debug state
44 # add field to be searched
46 $FieldName, $DBFieldName, $FieldType, $Weight, $UsedInKeywordSearch)
49 $this->FieldInfo[$FieldName][
"DBFieldName"] = $DBFieldName;
50 $this->FieldInfo[$FieldName][
"FieldType"] = $FieldType;
51 $this->FieldInfo[$FieldName][
"Weight"] = $Weight;
52 $this->FieldInfo[$FieldName][
"InKeywordSearch"] = $UsedInKeywordSearch;
55 # retrieve info about tables and fields (useful for child objects)
59 {
return $this->FieldInfo[$FieldName][
"DBFieldName"]; }
61 {
return $this->FieldInfo[$FieldName][
"FieldType"]; }
63 {
return $this->FieldInfo[$FieldName][
"Weight"]; }
65 {
return $this->FieldInfo[$FieldName][
"InKeywordSearch"]; }
74 # ---- search functions
76 # perform keyword search
77 function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10,
78 $SortByField = NULL, $SortDescending = TRUE)
80 $SearchString = $this->SetDebugLevel($SearchString);
81 $this->
DMsg(0,
"In Search() with search string \"".$SearchString.
"\"");
83 # save start time to use in calculating search time
84 $StartTime = microtime(TRUE);
87 $this->InclusiveTermCount = 0;
88 $this->RequiredTermCount = 0;
89 $this->ExcludedTermCount = 0;
91 # parse search string into terms
92 $Words = $this->ParseSearchStringForWords($SearchString);
93 $this->
DMsg(1,
"Found ".count($Words).
" words");
95 # parse search string for phrases
96 $Phrases = $this->ParseSearchStringForPhrases($SearchString);
97 $this->
DMsg(1,
"Found ".count($Phrases).
" phrases");
99 # if only excluded terms specified
100 if ($this->ExcludedTermCount && !$this->InclusiveTermCount)
103 $this->
DMsg(1,
"Loading all records");
104 $Scores = $this->LoadScoresForAllRecords();
109 $Scores = $this->SearchForWords($Words);
110 $this->
DMsg(1,
"Found ".count($Scores).
" results after word search");
111 $Scores = $this->SearchForPhrases($Phrases, $Scores);
112 $this->
DMsg(1,
"Found ".count($Scores).
" results after phrase search");
115 # if search results found
116 if (count($Scores) > 0)
118 # handle any excluded words
119 $Scores = $this->FilterOnExcludedWords($Words, $Scores);
121 # strip off any results that don't contain required words
122 $Scores = $this->FilterOnRequiredWords($Scores);
125 # count, sort, and trim search result scores list
126 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
127 $SortByField, $SortDescending);
130 $this->LastSearchTime = microtime(TRUE) - $StartTime;
132 # return list of items to caller
133 $this->
DMsg(0,
"Ended up with ".$this->NumberOfResultsAvailable.
" results");
137 # perform search across multiple fields and return trimmed results to caller
138 function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
139 $SortByField = NULL, $SortDescending = TRUE)
141 $SearchStrings = $this->SetDebugLevel($SearchStrings);
142 $this->
DMsg(0,
"In FieldedSearch() with "
143 .count($SearchStrings).
" search strings");
145 # save start time to use in calculating search time
146 $StartTime = microtime(TRUE);
149 $Scores = $this->SearchAcrossFields($SearchStrings);
150 $Scores = ($Scores === NULL) ? array() : $Scores;
152 # count, sort, and trim search result scores list
153 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
154 $SortByField, $SortDescending);
157 $this->LastSearchTime = microtime(TRUE) - $StartTime;
159 # return list of items to caller
160 $this->
DMsg(0,
"Ended up with ".$this->NumberOfResultsAvailable.
" results");
164 # perform search with logical groups of fielded searches
165 function GroupedSearch($SearchGroups, $StartingResult = 0, $NumberOfResults = 10,
166 $SortByField = NULL, $SortDescending = TRUE)
168 foreach ($SearchGroups as $Index => $Groups)
170 if (isset($SearchGroups[$Index][
"SearchStrings"]))
172 $SearchGroups[$Index][
"SearchStrings"] =
173 $this->SetDebugLevel($SearchGroups[$Index][
"SearchStrings"]);
176 $this->
DMsg(0,
"In GroupedSearch() with "
177 .count($SearchGroups).
" search groups");
179 # save start time to use in calculating search time
180 $StartTime = microtime(TRUE);
182 # start with no results
185 # save AND/OR search setting
188 # for each search group
190 foreach ($SearchGroups as $Group)
192 $this->
DMsg(0,
"----- GROUP ---------------------------");
194 # if group has AND/OR setting specified
195 if (isset($Group[
"Logic"]))
197 # use specified AND/OR setting
202 # use saved AND/OR setting
205 $this->
DMsg(2,
"Logic is "
208 # if we have search strings for this group
209 if (isset($Group[
"SearchStrings"]))
212 $GroupScores = $this->SearchAcrossFields($Group[
"SearchStrings"]);
214 # if search was conducted
215 if ($GroupScores !== NULL)
217 # if saved AND/OR setting is OR or this is first search
218 if (($SavedSearchLogic == self::LOGIC_OR) || $FirstSearch)
220 # add search results to result list
221 foreach ($GroupScores as $ItemId => $Score)
223 if (isset($Scores[$ItemId]))
225 $Scores[$ItemId] += $Score;
229 $Scores[$ItemId] = $Score;
233 # (reset flag indicating first search)
234 $FirstSearch = FALSE;
238 # AND search results with previous results
239 $OldScores = $Scores;
241 foreach ($GroupScores as $ItemId => $Score)
243 if (isset($OldScores[$ItemId]))
245 $Scores[$ItemId] = $OldScores[$ItemId] + $Score;
253 # restore AND/OR search setting
256 # count, sort, and trim search result scores list
257 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
258 $SortByField, $SortDescending);
261 $this->LastSearchTime = microtime(TRUE) - $StartTime;
263 # return search results to caller
264 $this->
DMsg(0,
"Ended up with ".$this->NumberOfResultsAvailable.
" results");
268 # add function that will be called to filter search results
271 # save filter function name
272 $this->FilterFuncs[] = $FunctionName;
275 # get or set default search logic (AND or OR)
278 if ($NewSetting != NULL)
304 return $this->SearchTermList;
312 # report total weight for all fields involved in search
316 $IncludedKeywordSearch = FALSE;
317 foreach ($SearchStrings as $FieldName => $SearchStringArray)
319 if ($FieldName ==
"XXXKeywordXXX")
321 $IncludedKeywordSearch = TRUE;
325 if (array_key_exists($FieldName, $this->FieldInfo))
327 $Weight += $this->FieldInfo[$FieldName][
"Weight"];
331 if ($IncludedKeywordSearch)
333 foreach ($this->FieldInfo as $FieldName => $Info)
335 if ($Info[
"InKeywordSearch"])
337 $Weight += $Info[
"Weight"];
345 # ---- search database update functions
347 # update search DB for the specified item
350 # bail out if item ID is negative (indicating a temporary record)
351 if ($ItemId < 0) {
return; }
353 # clear word count added flags for this item
354 unset($this->WordCountAdded);
356 # delete any existing info for this item
357 $this->DB->Query(
"DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
359 # for each metadata field
360 foreach ($this->FieldInfo as $FieldName => $Info)
362 # if search weight for field is positive
363 if ($Info[
"Weight"] > 0)
365 # retrieve text for field
371 # for each text string in array
372 foreach ($Text as $String)
374 # record search info for text
375 $this->RecordSearchInfoForText($ItemId, $FieldName,
376 $Info[
"Weight"], $String,
377 $Info[
"InKeywordSearch"]);
382 # record search info for text
383 $this->RecordSearchInfoForText($ItemId, $FieldName,
384 $Info[
"Weight"], $Text,
385 $Info[
"InKeywordSearch"]);
391 # update search DB for the specified range of items
394 # retrieve IDs for specified number of items starting at specified ID
395 $this->DB->Query(
"SELECT ".$this->
ItemIdFieldName.
" FROM ".$this->ItemTableName
396 .
" WHERE ".$this->ItemIdFieldName.
" >= ".$StartingItemId
397 .
" ORDER BY ".$this->ItemIdFieldName.
" LIMIT ".$NumberOfItems);
400 # for each retrieved item ID
401 foreach ($ItemIds as $ItemId)
403 # update search info for item
407 # return ID of last item updated to caller
411 # drop all data pertaining to item from search DB
414 # drop all entries pertaining to item from word count table
415 $this->DB->Query(
"DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
418 # drop all data pertaining to field from search DB
421 # retrieve our ID for field
422 $FieldId = $this->DB->Query(
"SELECT FieldId FROM SearchFields "
423 .
"WHERE FieldName = '".addslashes($FieldName).
"'",
"FieldId");
425 # drop all entries pertaining to field from word counts table
426 $this->DB->Query(
"DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId.
"\'");
428 # drop field from our fields table
429 $this->DB->Query(
"DELETE FROM SearchFields WHERE FieldId = \'".$FieldId.
"\'");
432 # return total number of terms indexed by search engine
435 return $this->DB->Query(
"SELECT COUNT(*) AS TermCount"
436 .
" FROM SearchWords",
"TermCount");
439 # return total number of items indexed by search engine
442 return $this->DB->Query(
"SELECT COUNT(DISTINCT ItemId) AS ItemCount"
443 .
" FROM SearchWordCounts",
"ItemCount");
454 # asssume no synonyms will be added
458 $WordId = $this->GetWordId($Word, TRUE);
460 # for each synonym passed in
461 foreach ($Synonyms as $Synonym)
464 $SynonymId = $this->GetWordId($Synonym, TRUE);
466 # if synonym is not already in database
467 $this->DB->Query(
"SELECT * FROM SearchWordSynonyms"
468 .
" WHERE (WordIdA = ".$WordId
469 .
" AND WordIdB = ".$SynonymId.
")"
470 .
" OR (WordIdB = ".$WordId
471 .
" AND WordIdA = ".$SynonymId.
")");
472 if ($this->DB->NumRowsSelected() == 0)
474 # add synonym entry to database
475 $this->DB->Query(
"INSERT INTO SearchWordSynonyms"
476 .
" (WordIdA, WordIdB)"
477 .
" VALUES (".$WordId.
", ".$SynonymId.
")");
482 # report to caller number of new synonyms added
490 $WordId = $this->GetWordId($Word);
493 if ($WordId !== NULL)
495 # if no specific synonyms provided
496 if ($Synonyms === NULL)
498 # remove all synonyms for word
499 $this->DB->Query(
"DELETE FROM SearchWordSynonyms"
500 .
" WHERE WordIdA = '".$WordId.
"'"
501 .
" OR WordIdB = '".$WordId.
"'");
505 # for each specified synonym
506 foreach ($Synonyms as $Synonym)
508 # look up ID for synonym
509 $SynonymId = $this->GetWordId($Synonym);
511 # if synonym ID was found
512 if ($SynonymId !== NULL)
514 # delete synonym entry
515 $this->DB->Query(
"DELETE FROM SearchWordSynonyms"
516 .
" WHERE (WordIdA = '".$WordId.
"'"
517 .
" AND WordIdB = '".$SynonymId.
"')"
518 .
" OR (WordIdB = '".$WordId.
"'"
519 .
" AND WordIdA = '".$SynonymId.
"')");
526 # remove all synonyms
529 $this->DB->Query(
"DELETE FROM SearchWordSynonyms");
532 # get synonyms for word (returns array of synonyms)
535 # assume no synonyms will be found
538 # look up ID for word
539 $WordId = $this->GetWordId($Word);
541 # if word ID was found
542 if ($WordId !== NULL)
544 # look up IDs of all synonyms for this word
545 $this->DB->Query(
"SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
546 .
" WHERE WordIdA = ".$WordId
547 .
" OR WordIdB = ".$WordId);
548 $SynonymIds = array();
549 while ($Record = $this->DB->FetchRow)
551 $SynonymIds[] = ($Record[
"WordIdA"] == $WordId)
552 ? $Record[
"WordIdB"] : $Record[
"WordIdA"];
555 # for each synonym ID
556 foreach ($SynonymIds as $SynonymId)
558 # look up synonym word and add to synonym list
559 $Synonyms[] = $this->GetWord($SynonymId);
563 # return synonyms to caller
567 # get all synonyms (returns 2D array w/ words as first index)
570 # assume no synonyms will be found
571 $SynonymList = array();
573 # for each synonym ID pair
575 $OurDB->Query(
"SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
576 while ($Record = $OurDB->FetchRow())
579 $Word = $this->GetWord($Record[
"WordIdA"]);
580 $Synonym = $this->GetWord($Record[
"WordIdB"]);
582 # if we do not already have an entry for the word
583 # or synonym is not listed for this word
584 if (!isset($SynonymList[$Word])
585 || !in_array($Synonym, $SynonymList[$Word]))
587 # add entry for synonym
588 $SynonymList[$Word][] = $Synonym;
591 # if we do not already have an entry for the synonym
592 # or word is not listed for this synonym
593 if (!isset($SynonymList[$Synonym])
594 || !in_array($Word, $SynonymList[$Synonym]))
597 $SynonymList[$Synonym][] = $Word;
602 # (this loop removes reciprocal duplicates)
603 foreach ($SynonymList as $Word => $Synonyms)
605 # for each synonym for that word
606 foreach ($Synonyms as $Synonym)
608 # if synonym has synonyms and word is one of them
609 if (isset($SynonymList[$Synonym])
610 && isset($SynonymList[$Word])
611 && in_array($Word, $SynonymList[$Synonym])
612 && in_array($Synonym, $SynonymList[$Word]))
614 # if word has less synonyms than synonym
615 if (count($SynonymList[$Word])
616 < count($SynonymList[$Synonym]))
618 # remove synonym from synonym list for word
619 $SynonymList[$Word] = array_diff(
620 $SynonymList[$Word], array($Synonym));
622 # if no synonyms left for word
623 if (!count($SynonymList[$Word]))
625 # remove empty synonym list for word
626 unset($SynonymList[$Word]);
631 # remove word from synonym list for synonym
632 $SynonymList[$Synonym] = array_diff(
633 $SynonymList[$Synonym], array($Word));
635 # if no synonyms left for word
636 if (!count($SynonymList[$Synonym]))
638 # remove empty synonym list for word
639 unset($SynonymList[$Synonym]);
646 # sort array alphabetically (just for convenience)
647 foreach ($SynonymList as $Word => $Synonyms)
649 asort($SynonymList[$Word]);
653 # return 2D array of synonyms to caller
657 # set all synonyms (accepts 2D array w/ words as first index)
660 # remove all existing synonyms
663 # for each synonym entry passed in
664 foreach ($SynonymList as $Word => $Synonyms)
666 # add synonyms for word
681 # asssume no synonyms will be added
684 # read in contents of file
685 $Lines = file($FileName, FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
687 # if file contained lines
690 # for each line of file
691 foreach ($Lines as $Line)
693 # if line is not a comment
694 if (!preg_match(
"/[\s]*#/", $Line))
696 # split line into words
697 $Words = preg_split(
"/[\s,]+/", $Line);
700 if (count($Words) > 1)
702 # separate out word and synonyms
703 $Word = array_shift($Words);
712 # return count of synonyms added to caller
716 # suggest alternatives
723 # ---- PRIVATE INTERFACE -------------------------------------------------
736 private $WordCountAdded;
739 private $RequiredTermCount;
740 private $RequiredTermCounts;
741 private $InclusiveTermCount;
742 private $ExcludedTermCount;
743 private $SearchTermList;
748 # ---- common private functions (used in both searching and DB build)
750 # normalize and parse search string into list of search terms
751 private function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE)
753 # strip off any surrounding whitespace
754 $Text = trim($SearchString);
756 # set up normalization replacement strings
758 "/'s[^a-z0-9\\-+~]+/i", #
get rid of possessive plurals
759 "/'/", #
get rid of single quotes / apostrophes
760 "/\"[^\"]*\"/", #
get rid of phrases (NOTE: HARD-CODED INDEX BELOW!!!)
"
761 "/\\([^)]*\\)/
", # get rid of groups (NOTE: HARD-CODED INDEX BELOW!!!)
762 "/[^a-z0-9\\-+~]+/i
", # convert non-alphanumerics / non-minus/plus to a space
763 "/([^\\s])-+/i
", # convert minus preceded by anything but whitespace to a space
764 "/([^\\s])\\++/i
", # convert plus preceded by anything but whitespace to a space
765 "/-\\s/i
", # convert minus followed by whitespace to a space
766 "/\\+\\s/i
", # convert plus followed by whitespace to a space
767 "/~\\s/i
", # convert tilde followed by whitespace to a space
768 "/[ ]+/
" # convert multiple spaces to one space
770 $Replacements = array(
784 # if we are supposed to ignore phrases and groups (series of words in quotes or surrounded by parens)
787 # switch phrase removal to double quote removal (HARD-CODED INDEX INTO PATTERN LIST!!)
788 $Patterns[2] = "/\
"/";
790 # switch group removal to paren removal (HARD-CODED INDEX INTO PATTERN LIST!!)
791 $Patterns[3] =
"/[\(\)]+/";
794 # remove punctuation from text and normalize whitespace
795 $Text = preg_replace($Patterns, $Replacements, $Text);
796 $this->
DMsg(2,
"Normalized search string is '".$Text.
"'");
798 # convert text to lower case
799 $Text = strtolower($Text);
801 # strip off any extraneous whitespace
804 # start with an empty array
807 # if we have no words left after parsing
808 if (strlen($Text) != 0)
811 foreach (explode(
" ", $Text) as $Word)
813 # grab first character of word
814 $FirstChar = substr($Word, 0, 1);
816 # strip off option characters and set flags appropriately
817 $Flags = WORD_PRESENT;
818 if ($FirstChar ==
"-")
820 $Word = substr($Word, 1);
821 $Flags |= WORD_EXCLUDED;
822 if (!isset($Words[$Word]))
824 $this->ExcludedTermCount++;
829 if ($FirstChar ==
"~")
831 $Word = substr($Word, 1);
834 || ($FirstChar ==
"+"))
836 if ($FirstChar ==
"+")
838 $Word = substr($Word, 1);
840 $Flags |= WORD_REQUIRED;
841 if (!isset($Words[$Word]))
843 $this->RequiredTermCount++;
846 if (!isset($Words[$Word]))
848 $this->InclusiveTermCount++;
849 $this->SearchTermList[] = $Word;
853 # store flags to indicate word found
854 $Words[$Word] = $Flags;
855 $this->
DMsg(3,
"Word identified (".$Word.
")");
859 # return normalized words to caller
863 protected function GetFieldId($FieldName)
865 # if field ID is not in cache
866 if (!isset($this->FieldIds[$FieldName]))
868 # look up field info in database
869 $this->DB->Query(
"SELECT FieldId FROM SearchFields "
870 .
"WHERE FieldName = '".addslashes($FieldName).
"'");
873 if ($Record = $this->DB->FetchRow())
875 # load info from DB record
876 $FieldId = $Record[
"FieldId"];
880 # add field to database
881 $this->DB->Query(
"INSERT INTO SearchFields (FieldName) "
882 .
"VALUES ('".addslashes($FieldName).
"')");
884 # retrieve ID for newly added field
885 $FieldId = $this->DB->LastInsertId(
"SearchFields");
889 $this->FieldIds[$FieldName] = $FieldId;
892 # return cached ID to caller
893 return $this->FieldIds[$FieldName];
896 # retrieve ID for specified word (returns NULL if no ID found)
897 private function GetWordId($Word, $AddIfNotFound = FALSE)
901 # if word was in ID cache
902 if (isset($WordIdCache[$Word]))
905 $WordId = $WordIdCache[$Word];
909 # look up ID in database
910 $WordId = $this->DB->Query(
"SELECT WordId"
912 .
" WHERE WordText='".addslashes($Word).
"'",
915 # if ID was not found and caller requested it be added
916 if (($WordId === NULL) && $AddIfNotFound)
918 # add word to database
919 $this->DB->Query(
"INSERT INTO SearchWords (WordText)"
920 .
" VALUES ('".addslashes(strtolower($Word)).
"')");
922 # get ID for newly added word
923 $WordId = $this->DB->LastInsertId(
"SearchWords");
927 $WordIdCache[$Word] = $WordId;
930 # return ID to caller
934 # retrieve ID for specified word stem (returns NULL if no ID found)
935 private function GetStemId($Stem, $AddIfNotFound = FALSE)
939 # if stem was in ID cache
940 if (isset($StemIdCache[$Stem]))
943 $StemId = $StemIdCache[$Stem];
947 # look up ID in database
948 $StemId = $this->DB->Query(
"SELECT WordId"
950 .
" WHERE WordText='".addslashes($Stem).
"'",
953 # if ID was not found and caller requested it be added
954 if (($StemId === NULL) && $AddIfNotFound)
956 # add stem to database
957 $this->DB->Query(
"INSERT INTO SearchStems (WordText)"
958 .
" VALUES ('".addslashes(strtolower($Stem)).
"')");
960 # get ID for newly added stem
961 $StemId = $this->DB->LastInsertId(
"SearchStems");
964 # adjust from DB ID value to stem ID value
965 $StemId += self::STEM_ID_OFFSET;
968 $StemIdCache[$Stem] = $StemId;
971 # return ID to caller
975 # retrieve word for specified word ID (returns FALSE if no word found)
976 private function GetWord($WordId)
980 # if word was in cache
981 if (isset($WordCache[$WordId]))
983 # use word from cache
984 $Word = $WordCache[$WordId];
988 # adjust search location and word ID if word is stem
989 $TableName =
"SearchWords";
990 if ($WordId >= self::STEM_ID_OFFSET)
992 $TableName =
"SearchStems";
993 $WordId -= self::STEM_ID_OFFSET;
996 # look up word in database
997 $Word = $this->DB->Query(
"SELECT WordText"
999 .
" WHERE WordId='".$WordId.
"'",
1002 # save word to cache
1003 $WordCache[$WordId] = $Word;
1006 # return word to caller
1011 # ---- private functions used in searching
1013 # perform search across multiple fields and return raw results to caller
1014 private function SearchAcrossFields($SearchStrings)
1016 # start by assuming no search will be done
1020 $this->InclusiveTermCount = 0;
1021 $this->RequiredTermCount = 0;
1022 $this->ExcludedTermCount = 0;
1025 $NeedComparisonSearch = FALSE;
1026 foreach ($SearchStrings as $FieldName => $SearchStringArray)
1028 # convert search string to array if needed
1029 if (!is_array($SearchStringArray))
1031 $SearchStringArray = array($SearchStringArray);
1034 # for each search string for this field
1035 foreach ($SearchStringArray as $SearchString)
1037 # if field is keyword or field is text and does not look like comparison match
1038 if (($FieldName ==
"XXXKeywordXXX")
1039 || (isset($this->FieldInfo[$FieldName])
1040 && ($this->FieldInfo[$FieldName][
"FieldType"] == self::FIELDTYPE_TEXT)
1041 && !preg_match(
"/^[><!]=./", $SearchString)
1042 && !preg_match(
"/^[><=]./", $SearchString)))
1044 $this->DMsg(0,
"Searching text field \""
1045 .$FieldName.
"\" for string \"$SearchString\"");
1047 # normalize text and split into words
1048 $Words[$FieldName] =
1049 $this->ParseSearchStringForWords($SearchString);
1051 # calculate scores for matching items
1052 if (count($Words[$FieldName]))
1054 $Scores = $this->SearchForWords(
1055 $Words[$FieldName], $FieldName, $Scores);
1056 $this->DMsg(3,
"Have "
1057 .count($Scores).
" results after word search");
1060 # split into phrases
1061 $Phrases[$FieldName] =
1062 $this->ParseSearchStringForPhrases($SearchString);
1064 # handle any phrases
1065 if (count($Phrases[$FieldName]))
1067 $Scores = $this->SearchForPhrases(
1068 $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE);
1069 $this->DMsg(3,
"Have "
1070 .count($Scores).
" results after phrase search");
1075 # set flag to indicate possible comparison search candidate found
1076 $NeedComparisonSearch = TRUE;
1081 # perform comparison searches
1082 if ($NeedComparisonSearch)
1084 $Scores = $this->SearchForComparisonMatches($SearchStrings, $Scores);
1085 $this->DMsg(3,
"Have ".count($Scores).
" results after comparison search");
1088 # if no results found and exclusions specified
1089 if (!count($Scores) && $this->ExcludedTermCount)
1092 $Scores = $this->LoadScoresForAllRecords();
1095 # if search results found
1098 # for each search text string
1099 foreach ($SearchStrings as $FieldName => $SearchStringArray)
1101 # convert search string to array if needed
1102 if (!is_array($SearchStringArray))
1104 $SearchStringArray = array($SearchStringArray);
1107 # for each search string for this field
1108 foreach ($SearchStringArray as $SearchString)
1111 if (($FieldName ==
"XXXKeywordXXX")
1112 || (isset($this->FieldInfo[$FieldName])
1113 && ($this->FieldInfo[$FieldName][
"FieldType"]
1114 == self::FIELDTYPE_TEXT)))
1116 # if there are words in search text
1117 if (isset($Words[$FieldName]))
1119 # handle any excluded words
1120 $Scores = $this->FilterOnExcludedWords($Words[$FieldName], $Scores, $FieldName);
1123 # handle any excluded phrases
1124 if (isset($Phrases[$FieldName]))
1126 $Scores = $this->SearchForPhrases(
1127 $Phrases[$FieldName], $Scores, $FieldName, FALSE, TRUE);
1133 # strip off any results that don't contain required words
1134 $Scores = $this->FilterOnRequiredWords($Scores);
1137 # return search result scores to caller
1141 # search for words in specified field
1142 private function SearchForWords(
1143 $Words, $FieldName =
"XXXKeywordXXX", $Scores = NULL)
1147 # start with empty search result scores list if none passed in
1148 if ($Scores == NULL)
1154 $FieldId = $this->GetFieldId($FieldName);
1157 foreach ($Words as $Word => $Flags)
1159 $this->DMsg(2,
"Searching for word '${Word}' in field ".$FieldName);
1161 # if word is not excluded
1162 if (!($Flags & WORD_EXCLUDED))
1164 # look up record ID for word
1165 $this->DMsg(2,
"Looking up word \"".$Word.
"\"");
1166 $WordId = $this->GetWordId($Word);
1169 if ($WordId !== NULL)
1171 # look up counts for word
1172 $DB->Query(
"SELECT ItemId,Count FROM SearchWordCounts "
1173 .
"WHERE WordId = ".$WordId
1174 .
" AND FieldId = ".$FieldId);
1175 $Counts = $DB->FetchColumn(
"Count",
"ItemId");
1177 # if synonym support is enabled
1178 if ($this->SynonymsEnabled)
1180 # look for any synonyms
1181 $DB->Query(
"SELECT WordIdA, WordIdB"
1182 .
" FROM SearchWordSynonyms"
1183 .
" WHERE WordIdA = ".$WordId
1184 .
" OR WordIdB = ".$WordId);
1186 # if synonyms were found
1187 if ($DB->NumRowsSelected())
1189 # retrieve synonym IDs
1190 $SynonymIds = array();
1191 while ($Record = $DB->FetchRow())
1193 $SynonymIds[] = ($Record[
"WordIdA"] == $WordId)
1194 ? $Record[
"WordIdB"]
1195 : $Record[
"WordIdA"];
1199 foreach ($SynonymIds as $SynonymId)
1201 # retrieve counts for synonym
1202 $DB->Query(
"SELECT ItemId,Count"
1203 .
" FROM SearchWordCounts"
1204 .
" WHERE WordId = ".$SynonymId
1205 .
" AND FieldId = ".$FieldId);
1206 $SynonymCounts = $DB->FetchColumn(
"Count",
"ItemId");
1209 foreach ($SynonymCounts as $ItemId => $Count)
1211 # adjust count because it's a synonym
1212 $AdjustedCount = ceil($Count / 2);
1214 # add count to existing counts
1215 if (isset($Counts[$ItemId]))
1217 $Counts[$ItemId] += $AdjustedCount;
1221 $Counts[$ItemId] = $AdjustedCount;
1229 # if stemming is enabled
1230 if ($this->StemmingEnabled)
1234 $this->DMsg(2,
"Looking up stem \"".$Stem.
"\"");
1235 $StemId = $this->GetStemId($Stem);
1237 # if ID found for stem
1238 if ($StemId !== NULL)
1240 # retrieve counts for stem
1241 $DB->Query(
"SELECT ItemId,Count"
1242 .
" FROM SearchWordCounts"
1243 .
" WHERE WordId = ".$StemId
1244 .
" AND FieldId = ".$FieldId);
1245 $StemCounts = $DB->FetchColumn(
"Count",
"ItemId");
1248 foreach ($StemCounts as $ItemId => $Count)
1250 # adjust count because it's a stem
1251 $AdjustedCount = ceil($Count / 2);
1253 # add count to existing counts
1254 if (isset($Counts[$ItemId]))
1256 $Counts[$ItemId] += $AdjustedCount;
1260 $Counts[$ItemId] = $AdjustedCount;
1266 # if counts were found
1270 foreach ($Counts as $ItemId => $Count)
1272 # if word flagged as required
1273 if ($Flags & WORD_REQUIRED)
1275 # increment required word count for record
1276 if (isset($this->RequiredTermCounts[$ItemId]))
1278 $this->RequiredTermCounts[$ItemId]++;
1282 $this->RequiredTermCounts[$ItemId] = 1;
1286 # add to item record score
1287 if (isset($Scores[$ItemId]))
1289 $Scores[$ItemId] += $Count;
1293 $Scores[$ItemId] = $Count;
1300 # return basic scores to caller
1304 # extract phrases (terms surrounded by quotes) from search string
1305 private function ParseSearchStringForPhrases($SearchString)
1307 # split into chunks delimited by double quote marks
1308 $Pieces = explode(
"\"", $SearchString); #
"
1310 # for each pair of chunks
1313 while ($Index < count($Pieces))
1315 # grab phrase from chunk
1316 $Phrase = trim(addslashes($Pieces[$Index - 1]));
1317 $Flags = WORD_PRESENT;
1319 # grab first character of phrase
1320 $FirstChar = substr($Pieces[$Index - 2], -1);
1322 # set flags to reflect any option characters
1323 if ($FirstChar == "-
")
1325 $Flags |= WORD_EXCLUDED;
1326 if (!isset($Phrases[$Phrase]))
1328 $this->ExcludedTermCount++;
1333 if ((($this->DefaultSearchLogic == self::LOGIC_AND) && ($FirstChar != "~
"))
1334 || ($FirstChar == "+
"))
1336 $Flags |= WORD_REQUIRED;
1337 if (!isset($Phrases[$Phrase]))
1339 $this->RequiredTermCount++;
1342 if (!isset($Phrases[$Phrase]))
1344 $this->InclusiveTermCount++;
1345 $this->SearchTermList[] = $Phrase;
1348 $Phrases[$Phrase] = $Flags;
1350 # move to next pair of chunks
1354 # return phrases to caller
1358 # extract groups (terms surrounded by parens) from search string
1359 # (NOTE: NOT YET IMPLEMENTED!!!)
1360 private function ParseSearchStringForGroups($SearchString)
1362 # split into chunks delimited by open paren
1363 $Pieces = explode("(
", $SearchString);
1367 while ($Index < count($Pieces))
1369 # grab phrase from chunk
1370 $Group = trim(addslashes($Pieces[$Index - 1]));
1373 # move to next pair of chunks
1377 # return phrases to caller
1381 protected function SearchFieldForPhrases($FieldName, $Phrase)
1384 exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\
n");
1387 private function SearchForPhrases($Phrases, $Scores, $FieldName = "XXXKeywordXXX",
1388 $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
1390 # if phrases are found
1391 if (count($Phrases) > 0)
1393 # if this is a keyword search
1394 if ($FieldName ==
"XXXKeywordXXX")
1397 foreach ($this->FieldInfo as $KFieldName => $Info)
1399 # if field is marked to be included in keyword searches
1400 if ($Info[
"InKeywordSearch"])
1402 # call ourself with that field
1403 $Scores = $this->SearchForPhrases($Phrases, $Scores, $KFieldName,
1404 $ProcessNonExcluded, $ProcessExcluded);
1411 foreach ($Phrases as $Phrase => $Flags)
1413 $this->DMsg(2,
"Searching for phrase '".$Phrase
1414 .
"' in field ".$FieldName);
1416 # if phrase flagged as excluded and we are doing excluded phrases
1417 # or phrase flagged as non-excluded and we are doing non-excluded phrases
1418 if (($ProcessExcluded && ($Flags & WORD_EXCLUDED))
1419 || ($ProcessNonExcluded && !($Flags & WORD_EXCLUDED)))
1421 # initialize score list if necessary
1422 if ($Scores === NULL) { $Scores = array(); }
1424 # retrieve list of items that contain phrase
1425 $ItemIds = $this->SearchFieldForPhrases(
1426 $FieldName, $Phrase);
1428 # for each item that contains phrase
1429 foreach ($ItemIds as $ItemId)
1431 # if we are doing excluded phrases and phrase flagged as excluded
1432 if ($ProcessExcluded && ($Flags & WORD_EXCLUDED))
1434 # knock item off of list
1435 unset($Scores[$ItemId]);
1437 elseif ($ProcessNonExcluded)
1439 # calculate phrase value based on number of words and field weight
1440 $PhraseScore = count(preg_split(
"/[\s]+/", $Phrase, -1, PREG_SPLIT_NO_EMPTY))
1441 * $this->FieldInfo[$FieldName][
"Weight"];
1442 $this->DMsg(2,
"Phrase score is ".$PhraseScore);
1444 # bump up item record score
1445 if (isset($Scores[$ItemId]))
1447 $Scores[$ItemId] += $PhraseScore;
1451 $Scores[$ItemId] = $PhraseScore;
1454 # if phrase flagged as required
1455 if ($Flags & WORD_REQUIRED)
1457 # increment required word count for record
1458 if (isset($this->RequiredTermCounts[$ItemId]))
1460 $this->RequiredTermCounts[$ItemId]++;
1464 $this->RequiredTermCounts[$ItemId] = 1;
1474 # return updated scores to caller
1478 private function FilterOnExcludedWords($Words, $Scores, $FieldName =
"XXXKeywordXXX")
1483 $FieldId = $this->GetFieldId($FieldName);
1486 foreach ($Words as $Word => $Flags)
1488 # if word flagged as excluded
1489 if ($Flags & WORD_EXCLUDED)
1491 # look up record ID for word
1492 $WordId = $this->GetWordId($Word);
1495 if ($WordId !== NULL)
1497 # look up counts for word
1498 $DB->Query(
"SELECT ItemId FROM SearchWordCounts "
1499 .
"WHERE WordId=${WordId} AND FieldId=${FieldId}");
1502 while ($Record = $DB->FetchRow())
1504 # if item record is in score list
1505 $ItemId = $Record[
"ItemId"];
1506 if (isset($Scores[$ItemId]))
1508 # remove item record from score list
1509 $this->DMsg(3,
"Filtering out item ".$ItemId
1510 .
" because it contained word \"".$Word.
"\"");
1511 unset($Scores[$ItemId]);
1518 # returned filtered score list to caller
1522 private function FilterOnRequiredWords($Scores)
1524 # if there were required words
1525 if ($this->RequiredTermCount > 0)
1528 foreach ($Scores as $ItemId => $Score)
1530 # if item does not meet required word count
1531 if (!isset($this->RequiredTermCounts[$ItemId])
1532 || ($this->RequiredTermCounts[$ItemId] < $this->RequiredTermCount))
1535 $this->DMsg(4,
"Filtering out item ".$ItemId
1536 .
" because it didn't have required word count of "
1537 .$this->RequiredTermCount
1538 .(isset($this->RequiredTermCounts[$ItemId])
1540 .$this->RequiredTermCounts[$ItemId]
1543 unset($Scores[$ItemId]);
1548 # return filtered list to caller
1552 # count, sort, and trim search result scores list
1553 private function CleanScores($Scores, $StartingResult, $NumberOfResults,
1554 $SortByField, $SortDescending)
1556 # perform any requested filtering
1557 $this->DMsg(0,
"Have ".count($Scores).
" results before filter callbacks");
1558 $Scores = $this->FilterOnSuppliedFunctions($Scores);
1560 # save total number of results available
1561 $this->NumberOfResultsAvailable = count($Scores);
1563 # if no sorting field specified
1564 if ($SortByField === NULL)
1566 # sort result list by score
1567 if ($SortDescending)
1568 arsort($Scores, SORT_NUMERIC);
1570 asort($Scores, SORT_NUMERIC);
1574 # get list of item IDs in sorted order
1575 $SortedIds = $this->GetItemIdsSortedByField(
1576 $SortByField, $SortDescending);
1578 # if we have sorted item IDs
1579 if (count($SortedIds) && count($Scores))
1581 # strip sorted ID list down to those that appear in search results
1582 $SortedIds = array_intersect($SortedIds, array_keys($Scores));
1584 # rebuild score list in sorted order
1585 foreach ($SortedIds as $Id)
1587 $NewScores[$Id] = $Scores[$Id];
1589 $Scores = $NewScores;
1593 # sort result list by score
1594 arsort($Scores, SORT_NUMERIC);
1598 # trim result list to match range requested by caller
1599 $ScoresKeys = array_slice(
1600 array_keys($Scores), $StartingResult, $NumberOfResults);
1601 $TrimmedScores = array();
1602 foreach ($ScoresKeys as $Key) { $TrimmedScores[$Key] = $Scores[$Key]; }
1604 # returned cleaned search result scores list to caller
1605 return $TrimmedScores;
1608 protected function FilterOnSuppliedFunctions($Scores)
1610 # if filter functions have been set
1611 if (isset($this->FilterFuncs))
1614 foreach ($Scores as $ItemId => $Score)
1616 # for each filter function
1617 foreach ($this->FilterFuncs as $FuncName)
1619 # if filter function return TRUE for item
1620 if (call_user_func($FuncName, $ItemId))
1623 $this->DMsg(2,
"Filter callback <i>".$FuncName
1624 .
"</i> rejected item ".$ItemId);
1625 unset($Scores[$ItemId]);
1627 # bail out of filter func loop
1634 # return filtered list to caller
1638 private function SearchForComparisonMatches($SearchStrings, $Scores)
1642 foreach ($SearchStrings as $SearchFieldName => $SearchStringArray)
1644 # if field is not keyword
1645 if ($SearchFieldName !=
"XXXKeywordXXX")
1647 # convert search string to array if needed
1648 if (!is_array($SearchStringArray))
1650 $SearchStringArray = array($SearchStringArray);
1653 # for each search string for this field
1654 foreach ($SearchStringArray as $SearchString)
1656 # if search string looks like comparison search
1657 $FoundOperator = preg_match(
"/^[><!]=./", $SearchString)
1658 || preg_match(
"/^[><=]./", $SearchString);
1660 || (isset($this->FieldInfo[$SearchFieldName][
"FieldType"])
1661 && ($this->FieldInfo[$SearchFieldName][
"FieldType"]
1662 != self::FIELDTYPE_TEXT)))
1665 $Patterns = array(
"/^[><!]=/",
"/^[><=]/");
1666 $Replacements = array(
"",
"");
1667 $Value = trim(preg_replace($Patterns, $Replacements, $SearchString));
1669 # determine and save operator
1670 if (!$FoundOperator)
1672 $Operators[$Index] =
"=";
1676 $Term = trim($SearchString);
1677 $FirstChar = $Term{0};
1678 $FirstTwoChars = $FirstChar.$Term{1};
1679 if ($FirstTwoChars ==
">=") { $Operators[$Index] =
">="; }
1680 elseif ($FirstTwoChars ==
"<=") { $Operators[$Index] =
"<="; }
1681 elseif ($FirstTwoChars ==
"!=") { $Operators[$Index] =
"!="; }
1682 elseif ($FirstChar ==
">") { $Operators[$Index] =
">"; }
1683 elseif ($FirstChar ==
"<") { $Operators[$Index] =
"<"; }
1684 elseif ($FirstChar ==
"=") { $Operators[$Index] =
"="; }
1687 # if operator was found
1688 if (isset($Operators[$Index]))
1691 $Values[$Index] = $Value;
1694 $FieldNames[$Index] = $SearchFieldName;
1695 $this->DMsg(3,
"Added comparison (field = <i>"
1696 .$FieldNames[$Index].
"</i> op = <i>"
1697 .$Operators[$Index].
"</i> val = <i>"
1698 .$Values[$Index].
"</i>)");
1700 # move to next comparison array entry
1708 # if comparisons found
1709 if (isset($Operators))
1711 # perform comparisons on fields and gather results
1712 $Results = $this->SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values);
1714 # if search logic is set to AND
1715 if ($this->DefaultSearchLogic == self::LOGIC_AND)
1717 # if results were found
1718 if (count($Results))
1720 # if there were no prior results and no terms for keyword search
1721 if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
1723 # add all results to scores
1724 foreach ($Results as $ItemId)
1726 $Scores[$ItemId] = 1;
1731 # remove anything from scores that is not part of results
1732 foreach ($Scores as $ItemId => $Score)
1734 if (in_array($ItemId, $Results) == FALSE)
1736 unset($Scores[$ItemId]);
1749 # add result items to scores
1750 if ($Scores === NULL) { $Scores = array(); }
1751 foreach ($Results as $ItemId)
1753 if (isset($Scores[$ItemId]))
1755 $Scores[$ItemId] += 1;
1759 $Scores[$ItemId] = 1;
1765 # return results to caller
1769 private function SetDebugLevel($SearchStrings)
1771 # if search info is an array
1772 if (is_array($SearchStrings))
1774 # for each array element
1775 foreach ($SearchStrings as $FieldName => $SearchStringArray)
1777 # if element is an array
1778 if (is_array($SearchStringArray))
1780 # for each array element
1781 foreach ($SearchStringArray as $Index => $SearchString)
1783 # pull out search string if present
1784 $SearchStrings[$FieldName][$Index] = $this->ExtractDebugLevel($SearchString);
1789 # pull out search string if present
1790 $SearchStrings[$FieldName] = $this->ExtractDebugLevel($SearchStringArray);
1796 # pull out search string if present
1797 $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
1800 # return new search info to caller
1801 return $SearchStrings;
1804 private function ExtractDebugLevel($SearchString)
1806 # if search string contains debug level indicator
1807 if (strstr($SearchString,
"DBUGLVL="))
1809 # remove indicator and set debug level
1810 $Level = preg_replace(
"/^\\s*DBUGLVL=([1-9]{1,2}).*/",
"\\1", $SearchString);
1813 $this->DebugLevel = $Level;
1814 $this->DMsg(0,
"Setting debug level to ".$Level);
1815 $SearchString = preg_replace(
"/DBUGLVL=${Level}/",
"", $SearchString);
1819 # return (possibly) modified search string to caller
1820 return $SearchString;
1823 # load and return search result scores array containing all possible records
1824 private function LoadScoresForAllRecords()
1826 # start with empty list
1830 $this->DB->Query(
"SELECT ".$this->ItemIdFieldName
1831 .
" FROM ".$this->ItemTableName);
1832 while ($Record = $this->DB->FetchRow())
1834 # set score for item to 1
1835 $Scores[$Record[$this->ItemIdFieldName]] = 1;
1838 # return array with all scores to caller
1843 # ---- private functions used in building search database
1852 private function UpdateWordCount($Word, $ItemId, $FieldId, $Weight = 1)
1854 # retrieve ID for word
1855 $WordIds[] = $this->GetWordId($Word, TRUE);
1857 # if stemming is enabled
1858 if ($this->StemmingEnabled)
1860 # retrieve ID for stem of word
1862 $WordIds[] = $this->GetStemId($Stem, TRUE);
1865 # for word and stem of word
1866 foreach ($WordIds as $WordId)
1868 # if word count already added to database
1869 if (isset($this->WordCountAdded[$WordId][$FieldId]))
1872 $this->DB->Query(
"UPDATE SearchWordCounts SET Count=Count+".$Weight
1873 .
" WHERE WordId=".$WordId
1874 .
" AND ItemId=".$ItemId
1875 .
" AND FieldId=".$FieldId);
1879 # add word count to DB
1880 $this->DB->Query(
"INSERT INTO SearchWordCounts"
1881 .
" (WordId, ItemId, FieldId, Count) VALUES"
1882 .
" (".$WordId.
", ".$ItemId.
", ".$FieldId.
", ".$Weight.
")");
1884 # remember that we added count for this word
1885 $this->WordCountAdded[$WordId][$FieldId] = TRUE;
1888 # decrease weight for stem
1889 $Weight = ceil($Weight / 2);
1893 protected function GetFieldContent($ItemId, $FieldName)
1896 exit(
"<br>SE - ERROR: GetFieldContent() not implemented<br>\n");
1899 private function RecordSearchInfoForText(
1900 $ItemId, $FieldName, $Weight, $Text, $IncludeInKeyword)
1903 $Words = $this->ParseSearchStringForWords($Text, TRUE);
1905 # if there was text left after parsing
1906 if (count($Words) > 0)
1909 $FieldId = $this->GetFieldId($FieldName);
1911 # if text should be included in keyword searches
1912 if ($IncludeInKeyword)
1914 # get ID for keyword field
1915 $KeywordFieldId = $this->GetFieldId(
"XXXKeywordXXX");
1919 foreach ($Words as $Word => $Flags)
1921 # update count for word
1922 $this->UpdateWordCount($Word, $ItemId, $FieldId);
1924 # if text should be included in keyword searches
1925 if ($IncludeInKeyword)
1927 # update keyword field count for word
1928 $this->UpdateWordCount(
1929 $Word, $ItemId, $KeywordFieldId, $Weight);
1935 # print debug message if level set high enough
1936 protected function DMsg($Level, $Msg)
1938 if ($this->DebugLevel > $Level)
1940 print(
"SE: ".$Msg.
"<br>\n");
1944 # ---- BACKWARD COMPATIBILITY --------------------------------------------
1946 # possible types of logical operators
1947 const SEARCHLOGIC_AND = 1;
1948 const SEARCHLOGIC_OR = 2;