00001 <?PHP
00002
00003 #
00004 # FILE: Scout--SearchEngine.php
00005 #
00006 # FUNCTIONS PROVIDED:
00007 # SearchEngine->SearchEngine()
00008 # - constructor
00009 # SearchEngine->Search($SearchString,
00010 # $StartingResult = 0, $NumberOfResults = 10)
00011 # - search for text and return list of matching item IDs
00012 # SearchEngine->FieldedSearch($SearchStrings,
00013 # $StartingResult = 0, $NumberOfResults = 10)
00014 # - search for text in specific fields and return item ID list
00015 # SearchEngine->NumberOfResults()
00016 # - return number of results found in last search
00017 # SearchEngine->SearchTime()
00018 # - return time in seconds that last search took
00019 # SearchEngine->AddResultFilterFunction($FunctionName)
00020 # - add function that will be used to filter search results
00021 # SearchEngine->UpdateForItem($ItemId)
00022 # - update search info for specified item
00023 # SearchEngine->UpdateForItems($StartingItemId, $NumberOfItems)
00024 # - update search info for all items in specified range (returns
00025 # ID of last item updated)
00026 #
00027 # AUTHOR: Edward Almasy
00028 #
00029 # Open Source Metadata Archive Search Engine (OSMASE)
00030 # Copyright 2002-2006 Internet Scout Project
00031 # http://scout.wisc.edu
00032 #
00033
00034 class SearchEngine {
00035
00036 # ---- PUBLIC INTERFACE --------------------------------------------------
00037
00038 # possible types of logical operators
00039 const SEARCHLOGIC_AND = 1;
00040 const SEARCHLOGIC_OR = 2;
00041
00042 # flags used for indicating field types
00043 const SEARCHFIELD_TEXT = 1;
00044 const SEARCHFIELD_NUMERIC = 2;
00045 const SEARCHFIELD_DATE = 3;
00046 const SEARCHFIELD_DATERANGE = 4;
00047
00048 # object constructor
00049 function SearchEngine(&$DB, $ItemTableName, $ItemIdFieldName)
00050 {
00051 # save database object for our use
00052 $this->DB = $DB;
00053
00054 # save item access parameters
00055 $this->ItemTableName = $ItemTableName;
00056 $this->ItemIdFieldName = $ItemIdFieldName;
00057
00058 # initialize internal values
00059 $this->DefaultSearchLogic = SEARCHLOGIC_AND;
00060
00061 # define flags used for indicating word states
00062 if (!defined("WORD_PRESENT")) { define("WORD_PRESENT", 1); }
00063 if (!defined("WORD_EXCLUDED")) { define("WORD_EXCLUDED", 2); }
00064 if (!defined("WORD_REQUIRED")) { define("WORD_REQUIRED", 4); }
00065
00066 # set default debug state
00067 $this->DebugLevel = 0;
00068 }
00069
00070 # add field to be searched
00071 function AddField($FieldName, $DBFieldName, $FieldType, $Weight, $UsedInKeywordSearch)
00072 {
00073 # save values
00074 $this->FieldInfo[$FieldName]["DBFieldName"] = $DBFieldName;
00075 $this->FieldInfo[$FieldName]["FieldType"] = $FieldType;
00076 $this->FieldInfo[$FieldName]["Weight"] = $Weight;
00077 $this->FieldInfo[$FieldName]["InKeywordSearch"] = $UsedInKeywordSearch;
00078 }
00079
00080 # retrieve info about tables and fields (useful for child objects)
00081 function ItemTableName() { return $this->ItemTableName; }
00082 function ItemIdFieldName() { return $this->ItemIdFieldName; }
00083 function DBFieldName($FieldName) { return $this->FieldInfo[$FieldName]["DBFieldName"]; }
00084 function FieldType($FieldName) { return $this->FieldInfo[$FieldName]["FieldType"]; }
00085 function FieldWeight($FieldName) { return $this->FieldInfo[$FieldName]["Weight"]; }
00086 function FieldInKeywordSearch($FieldName) { return $this->FieldInfo[$FieldName]["InKeywordSearch"]; }
00087
00088 # set debug level
00089 function DebugLevel($Setting)
00090 {
00091 $this->DebugLevel = $Setting;
00092 }
00093
00094
00095 # ---- search functions
00096
00097 # perform keyword search
00098 function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10,
00099 $SortByField = NULL, $SortDescending = TRUE)
00100 {
00101 $SearchString = $this->SetDebugLevel($SearchString);
00102 if ($this->DebugLevel > 0) { print("SE: In Search() with search string \"$SearchString\"<br>\n"); }
00103
00104 # save start time to use in calculating search time
00105 $StartTime = $this->GetMicrotime();
00106
00107 # clear word counts
00108 $this->InclusiveTermCount = 0;
00109 $this->RequiredTermCount = 0;
00110 $this->ExcludedTermCount = 0;
00111
00112 # parse search string into terms
00113 $Words = $this->ParseSearchStringForWords($SearchString);
00114 if ($this->DebugLevel > 1) { print("SE: Found ".count($Words)." words<br>\n"); }
00115
00116 # parse search string for phrases
00117 $Phrases = $this->ParseSearchStringForPhrases($SearchString);
00118 if ($this->DebugLevel > 1) { print("SE: Found ".count($Phrases)." phrases<br>\n"); }
00119
00120 # if only excluded terms specified
00121 if ($this->ExcludedTermCount && !$this->InclusiveTermCount)
00122 {
00123 # load all records
00124 if ($this->DebugLevel > 1) { print("SE: Loading all records<br>\n"); }
00125 $Scores = $this->LoadScoresForAllRecords();
00126 }
00127 else
00128 {
00129 # perform searches
00130 $Scores = $this->SearchForWords($Words);
00131 if ($this->DebugLevel > 1) { print("SE: Found ".count($Scores)." results after word search<br>\n"); }
00132 $Scores = $this->SearchForPhrases($Phrases, $Scores);
00133 if ($this->DebugLevel > 1) { print("SE: Found ".count($Scores)." results after phrase search<br>\n"); }
00134 }
00135
00136 # if search results found
00137 if (count($Scores) > 0)
00138 {
00139 # handle any excluded words
00140 $Scores = $this->FilterOnExcludedWords($Words, $Scores);
00141
00142 # strip off any results that don't contain required words
00143 $Scores = $this->FilterOnRequiredWords($Scores);
00144 }
00145
00146 # count, sort, and trim search result scores list
00147 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
00148 $SortByField, $SortDescending);
00149
00150 # record search time
00151 $this->LastSearchTime = $this->GetMicrotime() - $StartTime;
00152
00153 # return list of items to caller
00154 if ($this->DebugLevel > 0) { print("SE: Ended up with ".$this->NumberOfResultsAvailable." results<br>\n"); }
00155 return $Scores;
00156 }
00157
00158 # perform search across multiple fields and return trimmed results to caller
00159 function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10,
00160 $SortByField = NULL, $SortDescending = TRUE)
00161 {
00162 $SearchStrings = $this->SetDebugLevel($SearchStrings);
00163 if ($this->DebugLevel > 0) { print("SE: In FieldedSearch() with "
00164 .count($SearchStrings)." search strings<br>\n"); }
00165
00166 # save start time to use in calculating search time
00167 $StartTime = $this->GetMicrotime();
00168
00169 # perform search
00170 $Scores = $this->SearchAcrossFields($SearchStrings);
00171 $Scores = ($Scores === NULL) ? array() : $Scores;
00172
00173 # count, sort, and trim search result scores list
00174 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
00175 $SortByField, $SortDescending);
00176
00177 # record search time
00178 $this->LastSearchTime = $this->GetMicrotime() - $StartTime;
00179
00180 # return list of items to caller
00181 if ($this->DebugLevel > 0) { print("SE: Ended up with "
00182 .$this->NumberOfResultsAvailable." results<br>\n"); }
00183 return $Scores;
00184 }
00185
00186 # perform search with logical groups of fielded searches
00187 function GroupedSearch($SearchGroups, $StartingResult = 0, $NumberOfResults = 10,
00188 $SortByField = NULL, $SortDescending = TRUE)
00189 {
00190 foreach ($SearchGroups as $Index => $Groups)
00191 {
00192 if (isset($SearchGroups[$Index]["SearchStrings"]))
00193 {
00194 $SearchGroups[$Index]["SearchStrings"] =
00195 $this->SetDebugLevel($SearchGroups[$Index]["SearchStrings"]);
00196 }
00197 }
00198 if ($this->DebugLevel > 0) { print("SE: In GroupedSearch() with "
00199 .count($SearchGroups)." search groups<br>\n"); }
00200
00201 # save start time to use in calculating search time
00202 $StartTime = $this->GetMicrotime();
00203
00204 # start with no results
00205 $Scores = array();
00206
00207 # save AND/OR search setting
00208 $SavedSearchLogic = $this->DefaultSearchLogic;
00209
00210 # for each search group
00211 $FirstSearch = TRUE;
00212 foreach ($SearchGroups as $Group)
00213 {
00214 if ($this->DebugLevel > 0) { print("SE: ----- GROUP "
00215 ."---------------------------<br>\n"); }
00216
00217 # if group has AND/OR setting specified
00218 if (isset($Group["Logic"]))
00219 {
00220 # use specified AND/OR setting
00221 $this->DefaultSearchLogic = $Group["Logic"];
00222 }
00223 else
00224 {
00225 # use saved AND/OR setting
00226 $this->DefaultSearchLogic = $SavedSearchLogic;
00227 }
00228 if ($this->DebugLevel > 2) { print("SE: Logic is "
00229 .(($this->DefaultSearchLogic == SEARCHLOGIC_AND) ? "AND" : "OR")
00230 ."<br>\n"); }
00231
00232 # if we have search strings for this group
00233 if (isset($Group["SearchStrings"]))
00234 {
00235 # perform search
00236 $GroupScores = $this->SearchAcrossFields($Group["SearchStrings"]);
00237
00238 # if search was conducted
00239 if ($GroupScores !== NULL)
00240 {
00241 # if saved AND/OR setting is OR or this is first search
00242 if (($SavedSearchLogic == SEARCHLOGIC_OR) || $FirstSearch)
00243 {
00244 # add search results to result list
00245 foreach ($GroupScores as $ItemId => $Score)
00246 {
00247 if (isset($Scores[$ItemId]))
00248 {
00249 $Scores[$ItemId] += $Score;
00250 }
00251 else
00252 {
00253 $Scores[$ItemId] = $Score;
00254 }
00255 }
00256
00257 # (reset flag indicating first search)
00258 $FirstSearch = FALSE;
00259 }
00260 else
00261 {
00262 # AND search results with previous results
00263 $OldScores = $Scores;
00264 $Scores = array();
00265 foreach ($GroupScores as $ItemId => $Score)
00266 {
00267 if (isset($OldScores[$ItemId]))
00268 {
00269 $Scores[$ItemId] = $OldScores[$ItemId] + $Score;
00270 }
00271 }
00272 }
00273 }
00274 }
00275 }
00276
00277 # restore AND/OR search setting
00278 $this->DefaultSearchLogic = $SavedSearchLogic;
00279
00280 # count, sort, and trim search result scores list
00281 $Scores = $this->CleanScores($Scores, $StartingResult, $NumberOfResults,
00282 $SortByField, $SortDescending);
00283
00284 # record search time
00285 $this->LastSearchTime = $this->GetMicrotime() - $StartTime;
00286
00287 # return search results to caller
00288 if ($this->DebugLevel > 0) { print("SE: Ended up with "
00289 .$this->NumberOfResultsAvailable." results<br>\n"); }
00290 return $Scores;
00291 }
00292
00293 # add function that will be called to filter search results
00294 function AddResultFilterFunction($FunctionName)
00295 {
00296 # save filter function name
00297 $this->FilterFuncs[] = $FunctionName;
00298 }
00299
00300 # get or set default search logic (AND or OR)
00301 function DefaultSearchLogic($NewSetting = NULL)
00302 {
00303 if ($NewSetting != NULL)
00304 {
00305 $this->DefaultSearchLogic = $NewSetting;
00306 }
00307 return $this->DefaultSearchLogic;
00308 }
00309
00310 function SearchTermsRequiredByDefault($NewSetting = TRUE)
00311 {
00312 if ($NewSetting)
00313 {
00314 $this->DefaultSearchLogic = SEARCHLOGIC_AND;
00315 }
00316 else
00317 {
00318 $this->DefaultSearchLogic = SEARCHLOGIC_OR;
00319 }
00320 }
00321
00322 function NumberOfResults()
00323 {
00324 return $this->NumberOfResultsAvailable;
00325 }
00326
00327 function SearchTerms()
00328 {
00329 return $this->SearchTermList;
00330 }
00331
00332 function SearchTime()
00333 {
00334 return $this->LastSearchTime;
00335 }
00336
00337 # report total weight for all fields involved in search
00338 function FieldedSearchWeightScale($SearchStrings)
00339 {
00340 $Weight = 0;
00341 $IncludedKeywordSearch = FALSE;
00342 foreach ($SearchStrings as $FieldName => $SearchStringArray)
00343 {
00344 if ($FieldName == "XXXKeywordXXX")
00345 {
00346 $IncludedKeywordSearch = TRUE;
00347 }
00348 else
00349 {
00350 $Weight += $this->FieldInfo[$FieldName]["Weight"];
00351 }
00352 }
00353 if ($IncludedKeywordSearch)
00354 {
00355 foreach ($this->FieldInfo as $FieldName => $Info)
00356 {
00357 if ($Info["InKeywordSearch"])
00358 {
00359 $Weight += $Info["Weight"];
00360 }
00361 }
00362 }
00363 return $Weight;
00364 }
00365
00366
00367 # ---- search database update functions
00368
00369 # update search DB for the specified item
00370 function UpdateForItem($ItemId)
00371 {
00372 # bail out if item ID is negative (indicating a temporary record)
00373 if ($ItemId < 0) { return; }
00374
00375 # clear word count added flags for this item
00376 unset($this->WordCountAdded);
00377
00378 # delete any existing info for this item
00379 $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
00380
00381 # for each metadata field
00382 foreach ($this->FieldInfo as $FieldName => $Info)
00383 {
00384 # if search weight for field is positive
00385 if ($Info["Weight"] > 0)
00386 {
00387 # retrieve text for field
00388 $Text = $this->GetFieldContent($ItemId, $FieldName);
00389
00390 # if text is array
00391 if (is_array($Text))
00392 {
00393 # for each text string in array
00394 foreach ($Text as $String)
00395 {
00396 # record search info for text
00397 $this->RecordSearchInfoForText($ItemId, $FieldName,
00398 $Info["Weight"], $String,
00399 $Info["InKeywordSearch"]);
00400 }
00401 }
00402 else
00403 {
00404 # record search info for text
00405 $this->RecordSearchInfoForText($ItemId, $FieldName,
00406 $Info["Weight"], $Text,
00407 $Info["InKeywordSearch"]);
00408 }
00409 }
00410 }
00411 }
00412
00413 # update search DB for the specified range of items
00414 function UpdateForItems($StartingItemId, $NumberOfItems)
00415 {
00416 # retrieve IDs for specified number of items starting at specified ID
00417 $this->DB->Query("SELECT ".$this->ItemIdFieldName." FROM ".$this->ItemTableName
00418 ." WHERE ".$this->ItemIdFieldName." >= ".$StartingItemId
00419 ." ORDER BY ".$this->ItemIdFieldName." LIMIT ".$NumberOfItems);
00420 $ItemIds = $this->DB->FetchColumn($this->ItemIdFieldName);
00421
00422 # for each retrieved item ID
00423 foreach ($ItemIds as $ItemId)
00424 {
00425 # update search info for item
00426 $this->UpdateForItem($ItemId);
00427 }
00428
00429 # return ID of last item updated to caller
00430 return $ItemId;
00431 }
00432
00433 # drop all data pertaining to item from search DB
00434 function DropItem($ItemId)
00435 {
00436 # drop all entries pertaining to item from word count table
00437 $this->DB->Query("DELETE FROM SearchWordCounts WHERE ItemId = ".$ItemId);
00438 }
00439
00440 # drop all data pertaining to field from search DB
00441 function DropField($FieldName)
00442 {
00443 # retrieve our ID for field
00444 $FieldId = $this->DB->Query("SELECT FieldId FROM SearchFields "
00445 ."WHERE FieldName = '".addslashes($FieldName)."'", "FieldId");
00446
00447 # drop all entries pertaining to field from word counts table
00448 $this->DB->Query("DELETE FROM SearchWordCounts WHERE FieldId = \'".$FieldId."\'");
00449
00450 # drop field from our fields table
00451 $this->DB->Query("DELETE FROM SearchFields WHERE FieldId = \'".$FieldId."\'");
00452 }
00453
00454 # return total number of terms indexed by search engine
00455 function SearchTermCount()
00456 {
00457 return $this->DB->Query("SELECT COUNT(*) AS TermCount"
00458 ." FROM SearchWords", "TermCount");
00459 }
00460
00461 # return total number of items indexed by search engine
00462 function ItemCount()
00463 {
00464 return $this->DB->Query("SELECT COUNT(DISTINCT ItemId) AS ItemCount"
00465 ." FROM SearchWordCounts", "ItemCount");
00466 }
00467
00468 # add synonym(s)
00469 function AddSynonyms($Word, $Synonyms)
00470 {
00471 # get ID for word
00472 $WordId = $this->GetWordId($Word, TRUE);
00473
00474 # for each synonym passed in
00475 foreach ($Synonyms as $Synonym)
00476 {
00477 # get ID for synonym
00478 $SynonymId = $this->GetWordId($Synonym, TRUE);
00479
00480 # if synonym is not already in database
00481 $this->DB->Query("SELECT * FROM SearchWordSynonyms"
00482 ." WHERE (WordIdA = ".$WordId
00483 ." AND WordIdB = ".$SynonymId.")"
00484 ." OR (WordIdB = ".$WordId
00485 ." AND WordIdA = ".$SynonymId.")");
00486 if ($this->DB->NumRowsSelected() == 0)
00487 {
00488 # add synonym entry to database
00489 $this->DB->Query("INSERT INTO SearchWordSynonyms"
00490 ." (WordIdA, WordIdB)"
00491 ." VALUES (".$WordId.", ".$SynonymId.")");
00492 }
00493 }
00494 }
00495
00496 # remove synonym(s)
00497 function RemoveSynonyms($Word, $Synonyms = NULL)
00498 {
00499 # find ID for word
00500 $WordId = $this->GetWordId($Word);
00501
00502 # if ID found
00503 if ($WordId !== NULL)
00504 {
00505 # if no specific synonyms provided
00506 if ($Synonyms === NULL)
00507 {
00508 # remove all synonyms for word
00509 $this->DB->Query("DELETE FROM SearchWordSynonyms"
00510 ." WHERE WordIdA = '".$WordId."'"
00511 ." OR WordIdB = '".$WordId."'");
00512 }
00513 else
00514 {
00515 # for each specified synonym
00516 foreach ($Synonyms as $Synonym)
00517 {
00518 # look up ID for synonym
00519 $SynonymId = $this->GetWordId($Synonym);
00520
00521 # if synonym ID was found
00522 if ($SynonymId !== NULL)
00523 {
00524 # delete synonym entry
00525 $this->DB->Query("DELETE FROM SearchWordSynonyms"
00526 ." WHERE (WordIdA = '".$WordId."'"
00527 ." AND WordIdB = '".$SynonymId."')"
00528 ." OR (WordIdB = '".$WordId."'"
00529 ." AND WordIdA = '".$SynonymId."')");
00530 }
00531 }
00532 }
00533 }
00534 }
00535
00536 # remove all synonyms
00537 function RemoveAllSynonyms()
00538 {
00539 $this->DB->Query("DELETE FROM SearchWordSynonyms");
00540 }
00541
00542 # get synonyms for word (returns array of synonyms)
00543 function GetSynonyms($Word)
00544 {
00545 # assume no synonyms will be found
00546 $Synonyms = array();
00547
00548 # look up ID for word
00549 $WordId = $this->GetWordId($Word);
00550
00551 # if word ID was found
00552 if ($WordId !== NULL)
00553 {
00554 # look up IDs of all synonyms for this word
00555 $this->DB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms"
00556 ." WHERE WordIdA = ".$WordId
00557 ." OR WordIdB = ".$WordId);
00558 $SynonymIds = array();
00559 while ($Record = $this->DB->FetchRow)
00560 {
00561 $SynonymIds[] = ($Record["WordIdA"] == $WordId)
00562 ? $Record["WordIdB"] : $Record["WordIdA"];
00563 }
00564
00565 # for each synonym ID
00566 foreach ($SynonymIds as $SynonymId)
00567 {
00568 # look up synonym word and add to synonym list
00569 $Synonyms[] = $this->GetWord($SynonymId);
00570 }
00571 }
00572
00573 # return synonyms to caller
00574 return $Synonyms;
00575 }
00576
00577 # get all synonyms (returns 2D array w/ words as first index)
00578 function GetAllSynonyms()
00579 {
00580 # assume no synonyms will be found
00581 $SynonymList = array();
00582
00583 # for each synonym ID pair
00584 $OurDB = new SPTDatabase();
00585 $OurDB->Query("SELECT WordIdA, WordIdB FROM SearchWordSynonyms");
00586 while ($Record = $OurDB->FetchRow())
00587 {
00588 # look up words
00589 $Word = $this->GetWord($Record["WordIdA"]);
00590 $Synonym = $this->GetWord($Record["WordIdB"]);
00591
00592 # if we do not already have an entry for the word
00593 # or synonym is not listed for this word
00594 if (!isset($SynonymList[$Word])
00595 || !in_array($Synonym, $SynonymList[$Word]))
00596 {
00597 # add entry for synonym
00598 $SynonymList[$Word][] = $Synonym;
00599 }
00600
00601 # if we do not already have an entry for the synonym
00602 # or word is not listed for this synonym
00603 if (!isset($SynonymList[$Synonym])
00604 || !in_array($Word, $SynonymList[$Synonym]))
00605 {
00606 # add entry for word
00607 $SynonymList[$Synonym][] = $Word;
00608 }
00609 }
00610
00611 # for each word
00612 # (this loop removes reciprocal duplicates)
00613 foreach ($SynonymList as $Word => $Synonyms)
00614 {
00615 # for each synonym for that word
00616 foreach ($Synonyms as $Synonym)
00617 {
00618 # if synonym has synonyms and word is one of them
00619 if (isset($SynonymList[$Synonym])
00620 && isset($SynonymList[$Word])
00621 && in_array($Word, $SynonymList[$Synonym])
00622 && in_array($Synonym, $SynonymList[$Word]))
00623 {
00624 # if word has less synonyms than synonym
00625 if (count($SynonymList[$Word])
00626 < count($SynonymList[$Synonym]))
00627 {
00628 # remove synonym from synonym list for word
00629 $SynonymList[$Word] = array_diff(
00630 $SynonymList[$Word], array($Synonym));
00631
00632 # if no synonyms left for word
00633 if (!count($SynonymList[$Word]))
00634 {
00635 # remove empty synonym list for word
00636 unset($SynonymList[$Word]);
00637 }
00638 }
00639 else
00640 {
00641 # remove word from synonym list for synonym
00642 $SynonymList[$Synonym] = array_diff(
00643 $SynonymList[$Synonym], array($Word));
00644
00645 # if no synonyms left for word
00646 if (!count($SynonymList[$Synonym]))
00647 {
00648 # remove empty synonym list for word
00649 unset($SynonymList[$Synonym]);
00650 }
00651 }
00652 }
00653 }
00654 }
00655
00656 # sort array alphabetically (just for convenience)
00657 foreach ($SynonymList as $Word => $Synonyms)
00658 {
00659 asort($SynonymList[$Word]);
00660 }
00661 ksort($SynonymList);
00662
00663 # return 2D array of synonyms to caller
00664 return $SynonymList;
00665 }
00666
00667 # set all synonyms (accepts 2D array w/ words as first index)
00668 function SetAllSynonyms($SynonymList)
00669 {
00670 # remove all existing synonyms
00671 $this->RemoveAllSynonyms();
00672
00673 # for each synonym entry passed in
00674 foreach ($SynonymList as $Word => $Synonyms)
00675 {
00676 # add synonyms for word
00677 $this->AddSynonyms($Word, $Synonyms);
00678 }
00679 }
00680
00681 # suggest alternatives
00682 function SuggestAlternateSearches($SearchString)
00683 {
00684 #
00685 }
00686
00687
00688 # ---- PRIVATE INTERFACE -------------------------------------------------
00689
00690 var $DB;
00691 var $DebugLevel;
00692 var $WordCountAdded;
00693 var $NumberOfResultsAvailable;
00694 var $LastSearchTime;
00695 var $FilterFuncs;
00696 var $FieldIds;
00697 var $DefaultSearchLogic;
00698 var $FieldInfo;
00699 var $RequiredTermCount;
00700 var $RequiredTermCounts;
00701 var $InclusiveTermCount;
00702 var $ExcludedTermCount;
00703 var $ItemTableName;
00704 var $ItemIdFieldName;
00705 var $SearchTermList;
00706
00707
00708 # ---- common private functions (used in both searching and DB build)
00709
00710 # normalize and parse search string into list of search terms
00711 function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE)
00712 {
00713 # strip off any surrounding whitespace
00714 $Text = trim($SearchString);
00715
00716 # set up normalization replacement strings
00717 $Patterns = array(
00718 "/'s[^a-z0-9\-+~]+/i", # get rid of possessive plurals
00719 "/'/", # get rid of single quotes / apostrophes
00720 "/\"[^\"]*\"/", # get rid of phrases (NOTE: HARD-CODED INDEX BELOW!!!) "
00721 "/\\([^)]*\\)/", # get rid of groups (NOTE: HARD-CODED INDEX BELOW!!!)
00722 "/[^a-z0-9\-+~]+/i", # convert non-alphanumerics / non-minus/plus to a space
00723 "/([^\\s])-/i", # convert minus preceded by anything but whitespace to a space
00724 "/([^\\s])\\+/i", # convert plus preceded by anything but whitespace to a space
00725 "/-\\s/i", # convert minus followed by whitespace to a space
00726 "/\\+\\s/i", # convert plus followed by whitespace to a space
00727 "/~\\s/i", # convert tilde followed by whitespace to a space
00728 "/[ ]+/" # convert multiple spaces to one space
00729 );
00730 $Replacements = array(
00731 " ",
00732 "",
00733 " ",
00734 " ",
00735 "\\1 ",
00736 "\\1 ",
00737 " ",
00738 " ",
00739 " ",
00740 " ",
00741 " "
00742 );
00743
00744 # if we are supposed to ignore phrases and groups (series of words in quotes or surrounded by parens)
00745 if ($IgnorePhrases)
00746 {
00747 # switch phrase removal to double quote removal (HARD-CODED INDEX INTO PATTERN LIST!!)
00748 $Patterns[2] = "/\"/";
00749
00750 # switch group removal to paren removal (HARD-CODED INDEX INTO PATTERN LIST!!)
00751 $Patterns[3] = "/[\(\)]+/";
00752 }
00753
00754 # remove punctuation from text and normalize whitespace
00755 $Text = preg_replace($Patterns, $Replacements, $Text);
00756 if ($this->DebugLevel > 2) { print("SE: Normalized search string is '${Text}'<br>\n"); }
00757
00758 # convert text to lower case
00759 $Text = strtolower($Text);
00760
00761 # strip off any extraneous whitespace
00762 $Text = trim($Text);
00763
00764 # start with an empty array
00765 $Words = array();
00766
00767 # if we have no words left after parsing
00768 if (strlen($Text) != 0)
00769 {
00770 # for each word
00771 foreach (explode(" ", $Text) as $Word)
00772 {
00773 # grab first character of word
00774 $FirstChar = substr($Word, 0, 1);
00775
00776 # strip off option characters and set flags appropriately
00777 $Flags = WORD_PRESENT;
00778 if ($FirstChar == "-")
00779 {
00780 $Word = substr($Word, 1);
00781 $Flags |= WORD_EXCLUDED;
00782 if (!isset($Words[$Word]))
00783 {
00784 $this->ExcludedTermCount++;
00785 }
00786 }
00787 else
00788 {
00789 if ($FirstChar == "~")
00790 {
00791 $Word = substr($Word, 1);
00792 }
00793 elseif (($this->DefaultSearchLogic == SEARCHLOGIC_AND)
00794 || ($FirstChar == "+"))
00795 {
00796 if ($FirstChar == "+")
00797 {
00798 $Word = substr($Word, 1);
00799 }
00800 $Flags |= WORD_REQUIRED;
00801 if (!isset($Words[$Word]))
00802 {
00803 $this->RequiredTermCount++;
00804 }
00805 }
00806 if (!isset($Words[$Word]))
00807 {
00808 $this->InclusiveTermCount++;
00809 $this->SearchTermList[] = $Word;
00810 }
00811 }
00812
00813 # store flags to indicate word found
00814 $Words[$Word] = $Flags;
00815 if ($this->DebugLevel > 3) { print("SE: Word identified (${Word})<br>\n"); }
00816 }
00817 }
00818
00819 # return normalized words to caller
00820 return $Words;
00821 }
00822
00823 function GetFieldId($FieldName)
00824 {
00825 # if field ID is not in cache
00826 if (!isset($this->FieldIds[$FieldName]))
00827 {
00828 # look up field info in database
00829 $this->DB->Query("SELECT FieldId FROM SearchFields "
00830 ."WHERE FieldName = '".addslashes($FieldName)."'");
00831
00832 # if field was found
00833 if ($Record = $this->DB->FetchRow())
00834 {
00835 # load info from DB record
00836 $FieldId = $Record["FieldId"];
00837 }
00838 else
00839 {
00840 # add field to database
00841 $this->DB->Query("INSERT INTO SearchFields (FieldName) "
00842 ."VALUES ('".addslashes($FieldName)."')");
00843
00844 # retrieve ID for newly added field
00845 $FieldId = $this->DB->LastInsertId("SearchFields");
00846 }
00847
00848 # cache field info
00849 $this->FieldIds[$FieldName] = $FieldId;
00850 }
00851
00852 # return cached ID to caller
00853 return $this->FieldIds[$FieldName];
00854 }
00855
00856 # retrieve ID for specified word (returns NULL if no ID found)
00857 function GetWordId($Word, $AddIfNotFound = FALSE)
00858 {
00859 static $WordIdCache;
00860
00861 # if word was in ID cache
00862 if (isset($WordIdCache[$Word]))
00863 {
00864 # use ID from cache
00865 $WordId = $WordIdCache[$Word];
00866 }
00867 else
00868 {
00869 # look up ID in database
00870 $WordId = $this->DB->Query("SELECT WordId FROM SearchWords "
00871 ."WHERE WordText='".addslashes($Word)."'", "WordId");
00872
00873 # if ID was not found and caller requested it be added
00874 if (($WordId === NULL) && $AddIfNotFound)
00875 {
00876 # add word to database
00877 $this->DB->Query("INSERT INTO SearchWords (WordText)"
00878 ." VALUES ('".addslashes(strtolower($Word))."')");
00879
00880 # get ID for newly added word
00881 $WordId = $this->DB->LastInsertId("SearchWords");
00882 }
00883
00884 # save ID to cache
00885 $WordIdCache[$Word] = $WordId;
00886 }
00887
00888 # return ID to caller
00889 return $WordId;
00890 }
00891
00892 # retrieve word for specified word ID (returns FALSE if no word found)
00893 function GetWord($WordId)
00894 {
00895 static $WordCache;
00896
00897 # if word was in cache
00898 if (isset($WordCache[$WordId]))
00899 {
00900 # use word from cache
00901 $Word = $WordCache[$WordId];
00902 }
00903 else
00904 {
00905 # look up word in database
00906 $Word = $this->DB->Query("SELECT WordText FROM SearchWords "
00907 ."WHERE WordId='".$WordId."'", "WordText");
00908
00909 # save word to cache
00910 $WordCache[$WordId] = $Word;
00911 }
00912
00913 # return word to caller
00914 return $Word;
00915 }
00916
00917
00918 # ---- private functions used in searching
00919
00920 # perform search across multiple fields and return raw results to caller
00921 function SearchAcrossFields($SearchStrings)
00922 {
00923 # start by assuming no search will be done
00924 $Scores = NULL;
00925
00926 # clear word counts
00927 $this->InclusiveTermCount = 0;
00928 $this->RequiredTermCount = 0;
00929 $this->ExcludedTermCount = 0;
00930
00931 # for each field
00932 $NeedComparisonSearch = FALSE;
00933 foreach ($SearchStrings as $FieldName => $SearchStringArray)
00934 {
00935 # convert search string to array if needed
00936 if (!is_array($SearchStringArray))
00937 {
00938 $SearchStringArray = array($SearchStringArray);
00939 }
00940
00941 # for each search string for this field
00942 foreach ($SearchStringArray as $SearchString)
00943 {
00944 # if field is keyword or field is text and does not look like comparison match
00945 if (($FieldName == "XXXKeywordXXX")
00946 || (isset($this->FieldInfo[$FieldName])
00947 && ($this->FieldInfo[$FieldName]["FieldType"] == SEARCHFIELD_TEXT)
00948 && !preg_match("/^[><!]=./", $SearchString)
00949 && !preg_match("/^[><=]./", $SearchString)))
00950 {
00951 if ($this->DebugLevel > 0) { print("SE: Searching text field \""
00952 .$FieldName."\" for string \"$SearchString\"<br>\n"); }
00953
00954 # normalize text and split into words
00955 $Words[$FieldName] =
00956 $this->ParseSearchStringForWords($SearchString);
00957
00958 # calculate scores for matching items
00959 if (count($Words[$FieldName]))
00960 {
00961 $Scores = $this->SearchForWords(
00962 $Words[$FieldName], $FieldName, $Scores);
00963 if ($this->DebugLevel > 3) { print("SE: Have "
00964 .count($Scores)." results after word search<br>\n"); }
00965 }
00966
00967 # split into phrases
00968 $Phrases[$FieldName] =
00969 $this->ParseSearchStringForPhrases($SearchString);
00970
00971 # handle any phrases
00972 if (count($Phrases[$FieldName]))
00973 {
00974 $Scores = $this->SearchForPhrases(
00975 $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE);
00976 if ($this->DebugLevel > 3) { print("SE: Have ".count($Scores)
00977 ." results after phrase search<br>\n"); }
00978 }
00979 }
00980 else
00981 {
00982 # set flag to indicate possible comparison search candidate found
00983 $NeedComparisonSearch = TRUE;
00984 }
00985 }
00986 }
00987
00988 # perform comparison searches
00989 if ($NeedComparisonSearch)
00990 {
00991 $Scores = $this->SearchForComparisonMatches($SearchStrings, $Scores);
00992 if ($this->DebugLevel > 3) { print("SE: Have ".count($Scores)." results after comparison search<br>\n"); }
00993 }
00994
00995 # if no results found and exclusions specified
00996 if (!count($Scores) && $this->ExcludedTermCount)
00997 {
00998 # load all records
00999 $Scores = $this->LoadScoresForAllRecords();
01000 }
01001
01002 # if search results found
01003 if (count($Scores))
01004 {
01005 # for each search text string
01006 foreach ($SearchStrings as $FieldName => $SearchStringArray)
01007 {
01008 # convert search string to array if needed
01009 if (!is_array($SearchStringArray))
01010 {
01011 $SearchStringArray = array($SearchStringArray);
01012 }
01013
01014 # for each search string for this field
01015 foreach ($SearchStringArray as $SearchString)
01016 {
01017 # if field is text
01018 if (($FieldName == "XXXKeywordXXX")
01019 || (isset($this->FieldInfo[$FieldName])
01020 && ($this->FieldInfo[$FieldName]["FieldType"] == SEARCHFIELD_TEXT)))
01021 {
01022 # if there are words in search text
01023 if (isset($Words[$FieldName]))
01024 {
01025 # handle any excluded words
01026 $Scores = $this->FilterOnExcludedWords($Words[$FieldName], $Scores, $FieldName);
01027 }
01028
01029 # handle any excluded phrases
01030 if (isset($Phrases[$FieldName]))
01031 {
01032 $Scores = $this->SearchForPhrases(
01033 $Phrases[$FieldName], $Scores, $FieldName, FALSE, TRUE);
01034 }
01035 }
01036 }
01037 }
01038
01039 # strip off any results that don't contain required words
01040 $Scores = $this->FilterOnRequiredWords($Scores);
01041 }
01042
01043 # return search result scores to caller
01044 return $Scores;
01045 }
01046
01047 # search for words in specified field
01048 function SearchForWords(
01049 $Words, $FieldName = "XXXKeywordXXX", $Scores = NULL)
01050 {
01051 $DB = $this->DB;
01052
01053 # start with empty search result scores list if none passed in
01054 if ($Scores == NULL)
01055 {
01056 $Scores = array();
01057 }
01058
01059 # grab field ID
01060 $FieldId = $this->GetFieldId($FieldName);
01061
01062 # for each word
01063 foreach ($Words as $Word => $Flags)
01064 {
01065 if ($this->DebugLevel > 2) { print("SE: Searching for word '${Word}' in field ${FieldName}<br>\n"); }
01066
01067 # if word is not excluded
01068 if (!($Flags & WORD_EXCLUDED))
01069 {
01070 # look up record ID for word
01071 if ($this->DebugLevel > 2) { print("SE: Looking up word \"${Word}\"<br>\n"); }
01072 $WordId = $this->GetWordId($Word);
01073
01074 # if word is in DB
01075 if ($WordId !== NULL)
01076 {
01077 # look up counts for word
01078 $DB->Query("SELECT ItemId,Count FROM SearchWordCounts "
01079 ."WHERE WordId = ".$WordId
01080 ." AND FieldId = ".$FieldId);
01081 $Counts = $DB->FetchColumn("Count", "ItemId");
01082
01083 # if synonym support is enabled
01084 if (TRUE)
01085 {
01086 # look for any synonyms
01087 $DB->Query("SELECT WordIdA, WordIdB"
01088 ." FROM SearchWordSynonyms"
01089 ." WHERE WordIdA = ".$WordId
01090 ." OR WordIdB = ".$WordId);
01091
01092 # if synonyms were found
01093 if ($DB->NumRowsSelected())
01094 {
01095 # retrieve synonym IDs
01096 $SynonymIds = array();
01097 while ($Record = $DB->FetchRow())
01098 {
01099 $SynonymIds[] = ($Record["WordIdA"] == $WordId)
01100 ? $Record["WordIdB"]
01101 : $Record["WordIdA"];
01102 }
01103
01104 # for each synonym
01105 foreach ($SynonymIds as $SynonymId)
01106 {
01107 # retrieve counts for synonym
01108 $DB->Query("SELECT ItemId,Count"
01109 ." FROM SearchWordCounts"
01110 ." WHERE WordId = ".$SynonymId
01111 ." AND FieldId = ".$FieldId);
01112 $SynonymCounts = $DB->FetchColumn("Count", "ItemId");
01113
01114 # for each count
01115 foreach ($SynonymCounts as $ItemId => $Count)
01116 {
01117 # adjust count because it's a synonym
01118 $AdjustedCount = ceil($Count / 2);
01119
01120 # add count to existing counts
01121 if (isset($Counts[$ItemId]))
01122 {
01123 $Counts[$ItemId] += $AdjustedCount;
01124 }
01125 else
01126 {
01127 $Counts[$ItemId] = $AdjustedCount;
01128 }
01129 }
01130 }
01131 }
01132 }
01133
01134 # for each count
01135 foreach ($Counts as $ItemId => $Count)
01136 {
01137 # if word flagged as required
01138 if ($Flags & WORD_REQUIRED)
01139 {
01140 # increment required word count for record
01141 if (isset($this->RequiredTermCounts[$ItemId]))
01142 {
01143 $this->RequiredTermCounts[$ItemId]++;
01144 }
01145 else
01146 {
01147 $this->RequiredTermCounts[$ItemId] = 1;
01148 }
01149 }
01150
01151 # add to item record score
01152 if (isset($Scores[$ItemId]))
01153 {
01154 $Scores[$ItemId] += $Count;
01155 }
01156 else
01157 {
01158 $Scores[$ItemId] = $Count;
01159 }
01160 }
01161 }
01162 }
01163 }
01164
01165 # return basic scores to caller
01166 return $Scores;
01167 }
01168
01169 # extract phrases (terms surrounded by quotes) from search string
01170 function ParseSearchStringForPhrases($SearchString)
01171 {
01172 # split into chunks delimited by double quote marks
01173 $Pieces = explode("\"", $SearchString); # "
01174
01175 # for each pair of chunks
01176 $Index = 2;
01177 $Phrases = array();
01178 while ($Index < count($Pieces))
01179 {
01180 # grab phrase from chunk
01181 $Phrase = trim(addslashes($Pieces[$Index - 1]));
01182 $Flags = WORD_PRESENT;
01183
01184 # grab first character of phrase
01185 $FirstChar = substr($Pieces[$Index - 2], -1);
01186
01187 # set flags to reflect any option characters
01188 if ($FirstChar == "-")
01189 {
01190 $Flags |= WORD_EXCLUDED;
01191 if (!isset($Phrases[$Phrase]))
01192 {
01193 $this->ExcludedTermCount++;
01194 }
01195 }
01196 else
01197 {
01198 if ((($this->DefaultSearchLogic == SEARCHLOGIC_AND) && ($FirstChar != "~"))
01199 || ($FirstChar == "+"))
01200 {
01201 $Flags |= WORD_REQUIRED;
01202 if (!isset($Phrases[$Phrase]))
01203 {
01204 $this->RequiredTermCount++;
01205 }
01206 }
01207 if (!isset($Phrases[$Phrase]))
01208 {
01209 $this->InclusiveTermCount++;
01210 $this->SearchTermList[] = $Phrase;
01211 }
01212 }
01213 $Phrases[$Phrase] = $Flags;
01214
01215 # move to next pair of chunks
01216 $Index += 2;
01217 }
01218
01219 # return phrases to caller
01220 return $Phrases;
01221 }
01222
01223 # extract groups (terms surrounded by parens) from search string
01224 # (NOTE: NOT YET IMPLEMENTED!!!)
01225 function ParseSearchStringForGroups($SearchString)
01226 {
01227 # split into chunks delimited by open paren
01228 $Pieces = explode("(", $SearchString);
01229
01230 # for each chunk
01231 $Index = 2;
01232 while ($Index < count($Pieces))
01233 {
01234 # grab phrase from chunk
01235 $Group = trim(addslashes($Pieces[$Index - 1]));
01236 $Groups[] = $Group;
01237
01238 # move to next pair of chunks
01239 $Index += 2;
01240 }
01241
01242 # return phrases to caller
01243 return $Groups;
01244 }
01245
01246 function SearchFieldForPhrases($FieldName, $Phrase)
01247 {
01248 # error out
01249 exit("<br>SE - ERROR: SearchFieldForPhrases() not implemented<br>\n");
01250 }
01251
01252 function SearchForPhrases($Phrases, $Scores, $FieldName = "XXXKeywordXXX",
01253 $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
01254 {
01255 # if phrases are found
01256 if (count($Phrases) > 0)
01257 {
01258 # if this is a keyword search
01259 if ($FieldName == "XXXKeywordXXX")
01260 {
01261 # for each field
01262 foreach ($this->FieldInfo as $KFieldName => $Info)
01263 {
01264 # if field is marked to be included in keyword searches
01265 if ($Info["InKeywordSearch"])
01266 {
01267 # call ourself with that field
01268 $Scores = $this->SearchForPhrases($Phrases, $Scores, $KFieldName,
01269 $ProcessNonExcluded, $ProcessExcluded);
01270 }
01271 }
01272 }
01273 else
01274 {
01275 # for each phrase
01276 foreach ($Phrases as $Phrase => $Flags)
01277 {
01278 if ($this->DebugLevel > 2) { print("SE: searching for phrase '${Phrase}' in field ${FieldName}<br>\n"); }
01279
01280 # if phrase flagged as excluded and we are doing excluded phrases
01281 # or phrase flagged as non-excluded and we are doing non-excluded phrases
01282 if (($ProcessExcluded && ($Flags & WORD_EXCLUDED))
01283 || ($ProcessNonExcluded && !($Flags & WORD_EXCLUDED)))
01284 {
01285 # initialize score list if necessary
01286 if ($Scores === NULL) { $Scores = array(); }
01287
01288 # retrieve list of items that contain phrase
01289 $ItemIds = $this->SearchFieldForPhrases(
01290 $FieldName, $Phrase);
01291
01292 # for each item that contains phrase
01293 foreach ($ItemIds as $ItemId)
01294 {
01295 # if we are doing excluded phrases and phrase flagged as excluded
01296 if ($ProcessExcluded && ($Flags & WORD_EXCLUDED))
01297 {
01298 # knock item off of list
01299 unset($Scores[$ItemId]);
01300 }
01301 elseif ($ProcessNonExcluded)
01302 {
01303 # calculate phrase value based on number of words and field weight
01304 $PhraseScore = count(preg_split("/[\s]+/", $Phrase, -1, PREG_SPLIT_NO_EMPTY))
01305 * $this->FieldInfo[$FieldName]["Weight"];
01306 if ($this->DebugLevel > 2) { print("SE: phrase score is ${PhraseScore}<br>\n"); }
01307
01308 # bump up item record score
01309 if (isset($Scores[$ItemId]))
01310 {
01311 $Scores[$ItemId] += $PhraseScore;
01312 }
01313 else
01314 {
01315 $Scores[$ItemId] = $PhraseScore;
01316 }
01317
01318 # if phrase flagged as required
01319 if ($Flags & WORD_REQUIRED)
01320 {
01321 # increment required word count for record
01322 if (isset($this->RequiredTermCounts[$ItemId]))
01323 {
01324 $this->RequiredTermCounts[$ItemId]++;
01325 }
01326 else
01327 {
01328 $this->RequiredTermCounts[$ItemId] = 1;
01329 }
01330 }
01331 }
01332 }
01333 }
01334 }
01335 }
01336 }
01337
01338 # return updated scores to caller
01339 return $Scores;
01340 }
01341
01342 function FilterOnExcludedWords($Words, $Scores, $FieldName = "XXXKeywordXXX")
01343 {
01344 $DB = $this->DB;
01345
01346 # grab field ID
01347 $FieldId = $this->GetFieldId($FieldName);
01348
01349 # for each word
01350 foreach ($Words as $Word => $Flags)
01351 {
01352 # if word flagged as excluded
01353 if ($Flags & WORD_EXCLUDED)
01354 {
01355 # look up record ID for word
01356 $WordId = $this->GetWordId($Word);
01357
01358 # if word is in DB
01359 if ($WordId !== NULL)
01360 {
01361 # look up counts for word
01362 $DB->Query("SELECT ItemId FROM SearchWordCounts "
01363 ."WHERE WordId=${WordId} AND FieldId=${FieldId}");
01364
01365 # for each count
01366 while ($Record = $DB->FetchRow())
01367 {
01368 # if item record is in score list
01369 $ItemId = $Record["ItemId"];
01370 if (isset($Scores[$ItemId]))
01371 {
01372 # remove item record from score list
01373 if ($this->DebugLevel > 3) { print("SE: filtering out item $ItemId because it contained word \"".$Word."\"<br>\n"); }
01374 unset($Scores[$ItemId]);
01375 }
01376 }
01377 }
01378 }
01379 }
01380
01381 # returned filtered score list to caller
01382 return $Scores;
01383 }
01384
01385 function FilterOnRequiredWords($Scores)
01386 {
01387 # if there were required words
01388 if ($this->RequiredTermCount > 0)
01389 {
01390 # for each item
01391 foreach ($Scores as $ItemId => $Score)
01392 {
01393 # if item does not meet required word count
01394 if (!isset($this->RequiredTermCounts[$ItemId])
01395 || ($this->RequiredTermCounts[$ItemId] < $this->RequiredTermCount))
01396 {
01397 # filter out item
01398 if ($this->DebugLevel > 4) { print("SE: filtering out item $ItemId because it didn't have required word count of ".$this->RequiredTermCount." (only had ".$this->RequiredTermCounts[$ItemId].")<br>\n"); }
01399 unset($Scores[$ItemId]);
01400 }
01401 }
01402 }
01403
01404 # return filtered list to caller
01405 return $Scores;
01406 }
01407
01408 # count, sort, and trim search result scores list
01409 function CleanScores($Scores, $StartingResult, $NumberOfResults,
01410 $SortByField, $SortDescending)
01411 {
01412 # perform any requested filtering
01413 if ($this->DebugLevel > 0) { print("SE: Have "
01414 .count($Scores)." results before filter callbacks<br>\n"); }
01415 $Scores = $this->FilterOnSuppliedFunctions($Scores);
01416
01417 # save total number of results available
01418 $this->NumberOfResultsAvailable = count($Scores);
01419
01420 # if no sorting field specified
01421 if ($SortByField === NULL)
01422 {
01423 # sort result list by score
01424 if ($SortDescending)
01425 arsort($Scores, SORT_NUMERIC);
01426 else
01427 asort($Scores, SORT_NUMERIC);
01428 }
01429 else
01430 {
01431 # get list of item IDs in sorted order
01432 $SortedIds = $this->GetItemIdsSortedByField(
01433 $SortByField, $SortDescending);
01434
01435 # if we have sorted item IDs
01436 if (count($SortedIds) && count($Scores))
01437 {
01438 # strip sorted ID list down to those that appear in search results
01439 $SortedIds = array_intersect($SortedIds, array_keys($Scores));
01440
01441 # rebuild score list in sorted order
01442 foreach ($SortedIds as $Id)
01443 {
01444 $NewScores[$Id] = $Scores[$Id];
01445 }
01446 $Scores = $NewScores;
01447 }
01448 else
01449 {
01450 # sort result list by score
01451 arsort($Scores, SORT_NUMERIC);
01452 }
01453 }
01454
01455 # trim result list to match range requested by caller
01456 $ScoresKeys = array_slice(
01457 array_keys($Scores), $StartingResult, $NumberOfResults);
01458 $TrimmedScores = array();
01459 foreach ($ScoresKeys as $Key) { $TrimmedScores[$Key] = $Scores[$Key]; }
01460
01461 # returned cleaned search result scores list to caller
01462 return $TrimmedScores;
01463 }
01464
01465 function FilterOnSuppliedFunctions($Scores)
01466 {
01467 # if filter functions have been set
01468 if (isset($this->FilterFuncs))
01469 {
01470 # for each result
01471 foreach ($Scores as $ItemId => $Score)
01472 {
01473 # for each filter function
01474 foreach ($this->FilterFuncs as $FuncName)
01475 {
01476 # if filter function return TRUE for item
01477 if ($FuncName($ItemId))
01478 {
01479 # discard result
01480 if ($this->DebugLevel > 2) { print("SE: filter callback <i>$FuncName</i> rejected item ${ItemId}<br>\n"); }
01481 unset($Scores[$ItemId]);
01482
01483 # bail out of filter func loop
01484 continue 2;
01485 }
01486 }
01487 }
01488 }
01489
01490 # return filtered list to caller
01491 return $Scores;
01492 }
01493
01494 function SearchForComparisonMatches($SearchStrings, $Scores)
01495 {
01496 # for each field
01497 $Index = 0;
01498 foreach ($SearchStrings as $SearchFieldName => $SearchStringArray)
01499 {
01500 # if field is not keyword
01501 if ($SearchFieldName != "XXXKeywordXXX")
01502 {
01503 # convert search string to array if needed
01504 if (!is_array($SearchStringArray))
01505 {
01506 $SearchStringArray = array($SearchStringArray);
01507 }
01508
01509 # for each search string for this field
01510 foreach ($SearchStringArray as $SearchString)
01511 {
01512 # if search string looks like comparison search
01513 $FoundOperator = preg_match("/^[><!]=./", $SearchString) || preg_match("/^[><=]./", $SearchString);
01514 if ($FoundOperator || (isset($this->FieldInfo[$SearchFieldName]["FieldType"]) && ($this->FieldInfo[$SearchFieldName]["FieldType"] != SEARCHFIELD_TEXT)))
01515 {
01516 # determine value
01517 $Patterns = array("/^[><!]=/", "/^[><=]/");
01518 $Replacements = array("", "");
01519 $Value = trim(preg_replace($Patterns, $Replacements, $SearchString));
01520
01521 # determine and save operator
01522 if (!$FoundOperator)
01523 {
01524 $Operators[$Index] = "=";
01525 }
01526 else
01527 {
01528 $Term = trim($SearchString);
01529 $FirstChar = $Term{0};
01530 $FirstTwoChars = $FirstChar.$Term{1};
01531 if ($FirstTwoChars == ">=") { $Operators[$Index] = ">="; }
01532 elseif ($FirstTwoChars == "<=") { $Operators[$Index] = "<="; }
01533 elseif ($FirstTwoChars == "!=") { $Operators[$Index] = "!="; }
01534 elseif ($FirstChar == ">") { $Operators[$Index] = ">"; }
01535 elseif ($FirstChar == "<") { $Operators[$Index] = "<"; }
01536 elseif ($FirstChar == "=") { $Operators[$Index] = "="; }
01537 }
01538
01539 # if operator was found
01540 if (isset($Operators[$Index]))
01541 {
01542 # save value
01543 $Values[$Index] = $Value;
01544
01545 # save field name
01546 $FieldNames[$Index] = $SearchFieldName;
01547 if ($this->DebugLevel > 3) { print("SE: added comparison (field = <i>".$FieldNames[$Index]."</i> op = <i>".$Operators[$Index]."</i> val = <i>".$Values[$Index]."</i>)<br>\n"); }
01548
01549 # move to next comparison array entry
01550 $Index++;
01551 }
01552 }
01553 }
01554 }
01555 }
01556
01557 # if comparisons found
01558 if (isset($Operators))
01559 {
01560 # perform comparisons on fields and gather results
01561 $Results = $this->SearchFieldsForComparisonMatches($FieldNames, $Operators, $Values);
01562
01563 # if search logic is set to AND
01564 if ($this->DefaultSearchLogic == SEARCHLOGIC_AND)
01565 {
01566 # if results were found
01567 if (count($Results))
01568 {
01569 # if there were no prior results and no terms for keyword search
01570 if ((count($Scores) == 0) && ($this->InclusiveTermCount == 0))
01571 {
01572 # add all results to scores
01573 foreach ($Results as $ItemId)
01574 {
01575 $Scores[$ItemId] = 1;
01576 }
01577 }
01578 else
01579 {
01580 # remove anything from scores that is not part of results
01581 foreach ($Scores as $ItemId => $Score)
01582 {
01583 if (in_array($ItemId, $Results) == FALSE)
01584 {
01585 unset($Scores[$ItemId]);
01586 }
01587 }
01588 }
01589 }
01590 else
01591 {
01592 # clear scores
01593 $Scores = array();
01594 }
01595 }
01596 else
01597 {
01598 # add result items to scores
01599 if ($Scores === NULL) { $Scores = array(); }
01600 foreach ($Results as $ItemId)
01601 {
01602 if (isset($Scores[$ItemId]))
01603 {
01604 $Scores[$ItemId] += 1;
01605 }
01606 else
01607 {
01608 $Scores[$ItemId] = 1;
01609 }
01610 }
01611 }
01612 }
01613
01614 # return results to caller
01615 return $Scores;
01616 }
01617
01618 function SetDebugLevel($SearchStrings)
01619 {
01620 # if search info is an array
01621 if (is_array($SearchStrings))
01622 {
01623 # for each array element
01624 foreach ($SearchStrings as $FieldName => $SearchStringArray)
01625 {
01626 # if element is an array
01627 if (is_array($SearchStringArray))
01628 {
01629 # for each array element
01630 foreach ($SearchStringArray as $Index => $SearchString)
01631 {
01632 # pull out search string if present
01633 $SearchStrings[$FieldName][$Index] = $this->ExtractDebugLevel($SearchString);
01634 }
01635 }
01636 else
01637 {
01638 # pull out search string if present
01639 $SearchStrings[$FieldName] = $this->ExtractDebugLevel($SearchStringArray);
01640 }
01641 }
01642 }
01643 else
01644 {
01645 # pull out search string if present
01646 $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
01647 }
01648
01649 # return new search info to caller
01650 return $SearchStrings;
01651 }
01652
01653 function ExtractDebugLevel($SearchString)
01654 {
01655 # if search string contains debug level indicator
01656 if (strstr($SearchString, "DBUGLVL="))
01657 {
01658 # remove indicator and set debug level
01659 $Level = preg_replace("/^\\s*DBUGLVL=([1-9]{1,2}).*/", "\\1", $SearchString);
01660 if ($Level > 0)
01661 {
01662 print("SE: setting debug level to $Level<br>\n");
01663 $this->DebugLevel = $Level;
01664 $SearchString = preg_replace("/DBUGLVL=${Level}/", "", $SearchString);
01665 }
01666 }
01667
01668 # return (possibly) modified search string to caller
01669 return $SearchString;
01670 }
01671
01672 # load and return search result scores array containing all possible records
01673 function LoadScoresForAllRecords()
01674 {
01675 # start with empty list
01676 $Scores = array();
01677
01678 # for every item
01679 $this->DB->Query("SELECT ".$this->ItemIdFieldName
01680 ." FROM ".$this->ItemTableName);
01681 while ($Record = $this->DB->FetchRow())
01682 {
01683 # set score for item to 1
01684 $Scores[$Record[$this->ItemIdFieldName]] = 1;
01685 }
01686
01687 # return array with all scores to caller
01688 return $Scores;
01689 }
01690
01691
01692 # ---- private functions used in building search database
01693
01694 function UpdateWordCount($WordId, $ItemId, $FieldId, $Weight)
01695 {
01696 $DB = $this->DB;
01697
01698 # if word count already added to database
01699 if (isset($this->WordCountAdded[$WordId][$FieldId]))
01700 {
01701 # update word count
01702 $DB->Query("UPDATE SearchWordCounts SET Count=Count+${Weight} "
01703 ."WHERE WordId=${WordId} "
01704 ."AND ItemId=${ItemId} "
01705 ."AND FieldId=${FieldId}");
01706 }
01707 else
01708 {
01709 # add word count to DB
01710 $DB->Query("INSERT INTO SearchWordCounts"
01711 ." (WordId, ItemId, FieldId, Count) VALUES"
01712 ." (${WordId}, ${ItemId}, ${FieldId}, ${Weight})");
01713
01714 # remember that we added count for this word
01715 $this->WordCountAdded[$WordId][$FieldId] = TRUE;
01716 }
01717 }
01718
01719 function GetFieldContent($ItemId, $FieldName)
01720 {
01721 # error out
01722 exit("<br>SE - ERROR: GetFieldContent() not implemented<br>\n");
01723 }
01724
01725 function RecordSearchInfoForText($ItemId, $FieldName, $Weight, $Text, $IncludeInKeyword)
01726 {
01727 $DB = $this->DB;
01728
01729 # normalize text
01730 $Words = $this->ParseSearchStringForWords($Text, TRUE);
01731
01732 # if there was text left after parsing
01733 if (count($Words) > 0)
01734 {
01735 # get ID for field
01736 $FieldId = $this->GetFieldId($FieldName);
01737
01738 # if text should be included in keyword searches
01739 if ($IncludeInKeyword)
01740 {
01741 # get ID for keyword field
01742 $KeywordFieldId = $this->GetFieldId("XXXKeywordXXX");
01743 }
01744
01745 # for each word
01746 foreach ($Words as $Word => $Flags)
01747 {
01748 # look up ID for word
01749 $WordId = $this->GetWordId($Word, TRUE);
01750
01751 # update count for word
01752 $this->UpdateWordCount($WordId, $ItemId, $FieldId, 1);
01753
01754 # if text should be included in keyword searches
01755 if ($IncludeInKeyword)
01756 {
01757 # update keyword field count for word
01758 $this->UpdateWordCount(
01759 $WordId, $ItemId, $KeywordFieldId, $Weight);
01760 }
01761 }
01762 }
01763 }
01764
01765 # convenience function for getting time in microseconds
01766 function GetMicrotime()
01767 {
01768 list($usec, $sec) = explode(" ", microtime());
01769 return ((float)$usec + (float)$sec);
01770 }
01771 }
01772
01773 # define search logic modes
01774 define("SEARCHLOGIC_AND", 1);
01775 define("SEARCHLOGIC_OR", 2);
01776
01777 # define flags used for indicating field types
01778 define("SEARCHFIELD_TEXT", 1);
01779 define("SEARCHFIELD_NUMERIC", 2);
01780 define("SEARCHFIELD_DATE", 3);
01781 define("SEARCHFIELD_DATERANGE", 4);
01782
01783
01784 ?>