<?PHP

#
#   FILE:  SPT--SearchEngine.php
#
#   FUNCTIONS PROVIDED:
#       SearchEngine->SearchEngine()
#           - constructor
#       SearchEngine->Search($SearchString, 
#               $StartingResult = 0, $NumberOfResults = 10)
#           - search for text and return list of matching resource IDs
#       SearchEngine->FieldedSearch($SearchStrings, 
#               $StartingResult = 0, $NumberOfResults = 10)
#           - search for text in specific fields and return resource ID list
#       SearchEngine->NumberOfResults()
#           - return number of results found in last search
#       SearchEngine->SearchTime()
#           - return time in seconds that last search took
#       SearchEngine->AddResultFilterFunction($FunctionName)
#           - add function that will be used to filter search results
#       SearchEngine->UpdateForResource($ResourceId)
#           - update search info for specified resource
#       SearchEngine->UpdateForResources($StartingResourceId, $NumberOfResources)
#           - update search info for all resources in specified range (returns
#                   ID of last resource updated)
#
#   Part of the Scout Portal Toolkit
#   Copyright 2002 Internet Scout Project
#   http://scout.cs.wisc.edu
#

require_once(dirname(__FILE__)."/../Axis--Database.php");
require_once(dirname(__FILE__)."/SPT--SPTDate.php");


class SearchEngine {

    # ---- PUBLIC INTERFACE --------------------------------------------------

    # object constructor
    function SearchEngine(&$DB, $FieldInfo, 
                          $TextRetrievalFunc = NULL, 
                          $ResourcePhraseMatchFunc = NULL)
    {
        # save database object for our use
        $this->DB =& $DB;

        # save field info
        $this->FieldInfo = $FieldInfo;

        # save retrieval function names (if supplied)
        $this->TextRetrievalFunc = $TextRetrievalFunc;
        $this->ResourcePhraseMatchFunc = $ResourcePhraseMatchFunc;

        # initialize internal values
        $this->AllTermsRequiredByDefault = FALSE;

        # define flags used for indicating word states
        define("WORD_PRESENT", 1);
        define("WORD_EXCLUDED", 2);
        define("WORD_REQUIRED", 4);

        # set default debug state
        $this->DebugLevel = 0;
    }

    function DebugLevel($Setting)
    {
        $this->DebugLevel = $Setting;
    }


    # ---- search functions

    function Search($SearchString, $StartingResult = 0, $NumberOfResults = 10)
    {
        $SearchString = $this->SetDebugLevel($SearchString);
        if ($this->DebugLevel > 0) {  print("<table border='1'><tr><td>\n");  }
        if ($this->DebugLevel > 0) {  print("SE:  In Search() with search string \"$SearchString\"<br>\n");  }

        # save start time to use in calculating search time
        $StartTime = getmicrotime();

        # start by assuming no results will be found
        $TrimmedScores = array();
        $this->NumberOfResultsAvailable = 0;

        # clear word counts
        $this->InclusiveTermCount = 0;
        $this->RequiredTermCount = 0;
        $this->ExcludedTermCount = 0;

        # parse search string
        $Words = $this->ParseSearchStringForWords($SearchString);
        if ($this->DebugLevel > 1) {  print("SE:  Found ".count($Words)." words<br>\n");  }
        $Phrases = $this->ParseSearchStringForPhrases($SearchString);
        if ($this->DebugLevel > 1) {  print("SE:  Found ".count($Phrases)." phrases<br>\n");  }

        # if only excluded terms specified
        if ($this->ExcludedTermCount && !$this->InclusiveTermCount)
        {
            # load all records
            if ($this->DebugLevel > 1) {  print("SE:  Loading all records<br>\n");  }
            $Scores = $this->LoadScoresForAllRecords();
        }
        else
        {
            # perform searches
            $Scores = $this->SearchForWords($Words);
            if ($this->DebugLevel > 1) {  print("SE:  Found ".count($Scores)." results after word search<br>\n");  }
            $Scores = $this->SearchForPhrases($Phrases, $Scores);
            if ($this->DebugLevel > 1) {  print("SE:  Found ".count($Scores)." results after phrase search<br>\n");  }
        }

        # if search results found
        if (count($Scores) > 0)
        {
            # handle any excluded words
            $Scores = $this->FilterOnExcludedWords($Words, $Scores);

            # strip off any results that don't contain required words
            $Scores = $this->FilterOnRequiredWords($Scores);

            # perform any requested filtering
            if ($this->DebugLevel > 0) {  print("SE:    Have ".count($Scores)." results before filter callbacks<br>\n");  }
            $Scores = $this->FilterOnSuppliedFunctions($Scores);

            # save total number of results available
            $this->NumberOfResultsAvailable = count($Scores);

            # sort result list by score
            if (isset($Scores)) {  arsort($Scores, SORT_NUMERIC);  }

            # trim result list to match range requested by caller
            $ScoresKeys = array_slice(
                    array_keys($Scores), $StartingResult, $NumberOfResults);
            foreach ($ScoresKeys as $Key) {  $TrimmedScores[$Key] = $Scores[$Key];  }
        }

        # record search time
        $this->LastSearchTime = getmicrotime() - $StartTime;

        # return list of resources to caller
        if ($this->DebugLevel > 0) {  print("SE:  Ended up with ".$this->NumberOfResultsAvailable." results<br>\n");  }
        if ($this->DebugLevel > 0) {  print("</td></tr></table>\n");  }
        return $TrimmedScores;
    }

    function FieldedSearch($SearchStrings, $StartingResult = 0, $NumberOfResults = 10)
    {
        $SearchStrings = $this->SetDebugLevel($SearchStrings);
        if ($this->DebugLevel > 0) {  print("<table border='1'><tr><td>\n");  }
        if ($this->DebugLevel > 0) {  print("SE:  In FieldedSearch() with ".count($SearchStrings)." search strings<br>\n");  }

        # save start time to use in calculating search time
        $StartTime = getmicrotime();

        # start by assuming no results will be found
        $TrimmedScores = array();
        $this->NumberOfResultsAvailable = 0;

        # clear word counts
        $this->InclusiveTermCount = 0;
        $this->RequiredTermCount = 0;
        $this->ExcludedTermCount = 0;

        # for each field
        foreach ($SearchStrings as $FieldName => $SearchStringArray)
        {
            # convert search string to array if needed
            if (!is_array($SearchStringArray))
            {
                $SearchStringArray = array($SearchStringArray);
            }

            # for each search string for this field
            foreach ($SearchStringArray as $SearchString)
            {
                # if field is text
                if (($FieldName == "Keyword") || ($this->FieldInfo[$FieldName]["FieldType"] == SEARCHFIELD_TEXT))
                {
                    if ($this->DebugLevel > 0) {  print("SE:    Searching text field \"$FieldName\" for string \"$SearchString\"<br>\n");  }

                    # set flag indicating text field present
                    $TextFieldPresent = TRUE;

                    # normalize text and split into words
                    $Words[$FieldName] = 
                            $this->ParseSearchStringForWords($SearchString);

                    # calculate scores for matching resources
                    $Scores = $this->SearchForWords(
                            $Words[$FieldName], $FieldName, $Scores);
                    if ($this->DebugLevel > 3) {  print("SE:  Have ".count($Scores)." results after word search<br>\n");  }

                    # split into phrases
                    $Phrases[$FieldName] = 
                            $this->ParseSearchStringForPhrases($SearchString);

                    # handle any phrases
                    $Scores = $this->SearchForPhrases(
                            $Phrases[$FieldName], $Scores, $FieldName, TRUE, FALSE);
                    if ($this->DebugLevel > 3) {  print("SE:  Have ".count($Scores)." results after phrase search<br>\n");  }
                }
                else
                {
                    if ($this->DebugLevel > 0) {  print("SE:    Searching non-text field \"$FieldName\" for string \"$SearchString\"<br>\n");  }

                    # set flag indicating non-text field present
                    $NonTextFieldPresent = TRUE;
                }
            }
        }

        # if search results found 
        #         or no inclusive terms and exclusions or non-text field specified
        if (count($Scores) || (($this->InclusiveTermCount == 0)
                && ($this->ExcludedTermCount || $NonTextFieldPresent)))
        {
            # if non-text fields specified
            if ($NonTextFieldPresent)
            {
                # load list of records matching non-text fields
                $NonTextScores = $this->SearchForNonTextMatches($SearchStrings);

                # if no inclusive text terms specified
                if ($this->InclusiveTermCount == 0)
                {
                    # copy non-text field match list to result list
                    $Scores =& $NonTextScores;
                }
                else
                {
                    # filter out results that are not on non-text field match list
                    foreach ($Scores as $ResourceId => $Score)
                    {
                        if (isset($NonTextScores[$ResourceId]))
                        {
                            $Scores[$ResourceId] += $NonTextScores[$ResourceId];
                        }
                        else
                        {
                            unset($Scores[$ResourceId]);
                        }
                    }
                }
            }
            # else if no results found and exclusions specified
            elseif (!count($Scores) && $this->ExcludedTermCount)
            {
                # load all records
                $Scores = $this->LoadScoresForAllRecords();
            }

            # for each search text string
            foreach ($SearchStrings as $FieldName => $SearchStringArray)
            {
                # convert search string to array if needed
                if (!is_array($SearchStringArray))
                {
                    $SearchStringArray = array($SearchStringArray);
                }

                # for each search string for this field
                foreach ($SearchStringArray as $SearchString)
                {
                    # if field is text
                    if ($this->FieldInfo[$FieldName]["FieldType"] == SEARCHFIELD_TEXT) 
                    {
                        # if there are words in search text
                        if (count($Words[$FieldName]) > 0)
                        {
                            # handle any excluded words
                            $Scores = $this->FilterOnExcludedWords($Words[$FieldName], $Scores);
                        }

                        # handle any excluded phrases
                        $Scores = $this->SearchForPhrases(
                                $Phrases[$FieldName], $Scores, $FieldName, FALSE, TRUE);
                    }
                }
            }

            # strip off any results that don't contain required words
            $Scores = $this->FilterOnRequiredWords($Scores);

            # perform any requested filtering
            if ($this->DebugLevel > 1) {  print("SE:    Have ".count($Scores)." results before filter callbacks<br>\n");  }
            $Scores = $this->FilterOnSuppliedFunctions($Scores);

            # save total number of results available
            $this->NumberOfResultsAvailable = count($Scores);

            # sort result list by score
            if (isset($Scores)) {  arsort($Scores, SORT_NUMERIC);  }

            # trim result list to match range requested by caller
            $ScoresKeys = array_slice(
                    array_keys($Scores), $StartingResult, $NumberOfResults);
            foreach ($ScoresKeys as $Key) {  $TrimmedScores[$Key] = $Scores[$Key];  }
        }

        # record search time
        $this->LastSearchTime = getmicrotime() - $StartTime;

        # return list of resources to caller
        if ($this->DebugLevel > 0) {  print("SE:  Ended up with ".$this->NumberOfResultsAvailable." results<br>\n");  }
        if ($this->DebugLevel > 0) {  print("</td></tr></table>\n");  }
        return $TrimmedScores;
    }

    function AddResultFilterFunction($FunctionName)
    {
        # save filter function name
        $this->FilterFuncs[] = $FunctionName;
    }

    function SearchTermsRequiredByDefault($NewSetting = TRUE)
    {
        $this->AllTermsRequiredByDefault = $NewSetting;
    }

    function NumberOfResults()
    {
        return $this->NumberOfResultsAvailable;
    }

    function SearchTime()
    {
        return $this->LastSearchTime;
    }


    # ---- database update functions

    function UpdateForResource($ResourceId)
    {
        # bail out if resource ID is negative (indicating a temporary record)
        if ($ResourceId < 0) {  return;  }

        # clear word count added flags for this resource
        unset($this->WordCountAdded);

        # delete any existing info for this resource
        $this->DB->Query("DELETE FROM SearchWordCounts WHERE ResourceId = ${ResourceId}");

        # for each metadata field
        foreach ($this->FieldInfo as $FieldName => $Info)
        {
            # if search weight for field is positive
            if ($Info["Weight"] > 0)
            {
                # retrieve text for field
                $Text = $this->GetResourceFieldText($ResourceId, $FieldName);

                # if text is array
                if (is_array($Text))
                {
                    # for each text string in array
                    foreach ($Text as $String)
                    {
                        # record search info for text
                        $this->RecordSearchInfoForText($ResourceId, $FieldName,
                                                       $Info["Weight"], $String,
                                                       $Info["InKeywordSearch"]);
                    }
                }
                else
                {
                    # record search info for text
                    $this->RecordSearchInfoForText($ResourceId, $FieldName,
                                                   $Info["Weight"], $Text,
                                                   $Info["InKeywordSearch"]);
                }
            }
        }
    }

    function UpdateForResources($StartingResourceId, $NumberOfResources)
    {
        $DB =& new SPTDatabase;

        # for specified number of resources starting at specified ID
        $DB->Query("SELECT ResourceId FROM Resources"
                ." WHERE ResourceId>=${StartingResourceId}"
                ." ORDER BY ResourceId LIMIT ${NumberOfResources}");
        while ($Resource = $DB->FetchRow())
        {
            # update search info for resource
            $ResourceId = $Resource["ResourceId"];
            $this->UpdateForResource($ResourceId);
        }

        # return ID of last resource updated to caller
        return $ResourceId;
    }

    function DropResource($ResourceId)
    {
        # drop all entries pertaining to resource from word count table
        $this->DB->Query("DELETE FROM SearchWordCounts WHERE ResourceId = ".$ResourceId);
    }


    # ---- PRIVATE INTERFACE -------------------------------------------------

    var $DB;
    var $DebugLevel;
    var $WordIdCache;
    var $WordCountAdded;
    var $NumberOfResultsAvailable;
    var $LastSearchTime;
    var $FilterFuncs;
    var $FieldIds;
    var $AllTermsRequiredByDefault;
    var $FieldInfo;
    var $RequiredTermCount;
    var $RequiredTermCounts;
    var $InclusiveTermCount;
    var $ExcludedTermCount;
    var $TextRetrievalFunc;
    var $ResourcePhraseMatchFunc;


    # ---- common private functions (used in both searching and DB build)

    function ParseSearchStringForWords($SearchString, $IgnorePhrases = FALSE)
    {
        # strip off any surrounding whitespace
        $Text = trim($SearchString);
        
        # set up normalization replacement strings
        $Patterns = array(
                "/'s[^a-z0-9\-+]+/i", # get rid of possessive plurals
                "/'/",               # get rid of single quotes / apostrophes
                "/\"[^\"]*\"/",      # " get rid of phrases  (NOTE HARD-CODED INDEX BELOW!!!)
                "/[^a-z0-9\-+]+/i",  # convert non-alphanumerics / non-minus/plus to a space
                "/([^\\s])-/i",      # convert minus preceded by anything but whitespace to a space
                "/([^\\s])\\+/i",    # convert plus preceded by anything but whitespace to a space
                "/-\\s/i",           # convert minus followed by whitespace to a space
                "/\\+\\s/i",         # convert plus followed by whitespace to a space
                "/[ ]+/"             # convert multiple spaces to one space
                );
        $Replacements = array(
                " ",
                "",
                " ",
                " ",
                "\\1 ",
                "\\1 ",
                " ",
                " ",
                " "
                );
        
        # if we are supposed to ignore phrases (series of words in quotes)
        if ($IgnorePhrases)
        {
            # switch phrase removal to double quote removal
            $Patterns[2] = "/\"/";
        }

        # remove punctuation from text and normalize whitespace
        $Text = preg_replace($Patterns, $Replacements, $Text);

        # convert text to lower case
        $Text = strtolower($Text);

        # strip off any extraneous whitespace
        $Text = trim($Text);

        # if we have no words left after parsing
        if (strlen($Text) == 0)
        {
            # return an empty array
            $Words = array();
        }
        else
        {
            # for each word
            foreach (explode(" ", $Text) as $Word)
            {
                # strip off option characters and set flags appropriately
                if (substr($Word, 0, 1) == "-")
                {
                    $Word = substr($Word, 1);
                    $Words[$Word] |= WORD_EXCLUDED;
                    $this->ExcludedTermCount++;
                }
                else
                {
                    $this->InclusiveTermCount++;
                    if ($this->AllTermsRequiredByDefault 
                            || (substr($Word, 0, 1) == "+"))
                    {
                        $Words[$Word] |= WORD_REQUIRED;
                        $this->RequiredTermCount++;
                        if (substr($Word, 0, 1) == "+")
                        {
                            $Word = substr($Word, 1);
                        }
                    }
                }

                # set flag to indicate word found
                $Words[$Word] |= WORD_PRESENT;
            }
        }

        # return normalized words to caller
        return $Words;
    }

    function GetFieldId($FieldName)
    {
        # if field ID is not in cache
        if (!isset($this->FieldIds[$FieldName]))
        {
            # look up field info in database
            $this->DB->Query("SELECT FieldId FROM SearchFields "
                    ."WHERE FieldName = '".addslashes($FieldName)."'");
            
            # if field was found
            if ($Record = $this->DB->FetchRow())
            {
                # load info from DB record
                $FieldId = $Record["FieldId"];
            }
            else
            {
                # add field to database
                $this->DB->Query("INSERT INTO SearchFields (FieldName) "
                        ."VALUES ('".addslashes($FieldName)."')");

                  # retrieve ID for newly added field
                $FieldId = $this->DB->LastInsertId("SearchFields");
            }

            # cache field info
            $this->FieldIds[$FieldName] = $FieldId;
        }

        # return cached ID to caller
        return $this->FieldIds[$FieldName];
    }


    # ---- private functions used in searching

    function SearchForWords(
            $Words, $FieldName = "Keyword", $Scores = NULL)
    {
        $DB =& $this->DB;

        # grab field ID
        $FieldId = $this->GetFieldId($FieldName);

        # for each word
        foreach ($Words as $Word => $Flags)
        {
            if ($this->DebugLevel > 2) {  print("SE: Searching for word '${Word}' in field ${FieldName}<br>\n");  }

            # if word is not excluded
            if (!($Flags & WORD_EXCLUDED))
            {
                # look up record ID for word
                if ($this->DebugLevel > 2) {  print("SE:  Looking up word \"${Word}\"<br>\n");  }
                $WordId = $DB->Query("SELECT WordId FROM SearchWords WHERE WordText='"
                        .$Word."'", "WordId");

                # if word is in DB
                if ($WordId != FALSE)
                {
                    # look up counts for word
                    $DB->Query("SELECT ResourceId,Count FROM SearchWordCounts "
                            ."WHERE WordId=${WordId} AND FieldId=${FieldId}");

                    # for each count
                    while ($Record = $DB->FetchRow())
                    {
                        # if all words required or word flagged as required
                        if ($this->AllTermsRequiredByDefault || ($Flags & WORD_REQUIRED))
                        {
                            # increment required word count for record
                            $this->RequiredTermCounts[$Record["ResourceId"]]++;
                        }

                        # add to resource record score
                        $Scores[$Record["ResourceId"]] += $Record["Count"];
                    }
                }
            }
        }

        # return basic scores to caller
        return $Scores;
    }

    function ParseSearchStringForPhrases($SearchString)
    {
        # split into chunks delimited by double quote marks
        $Pieces = explode("\"", $SearchString);   # "

        # for each pair of chunks
        $Index = 2;
        while ($Index < count($Pieces))
        {
            # grab phrase from chunk
            $Phrase = trim(addslashes($Pieces[$Index - 1]));
            $Phrases[$Phrase] = WORD_PRESENT;

            # set flags to reflect any option characters
            if (substr($Pieces[$Index - 2], -1) == "-")
            {
                $Phrases[$Phrase] |= WORD_EXCLUDED;
                $this->ExcludedTermCount++;
            }
            else
            {
                $this->InclusiveTermCount++;
                if ($this->AllTermsRequiredByDefault 
                        || (substr($Pieces[$Index - 2], -1) == "+"))
                {
                    $Phrases[$Phrase] |= WORD_REQUIRED;
                    $this->RequiredTermCount++;
                }
            }

            # move to next pair of chunks
            $Index += 2;
        }

        # return phrases to caller
        return $Phrases;
    }

    function GetResourcePhraseMatchList($FieldName, $Phrase)
    {
        # if field resource/phrase match function specified and available
        if (($this->ResourcePhraseMatchFunc != NULL) 
            && (function_exists($this->ResourcePhraseMatchFunc)))
        {
            # call resource/phrase match function
            $MatchingResources = ${$this->ResourcePhraseMatchFunc}($FieldName, $Phrase);

            # return matches (if any) to caller
            return $MatchingResources;
        }
        else
        {
            # error out
            exit("<br>SE - ERROR:  no resource/phrase match function found<br>\n");
        }
    }

    function SearchForPhrases($Phrases, $Scores, $FieldName = "Keyword", 
            $ProcessNonExcluded = TRUE, $ProcessExcluded = TRUE)
    {
        # if phrases are found
        if (count($Phrases) > 0)
        {
            # if this is a keyword search
            if ($FieldName == "Keyword")
            {
                # for each field
                foreach ($this->FieldInfo as $KFieldName => $Info)
                {
                    # if field is marked to be included in keyword searches
                    if ($Info["InKeywordSearch"])
                    {
                        # call ourself with that field
                        $Scores = $this->SearchForPhrases($Phrases, $Scores, $KFieldName, 
                                                          $ProcessNonExcluded, $ProcessExcluded);
                    }
                }
            }
            else
            {
                # for each phrase
                foreach ($Phrases as $Phrase => $Flags)
                {
                    if ($this->DebugLevel > 2) {  print("SE: searching for phrase '${Phrase}' in field ${FieldName}<br>\n");  }
    
                    # if phrase flagged as excluded and we are doing excluded phrases
                    #         or phrase flagged as non-excluded and we are doing non-excluded phrases
                    if (($ProcessExcluded && ($Flags & WORD_EXCLUDED))
                            || ($ProcessNonExcluded && !($Flags & WORD_EXCLUDED)))
                    {
                        # retrieve list of resources that contain phrase
                        $ResourceIds = $this->GetResourcePhraseMatchList(
                                $FieldName, $Phrase);
    
                        # for each resource that contains phrase
                        foreach ($ResourceIds as $ResourceId)
                        {
                            # if we are doing excluded phrases and phrase flagged as excluded
                            if ($ProcessExcluded && ($Flags & WORD_EXCLUDED))
                            {
                                # knock resource off of list
                                unset($Scores[$ResourceId]);
                            }
                            elseif ($ProcessNonExcluded)
                            {
                                # bump up resource record score
                                $Scores[$ResourceId] += 25;
    
                                # if all words required or phrase flagged as required
                                if ($this->AllTermsRequiredByDefault || ($Flags & WORD_REQUIRED))
                                {
                                    # increment required word count for record
                                    $this->RequiredTermCounts[$ResourceId]++;
                                }
                            }
                        }
                    }
                }
            }
        }

        # return updated scores to caller
        return $Scores;
    }

    function FilterOnExcludedWords($Words, $Scores, $FieldName = "Keyword")
    {
        $DB =& $this->DB;

        # grab field ID
        $FieldId = $this->GetFieldId($FieldName);

        # for each word
        foreach ($Words as $Word => $Flags)
        {
            # if word flagged as excluded
            if ($Flags & WORD_EXCLUDED)
            {
                # look up record ID for word
                $WordId = $DB->Query("SELECT WordId FROM SearchWords WHERE WordText='"
                        .$Word."'", "WordId");

                # if word is in DB
                if ($WordId != FALSE)
                {
                    # look up counts for word
                    $DB->Query("SELECT ResourceId FROM SearchWordCounts "
                            ."WHERE WordId=${WordId} AND FieldId=${FieldId}");

                    # for each count
                    while ($Record = $DB->FetchRow())
                    {
                        # if resource record is in score list
                        $ResourceId = $Record["ResourceId"];
                        if (isset($Scores[$ResourceId]))
                        {
                            # remove resource record from score list
                            if ($this->DebugLevel > 3) {  print("SE: filtering out resource $ResourceId because it contained word \"".$Word."\"<br>\n");  }
                            unset($Scores[$ResourceId]);
                        }
                    }
                }
            }
        }

        # returned filtered score list to caller
        return $Scores;
    }

    function FilterOnRequiredWords($Scores)
    {
        # if there were required words
        if ($this->RequiredTermCount > 0)
        {
            # for each resource
            foreach ($Scores as $ResourceId => $Score)
            {
                # if resource does not meet required word count
                if ($this->RequiredTermCounts[$ResourceId] < $this->RequiredTermCount)
                {
                    # filter out resource
                    if ($this->DebugLevel > 3) {  print("SE: filtering out resource $ResourceId because it didn't have required word count of ".$this->RequiredTermCount." (only had ".$this->RequiredTermCounts[$ResourceId].")<br>\n");  }
                    unset($Scores[$ResourceId]);
                }
            }
        }

        # return filtered list to caller
        return $Scores;
    }

    function FilterOnSuppliedFunctions($Scores)
    {
        # if filter functions have been set
        if (count($this->FilterFuncs) > 0)
        {
            # for each result
            foreach ($Scores as $ResourceId => $Score)
            {
                # for each filter function
                foreach ($this->FilterFuncs as $FuncName)
                {
                    # if filter function return TRUE for result resource
                    if ($FuncName($ResourceId))
                    {
                        if ($this->DebugLevel > 2) {  print("SE:      filter callback rejected resource ${ResourceId}<br>\n");  }
                        # discard result
                        unset($Scores[$ResourceId]);

                        # bail out of filter func loop
                        continue 2;
                    }
                }
            }
        }

        # return filtered list to caller
        return $Scores;
    }

    function SearchForNonTextMatches($SearchStrings)
    {
        $DB =& $this->DB;

        # start with no results
        $Results = array();

        # for each field
        foreach ($SearchStrings as $FieldName => $SearchStringArray)
        {
            # convert search string to array if needed
            if (!is_array($SearchStringArray))
            {
                $SearchStringArray = array($SearchStringArray);
            }

            # for each search string for this field
            foreach ($SearchStringArray as $SearchString)
            {
                # if field is non-text
                if ($this->FieldInfo[$FieldName]["FieldType"] != SEARCHFIELD_TEXT)
                {
                    # determine operator
                    $Term = trim($SearchString);
                    $FirstChar = substr($Term, 0, 1);
                    $FirstTwoChars = substr($Term, 0, 2);
                    if ($FirstTwoChars == ">=") {  $Operator = ">=";  }
                    elseif ($FirstTwoChars == "<=") {  $Operator = "<=";  }
                    elseif ($FirstTwoChars == "!=") {  $Operator = "!=";  }
                    elseif ($FirstTwoChars == "is") {  $Operator = "is"; }
                    elseif ($FirstChar == ">") {  $Operator = ">";  }
                    elseif ($FirstChar == "<") {  $Operator = "<";  }
                    else {  $Operator = "=";  }

                    # determine value
                    $Value = trim(strtr($Term, "is=><!", "    "));

                    # if we have a reasonable operator and value
                    if (($Operator != "") && ($Value != ""))
                    {
                        # cast to float if value is numeric and not NULL
                        if(is_numeric($Value))
                            $Value = (float)$Value;
                        # build query based on field type
                        switch ($this->FieldInfo[$FieldName]["FieldType"])
                        {
                            case SEARCHFIELD_NUMERIC:
                                # add comparison to simple query condition
                                $SimpleQueryCondition .= 
                                        " AND ".
                                   $this->FieldInfo[$FieldName]["DBFieldName"].
                                        " $Operator ".$Value;
                                if ($this->DebugLevel > 2) {  print("SE:  Performing non-text search for numeric field (<i>$FieldName $Operator $Value</i>)<br>\n");  }
                                break;

                            case SEARCHFIELD_DATE:
                                # build date query and add to simple query condition
                                $Date = new SPTDate($Value);
                                $SimpleQueryCondition .= " AND ".$Date->SqlCondition(
                                        $this->FieldInfo[$FieldName]["DBFieldName"], NULL, $Operator);
                                if ($this->DebugLevel > 2) {  print("SE:  Performing non-text search for date field (<i>$FieldName $Operator $Value</i>)<br>\n");  }
                                break;
                                
                            case SEARCHFIELD_DATERANGE:
                                # build date range query and add to simple query condition
                                $Date = new SPTDate($Value);
                                $SimpleQueryCondition .= " AND ".$Date->SqlCondition(
                                        $this->FieldInfo[$FieldName]["DBFieldName"]."Begin", NULL, $Operator);
                                if ($this->DebugLevel > 2) {  print("SE:  Performing non-text search for date range field (<i>$FieldName $Operator $Value</i>)<br>\n");  }
                                break;
                                
                            default:
                                if ($this->DebugLevel > 0) {  print("SE:  Attempted to perform non-text search on field with UNKNOWN FIELD TYPE \"".$this->FieldInfo[$FieldName]["FieldType"]."\" (Search: <i>$FieldName $Operator $Value</i>)<br>\n");  }
                                break;
                        }
                    }
                }
            }
        }

        # if simple query condition found
        if (strlen($SimpleQueryCondition))
        {
            # clean up simple query condition
            $SimpleQueryCondition = substr($SimpleQueryCondition, 5);

            # perform query
            if ($this->DebugLevel > 3) {  print("SE:  Simple query condition for non-text search is <i>$SimpleQueryCondition</i><br>\n");  }
            $DB->Query("SELECT ResourceId FROM Resources "
                    ."WHERE ${SimpleQueryCondition}");

            # add result of query to results
            while ($Record = $DB->FetchRow()) 
                    {  $Results[$Record["ResourceId"]] = 1;  }
            $ResultCountNeeded = 1;
        }

        # if controlled name queries were found
        if (count($ControlledNameQueries))
        {
            # for each controlled name query
            foreach ($ControlledNameQueries as $FieldName => $QueryList)
            {
                # build query string
                $Query = "SELECT Resources.ResourceId "
                        ."FROM Resources, ResourceNameInts "
                        ."WHERE Resources.ResourceId = ResourceNameInts.ResourceId";
                $Conjunction = "AND (";
                foreach ($QueryList as $Value)
                {
                    $Query .= " ${Conjunction} ControlledNameId = ${Value}";
                    $Conjunction = "OR";
                }
                $Query .= " )";
                
                # perform query
                $DB->Query($Query);

                # update result counts with query result
                while ($Record = $DB->FetchRow()) {  $Results[$Record["ResourceId"]]++;  }

                # increment count of results needed to be valid
                $ResultCountNeeded++;
            }

            # pare down results based on result count
            foreach ($Results as $ResourceId => $Count)
            {
                if ($Count < $ResultCountNeeded)
                {
                    unset($Results[$ResourceId]);
                }
            }
        }

        # return results to caller
        return $Results;
    }
    
    function SetDebugLevel($SearchStrings)
    {
        # if search info is an array
        if (is_array($SearchStrings))
        {
            # for each array element
            foreach ($SearchStrings as $FieldName => $SearchStringArray)
            {
                # if element is an array
                if (is_array($SearchStringArray))
                {
                    # for each array element
                    foreach ($SearchStringArray as $Index => $SearchString)
                    {
                        # pull out search string if present
                        $SearchStrings[$FieldName][$Index] = $this->ExtractDebugLevel($SearchString);
                    }
                }
                else
                {
                    # pull out search string if present
                    $SearchStrings[$FieldName] = $this->ExtractDebugLevel($SearchStringArray);
                }
            }
        }
        else
        {
            # pull out search string if present
            $SearchStrings = $this->ExtractDebugLevel($SearchStrings);
        }

        # return new search info to caller
        return $SearchStrings;
    }
    
    function ExtractDebugLevel($SearchString)
    {
        # if search string contains debug level indicator
        if (strstr($SearchString, "DBUGLVL="))
        {
            # remove indicator and set debug level
            $Level = preg_replace("/^\\s*DBUGLVL=([1-9]).*/", "\\1", $SearchString);
            if ($Level > 0)
            {
                print("SE: setting debug level to $Level<br>\n");
                $this->DebugLevel = $Level;
                $SearchString = preg_replace("/DBUGLVL=${Level}/", "", $SearchString);
            }
        }
        
        # return (possibly) modified search string to caller
        return $SearchString;
    }

    function LoadScoresForAllRecords()
    {
        $this->DB->Query("SELECT ResourceId FROM Resources");
        while ($Record = $this->DB->FetchRow())
        {
            $Scores[$Record["ResourceId"]] = 1;
        }
        return $Scores;
    }


    # ---- private functions used in building search database

    function UpdateWordCount($WordId, $ResourceId, $FieldId, $Weight)
    {
        $DB =& $this->DB;

        # if word count already added to database
        if ($this->WordCountAdded[$WordId][$FieldId])
        {
            # update word count
            $DB->Query("UPDATE SearchWordCounts SET Count=Count+${Weight} "
                    ."WHERE WordId=${WordId} "
                            ."AND ResourceId=${ResourceId} "
                            ."AND FieldId=${FieldId}");
        }
        else
        {
            # add word count to DB
            $DB->Query("INSERT INTO SearchWordCounts"
                    ." (WordId, ResourceId, FieldId, Count) VALUES"
                    ." (${WordId}, ${ResourceId}, ${FieldId}, ${Weight})");

            # remember that we added count for this word
            $this->WordCountAdded[$WordId][$FieldId] = TRUE;
        }
    }

    function GetResourceFieldText($ResourceId, $FieldName)
    {
        # if field text retrieval function specified and available
        if (($this->TextRetrievalFunc != NULL) 
            && (function_exists($this->TextRetrievalFunc)))
        {
            # call text retrieval function
            $Text = ${$this->TextRetrievalFunc}($ResourceId, $FieldName);

            # return text (if any) to caller
            return $Text;
        }
        else
        {
            # error out
            exit("<br>SE - ERROR:  no text retrieval function found<br>\n");
        }
    }

    function RecordSearchInfoForText($ResourceId, $FieldName, $Weight, $Text, $IncludeInKeyword)
    {
        $DB =& $this->DB;

        # normalize text
        $Words = $this->ParseSearchStringForWords($Text, TRUE);

        # if there was text left after parsing
        if (count($Words) > 0)
        {
            # get ID for field
            $FieldId = $this->GetFieldId($FieldName);

            # if text should be included in keyword searches
            if ($IncludeInKeyword)
            {
                # get ID for keyword field
                $KeywordFieldId = $this->GetFieldId("Keyword");
            }

            # for each word
            foreach ($Words as $Word => $Flags)
            {
                # look up ID for word
                if (isset($this->WordIdCache[$Word]))
                {
                    $WordId = $this->WordIdCache[$Word];
                }
                else
                {
                    $WordId = $DB->Query("SELECT WordId FROM SearchWords "
                            ."WHERE WordText='".$Word."'", "WordId");
                    $this->WordIdCache[$Word] = $WordId;
                }
            
                # if word is not in DB
                if ($WordId == FALSE)
                {
                    # add word to DB
                    $DB->Query("INSERT INTO SearchWords (WordText) VALUES ('".$Word."')");
            
                    # get record ID for newly added word
                    $WordId = $DB->LastInsertId("SearchWords");
                    $this->WordIdCache[$Word] = $WordId;
                }

                # update count for word
                $this->UpdateWordCount($WordId, $ResourceId, $FieldId, 1);

                # if text should be included in keyword searches
                if ($IncludeInKeyword)
                {
                    # update keyword field count for word
                    $this->UpdateWordCount(
                            $WordId, $ResourceId, $KeywordFieldId, $Weight);
                }
            }
        }
    }
}

# define flags used for indicating field types
define("SEARCHFIELD_TEXT", 1);
define("SEARCHFIELD_NUMERIC", 2);
define("SEARCHFIELD_DATE", 3);
define("SEARCHFIELD_DATERANGE", 4);


# (includes required only for SPT search engine object)
require_once("include/SPT--SPTDatabase.php");
require_once("include/SPT--Resource.php");
require_once("include/SPT--MetadataSchema.php");

class SPTSearchEngine extends SearchEngine {
    
    function SPTSearchEngine()
    {
        # set up configuration values
        $ItemTableName = "Resources";
        $ItemIdFieldName = "ResourceId";
        
        # build field info from SPT metadata schema
        $this->Schema =& new MetadataSchema();
        $Fields = $this->Schema->GetFields();
        foreach ($Fields as $Field)
        {
            $FieldName = $Field->Name();
            $FieldInfo[$FieldName]["Weight"] = $Field->SearchWeight();
            $FieldInfo[$FieldName]["InKeywordSearch"] = $Field->IncludeInKeywordSearch();
            $FieldInfo[$FieldName]["DBFieldName"] = $Field->DBFieldName();
            switch ($Field->Type())
            {
                case MDFTYPE_TEXT:
				case MDFTYPE_PARAGRAPH:
				case MDFTYPE_USER:
                    $FieldInfo[$FieldName]["FieldType"] = SEARCHFIELD_TEXT;
                    break;

				case MDFTYPE_NUMBER:
                case MDFTYPE_FLAG:
                    $FieldInfo[$FieldName]["FieldType"] = SEARCHFIELD_NUMERIC;
                    break;

                case MDFTYPE_TREE:
                    $FieldInfo[$FieldName]["FieldType"]             = SEARCHFIELD_TEXT;
                    $FieldInfo[$FieldName]["IntersectionTableName"] = "ResourceClassInts";
                    $FieldInfo[$FieldName]["AttributeTableName"]    = "Classifications";
                    $FieldInfo[$FieldName]["AttributeIdFieldName"]  = "ClassificationId";
                    $FieldInfo[$FieldName]["AttributeFieldName"]    = "SegmentName";
                    break;

                case MDFTYPE_CONTROLLEDNAME:
                case MDFTYPE_OPTION:
                    $FieldInfo[$FieldName]["FieldType"] = SEARCHFIELD_TEXT;
                    break;

				case MDFTYPE_DATE:
                    $FieldInfo[$FieldName]["FieldType"] = SEARCHFIELD_DATERANGE;
                    break;

				case MDFTYPE_TIMESTAMP:
                    $FieldInfo[$FieldName]["FieldType"] = SEARCHFIELD_DATE;
                    break;

                case MDFTYPE_IMAGE:
                    $FieldInfo[$FieldName]["FieldType"] = SEARCHFIELD_TEXT;
                    break;
            }
        }

        # create a database handle
        $DB =& new SPTDatabase();
        
        # pass database handle and config values to real search engine object
        $this->SearchEngine($DB, $FieldInfo);
    }

    # overloaded version of method to retrieve text from DB
    function GetResourceFieldText($ResourceId, $FieldName)
    {
        # get resource object
        $Resource =& new Resource($ResourceId);

        # retrieve text from resource object and return to caller
        return $Resource->Get($FieldName);
    }

    # overloaded version of method to retrieve resource/phrase match list
    function GetResourcePhraseMatchList($FieldName, $Phrase)
    {
        # normalize and escape search phrase for use in SQL query
        $SearchPhrase = strtolower(addslashes($Phrase));

        # query DB for matching list based on field type
        $Field = $this->Schema->GetFieldByName($FieldName);
        switch ($Field->Type())
        {
            case MDFTYPE_TEXT:
            case MDFTYPE_PARAGRAPH:
                $this->DB->Query("SELECT ResourceId FROM Resources "
                        ."WHERE POSITION('".$SearchPhrase."' IN LOWER(".$Field->DBFieldName().")) ");
                break;

            case MDFTYPE_IMAGE:
                $this->DB->Query("SELECT ResourceId FROM Resources "
                        ."WHERE POSITION('".$SearchPhrase."' IN LOWER(".$Field->DBFieldName()."AltText)) ");
                break;

            case MDFTYPE_CONTROLLEDNAME:
            case MDFTYPE_OPTION:
                $this->DB->Query("SELECT DISTINCT ResourceNameInts.ResourceId "
                        ."FROM ResourceNameInts, ControlledNameTypes, ControlledNames "
                        ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ControlledName)) "
                        ."AND ControlledNames.ControlledNameId = ResourceNameInts.ControlledNameId "
                        ."AND ControlledNameTypes.ControlledNameTypeName = '".addslashes($FieldName)."' "
                        ."AND ControlledNameTypes.ControlledNameTypeId = ControlledNames.ControlledNameTypeId");
                break;

            case MDFTYPE_TREE:
                $this->DB->Query("SELECT DISTINCT ResourceClassInts.ResourceId "
                        ."FROM ResourceClassInts, ClassificationTypes, Classifications "
                        ."WHERE POSITION('".$SearchPhrase."' IN LOWER(ClassificationName)) "
                        ."AND Classifications.ClassificationId = ResourceClassInts.ClassificationId "
                        ."AND ClassificationTypes.ClassificationTypeName = '".addslashes($FieldName)."' "
                        ."AND ClassificationTypes.ClassificationTypeId = Classifications.ClassificationTypeId");
                break;
                
            case MDFTYPE_USER:
                $UserId = $this->DB->Query("SELECT UserId FROM APUsers "
                                           ."WHERE POSITION('".$SearchPhrase."' IN LOWER(UserName)) "
                                           ."OR POSITION('".$SearchPhrase."' IN LOWER(RealName))", "UserId");
                if ($UserId != NULL)
                {
                    $this->DB->Query("SELECT ResourceId FROM Resources "
                                     ."WHERE ".$Field->DBFieldName()." = ".$UserId);
                }
                break;

            case MDFTYPE_NUMBER:
                if ($SearchPhrase > 0)
                {
                    $this->DB->Query("SELECT ResourceId FROM Resources "
                                     ."WHERE ".$Field->DBFieldName()." = ".(int)$SearchPhrase);
                }
                break;
                
            case MDFTYPE_FLAG:
            case MDFTYPE_DATE:
            case MDFTYPE_TIMESTAMP:
                # (these types not yet handled by search engine for phrases)
                break;
        }

        # build match list based on results returned from DB
        $MatchList = array();
        while ($Record = $this->DB->FetchRow())
        {
            $MatchList[] = $Record["ResourceId"];
        }

        # return list of matching resources to caller
        return $MatchList;
    }

    var $Schema;
}


?>
