<?PHP

#
#   FILE:  SPT--Recommender.php
#
#   METHODS PROVIDED:
#       Recommender()
#           - constructor
#       SomeMethod($SomeParameter, $AnotherParameter)
#           - short description of method
#
#   AUTHOR:  Edward Almasy
#
#   Part of the Scout Portal Toolkit
#   Copyright 2002 Internet Scout Project
#   http://scout.cs.wisc.edu
#

require_once(dirname(__FILE__)."/SPT--SPTDatabase.php");


class Recommender {

    # ---- PUBLIC INTERFACE --------------------------------------------------

    # object constructor
    function Recommender(&$DB, $ItemTableName, $RatingTableName, 
            $ItemIdFieldName, $UserIdFieldName, $RatingFieldName,
            $ContentFields)
    {
        # set default parameters
        $this->ContentCorrelationThreshold = 1;

        # save database object
        $this->DB =& $DB;

        # save new configuration values
        $this->ItemTableName = $ItemTableName;
        $this->RatingTableName = $RatingTableName;
        $this->ItemIdFieldName = $ItemIdFieldName;
        $this->UserIdFieldName = $UserIdFieldName;
        $this->RatingFieldName = $RatingFieldName;
        $this->ContentFields = $ContentFields;

        # set default debug state
        $this->DebugLevel = 0;
    }

    function DebugLevel($Setting)
    {
        $this->DebugLevel = $Setting;
    }


    # ---- recommendation methods

    function Recommend($UserId, $StartingResult = 0, $NumberOfResults = 10)
    {
        if ($this->DebugLevel > 0) {  print("REC:  Recommend(${UserId}, ${StartingResult}, ${NumberOfResults})<br>\n");  }

        # load in user ratings
        $Ratings = array();
        $DB =& $this->DB;
        $DB->Query("SELECT ".$this->ItemIdFieldName.", ".$this->RatingFieldName
                ." FROM ".$this->RatingTableName
                ." WHERE ".$this->UserIdFieldName." = ${UserId}");
        while ($Row = $DB->FetchRow())
        {
            $Ratings[$Row[$this->ItemIdFieldName]] = 
                    $Row[$this->RatingFieldName];
        }
        if ($this->DebugLevel > 1) {  print("REC:  user has rated ".count($Ratings)." items<br>\n");  }

        # for each item that user has rated
        $RecVals = array();
        foreach ($Ratings as $ItemId => $ItemRating)
        {
            # for each content correlation available for that item
            $DB->Query("SELECT Correlation, ItemIdB "
                    ."FROM RecContentCorrelations "
                    ."WHERE ItemIdA = ${ItemId}");
            while ($Row = $DB->FetchRow())
            {
                # multiply that correlation by normalized rating and add
                #       resulting value to recommendation value for that item
                if (isset($RecVals[$Row["ItemIdB"]]))
                {
                    $RecVals[$Row["ItemIdB"]] +=
                            $Row["Correlation"] * ($ItemRating - 50);
                }
                else
                {
                    $RecVals[$Row["ItemIdB"]] =
                            $Row["Correlation"] * ($ItemRating - 50);
                }
            }
        }
        if ($this->DebugLevel > 1) {  print("REC:  found ".count($RecVals)." total recommendations<br>\n");  }

        # calculate average correlation between items
        $ResultThreshold = $DB->Query("SELECT AVG(Correlation) "
                ."AS Average FROM RecContentCorrelations", "Average");
        $ResultThreshold = round($ResultThreshold) * 2;

        # for each recommended item
        foreach ($RecVals as $ItemId => $RecVal)
        {
            # remove item from list if user already rated it
            if (isset($Ratings[$ItemId]))
            {
                unset($RecVals[$ItemId]);  
            }
            else
            {
                # scale recommendation value back to match thresholds
                $RecVals[$ItemId] = round($RecVal / 50);

                # remove item from recommendation list if value is below threshold
                if ($RecVals[$ItemId] < $ResultThreshold)
                {  
                    unset($RecVals[$ItemId]);  
                }
            }
        }
        if ($this->DebugLevel > 1) {  print("REC:  found ".count($RecVals)." positive recommendations<br>\n");  }

        # sort recommendation list by value
        if (isset($RecVals)) {  arsort($RecVals, SORT_NUMERIC);  }

        # save total number of results available
        $this->NumberOfResultsAvailable = count($RecVals);

        # trim result list to match range requested by caller
        $RecValKeys = array_slice(
                array_keys($RecVals), $StartingResult, $NumberOfResults);
        $RecValSegment = array();
        foreach ($RecValKeys as $Key) 
        {  
            $RecValSegment[$Key] = $RecVals[$Key];  
        }

        # return recommendation list to caller
        return $RecValSegment;
    }

    function AddResultFilterFunction($FunctionName)
    {
        # save filter function name
        $this->FilterFuncs[] = $FunctionName;
    }

    function NumberOfResults()
    {
        return $this->NumberOfResultsAvailable;
    }

    function SearchTime()
    {
        return $this->LastSearchTime;
    }

    function GetSourceList($UserId, $RecommendedItemId)
    {
        # pull list of correlations from DB
        $this->DB->Query("SELECT * FROM RecContentCorrelations, ".$this->RatingTableName
                ." WHERE (ItemIdA = ${RecommendedItemId}"
                        ." OR ItemIdB = ${RecommendedItemId})"
                        ." AND ".$this->UserIdFieldName." = ".$UserId
                        ." AND (RecContentCorrelations.ItemIdA = ".$this->RatingTableName.".".$this->ItemIdFieldName
                        ." OR RecContentCorrelations.ItemIdB = ".$this->RatingTableName.".".$this->ItemIdFieldName.")"
                        ." AND Rating >= 50 "
                ." ORDER BY Correlation DESC");

        # for each correlation
        $SourceList = array();
        while ($Row = $this->DB->FetchRow())
        {
            # pick out appropriate item ID
            if ($Row["ItemIdA"] == $RecommendedItemId)
            {
                $ItemId = $Row["ItemIdB"];
            }
            else
            {
                $ItemId = $Row["ItemIdA"];
            }

            # add item to recommendation source list
            $SourceList[$ItemId] = $Row["Correlation"];
        }

        # return recommendation source list to caller
        return $SourceList;
    }


    # ---- database update methods

    function UpdateForItems($StartingItemId, $NumberOfItems)
    {
        if ($this->DebugLevel > 0) {  print("REC:  UpdateForItems(${StartingItemId}, ${NumberOfItems})<br>\n");  }
        # make sure we have item IDs available
        $this->LoadItemIds();

        # for every item
        $ItemsUpdated = 0;
        foreach ($this->ItemIds as $ItemId)
        {
            # if item ID is within requested range
            if ($ItemId >= $StartingItemId)
            {
                # update recommender info for item
                if ($this->DebugLevel > 1) {  print("REC:  doing item ${ItemId}<br>\n");  }
                $this->UpdateForItem($ItemId, TRUE);
                $ItemsUpdated++;

                # if we have done requested number of items
                if ($ItemsUpdated >= $NumberOfItems)
                {
                    # bail out
                    if ($this->DebugLevel > 1) {  print("REC:  bailing out with item ${ItemId}<br>\n");  }
                    return $ItemId;
                }
            }
        }

        # return ID of last resource updated to caller
        return $ItemId;
    }

    function UpdateForItem($ItemId, $FullPass = FALSE)
    {   
        if ($this->DebugLevel > 1) {  print("REC:  updating for item \"".$ItemId."\"<br>\n");  }
        $DB =& $this->DB;

        # make sure we have item IDs available
        $this->LoadItemIds();

        # lock access to tables to speed up processing
# (commenting out table locking until it can be extended to handle tables
#       specified in the ContentFields array)
#        $DB->Query("LOCK TABLES ".$this->ItemTableName." READ, "
#                .$this->RatingTableName." READ, "
#                ."RecContentCorrelations WRITE");

        # clear existing correlations for this item
        $DB->Query("DELETE FROM RecContentCorrelations "
                ."WHERE ItemIdA = ${ItemId}");

        # for every item
        foreach ($this->ItemIds as $Id)
        {
            # if full pass and item is later in list than current item
            if (($FullPass == FALSE) || ($Id > $ItemId))
            {
                # update correlation value for item and target item
                $this->UpdateContentCorrelation($ItemId, $Id);
            }
        }

        # release lock on tables
#        $DB->Query("UNLOCK TABLES");
    }

    function DropItem($ItemId)
    {
        # drop all correlation entries referring to item
        $this->DB->Query("DELETE FROM RecContentCorrelations "
                         ."WHERE ItemIdA = ".$ItemId." "
                            ."OR ItemIdB = ".$ItemId);
    }

    function PruneCorrelations()
    {
        # get average correlation
        $AverageCorrelation = $this->DB->Query("SELECT AVG(Correlation) "
                ."AS Average FROM RecContentCorrelations", "Average");

        # dump all below-average correlations
        $this->DB->Query("DELETE FROM RecContentCorrelations "
                ."WHERE Correlation <= ${AverageCorrelation}");
    }


    # ---- PRIVATE INTERFACE -------------------------------------------------

    var $ContentCorrelationThreshold;
    var $ContentFields;
    var $ItemTableName;
    var $RatingTableName;
    var $ItemIdFieldName;
    var $UserIdFieldName;
    var $RatingFieldName;
    var $ItemIds;
    var $DB;
    var $FilterFuncs;
    var $LastSearchTime;
    var $NumberOfResultsAvailable;
    var $DebugLevel;


    function LoadItemIds()
    {
        # if item IDs not already loaded
        if (!isset($this->ItemIds))
        {
            # load item IDs from DB
            $this->DB->Query("SELECT ".$this->ItemIdFieldName." AS Id FROM "
                    .$this->ItemTableName." ORDER BY ".$this->ItemIdFieldName);
            while ($Item = $this->DB->FetchRow())
            {
                $this->ItemIds[] = $Item["Id"];
            }
        }
    }

    function GetFieldData($ItemId, $RequestedFieldName)
    {
        static $ItemData;
        static $ControlledNames;
        static $CachedItemList;

        # if data not already loaded
        if (!isset($ItemData[$ItemId][$RequestedFieldName]))
        {
            # load item record from DB
            $DB =& $this->DB;
            $DB->Query("SELECT * FROM ".$this->ItemTableName
                    ." WHERE ".$this->ItemIdFieldName." = ".$ItemId);
            $Record = $DB->FetchRow();

            # for each content field
            foreach ($this->ContentFields as $FieldName => $FieldAttributes)
            {
                # parse and store content data based on content type
                switch ($FieldAttributes["FieldType"])
                {
                    case CONTENTFIELDTYPE_TEXT:
                        # normalize text and break into word array
                        $ItemData[$ItemId][$FieldName] = 
                                $this->NormalizeAndParseText($Record[$FieldName]);
                        break;

                    case CONTENTFIELDTYPE_CONTROLLEDNAME:
                        # get IDs for all controlled names
                        $DB->Query("SELECT "
                                .$FieldAttributes["AttributeIdFieldName"]." "
                                ."FROM ".$FieldAttributes["IntersectionTableName"]." "
                                ."WHERE ".$this->ItemIdFieldName." = ".$ItemId);
                        unset($ControlledNameIds);
                        while ($Row = $DB->FetchRow())
                        {
                            $ControlledNameIds[] = 
                                    $Row[$FieldAttributes["AttributeIdFieldName"]];
                        }

                        # if names found
                        $ConcatenatedNames = "";
                        if (isset($ControlledNameIds))
                        {
                            # for each ID
                            foreach ($ControlledNameIds as $ControlledNameId)
                            {
                                # if controlled name not already loaded
                                if (!isset($ControlledNames[$FieldName][$ControlledNameId]))
                                {
                                    # load name in from database
                                    $ControlledNames[$FieldName][$ControlledNameId] = 
                                            $DB->Query("SELECT ".$FieldAttributes["AttributeFieldName"]
                                                    ." FROM ".$FieldAttributes["AttributeTableName"]
                                                    ." WHERE ".$FieldAttributes["AttributeIdFieldName"]
                                                    ." = ".$ControlledNameId,
                                                    $FieldAttributes["AttributeFieldName"]);
                                }

                                # append name to data
                                if (isset($ItemData[$ItemId][$FieldName]))
                                {
                                    $ConcatenatedNames .= " ";
                                }
                                $ConcatenatedNames .= 
                                        $ControlledNames[$FieldName][$ControlledNameId];
                            }
                        }

                        # normalize concatenated name string and break into word array
                        $ItemData[$ItemId][$FieldName] = 
                                $this->NormalizeAndParseText($ConcatenatedNames);
                        break;
                }
            }

            # add item to list of cached items
            if (!isset($CachedItemList)) {  $CachedItemList = array();  }
            array_unshift($CachedItemList, $ItemId);

            # if more items than cache limit
            if (count($CachedItemList) > 1000)
            {
                # remove item from list and dump item data
                $DumpedItemId = array_pop($CachedItemList);
                unset($ItemData[$DumpedItemId]);
            }
        }

        # return cached data to caller
        return $ItemData[$ItemId][$RequestedFieldName];
    }

    function UpdateContentCorrelation($ItemIdA, $ItemIdB)
    {
        if ($this->DebugLevel > 6) {  print("REC:  updating correlation between items $ItemIdA and $ItemIdB<br>\n");  }

        # bail out if two items are the same
        if ($ItemIdA == $ItemIdB) {  return;  }

        # for each content field
        $TotalCorrelation = 0;
        foreach ($this->ContentFields as $FieldName => $FieldAttributes)
        {
            # load data
            $ItemAData = $this->GetFieldData($ItemIdA, $FieldName);
            $ItemBData = $this->GetFieldData($ItemIdB, $FieldName);

            # call appropriate routine to get correlation
            switch (intval($FieldAttributes["FieldType"]))
            {
                case CONTENTFIELDTYPE_TEXT:
                case CONTENTFIELDTYPE_CONTROLLEDNAME:
                    $Correlation = $this->CalcTextCorrelation(
                            $ItemAData, $ItemBData);
                    break;
            }

            # add correlation multiplied by weight to total
            $TotalCorrelation += $Correlation * $FieldAttributes["Weight"];
        }

        # save new correlation
        $this->ContentCorrelation($ItemIdA, $ItemIdB, $TotalCorrelation);
    }

    function NormalizeAndParseText($Text)
    {
        $StopWords = array(
                "a",
                "about",
                "also",
                "an",
                "and",
                "are",
                "as",
                "at",
                "be",
                "but",
                "by",
                "can",
                "either",
                "for",
                "from",
                "has",
                "he",
                "her",
                "here",
                "hers",
                "him",
                "his",
                "how",
                "i",
                "if",
                "in",
                "into",
                "is",
                "it",
                "its",
                "me",
                "neither",
                "no",
                "nor",
                "not",
                "of",
                "on",
                "or",
                "so",
                "she",
                "than",
                "that",
                "the",
                "their",
                "them",
                "then",
                "there",
                "they",
                "this",
                "through",
                "to",
                "too",
                "very",
                "what",
                "when",
                "where",
                "while",
                "who",
                "why",
                "will",
                "you",
                "");

        # strip any HTML tags
        $Text = strip_tags($Text);

        # strip any punctuation
        $Text = preg_replace("/,\\.\\?-\\(\\)\\[\\]\"/", " ", $Text);   # "

        # normalize whitespace
        $Text = trim(preg_replace("/[\\s]+/", " ", $Text));

        # convert to all lower case
        $Text = strtolower($Text);

        # split text into arrays of words
        $Words = explode(" ", $Text);

        # filter out all stop words
        $Words = array_diff($Words, $StopWords);

        # return word array to caller
        return $Words;
    }

    function CalcTextCorrelation($WordsA, $WordsB)
    {
        # get array containing intersection of two word arrays
        $IntersectWords = array_intersect($WordsA, $WordsB);

        # return number of words remaining as score
        return count($IntersectWords);
    }

    function ContentCorrelation($ItemIdA, $ItemIdB, $NewCorrelation = -1)
    {
        # if item ID A is greater than item ID B
        if ($ItemIdA > $ItemIdB)
        {
            # swap item IDs
            $Temp = $ItemIdA;
            $ItemIdA = $ItemIdB;
            $ItemIdB = $Temp;
        }

        # if new correlation value provided
        if ($NewCorrelation != -1)
        {
            # if new value is above threshold
            if ($NewCorrelation >= $this->ContentCorrelationThreshold)
            {
                # insert new correlation value in DB
                $this->DB->Query("INSERT INTO RecContentCorrelations "
                        ."(ItemIdA, ItemIdB, Correlation) "
                        ."VALUES (${ItemIdA}, ${ItemIdB}, ${NewCorrelation})");

                # return correlation value is new value
                $Correlation = $NewCorrelation;
            }
            # else
            else
            {
                # return value is zero
                $Correlation = 0;
            }
        }
        else
        {
            # retrieve correlation value from DB
            $Correlation = $this->DB->Query(
                    "SELECT Correlation FROM RecContentCorrelations "
                            ."WHERE ItemIdA = ${ItemIdA} AND ItemIdB = ${ItemIdB}",
                    "Correlation");

            # if no value found in DB
            if ($Correlation == FALSE)
            {
                # return value is zero
                $Correlation = 0;
            }
        }

        # return correlation value to caller
        return $Correlation;
    }

    function FilterOnSuppliedFunctions($Results)
    {
        # if filter functions have been set
        if (count($this->FilterFuncs) > 0)
        {
            # for each result
            foreach ($Results as $ResourceId => $Result)
            {
                # for each filter function
                foreach ($this->FilterFuncs as $FuncName)
                {
                    # if filter function return TRUE for result resource
                    if ($FuncName($ResourceId))
                    {
                        # discard result
                        if ($this->DebugLevel > 2) {  print("REC:      filter callback rejected resource ${ResourceId}<br>\n");  }
                        unset($Results[$ResourceId]);

                        # bail out of filter func loop
                        continue 2;
                    }
                }
            }
        }

        # return filtered list to caller
        return $Results;
    }
}

# define content field types
define("CONTENTFIELDTYPE_TEXT", 1);
define("CONTENTFIELDTYPE_CONTROLLEDNAME", 2);


class SPTRecommender extends Recommender {

    function SPTRecommender()
    {
        # set up recommender configuration values for SPT
        $ItemTableName = "Resources";
        $RatingTableName = "ResourceRatings";
        $ItemIdFieldName = "ResourceId";
        $UserIdFieldName = "UserId";
        $RatingFieldName = "Rating";
        $ContentFields = array(
                "Title" => array(
                        "FieldType"             => CONTENTFIELDTYPE_TEXT,
                        "Weight"                => 20),
                "AlternateTitle" => array(
                        "FieldType"             => CONTENTFIELDTYPE_TEXT,
                        "Weight"                => 15),
                "Description" => array(
                        "FieldType"             => CONTENTFIELDTYPE_TEXT,
                        "Weight"                => 1),
                "Classification" => array(
                        "FieldType"             => CONTENTFIELDTYPE_CONTROLLEDNAME,
                        "Weight"                => 10,
                        "IntersectionTableName" => "ResourceClassInts",
                        "AttributeTableName"    => "Classifications",
                        "AttributeIdFieldName"  => "ClassificationId",
                        "AttributeFieldName"    => "SegmentName"),
                );

        # create a database connection for recommender to use
        $DB =& new SPTDatabase();

        # pass configuration info to real recommender object
        $this->Recommender($DB, $ItemTableName, $RatingTableName,
                $ItemIdFieldName, $UserIdFieldName, $RatingFieldName,
                $ContentFields);
    }
};


?>
