<?PHP
#
#   FILE:  ImportDataExecute.php
#
#   Part of the Collection Workflow Integration System (CWIS)
#   Copyright 2004-2013 Edward Almasy and Internet Scout Research Group
#   http://scout.wisc.edu/cwis/
#

# ----- EXPORTED FUNCTIONS ---------------------------------------------------

# ----- LOCAL FUNCTIONS ------------------------------------------------------

# print debug info
function PrintDebugInfo()
{
    global $DebugInfo;

    print $DebugInfo;
}

# add in new controlled name if needed
function AddControlledName($Field, $Value)
{
    global $ControlledNameCount;

    $Value = trim($Value);
    if (!empty($Value))
    {
        $ControlledName = new ControlledName(NULL, $Value, $Field->Id());
        $Value = $ControlledName->Id();
        if ($ControlledName->Status() != ControlledName::STATUS_EXISTS)
        {
            $ControlledNameCount++;
        }
    }
    return $Value;
}

# add in a new classification if needed
function AddClassification($Field, $Value)
{
    global $ClassificationCount;

    $Value = trim($Value);
    if (!empty($Value))
    {
        $Classification = new Classification(NULL, $Value, $Field->Id());
        $Value = $Classification->Id();

        if ($Classification->Status() == Classification::CLASSSTAT_OK)
        {
            $ClassificationCount += $Classification->SegmentsCreated();
        }
    }
    return $Value;
}

# first time through, create empty placeholders
function FirstTimeThrough()
{
    global $TotalLineCount, $FSeek, $AF;
    global $fp, $NameArray;
    global $NumberOfFields, $Debug, $DebugInfo;
    global $ImportComplete;
    global $ReferenceArray;

    $Schema = new MetadataSchema();
    $ReferenceArray = array();

    # read in line from import file
    $fline = fgets($fp, 4096);
    $FSeek += strlen($fline);

    if ($Debug)
        $DebugInfo .= "fline=$fline<br>";

    # parse line from import file
    $Vars = str_getcsv($fline, "\t");

    $NumberOfFields = 0;
    foreach ($Vars as $Var)
    {
        $Var = trim($Var);

        # old style import files
        if ($Var != "ControlledName" &&
            $Var != "ControlledNameTypeName" &&
            $Var != "ClassificationName" &&
            $Var != "ClassificationTypeId")
        {
            $Field = $Schema->GetFieldByName($Var);
            if (is_null($Field))
            {
                $_SESSION["ImportComplete"] = $ImportComplete;
                $ErrorMessage =
                    "Error: Unknown metadata field name = \"".$Var.
                    "\" encountered.<br>".
                    "This must match the field name ".
                    "in the Metadata Field Editor.<br>".
                    "Please select Back and correct the problem ".
                    "on the first line.";
                $_SESSION["ErrorMessage"] = $ErrorMessage;
                $AF->SetJumpToPage("ImportData");
                $ImportComplete = 1;
                $FSeek = 0;
                return;
            }
            # save field object and field name
            $NameArray[] = $Var;
        }
        # save names for ControlledName or Classification
        else
        {
            $NameArray[] = $Var;
        }
        $NumberOfFields++;
    }

    # count the first header line
    $TotalLineCount = 1;
}

# do/while loop, this is the main event
function DoWhileLoop()
{
    global $fp, $FSeek, $UniqueFieldValue, $ImportComplete, $AF;
    global $User, $ResourceCount, $ControlledNameCount, $ClassificationCount;
    global $TotalLineCount, $ReleaseFlag, $User, $UniqueField;
    global $NumberOfFields, $Debug, $DebugInfo, $SysConfig, $NameArray;
    global $ReferenceArray;

    $Schema = new MetadataSchema();
    $RFactory = new ResourceFactory();
    $SearchEngine = new SPTSearchEngine();
    $Recommender = new SPTRecommender();

    $LineCount = 0;
    $Resource = NULL;
    $LastUniqueFieldValue = NULL;
    $UniqueFieldValue = NULL;

    # user current user for AddedById, LastModifiedById
    # current date for DateOfRecordCreation
    $CurrentUserId = $User->Get("UserId");
    $TodaysDate = date("Y-m-d H:i:s");

    while (!feof($fp) && $LineCount < 50 && $ImportComplete == 0)
    {
        # read in line from import file
        $fline = fgets($fp, 2000000);

        # update variables
        $LineCount++;
        $TotalLineCount++;

        if ($Debug)
            $DebugInfo .= "Line $TotalLineCount: fline=$fline<br>";

        $FSeek += strlen($fline);
        $_SESSION["FSeek"] = $FSeek;

        $ValueArray = array();

        # parse line from import file
        $Vars = str_getcsv($fline, "\t");

        # bail out if line was empty or end of file encountered
        $NumberOfVars = count($Vars);
        if ($NumberOfVars < 1 || feof($fp))
        {
            $ImportComplete = 1;
            break;
        }

        # make sure number of vars on line match number of fields in header
        if ($NumberOfVars != $NumberOfFields)
        {
            $_SESSION["ImportComplete"] = $ImportComplete;
            $ErrorMessage =
                "Error: incorrect field count on line $TotalLineCount.<br>".
                "Expected $NumberOfFields, encountered $NumberOfVars<br>".
                "Correct the problem and try importing again.<br>";
            foreach ($NameArray as $Index => $Name)
            {
                if ($Index < count($Vars) )
                {
                    $ErrorMessage .= "[".sprintf("%02d", $Index)."] "
                        .htmlspecialchars($Name)." = <i>"
                        .htmlspecialchars($Vars[$Index])."</i><br>\n";
                }
                else
                {
                    $ErrorMessage .= "[".sprintf("%02d", $Index)."] "
                        .htmlspecialchars($Name)." is missing\n";
                }
            }
            $_SESSION["ErrorMessage"] = $ErrorMessage;
            $AF->SetJumpToPage("ImportData");
            $ImportComplete = 1;
            $FSeek = 0;
            return;
        }

        # process each var and cache it's value
        foreach ($Vars as $Index => $Var)
        {
            # translate backslashed tabs and newlines
            $Var = str_replace(array('\t', '\n'),
                               array("\t", "\n"),
                               $Var);

            $Field = $Schema->GetFieldByName($NameArray[$Index]);
            if (is_object($Field))
            {
                if ($Field->Type() == MetadataSchema::MDFTYPE_CONTROLLEDNAME ||
                    $Field->Type() == MetadataSchema::MDFTYPE_OPTION)
                {
                    # create new controlled name if needed, replace alpha value
                    $Var = AddControlledName($Field, $Var);
                }
                else if ($Field->Type() == MetadataSchema::MDFTYPE_TREE)
                {
                    # create new classification if needed, replace alpha value
                    $Var = AddClassification($Field, $Var);
                }
                $FieldArray[$Index] = $Field;
                $ValueArray[$Field->Id()] = $Var;
            }
            else
                $SpecialArray[$Index] = trim($Var);
        }

        # old format with ControlledName/ControlleNameTypeName pairs
        $Key = array_search("ControlledName", $NameArray);
        if ($Key !== FALSE && !is_null($Key))
        {
            $Value = $SpecialArray[$Key];
            $Key = array_search("ControlledNameTypeName", $NameArray);
            $Field = $Schema->GetFieldByName($SpecialArray[$Key]);
            if (is_object($Field))
            {
                $Value = AddControlledName($Field, $Value);
                $ValueArray[$Field->Id()] = $Value;
            }
        }

        # old format with ClassificationName/ClassificationTypeId pairs
        $Key = array_search("ClassificationName", $NameArray);
        if ($Key !== FALSE && !is_null($Key))
        {
            $Value = $SpecialArray[$Key];
            $Key = array_search("ClassificationTypeId", $NameArray);

            # compensate for bad data (missing type id)
            if (!is_numeric($SpecialArray[$Key]))
                $SpecialArray[$Key] = $SysConfig->BrowsingFieldId();

            $Field = $Schema->GetFieldByName($SpecialArray[$Key]);
            if (is_object($Field))
            {
                $Value = AddClassification($Field, $Value);
                $ValueArray[$Field->Id()] = $Value;
            }
        }

        # grab the UniqueField and Description values from the array
        $UniqueFieldKey = array_search($UniqueField, $NameArray);
        if ($UniqueFieldKey !== FALSE && !is_null($UniqueFieldKey))
        {
            $Field = $FieldArray[$UniqueFieldKey];
            $UniqueFieldValue = addslashes($ValueArray[$Field->Id()]);
            $UniqueFieldDBName = $Field->DBFieldName();
        }
        $DescriptionKey = array_search("Description", $NameArray);
        if ($DescriptionKey !== FALSE && !is_null($DescriptionKey))
        {
            $Field = $FieldArray[$DescriptionKey];
            $Description = addslashes($ValueArray[$Field->Id()]);
        }

        if ($Debug)
            $DebugInfo .= "$UniqueField = $UniqueFieldValue<br>";

        if (!empty($UniqueFieldValue) &&
                $UniqueFieldValue != $LastUniqueFieldValue)
        {
            if ($UniqueField == "Title")
                $Resources = $RFactory->GetItemIds(
                    $UniqueFieldDBName."=\"".$UniqueFieldValue."\" ".
                    "AND Description=\"".$Description."\"");
            else
                $Resources = $RFactory->GetItemIds(
                    $UniqueFieldDBName."=\"".$UniqueFieldValue."\"");

            if (count($Resources) == 0)
            {
                # create new temporary field
                $Resource = Resource::Create(MetadataSchema::SCHEMAID_DEFAULT);

                # handle special fields

                if (array_search("Added By Id", $NameArray) === FALSE)
                {
                    $Resource->Set("Added By Id", $CurrentUserId);
                }

                if (array_search("Last Modified By Id", $NameArray) === FALSE)
                {
                    $Resource->Set("Last Modified By Id", $CurrentUserId);
                }

                $Key = array_search("Date Of Record Creation", $NameArray);
                if ($Key === FALSE)
                {
                    $Resource->Set("Date Of Record Creation", $TodaysDate);
                }
                else
                {
                    $Field = $FieldArray[$Key];
                    $DORC = explode(" ", $ValueArray[$Field->Id()]);
                    $Date = new Date($DORC[0]);
                    $DateBegin = $Date->BeginDate();
                    $Resource->Set("Date Of Record Creation", $DateBegin);
                }

                $Key = array_search("Date Last Modified", $NameArray);
                if ($Key == FALSE)
                {
                    $Resource->Set("Date Last Modified", $TodaysDate);
                }
                else
                {
                    $Field = $FieldArray[$Key];
                    $DORC = explode(" ", $ValueArray[$Field->Id()]);
                    $Date = new Date($DORC[0]);
                    $DateBegin = $Date->BeginDate();
                    $Resource->Set("Date Last Modified", $DateBegin);
                }

                # convert to real resource
                $Resource->IsTempResource(FALSE);
                $ResourceId = $Resource->Id();

                # make sure search and recommender databases are updated
                $SearchEngine->QueueUpdateForItem($ResourceId);
                $Recommender->QueueUpdateForItem($ResourceId);

                if ($Debug)
                    $DebugInfo .= "ResourceId = $ResourceId<br>";

                # keep track of number of resources added
                $ResourceCount++;
                $_SESSION["ResourceCount"] = $ResourceCount;

                # cache the last title
                $LastUniqueFieldValue = $UniqueFieldValue;
            }
            # this resource already exists
            else if (count($Resources) == 1)
            {
                # should only be one matching Resources record
                $ResourceId = $Resources[0];
                $Resource = new Resource($ResourceId);
            }
            # duplicate resources exist!
            else if (count($Resources) > 1)
            {
                $_SESSION["ImportComplete"] = $ImportComplete;
                $ErrorMessage =
                    "Error: Multiple Resources with $UniqueField = \"".
                        $UniqueFieldValue.
                    "\" encountered.<br>".
                    "Please select Back and correct the problem on line ".
                    $TotalLineCount.".";
                $_SESSION["ErrorMessage"] = $ErrorMessage;
                $AF->SetJumpToPage("ImportData");
                $ImportComplete = 1;
                $FSeek = 0;
                return;
            }
        }

        # Deal with release flag setting
        $Key = array_search("Release Flag", $NameArray);
        if ($ReleaseFlag != -1 || $Key === FALSE)
        {
            # Asked not to use value in file, or file doesn't have such a value
            if (is_object($Resource))
            {
                $Resource->Set("Release Flag", $ReleaseFlag);

                #Blow away the file value, if it exists
                if ($Key !== FALSE)
                {
                    $Field = $FieldArray[$Key];
                    $ValueArray[$Field->Id()] = 0;
                }
            }
        }

        # now set each Resource field
        foreach ($ValueArray as $FieldId => $Value)
        {
            if (!empty($Value))
            {
                if (is_object($Resource))
                {
                    if ($Debug)
                        $DebugInfo .= "ResourceId=".$Resource->Id().
                            ": Setting FieldId $FieldId to $Value<br>";

                    $Field = $Schema->GetField($FieldId);
                    if (is_object($Field) &&
                        $Field->Type() == MetadataSchema::MDFTYPE_REFERENCE)
                        $ReferenceArray[$Field->Id()][$Resource->Id()] []=
                            $Value;
                    else
                        $Resource->SetByFieldId($FieldId, $Value);
                }
            }
        }
    }
}

# ----- MAIN -----------------------------------------------------------------

# non-standard global variables
global $ClassificationCount;
global $ControlledNameCount;
global $Debug;
global $DebugInfo;
global $FSeek;
global $ImportComplete;
global $NameArray;
global $NumberOfFields;
global $ReleaseFlag;
global $ResourceCount;
global $TotalLineCount;
global $UniqueField;
global $UniqueFieldValue;
global $fp;
global $ReferenceArray;

# check if current user is authorized
if (!CheckAuthorization(PRIV_SYSADMIN, PRIV_COLLECTIONADMIN)) {  return;  }

# Be sure we're able to gracefully deal with screwy line endings:
ini_set("auto_detect_line_endings", true);

# load up passed thru vars
foreach( array("FSeek", "ImportComplete", "ResourceCount", "ControlledNameCount",
               "ClassificationCount", "TotalLineCount", "NameArray", "TempFile",
               "NumberOfFields", "UniqueField", "ReferenceArray") as $Var )
{
    if (isset($_SESSION[$Var]))
        eval('$'.$Var.' = $_SESSION["'.$Var.'"];');
}

if (isset($_POST["ReleaseFlag"]))
    $ReleaseFlag = $_POST["ReleaseFlag"];
elseif (isset($_SESSION["ReleaseFlag"]))
    $ReleaseFlag = $_SESSION["ReleaseFlag"];

if (isset($_POST["F_UniqueField"]) &&
    $_POST["F_UniqueField"] != "-1")
    $UniqueField = $_POST["F_UniqueField"];
else if (is_null($UniqueField))
    $UniqueField = "Title";

if (isset($_POST["Debug"]))
    $Debug = $_POST["Debug"];
elseif(isset($_SESSION["Debug"]))
    $Debug = $_SESSION["Debug"];

# check for Cancel button
if (isset($_POST["Submit"]) && $_POST["Submit"] == "Cancel")
{
    $ImportComplete = 1; # Required to make the PostProc call work.
    $AF->SetJumpToPage("SysAdmin");
    return;
}

## Initialize import variables (they'll be null the first time through):
foreach( array("ImportComplete", "FSeek","ResourceCount",
               "ControlledNameCount","ClassificationCount") as $Var )
    eval('if (is_null($'.$Var.')) $'.$Var.'=0;');

# open import file for reading
if (isset($_FILES['F_FileName']['tmp_name']) &&
    is_uploaded_file($_FILES['F_FileName']['tmp_name']))
{
    if ($Debug)
        $DebugInfo = "filename=".$_FILES['F_FileName']['tmp_name'].
            "<br>";

    # zero length or tiny file
    if ($_FILES['F_FileName']['size'] == 0)
    {
        $ErrorMessage = "Error: File doesn't exist or is empty.";
        $_SESSION["ErrorMessage"] = $ErrorMessage;
        $AF->SetJumpToPage("ImportData");
        return;
    }
    # files with illegal extensions
    elseif (preg_match('/\.php/i', $_FILES['F_FileName']['name'])
            || preg_match('/\.js/i', $_FILES['F_FileName']['name'])
            || preg_match('/\.zip/i', $_FILES['F_FileName']['name'])
            || preg_match('/\.gz/i', $_FILES['F_FileName']['name'])
            || preg_match('/\.xz/i', $_FILES['F_FileName']['name'])
            || preg_match('/\.xml/i', $_FILES['F_FileName']['name'])
            || preg_match('/\.html/i', $_FILES['F_FileName']['name']))
    {
        $ErrorMessage = "Error: File type is not allowed.";
        $_SESSION["ErrorMessage"] = $ErrorMessage;
        $AF->SetJumpToPage("ImportData");
        return;
    }

    $ScriptFileName = isset($_SERVER["PATH_TRANSLATED"]) ? $_SERVER['PATH_TRANSLATED']
            : (isset($_SERVER["SCRIPT_FILENAME"]) ? $_SERVER["SCRIPT_FILENAME"]
            : (isset($_SERVER["PATH_INFO"]) ? $_SERVER["PATH_INFO"]
            : exit("ERROR: unable to determine current file name")));

    $TempDir = dirname($ScriptFileName).
        "/tmp";
    $TempDir = realpath($TempDir)."/";

    # make sure destination dir exists
    if (!file_exists($TempDir))
    {
        $ErrorMessage = "Error: Destination directory ".$TempDir.
                        " doesn't exist.";
        $_SESSION["ErrorMessage"] = $ErrorMessage;
        $AF->SetJumpToPage("ImportData");
        return;
    }

    # make sure destination dir is writable
    if (!is_writable($TempDir))
    {
        $ErrorMessage = "Error: Destination directory ".$TempDir.
                        " is not writable.";
        $_SESSION["ErrorMessage"] = $ErrorMessage;
        $AF->SetJumpToPage("ImportData");
        return;
    }

    $TempFile = dirname($ScriptFileName).
        "/tmp/".
        $_FILES['F_FileName']['name'];

    copy($_FILES['F_FileName']['tmp_name'], $TempFile);

    $FileType = mime_content_type($TempFile);
    if (preg_match('/^text\/plain/', $FileType)==0)
    {
        $ErrorMessage = "Error: File type is not allowed.";
        $_SESSION["ErrorMessage"] = $ErrorMessage;
        $AF->SetJumpToPage("ImportData");
        return;
    }

    $fp = fopen($TempFile, 'r');
    unlink($_FILES['F_FileName']['tmp_name']);
}
else if ($FSeek > 0)
{
    $fp = fopen($TempFile, 'r');
}
else
{
    switch($_FILES['F_FileName']['error'])
    {
        # no error; possible file attack!
        case 0:
            $ErrorMessage = "There was a problem with your upload.";
            break;

        # uploaded file exceeds the upload_max_filesize directive in php.ini
        case 1:
            $ErrorMessage = "The file you are trying to upload is too big.";
            break;

        # uploaded file exceeds the MAX_FILE_SIZE directive that
        # was specified in the html form
        case 2:
            $ErrorMessage = "The file you are trying to upload is too big.";
            break;

        # uploaded file was only partially uploaded
        case 3:
            $ErrorMessage =
                "The file you are trying upload was only partially uploaded.";
            break;

        # no file was uploaded
        case 4:
            $ErrorMessage = "You must select a file for upload.";
            break;

        # a default error, just in case!
        default:
            $ErrorMessage .= "There was a problem with your upload.  ("
                    .$_FILES['F_FileName']['error'].")";
            break;
    }
    $_SESSION["ErrorMessage"] = $ErrorMessage;
    $AF->SetJumpToPage("ImportData");
    return;
}

# first time through
if ($FSeek == 0)
    FirstTimeThrough();

# echo "FSeek=$FSeek, FileName=$F_FileName<br>";

# seek to the next line
if ($FSeek > 0)
    fseek($fp, $FSeek);

# the main work happenes here
DoWhileLoop();

# end of file reached?
if (feof($fp))
    $ImportComplete = 1;

# register some key variables for other html code
foreach (array("ReleaseFlag","ImportComplete","ResourceCount","ControlledNameCount",
               "ClassificationCount","TotalLineCount","NameArray","FSeek",
               "TempFile","NumberOfFields","ReferenceArray","UniqueField",
               "Debug") as $Var )
    eval('$_SESSION["'.$Var.'"]=$'.$Var.';');

#  Time to auto-refresh?
if ($ImportComplete == 0)
    $AutoRefreshToPage = "index.php?P=ImportDataExecute";
else
{
    global $ReferenceMessages;

    $ReferenceMessages = array();
    # If we're done with our import, then try to resolve any
    # references it contained:

    $RFactory = new ResourceFactory();

    # Foreach reference field:
    foreach ($ReferenceArray as $FieldId => $ResourceReferences)
    {
        # Foreach resource having a value for that field:
        foreach ($ResourceReferences as $ResourceId => $RefTargets)
        {
            $ThisResource = new Resource($ResourceId);

            # For each value that it has:
            foreach ($RefTargets as $RefTarget)
            {
                ## Look first for matching URLs:
                $Candidates = $RFactory->GetItemIds(
                    "Url=\"".addslashes($RefTarget)."\"");

                # If there are no matching URLs, look at titles:
                if (count($Candidates)==0)
                    $Candidates = $RFactory->GetItemIds(
                        "Title=\"".addslashes($RefTarget)."\"");

                # If there were no matching titles, look for alternate titles
                if (count($Candidates)==0)
                    $Candidates = $RFactory->GetItemIds(
                        "AlternateTitle=\"".addslashes($RefTarget)."\"");

                # Complain if there was no match at all:
                if (count($Candidates)==0)
                    $ReferenceMessages []=
                        "Unable to resolve reference from '".
                        $ThisResource->Get("Title")."' to '".$RefTarget."'";

                # If there was just one match, add the reference to the field:
                elseif(count($Candidates)==1)
                {
                    $Value = $ThisResource->GetByFieldId($FieldId);
                    $Value []= $Candidates[0];
                    $ThisResource->SetByFieldId( $FieldId, $Value );
                }
                # If somehow there were a pile of matches,
                # complain about that too.
                else
                    $ReferenceMesages []=
                        "Reference from '".$ThisResource->Get("Title")."' ".
                        "to '".$RefTarget."' is not unique";
            }
        }
    }
}

PageTitle("Import Data");

# register post-processing function with the application framework
$AF->AddPostProcessingCall("PostProcessingFn",
                           $TempFile, $fp, $ImportComplete);

# post-processing call
function PostProcessingFn($TempFile, $fp, $ImportComplete)
{
    if ($ImportComplete == 1)
    {
        fclose($fp);
        # remove temporary uploaded file
        unlink($TempFile);

        foreach( array("FSeek", "ImportComplete", "ResourceCount", "ControlledNameCount",
                       "ClassificationCount", "TotalLineCount", "NameArray", "TempFile",
                       "NumberOfFields", "UniqueField", "ReleaseFlag",
                       "Debug","ReferenceArray") as $Var )
            unset($_SESSION[$Var]);
    }
}

?>
