Commit c3f40d84 authored by bligny's avatar bligny

Update search : textual search on the columns instead of token based search

parent 1c428058
......@@ -93,14 +93,16 @@ foreach ($sims as $doc => $score) {
}
$htmlout .= "</ul>\n";
// $output_mode defined in /twbackends/phpAPI/parameters_details.php
if ($output_mode == "html") {
echo '<br/><h4><font color="#0000FF"> Full text of top '.$nb_displayed."/".count($sims).' related publications:</font></h4>';
echo $htmlout;
}
else {
echo json_encode(array(
'hits' => $jsonout,
'nhits' => $nb_displayed
'hits' => $jsonout, // documents content
'nhits' => $nb_displayed, // number of documents displayed
'ntotal' => $totalfound // total number of matching documents found
));
}
......
......@@ -99,14 +99,58 @@ else {
// DO THE SEARCH
// -------------
$searchcols = $my_conf["node".$ntid][$dbtype]['qcols'];
//var_dump($searchcols);
// a - split the query
$qtokens = preg_split('/[\p{Z}\p{P}\p{C}]+/u', $_GET["query"]);
// b - compute freq similarity per doc
$sims = array();
// for each token
$totalfound=0;
// ------------ Textual search -------------
// Format input search string
$searchinput = $_GET["query"]; // ex : "[\"medical device\"]"
$searchinput=ltrim($searchinput,"[\"");
$searchinput=rtrim($searchinput,"\"]");
//echodump("Search input :", $searchinput); // ex "medical device"
$searchpattern="/".$searchinput."/i"; // ex "/medical device/i"
//echodump("Search pattern :", $pattern);
// Search in $base occurences of $searchtext. Update $sims if found.
// $base is a 2D array (array(array).
// echodump("base", $base);
// The first line, or row is the first element in the $base array:
// echodump("base row 0", $base[0]);
// The column "abstract" for first row :
// echodump("base row 0, 'abstract' col:", $base[0]["abstract"]);
foreach ( $base as $idrow => $row) {
// test search text with : $row["abstract"]
// search nb occurences. if > 1, update $sims
$res=0;
foreach ($searchcols as $idcol => $col){
$res=$res+preg_match_all($searchpattern,$row[$col],$out);
}
if ($res > 0) {
$docid = 'd'.$idrow;
//echo "doc ". $docid.", ".$res ." occurence(s) found \n";
$totalfound++;
// build sims array similar to what token search does
$sims[$docid] = $res;
}
//echo "id : ". $id . "\n";
//echo "Abstract : ". $row["abstract"]. "\n";
}
// echo "total doc found :". $totalfound."\n";
/*
// ------- Token based search search ------
// Split the query
$qtokens = preg_split('/[\p{Z}\p{P}\p{C}]+/u', $_GET["query"]);
// echodump("tockens :", $qtokens);
// for each token, compute freq similarity per doc
for ($k=0 ; $k < count($qtokens) ; $k++) {
$tok = $qtokens[$k];
......@@ -148,6 +192,7 @@ else {
}
}
*/
// c - sorted score per doc
//
......@@ -159,13 +204,9 @@ else {
}
}
// just to make sure we use the same conventional name at cache read/write
function mem_entry_name($graphdbname) {
return "twbackendmem/".$graphdbname."/csv_search_base";
}
?>
......@@ -503,10 +503,14 @@ function getTopPapers(qWords, nodetypeId, chosenAPI, tgtDivId) {
data: urlParams,
contentType: "application/json",
success : function(data){
cbDisplay(data.hits)
// debug
// console.log(data);
cbDisplay(data.hits);
},
error: function(){
console.log(`Not found: relatedDocs for ${apiurl}`)
// debug
// console.log(apiurl + '/info_div.php?'+urlParams)
cbDisplay([{ "error": stockErrMsg }])
}
});
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment