Commit c3f40d84 authored by bligny's avatar bligny

Update search : textual search on the columns instead of token based search

parent 1c428058
...@@ -93,14 +93,16 @@ foreach ($sims as $doc => $score) { ...@@ -93,14 +93,16 @@ foreach ($sims as $doc => $score) {
} }
$htmlout .= "</ul>\n"; $htmlout .= "</ul>\n";
// $output_mode defined in /twbackends/phpAPI/parameters_details.php
if ($output_mode == "html") { if ($output_mode == "html") {
echo '<br/><h4><font color="#0000FF"> Full text of top '.$nb_displayed."/".count($sims).' related publications:</font></h4>'; echo '<br/><h4><font color="#0000FF"> Full text of top '.$nb_displayed."/".count($sims).' related publications:</font></h4>';
echo $htmlout; echo $htmlout;
} }
else { else {
echo json_encode(array( echo json_encode(array(
'hits' => $jsonout, 'hits' => $jsonout, // documents content
'nhits' => $nb_displayed 'nhits' => $nb_displayed, // number of documents displayed
'ntotal' => $totalfound // total number of matching documents found
)); ));
} }
......
...@@ -99,14 +99,58 @@ else { ...@@ -99,14 +99,58 @@ else {
// DO THE SEARCH // DO THE SEARCH
// ------------- // -------------
$searchcols = $my_conf["node".$ntid][$dbtype]['qcols']; $searchcols = $my_conf["node".$ntid][$dbtype]['qcols'];
//var_dump($searchcols);
// a - split the query
$qtokens = preg_split('/[\p{Z}\p{P}\p{C}]+/u', $_GET["query"]);
// b - compute freq similarity per doc
$sims = array(); $sims = array();
// for each token $totalfound=0;
// ------------ Textual search -------------
// Format input search string
$searchinput = $_GET["query"]; // ex : "[\"medical device\"]"
$searchinput=ltrim($searchinput,"[\"");
$searchinput=rtrim($searchinput,"\"]");
//echodump("Search input :", $searchinput); // ex "medical device"
$searchpattern="/".$searchinput."/i"; // ex "/medical device/i"
//echodump("Search pattern :", $pattern);
// Search in $base occurences of $searchtext. Update $sims if found.
// $base is a 2D array (array(array).
// echodump("base", $base);
// The first line, or row is the first element in the $base array:
// echodump("base row 0", $base[0]);
// The column "abstract" for first row :
// echodump("base row 0, 'abstract' col:", $base[0]["abstract"]);
foreach ( $base as $idrow => $row) {
// test search text with : $row["abstract"]
// search nb occurences. if > 1, update $sims
$res=0;
foreach ($searchcols as $idcol => $col){
$res=$res+preg_match_all($searchpattern,$row[$col],$out);
}
if ($res > 0) {
$docid = 'd'.$idrow;
//echo "doc ". $docid.", ".$res ." occurence(s) found \n";
$totalfound++;
// build sims array similar to what token search does
$sims[$docid] = $res;
}
//echo "id : ". $id . "\n";
//echo "Abstract : ". $row["abstract"]. "\n";
}
// echo "total doc found :". $totalfound."\n";
/*
// ------- Token based search search ------
// Split the query
$qtokens = preg_split('/[\p{Z}\p{P}\p{C}]+/u', $_GET["query"]);
// echodump("tockens :", $qtokens);
// for each token, compute freq similarity per doc
for ($k=0 ; $k < count($qtokens) ; $k++) { for ($k=0 ; $k < count($qtokens) ; $k++) {
$tok = $qtokens[$k]; $tok = $qtokens[$k];
...@@ -148,6 +192,7 @@ else { ...@@ -148,6 +192,7 @@ else {
} }
} }
*/
// c - sorted score per doc // c - sorted score per doc
// //
...@@ -159,13 +204,9 @@ else { ...@@ -159,13 +204,9 @@ else {
} }
} }
// just to make sure we use the same conventional name at cache read/write // just to make sure we use the same conventional name at cache read/write
function mem_entry_name($graphdbname) { function mem_entry_name($graphdbname) {
return "twbackendmem/".$graphdbname."/csv_search_base"; return "twbackendmem/".$graphdbname."/csv_search_base";
} }
?> ?>
...@@ -503,10 +503,14 @@ function getTopPapers(qWords, nodetypeId, chosenAPI, tgtDivId) { ...@@ -503,10 +503,14 @@ function getTopPapers(qWords, nodetypeId, chosenAPI, tgtDivId) {
data: urlParams, data: urlParams,
contentType: "application/json", contentType: "application/json",
success : function(data){ success : function(data){
cbDisplay(data.hits) // debug
// console.log(data);
cbDisplay(data.hits);
}, },
error: function(){ error: function(){
console.log(`Not found: relatedDocs for ${apiurl}`) console.log(`Not found: relatedDocs for ${apiurl}`)
// debug
// console.log(apiurl + '/info_div.php?'+urlParams)
cbDisplay([{ "error": stockErrMsg }]) cbDisplay([{ "error": stockErrMsg }])
} }
}); });
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment