Commit c4b46d94 authored by Romain Loth's avatar Romain Loth

memcached for localDB CSV searchbase

with this optional cache, indexation happens only once every 1800s, and subsequent queries retrieve the search base (aka postings) from a serialized version in cache
parent 9e43cc8b
## Related Documents PHP backend
##### Provides an API for "topPapers" search queries in tinawebJS
Main use case is support for one-doc-by-row CSV files.
#### Prerequisites
You need any kind of php server support with php > 5.0.
For instance on an ubuntu 16 with an nginx server, I'd install php 7 fpm:
```
sudo apt install php7.0-fpm
```
And then add this kind of configuration entry in `nginx.conf`:
```
location ~ \.php$ {
include snippets/fastcgi-php.conf;
fastcgi_pass unix:/run/php/php7.0-fpm.sock;
fastcgi_param SCRIPT_FILENAME /your/path/to/LOCALDB/$fastcgi_script_name;
# ---------------------
}
```
It's enough to run any kind of gargantext-style gexf + CSV sets.
Optionally, especially in production, you should add memcached support for faster CSV search (it allows caching the CSV postings base).
```
sudo apt install php-memcached
sudo service php7.0-fpm restart
```
For legacy cortext-style databases, you'll also need sqlite:
```
sudo apt install sqlite3
```
#### Usage
For a given graph source and database, you will need to **fill in the right settings in `db.json`** in order to associate the graph source file (eg gexf) with this related docs API.
£TODO explain after new specifications implemented
......@@ -2,7 +2,7 @@
// manage the dynamical additional information in the left panel.
ini_set('display_errors',1);
ini_set('display_startup_errors',1);
error_reporting(-1);
// error_reporting(-1);
// relative path to dirname "/line"
$project_root = "../";
......@@ -27,24 +27,49 @@ else {
// echodump("columns to index",$idxcolsbytype);
include('csv_indexation.php');
// DO THE INDEXATION
// DO THE INDEXATION (or RETRIEVE CACHED ONE)
// we use cache if memcached is present (and if we indexed the csv already)
// $can_use_cache = False
include('csv_indexation.php');
$csv_search_base = NULL;
if(class_exists('Memcached')){
$mcd = new Memcached;
$mcd->addServer($memserver, $memport);
// £TODO use memcached or something to store a serialized version of csv_search_base
// + add all (sem+soc) columns for the index to work !!
$csv_search_base = parse_and_index_csv($project_root.$graphdb, $idxcolsbytype, $csvsep, $csvquote);
// test if we indexed it already
$csv_search_base_seri = $mcd->get(mem_entry_name($graphdb));
if ($csv_search_base_seri !== False) {
// echo("Using cached base<br>");
$csv_search_base = json_decode($csv_search_base_seri, $assoc=true);
}
}
if (! $csv_search_base) {
// echo("Creating new base<br>");
// indexing ----------------------------------------------------------------------
// must know about all (sem+soc) cols typed, even if each search doesn't need them
$csv_search_base = parse_and_index_csv($project_root.$graphdb,
$idxcolsbytype,
$csvsep, $csvquote);
// -------------------------------------------------------------------------------
if(class_exists('Memcached')){
// **store** in cache for 1/2h
$mcd->set(mem_entry_name($graphdb), json_encode($csv_search_base), 1800);
}
}
$base = $csv_search_base[0];
$postings = $csv_search_base[1];
// echodump("postings", $postings);
// echodump("base", $base);
// DO THE SEARCH
// DO THE SEARCH
// -------------
$searchcols = json_decode($_GET['searchin']);
// a - split the query
......@@ -105,6 +130,12 @@ else {
}
// just to make sure we use the same conventional name at cache read/write
function mem_entry_name($graphdbname) {
return "twbackendmem/".$graphdbname."/csv_search_base";
}
?>
......@@ -37,7 +37,7 @@ TW.conf = (function(TW){
TWConf.getRelatedDocs = true
TWConf.relatedDocsMax = 10
TWConf.relatedDocsType = "twitter" // accepted: "twitter" | "wosLocalDB"
TWConf.relatedDocsType = "wosLocalDB" // accepted: "twitter" | "wosLocalDB"
// POSSible: "elastic"
TWConf.relatedDocsAPIS = {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment