"
exit 1
fi
name="$1"
path="$2"
limit="$3"
stack --docker exec gargantext-import -- true "user1" "$name" gargantext.ini "$limit" "$path"
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/docker/docker-install 0000775 0000000 0000000 00000003046 14124644201 0027764 0 ustar 00root root 0000000 0000000 #!/bin/bash
if git --version;
then
echo "git installed, ok"
else
sudo apt update && sudo apt install git
fi
if docker --version;
then
echo "Docker installed, ok"
else
curl -sSL https://get.docker.com/ | sh
echo "Docker has been installed"
echo "Configure your user rights:"
echo " sudo usermod -a -G docker $USER"
echo " or : adduser $(whoami) docker"
fi
echo "---------------script docker-----------------------------"
########################################################################
# Docker path conf
if [ -f "/usr/local/bin/docker" ]
then
echo "docker alias exists already"
else
DOCKERBIN="/usr/local/bin/docker"
sudo touch $DOCKERBIN
sudo chmod o+w $DOCKERBIN
sudo echo "#!/bin/sh
exec sudo -E /usr/bin/docker \"\$@\"" >> $DOCKERBIN
sudo chmod gou+x $DOCKERBIN
sudo chmod o-w $DOCKERBIN
fi
########################################################################
if stack --version;
then
echo "Haskell stack installed, ok"
else
curl -sSL https://get.haskellstack.org/ | sh
stack update
stack upgrade
fi
git clone https://gitlab.iscpif.fr/gargantext/haskell-gargantext.git
cd haskell-gargantext
git clone https://gitlab.iscpif.fr/gargantext/purescript-gargantext
../install-deps $(pwd)
pushd devops/docker
docker build --pull -t fpco/stack-build:lts-17.13-garg .
popd
#stack docker pull
stack --docker setup
stack --docker build
stack --docker install
# Database configuration
# CREATE USER gargantua WITH PASSWORD $(grep DB_PASS gargantext.ini)
# GRANT ALL PRIVILEGES ON DATABASE gargandbV4 to gargantua
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/docker/docker-postgres 0000775 0000000 0000000 00000001225 14124644201 0030161 0 ustar 00root root 0000000 0000000 #!/bin/bash
set -eu
docker stop dbgarg || :
docker rm --volumes dbgarg || :
export PGPASSWORD="$(grep DB_PASS gargantext.ini | \awk '{print $3}')"
docker run --name dbgarg -e POSTGRES_USER=gargantua -e POSTGRES_DB=gargandbV5 -e POSTGRES_PASSWORD=${PGPASSWORD} -d postgres
sleep 3
docker run -e PGPASSWORD -i --rm --link dbgarg:postgres postgres psql -h postgres -U gargantua -d gargandbV5 < devops/postgres/schema.sql
#docker run -e PGPASSWORD -i --rm --link dbgarg:postgres postgres psql -h postgres -U gargantua -d gargandbV5 < gargantext.dump
#docker run -e PGPASSWORD -i --rm --link dbgarg:postgres postgres dropdb -h postgres -U gargantua gargandbV5
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/docker/docker-prod 0000775 0000000 0000000 00000000126 14124644201 0027256 0 ustar 00root root 0000000 0000000 #!/bin/bash
stack --docker exec gargantext-server -- --run Prod --ini gargantext.ini
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/install-corenlp 0000664 0000000 0000000 00000000302 14124644201 0026675 0 ustar 00root root 0000000 0000000 #!/bin/bash
if [ ! -d coreNLP ]; then
mkdir -v coreNLP
fi
pushd coreNLP
wget https://dl.gargantext.org/coreNLP.tar.bz2
tar xvjf coreNLP.tar.bz2
pushd home/debian/CoreNLP
./startServer.sh
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/install-deps 0000775 0000000 0000000 00000001461 14124644201 0026200 0 ustar 00root root 0000000 0000000 #!/bin/bash
DIR=$1
DEPS_DIR=${DIR}/deps
function clone_or_update() {
REPO=$1
echo "Checking repo ${REPO}"
# strip only dir name from URL
DIR=${REPO##*/}
# strip the remaining '.git' suffix
RAW_DIR=${DIR%.*}
if [ -d "${RAW_DIR}" ]; then
pushd ${RAW_DIR}
git pull
popd ..
else
git clone "$@"
fi
}
if [ ! -d "${DEPS_DIR}" ]; then
mkdir ${DIR}/deps
fi
cd ${DIR}/deps
clone_or_update https://gitlab.iscpif.fr/gargantext/clustering-louvain.git
clone_or_update https://github.com/np/servant-job.git
clone_or_update https://github.com/np/patches-map
clone_or_update https://gitlab.com/npouillard/patches-class.git
clone_or_update https://github.com/delanoe/haskell-opaleye
clone_or_update https://github.com/delanoe/hsparql -b next --single-branch
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/maintenance/ 0000775 0000000 0000000 00000000000 14124644201 0026133 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/maintenance/index.html 0000664 0000000 0000000 00000001666 14124644201 0030141 0 ustar 00root root 0000000 0000000
index
Welcome,
This server is on maintenance, see you back soon.
(Sure, we take care of yours data, no worries :)
Best regards, The GarganText Team CNRS/ISCPIF
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/maintenance/index.md 0000664 0000000 0000000 00000000331 14124644201 0027561 0 ustar 00root root 0000000 0000000
![](http://dl.gargantext.org/Logo_V4-min.png "V4 Logo")
Welcome,
This server is on maintenance, see you back soon.
(Sure, we take care of yours data, no worries :)
Best regards,
The GarganText Team
CNRS/ISCPIF
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/misc/ 0000775 0000000 0000000 00000000000 14124644201 0024604 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/misc/build-doc 0000775 0000000 0000000 00000000056 14124644201 0026375 0 ustar 00root root 0000000 0000000 #!/bin/bash
stack haddock --no-haddock-deps
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/misc/check-repo-cycles.jq 0000664 0000000 0000000 00000001105 14124644201 0030435 0 ustar 00root root 0000000 0000000 .state |
to_entries[] | .key as $ty | .value |
to_entries[] | .key as $list | .value as $m | .value |
("\($list): \($m|length) size" | debug) as $_ |
to_entries[] | .key as $ngram | .value |
select(.root) | debug |
select( # We keep only records with errors
.root and # We need .root
(
$m[.root].root != null or # A root should have no root itself
.parent != .root and # We need .parent different .root
$m[.parent].root != .root # The parent's root should the same root.
)
) |
{$ty, $list, $ngram, data: .} haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/misc/stats.jq 0000775 0000000 0000000 00000000117 14124644201 0026300 0 ustar 00root root 0000000 0000000 jq < repo.json '.state |= map_values(map_values(length)) | .history |= length'
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/nginx/ 0000775 0000000 0000000 00000000000 14124644201 0024774 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/nginx/default 0000664 0000000 0000000 00000012240 14124644201 0026342 0 ustar 00root root 0000000 0000000
##
# You should look at the following URL's in order to grasp a solid understanding
# of Nginx configuration files in order to fully unleash the power of Nginx.
# http://wiki.nginx.org/Pitfalls
# http://wiki.nginx.org/QuickStart
# http://wiki.nginx.org/Configuration
#
# Generally, you will want to move this file somewhere, and start with a clean
# file but keep this around for reference. Or just disable in sites-enabled.
#
# Please see /usr/share/doc/nginx-doc/examples/ for more detailed examples.
##
server {
server_name doc.gargantext.org;
add_header Cache-Control "no-cache";
root /var/www/html/;
index index.html;
listen 443 ssl; # managed by Certbot
ssl_certificate /etc/letsencrypt/live/v4.gargantext.org/fullchain.pem; # managed by Certbot
ssl_certificate_key /etc/letsencrypt/live/v4.gargantext.org/privkey.pem; # managed by Certbot
include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot
}
# Comment this for maintenance only
server {
server_name v4.gargantext.org;
#server_name maintenanceOnly.gargantext.org;
add_header Cache-Control "no-cache";
root /var/www/maintenance/;
index index.html;
listen 443 ssl; # managed by Certbot
ssl_certificate /etc/letsencrypt/live/v4.gargantext.org/fullchain.pem; # managed by Certbot
ssl_certificate_key /etc/letsencrypt/live/v4.gargantext.org/privkey.pem; # managed by Certbot
include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot
}
server {
if ($host = v4.gargantext.org) {
return 301 https://$host$request_uri;
} # managed by Certbot
listen 80;
server_name v4.gargantext.org;
add_header Cache-Control "no-cache";
location '/.well-known/acme-challenge' {
root /var/www/gargantext;
}
# Always redirect to https
return 301 https://v4.gargantext.org$request_uri;
}
server {
listen 443;
listen [::]:443 ssl;
server_name v4.gargantext.org;
# Some options configurations:
# https://github.com/h5bp/server-configs-nginx/blob/master/h5bp/location/expires.conf
add_header Cache-Control "no-cache";
# SSL configuration
#
# listen 443 ssl default_server;
# listen [::]:443 ssl default_server;
ssl on;
ssl_certificate /etc/letsencrypt/live/v4.gargantext.org/fullchain.pem; # managed by Certbot
ssl_certificate_key /etc/letsencrypt/live/v4.gargantext.org/privkey.pem; # managed by Certbot
# Note: You should disable gzip for SSL traffic.
# See: https://bugs.debian.org/773332
#
# Read up on ssl_ciphers to ensure a secure configuration.
# See: https://bugs.debian.org/765782
#
# Self signed certs generated by the ssl-cert package
# Don't use them in a production server!
#
# include snippets/snakeoil.conf;
client_max_body_size 800M;
client_body_timeout 12;
client_header_timeout 12;
keepalive_timeout 15;
send_timeout 10;
root /var/www/html;
index index.html;
#add_header Access-Control-Allow-Origin $http_origin always;
# Add index.php to the list if you are using PHP
#index index.html index.htm index.nginx-debian.html;
# CORS config borrowed from: https://gist.github.com/pauloricardomg/7084524
# NP: not sure we need CORS yet
#
if ($http_origin ~* (^https?://(127.0.0.1|localhost|v4\.gargantext\.com))) {
set $cors "1";
}
#
# Cross-Origin Resource Sharing
if ($request_method = "OPTIONS") {
set $cors "${cors}o";
}
# SSL CERT renewal
location '/.well-known/acme-challenge' {
alias /var/www/gargantext/.well-known/acme-challenge ;
}
location /api {
# limit_except OPTIONS {
# auth_basic "Gargantext's Development Version";
# auth_basic_user_file /etc/nginx/haskell_gargantext.htpasswd;
# }
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Host $http_host;
proxy_redirect off;
proxy_pass http://127.0.0.1:8008;
}
location / {
# https://stackoverflow.com/a/48708812
# limit_except OPTIONS {
# auth_basic "Gargantext's Development Version";
# auth_basic_user_file /etc/nginx/haskell_gargantext.htpasswd;
# }
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Host $http_host;
proxy_redirect off;
proxy_pass http://127.0.0.1:8008;
}
#access_log off;
access_log /var/log/nginx/access.log;
error_log /var/log/nginx/error.log;
}
server {
if ($host = doc.gargantext.org) {
return 301 https://$host$request_uri;
} # managed by Certbot
listen 80;
server_name doc.gargantext.org;
return 404; # managed by Certbot
}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/nginx/frames/ 0000775 0000000 0000000 00000000000 14124644201 0026251 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/nginx/frames/default 0000664 0000000 0000000 00000020352 14124644201 0027622 0 ustar 00root root 0000000 0000000 ##
# You should look at the following URL's in order to grasp a solid understanding
# of Nginx configuration files in order to fully unleash the power of Nginx.
# https://www.nginx.com/resources/wiki/start/
# https://www.nginx.com/resources/wiki/start/topics/tutorials/config_pitfalls/
# https://wiki.debian.org/Nginx/DirectoryStructure
#
# In most cases, administrators will remove this file from sites-enabled/ and
# leave it as reference inside of sites-available where it will continue to be
# updated by the nginx packaging team.
#
# This file will automatically load configuration files provided by other
# applications, such as Drupal or Wordpress. These applications will be made
# available underneath a path with that package name, such as /drupal8.
#
# Please see /usr/share/doc/nginx-doc/examples/ for more detailed examples.
##
# Default server configuration
#
#upstream backend_istex{
# server 127.0.0.1:8080;
#}
#upstream backend_cillex{
# server 127.0.0.1:7080;
#}
#events {
# worker_connections 2000;
#}
server {
server_name write.frame.gargantext.org;
location / {
# include proxy_params;
proxy_pass http://localhost:3000;
# proxy_http_version 1.1;
# proxy_set_header Upgrade $http_upgrade;
# proxy_set_header Connection "upgrade";
# proxy_set_header Host $host;
# proxy_cookie_path / "/; secure; HttpOnly; SameSite=lax";
#proxy_set_header X-Real-IP $remote_addr;
# proxy_cookie_domain $host $host;
# proxy_ignore_headers Cache-Control Expires Set-Cookie;
}
location /socket.io {
proxy_pass http://localhost:3000;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "Upgrade";
proxy_set_header Host $host;
}
listen 443 ssl; # managed by Certbot
ssl_certificate /etc/letsencrypt/live/searx.frame.gargantext.org/fullchain.pem; # managed by Certbot
ssl_certificate_key /etc/letsencrypt/live/searx.frame.gargantext.org/privkey.pem; # managed by Certbot
include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot
}
server {
server_name write2.frame.gargantext.org;
location / {
# include proxy_params;
proxy_pass http://localhost:3001;
# proxy_http_version 1.1;
# proxy_set_header Upgrade $http_upgrade;
# proxy_set_header Connection "upgrade";
# proxy_set_header Host $host;
# proxy_cookie_path / "/; secure; HttpOnly; SameSite=lax";
#proxy_set_header X-Real-IP $remote_addr;
# proxy_cookie_domain $host $host;
# proxy_ignore_headers Cache-Control Expires Set-Cookie;
}
listen 443 ssl; # managed by Certbot
ssl_certificate /etc/letsencrypt/live/write2.frame.gargantext.org/fullchain.pem; # managed by Certbot
ssl_certificate_key /etc/letsencrypt/live/write2.frame.gargantext.org/privkey.pem; # managed by Certbot
include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot
}
server {
server_name calc.frame.gargantext.org;
location / {
include proxy_params;
proxy_pass http://localhost:8000;
}
listen 443 ssl; # managed by Certbot
ssl_certificate /etc/letsencrypt/live/searx.frame.gargantext.org/fullchain.pem; # managed by Certbot
ssl_certificate_key /etc/letsencrypt/live/searx.frame.gargantext.org/privkey.pem; # managed by Certbot
include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot
}
#server {
# server_name hackmd.gargantext.org;
# location / {
# include proxy_params;
# proxy_pass http://localhost:8000;
# }
#
# listen 443 ssl; # managed by Certbot
# ssl_certificate /etc/letsencrypt/live/cillex.gargantext.org/fullchain.pem; # managed by Certbot
# ssl_certificate_key /etc/letsencrypt/live/cillex.gargantext.org/privkey.pem; # managed by Certbot
# include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot
# ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot
#
#}
server {
server_name istex.frame.gargantext.org;
location / {
include proxy_params;
proxy_pass http://localhost:8080;
}
listen 443 ssl; # managed by Certbot
ssl_certificate /etc/letsencrypt/live/searx.frame.gargantext.org/fullchain.pem; # managed by Certbot
ssl_certificate_key /etc/letsencrypt/live/searx.frame.gargantext.org/privkey.pem; # managed by Certbot
include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot
}
server {
server_name istex.gargantext.org;
location / {
include proxy_params;
proxy_pass http://localhost:8080;
}
listen 443 ssl; # managed by Certbot
ssl_certificate /etc/letsencrypt/live/searx.frame.gargantext.org/fullchain.pem; # managed by Certbot
ssl_certificate_key /etc/letsencrypt/live/searx.frame.gargantext.org/privkey.pem; # managed by Certbot
include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot
}
server {
server_name searx.frame.gargantext.org;
location / {
include proxy_params;
proxy_pass http://localhost:8181;
}
listen 443 ssl; # managed by Certbot
ssl_certificate /etc/letsencrypt/live/searx.frame.gargantext.org/fullchain.pem; # managed by Certbot
ssl_certificate_key /etc/letsencrypt/live/searx.frame.gargantext.org/privkey.pem; # managed by Certbot
include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot
}
# Virtual Host configuration for example.com
#
# You can move that to a different file under sites-available/ and symlink that
# to sites-enabled/ to enable it.
#
#server {
# listen 80;
# listen [::]:80;
#
# server_name example.com;
#
# root /var/www/example.com;
# index index.html;
#
# location / {
# try_files $uri $uri/ =404;
# }
#}
server {
if ($host = cillex.gargantext.org) {
return 301 https://$host$request_uri;
} # managed by Certbot
server_name cillex.gargantext.org;
return 404; # managed by Certbot
listen 443 ssl; # managed by Certbot
ssl_certificate /etc/letsencrypt/live/searx.frame.gargantext.org/fullchain.pem; # managed by Certbot
ssl_certificate_key /etc/letsencrypt/live/searx.frame.gargantext.org/privkey.pem; # managed by Certbot
include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot
}
server {
if ($host = searx.frame.gargantext.org) {
return 301 https://$host$request_uri;
} # managed by Certbot
server_name searx.frame.gargantext.org;
listen 80;
return 404; # managed by Certbot
}
server {
if ($host = istex.frame.gargantext.org) {
return 301 https://$host$request_uri;
} # managed by Certbot
server_name istex.frame.gargantext.org;
listen 80;
return 404; # managed by Certbot
}
server {
if ($host = calc.frame.gargantext.org) {
return 301 https://$host$request_uri;
} # managed by Certbot
server_name calc.frame.gargantext.org;
listen 80;
return 404; # managed by Certbot
}
server {
if ($host = write.frame.gargantext.org) {
return 301 https://$host$request_uri;
} # managed by Certbot
server_name write.frame.gargantext.org;
listen 80;
return 404; # managed by Certbot
}
server {
if ($host = write2.frame.gargantext.org) {
return 301 https://$host$request_uri;
} # managed by Certbot
server_name write2.frame.gargantext.org;
listen 80;
return 404; # managed by Certbot
}
server {
if ($host = istex.gargantext.org) {
return 301 https://$host$request_uri;
} # managed by Certbot
server_name istex.gargantext.org;
listen 80;
return 404; # managed by Certbot
}
server {
if ($host = cillex.gargantext.org) {
return 301 https://$host$request_uri;
} # managed by Certbot
server_name cillex.gargantext.org;
listen 80;
return 404; # managed by Certbot
}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/nginx/install 0000775 0000000 0000000 00000000245 14124644201 0026371 0 ustar 00root root 0000000 0000000
sudo apt install apache2-utils
htpasswd -c /etc/nginx/haskell_gargantext.htpasswd username1
sudo apt-get install certbot python-certbot-nginx
sudo certbot --nginx
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/openStack/ 0000775 0000000 0000000 00000000000 14124644201 0025600 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/openStack/install 0000775 0000000 0000000 00000001106 14124644201 0027172 0 ustar 00root root 0000000 0000000 #!/bin/bash
########################################################################
# Open Stack only: attach volumes
# attach the volume created (OS interface or API)
sudo fdisk -l
sudo fdisk /dev/vdb (n,p,t,83,w)
sudo mkfs.ext4 /dev/vdb1
sudo blkid
# copy UUID in fstab (same parameters)
sudo vim /etc/fstab
########################################################################
sudo sed -i "s/stretch/buster/g" /etc/apt/sources.list
sudo apt update
sudo apt dist-upgrade
# sudo reboot #recommended
########################################################################
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/postgres/ 0000775 0000000 0000000 00000000000 14124644201 0025517 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/postgres/create 0000775 0000000 0000000 00000000555 14124644201 0026715 0 ustar 00root root 0000000 0000000 #!/bin/bash
# sudo su postgres
# postgresql://$USER:$PW@localhost/$DB
PW="C8kdcUrAQy66U"
DB="gargandbV5"
USER="gargantua"
psql -c "CREATE USER \"${USER}\""
psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
psql -c "DROP DATABASE IF EXISTS \"${DB}\""
createdb "${DB}"
psql "${DB}" < schema.sql
psql -c "ALTER DATABASE \"${DB}\" OWNER to \"${USER}\""
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/postgres/schema 0000775 0000000 0000000 00000000222 14124644201 0026701 0 ustar 00root root 0000000 0000000 #!/bin/bash
DB="gargandbV5"
rm ../../tmp*
rm ../../repo*
psql -c "drop database IF EXISTS \"${DB}\""
createdb "${DB}"
psql "${DB}" < schema.sql
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/postgres/schema.sql 0000664 0000000 0000000 00000021321 14124644201 0027477 0 ustar 00root root 0000000 0000000 CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';
CREATE EXTENSION IF NOT EXISTS tsm_system_rows;
CREATE EXTENSION pgcrypto;
-----------------------------------------------------------------
CREATE TABLE public.auth_user (
id SERIAL,
password CHARACTER varying(128) NOT NULL,
last_login TIMESTAMP with time zone,
is_superuser BOOLEAN NOT NULL,
username CHARACTER varying(150) NOT NULL,
first_name CHARACTER varying(30) NOT NULL,
last_name CHARACTER varying(30) NOT NULL,
email CHARACTER varying(254) NOT NULL,
is_staff BOOLEAN NOT NULL,
is_active BOOLEAN NOT NULL,
date_joined TIMESTAMP with time zone DEFAULT now() NOT NULL,
PRIMARY KEY (id)
);
ALTER TABLE public.auth_user OWNER TO gargantua;
-- TODO add publication_date
-- TODO typename -> type_id
CREATE TABLE public.nodes (
id SERIAL,
hash_id CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
typename INTEGER NOT NULL,
user_id INTEGER NOT NULL,
parent_id INTEGER REFERENCES public.nodes(id) ON DELETE CASCADE ,
name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
date TIMESTAMP with time zone DEFAULT now() NOT NULL,
hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
search tsvector,
PRIMARY KEY (id),
FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE
);
ALTER TABLE public.nodes OWNER TO gargantua;
--------------------------------------------------------------
-- | Ngrams
CREATE TABLE public.ngrams (
id SERIAL,
terms CHARACTER varying(255),
n INTEGER,
PRIMARY KEY (id)
);
ALTER TABLE public.ngrams OWNER TO gargantua;
-- | Ngrams PosTag
CREATE TABLE public.ngrams_postag (
id SERIAL,
lang_id INTEGER,
algo_id INTEGER,
postag CHARACTER varying(5),
ngrams_id INTEGER NOT NULL,
lemm_id INTEGER NOT NULL,
score INTEGER DEFAULT 1 ::integer NOT NULL,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE,
FOREIGN KEY (lemm_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.ngrams_postag OWNER TO gargantua;
--------------------------------------------------------------
CREATE TABLE public.node_ngrams (
id SERIAL,
node_id INTEGER NOT NULL,
node_subtype INTEGER,
ngrams_id INTEGER NOT NULL,
ngrams_type INTEGER, -- change to ngrams_field? (no for pedagogic reason)
ngrams_field INTEGER,
ngrams_tag INTEGER,
ngrams_class INTEGER,
weight double precision,
PRIMARY KEY (id),
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_ngrams OWNER TO gargantua;
CREATE TABLE public.node_nodengrams_nodengrams (
node_id INTEGER NOT NULL,
node_ngrams1_id INTEGER NOT NULL,
node_ngrams2_id INTEGER NOT NULL,
weight double precision,
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (node_ngrams1_id) REFERENCES public.node_ngrams(id) ON DELETE CASCADE,
FOREIGN KEY (node_ngrams2_id) REFERENCES public.node_ngrams(id) ON DELETE CASCADE,
PRIMARY KEY (node_id, node_ngrams1_id, node_ngrams2_id)
);
ALTER TABLE public.node_nodengrams_nodengrams OWNER TO gargantua;
--------------------------------------------------------------
--------------------------------------------------------------
--
--
--CREATE TABLE public.nodes_ngrams_ngrams (
-- node_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
-- ngram1_id integer NOT NULL REFERENCES public.ngrams(id) ON DELETE CASCADE,
-- ngram2_id integer NOT NULL REFERENCES public.ngrams(id) ON DELETE CASCADE,
-- weight double precision,
-- PRIMARY KEY (node_id,ngram1_id,ngram2_id)
--);
--
--ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua;
---------------------------------------------------------------
-- TODO nodes_nodes(node1_id int, node2_id int, edge_type int , weight real)
CREATE TABLE public.nodes_nodes (
node1_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
node2_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
score REAL,
category INTEGER,
PRIMARY KEY (node1_id, node2_id)
);
ALTER TABLE public.nodes_nodes OWNER TO gargantua;
---------------------------------------------------------------
CREATE TABLE public.node_node_ngrams (
node1_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
node2_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
ngrams_type INTEGER,
weight double precision,
PRIMARY KEY (node1_id, node2_id, ngrams_id, ngrams_type)
);
ALTER TABLE public.node_node_ngrams OWNER TO gargantua;
CREATE TABLE public.node_node_ngrams2 (
node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
weight double precision,
PRIMARY KEY (node_id, nodengrams_id)
);
ALTER TABLE public.node_node_ngrams2 OWNER TO gargantua;
--------------------------------------------------------------
--CREATE TABLE public.nodes_ngrams_repo (
-- version integer NOT NULL,
-- patches jsonb DEFAULT '{}'::jsonb NOT NULL,
-- PRIMARY KEY (version)
--);
--ALTER TABLE public.nodes_ngrams_repo OWNER TO gargantua;
---------------------------------------------------------
-- If needed for rights management at row level
-- CREATE EXTENSION IF NOT EXISTS acl WITH SCHEMA public;
CREATE TABLE public.rights (
user_id INTEGER NOT NULL REFERENCES public.auth_user(id) ON DELETE CASCADE,
node_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
rights INTEGER NOT NULL,
PRIMARY KEY (user_id, node_id)
);
ALTER TABLE public.rights OWNER TO gargantua;
------------------------------------------------------------
------------------------------------------------------------
-- INDEXES
CREATE INDEX ON public.auth_user USING btree (username varchar_pattern_ops);
CREATE UNIQUE INDEX ON public.auth_user USING btree (username);
CREATE INDEX ON public.rights USING btree (user_id,node_id);
CREATE INDEX ON public.nodes USING gin (hyperdata);
CREATE INDEX ON public.nodes USING btree (user_id, typename, parent_id);
CREATE INDEX ON public.nodes USING btree (id, typename, date ASC);
CREATE INDEX ON public.nodes USING btree (id, typename, date DESC);
CREATE INDEX ON public.nodes USING btree (typename, id);
CREATE UNIQUE INDEX ON public.nodes USING btree (hash_id);
-- CREATE UNIQUE INDEX ON public.nodes USING btree (((hyperdata ->> 'uniqId'::text)));
-- CREATE UNIQUE INDEX ON public.nodes USING btree (((hyperdata ->> 'uniqIdBdd'::text)));
-- CREATE UNIQUE INDEX ON public.nodes USING btree (typename, parent_id, ((hyperdata ->> 'uniqId'::text)));
CREATE UNIQUE INDEX ON public.ngrams (terms); -- TEST GIN
CREATE INDEX ON public.ngrams USING btree (id, terms);
CREATE UNIQUE INDEX ON public.ngrams_postag (lang_id,algo_id,postag,ngrams_id,lemm_id);
CREATE INDEX ON public.node_ngrams USING btree (node_id,node_subtype);
CREATE UNIQUE INDEX ON public.node_ngrams USING btree (node_id,node_subtype, ngrams_id);
CREATE UNIQUE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id);
CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, category);
CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type);
CREATE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id);
CREATE INDEX ON public.node_node_ngrams USING btree (ngrams_id, node2_id);
CREATE INDEX ON public.node_node_ngrams USING btree (ngrams_type);
CREATE INDEX ON public.node_nodengrams_nodengrams USING btree (node_id, node_ngrams1_id, node_ngrams2_id);
CREATE INDEX ON public.node_nodengrams_nodengrams USING btree (node_ngrams1_id);
CREATE INDEX ON public.node_nodengrams_nodengrams USING btree (node_ngrams2_id);
CREATE INDEX ON public.node_node_ngrams2 USING btree (node_id);
CREATE INDEX ON public.node_node_ngrams2 USING btree (nodengrams_id);
CREATE INDEX ON public.node_node_ngrams2 USING btree (node_id, nodengrams_id);
------------------------------------------------------------
------------------------------------------------------------------------
-- Ngrams Full DB Extraction Optim
-- TODO remove hard parameter and move elsewhere
CREATE OR REPLACE function node_pos(int, int) returns bigint
AS 'SELECT count(id) from nodes
WHERE id < $1
AND typename = $2
'
LANGUAGE SQL immutable;
--drop index node_by_pos;
create index node_by_pos on nodes using btree(node_pos(id,typename));
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/postgres/upgrade/ 0000775 0000000 0000000 00000000000 14124644201 0027146 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/postgres/upgrade/0.0.2.6.sql 0000664 0000000 0000000 00000001125 14124644201 0030467 0 ustar 00root root 0000000 0000000 CREATE TABLE public.ngrams_postag (
id SERIAL,
lang_id INTEGER,
algo_id INTEGER,
postag CHARACTER varying(5),
ngrams_id INTEGER NOT NULL,
lemm_id INTEGER NOT NULL,
score INTEGER DEFAULT 1 ::integer NOT NULL,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE,
FOREIGN KEY (lemm_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
) ;
-- ALTER TABLE public.ngrams_postag OWNER TO gargantua;
CREATE UNIQUE INDEX ON public.ngrams_postag (lang_id,algo_id,postag,ngrams_id,lemm_id);
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/tmux.sh 0000775 0000000 0000000 00000000356 14124644201 0025211 0 ustar 00root root 0000000 0000000 #!/bin/bash
tmux new -d -s gargantext './server' \; \
split-window -h -d 'cd ./purescript-gargantext ; ./server' \; \
select-pane -t 1 \; \
split-window -d 'cd deps/CoreNLP ; ./startServer.sh' \; \
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/ubuntu/ 0000775 0000000 0000000 00000000000 14124644201 0025173 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/devops/ubuntu/install 0000775 0000000 0000000 00000024304 14124644201 0026572 0 ustar 00root root 0000000 0000000 #!/bin/bash
# ____ _ _
# / ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
# | | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
# | |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
# \____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
# |___/
# This receipe is for specific use (Open Stack && Debien)
# Step by step installation process (advanced user mainly)
# For others: a docker image is coming
########################################################################
# To be sure to have updated packages
sudo apt update
sudo apt upgrade
# Tmux to avoid disconnections during the installation process
# Htop for visual monitoring
sudo apt install tmux htop
########################################################################
#sudo sed -i "s/stretch/buster/g" /etc/apt/sources.list
#sudo apt update
#sudo apt dist-upgrade
# sudo reboot #recommended
########################################################################
sudo apt update
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-11 nginx libigraph0-dev libgfortran-8-dev
sudo apt install git
#git config --global user.email "contact@gargantext.org"
#git config --global user.name "Gargantua"
########################################################################
echo "Which user?"
read USER
USER="gargantua"
sudo adduser --disabled-password --gecos "" $USER
########################################################################
#cd /home
#sudo mv -if /home/$USER /srv/
#sudo ln -s /srv/$USER
curl -sSL https://get.haskellstack.org/ | sh
sudo apt install curl
sudo su $USER
stack update
stack upgrade
git clone https://gitlab.iscpif.fr/gargantext/haskell-gargantext.git
cd haskell-gargantext
##########
git clone https://gitlab.iscpif.fr/gargantext/purescript-gargantext
cd purescript-gargantext
# as sudoer
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
sudo apt update
sudo apt install yarn
# as user
yarn install && yarn install-ps && yarn build
# temporary bug (help welcome)
cp src/index.html dist/index.html
#########################################################################
# stack install should be enough but all process is the following steps
stack setup && stack build && stack install
#########################################################################
# build deps
#!/bin/bash
mkdir deps
cd deps
git clone https://gitlab.iscpif.fr/gargantext/clustering-louvain-cplusplus.git
cd clustering-louvain-cplusplus
./install
cd ..
sudo apt install default-jdk
wget https://dl.gargantext.org/coreNLP.tar.bz2
tar xvjf coreNLP.tar.bz2
# CoreNLP needs to be started
# ./startServer.sh
# Specific to our servers
#### Configure timezone and locale ###################################
# todo sync dates
#echo "########### LOCALES & TZ #################"
#echo "Europe/Paris" > /etc/timezone
#dpkg-reconfigure --frontend=noninteractive tzdata
##ENV TZ "Europe/Paris"
#
#sed -i -e 's/# en_GB.UTF-8 UTF-8/en_GB.UTF-8 UTF-8/' /etc/locale.gen && \
#sed -i -e 's/# fr_FR.UTF-8 UTF-8/fr_FR.UTF-8 UTF-8/' /etc/locale.gen && \
#locale-gen && \
#update-locale LANG=fr_FR.UTF-8 && \
#update-locale LANGUAGE=fr_FR.UTF-8 && \
#update-locale LC_ALL=fr_FR.UTF-8
#################################################################
# Database configuration
# CREATE USER gargantua WITH PASSWORD $(grep DB_PASS gargantext.ini)
# GRANT ALL PRIVILEGES ON DATABASE gargandbV4 to gargantua
#######################################################################
## POSTGRESQL DATA (as ROOT)
#######################################################################
PGVersion = 11
GARGDATA = "/srv/gargantua/gargandata"
mkdir $GARGDATA
sudo apt install rsync
sudo sed -iP "s%^data_directory.*%data_directory = \'$GARGADATA\'%" /etc/postgresql/$PGVersion/main/postgresql.conf
sudo rsync -av /var/lib/postgresql/$PGVersion/main $GARGDATA
# configure the database with script in devops/postgres
# edit gargantext.ini
..........,,;;;;,,,oKXNNNNNNNNNXXXXXKK0OOxdl::ccc:::::;;;;,,,'..
.........';;;;;;,,,,'''''''''dXNMMMMMMMMMMMMMMMMMMMWWWNNNWNNNNNNXXXXKKK0Oxddlcc::::::::;;;,,,'....
.........',,,;;;;,,,'''''''''''''''''''oXNMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWWWWNNNNNNNXXXXKKK0Oxddlcccc::;;
.::cccllc:;''''''''''''''''''''.''''''''oXNMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNNNXXXXKK0O
...';;;;;;;;;,'''''''''''''''''''''''''''''oXNMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWWWNNNNNNX0
;;,,'''''''''''''''''''''''''''''''''''''''oXNMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
'''''''''''''''''''''''''''''''''',;:::cclldkOOOOO00000KKKXXNWWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNX
'''''''''''''''''''''''''''';:loodkkOO0KKXXkc:;;;;:::::cccloodxkkO0NMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNX
''''''''''''''''''','''''',,:okKKNMMMMMMMMMk;,'''''''',''''''',;;cd0MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNX
''''''''''''''''''''''',:lxxk0KNNWMWWWNXKKKkoooolllcc:;,''''''',,:lOMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
''''''''''''''''''',:llx0XWWMMWWWX0kddolcccx0KXXXXKKK0Okxxo:;'''';:OMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
'''''''''''''''''':oOXXNMMMMNXOddl:,'''''''oXNMMMMMMMMMMWWX0l'''';:kMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
'''''''''''''''':lONWMMMMNXXkl:,,''''''''''dXNMMMMMMMMMMMMWWx,,'';:kMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,'''''''''''',::xXNMMMMWWOoo:''''''''''''''dXNMMMMWNNNNWWWMMk:;'';cOMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,'''''''''''':oo0MMMMMMWXd,,,,''''''''''',,dXNMMMMNXXXXNWWMMOc;'';cOMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
'''''''''''''lOOXMMMMMMXkl'''''''''''''''''dXNMMWNKOOOO0KKNMKxoccod0MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,'''''''''',:kWWWMMMWWWk:,'''''''''''''''''dXNMMXOkxxxxxxx0NNNXKKKXNMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,''''''''''cxKMMMMMMNKKo'''''''''''''''''''dXNMMXOkxxxxxxx0NWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
'''''''''''oKNMMMMMMKddc'''''''''''''''''''dXWMMNX0OOOkO00XWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,''''''''''dNWMMMMMM0oo:'''''''''''''''''''oXNMMWWX000OKNNWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,'''''''',,xWWMMMMMMOcc;'''''''''''''''''''oXNMMMWNNNXXNWWWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,''''''',;;kMMMMMMMMk;;,'''''''''''''''''''dXWMMMMMMMMWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,''''''',::kMMMMMMMMk;;,'''''''''''''''''''dXNMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,''''''';::kMMMMMMMMk;;,'''''''''''''''''''dXNMMNKOkkkkkkkkkkkkkkkkkkkkkOOOXMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,''''''',;;kMMMMMMMMO::;'''''''''''''''''''dXNMMKxo::::;,,,,,,,,,,,;;;;:cll0MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,''''''',,,kMMMMMMMMOcc;'''''''''''''''''''oXNMMXkocc::;,,,''''''''',,;:loo0MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,'''''''',,xWWMMMMMM0ll:'''''''''''''''''''oXNMMNX0OOOkkxxc'''''',,cxxkkO00XMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,''''''''''oKNMMMMMMKkkc'''''''''''''''''''dXWMMMMMMMMMWWWx,,'''',;kMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,''''''''''cdKMMMMMMNXXo'''''''''''''''''''dXWMMMMMMMMMMMMk,,'''',:kMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,'''''''''',;xWWWMMMMWWk:;'''''''''''''''''dXNMMMMMMMMMMMMk,,'''',:kMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNX
,''''''''''''lkkKMMMMMMX0o'''''''''''''''''dXNMMMMMMMMMMMMx,,'''',:kMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,'''''''''''':llOMMMMMMWWx,,,''''''''''''''dXNWWMMMMMMMMMMk,,'''',:kMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
,'''''''''''',::xXNMMMMMW0oo:''''''''''''''dXNMMMMMMMMMMMMk,,'''',;kMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWX
;,,''''''''''''':lONWMMMMWNNOl:''''''''''''oXNMMMMMMMMMMMMx,,'''',:kMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNX
.';,,,,''''''''''':dONNWMMMMWXOddl:,'''''''dXNMMMMMMMMMMMMk,,'''',:kMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNX
.',,,;,''''',,''',:ookKNMMMMWWWXOxddlc:;;xXNWWWWWWWNXXKKd,,'''',:kMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNX
.'',:;''''''''''''''cxKMMMMMMMMMWNN0xollxKXWWWWNXXK0Okkl,''''';:kWWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNX
....';;,''''''''''';cokk0XWMMMMWWWXK0OOkxxxxdddddoolcc;'''',,:lOMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNK0
...,;,,,,'''',,'',,,:odkkO0KXXNNWWWk:;'''',,,;;:ccclodxkkO0XMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNNNX0kko:;,
.';;;;,'''''''''''',,;:clloodddxxxxxxxkkkOO000KXNWWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNNXX0kocc:;'..
....';,''''''''''''''''''''''dXNMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNNXX0koc:;;'.
..,;;;,''''''''''''''''''dXNMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWWWNXKOOd::;;'.
.cc:,''''''''''''''''''dXWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMNXXK0dcc.
...,;;,,,''''''''''''''dXWMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMWNNNNKOdcc::,...
..'',;,''''''''''''oXNMMMMMMMMMMMMMMMMMMMMMMMMMMWNNNXKOOd::::,..
..';;,'''''''''oKNMMMMMMMMMMMMMMMMMMMWWNXXKOd::::,...
..';;,,,''''oXNMMMMMMMMMMMMWWNXKOOdc:;;,.. TODO, too big, what diet for this ascii art ?
.::;;,''''oXNWWMMMMMMMMMMWNK0d::.
..',,,,,''oKNMMMMWWWNX0OOdc:;,..
....';;o0KXXKOdcc:;,...
.ccdOO00xl.
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/docs/ 0000775 0000000 0000000 00000000000 14124644201 0023301 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/docs/search-api.org 0000664 0000000 0000000 00000024454 14124644201 0026037 0 ustar 00root root 0000000 0000000 #+TITLE: Searx API request
This is related to issue
https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/70
#+begin_src restclient
:domain := "https://searx.frame.gargantext.org"
POST :domain/
Content-Type: application/x-www-form-urlencoded
category_general=1&q=banach%20space&pageno=1&time_range=None&language=en-US&format=json
#+end_src
#+RESULTS:
#+BEGIN_SRC js
{
"query": "banach space",
"number_of_results": 93700.0,
"results": [
{
"url": "https://en.wikipedia.org/wiki/Banach_space",
"title": "Banach space",
"engine": "wikipedia",
"parsed_url": [
"https",
"en.wikipedia.org",
"/wiki/Banach_space",
"",
"",
""
],
"engines": [
"wikipedia"
],
"positions": [
1
],
"score": 1.0,
"category": "general",
"pretty_url": "https://en.wikipedia.org/wiki/Banach_space"
},
{
"url": "http://mathworld.wolfram.com/BanachSpace.html",
"title": "Banach Space -- from Wolfram MathWorld",
"content": "10/05/2021 · A Banach space is a complete vector space with a norm . Two norms and are called equivalent if they give the same topology , which is equivalent to the existence of constants and such that. (1) and. (2) hold for all . In the finite-dimensional case, all norms are equivalent.",
"engine": "bing",
"parsed_url": [
"http",
"mathworld.wolfram.com",
"/BanachSpace.html",
"",
"",
""
],
"engines": [
"bing"
],
"positions": [
1
],
"score": 1.0,
"category": "general",
"pretty_url": "http://mathworld.wolfram.com/BanachSpace.html"
},
{
"url": "https://en.wikipedia.org/wiki/List_of_Banach_spaces",
"title": "List of Banach spaces - Wikipedia",
"content": "25 lignes · Classical Banach spaces. According to Diestel (1984, Chapter VII), the classical Banach …",
"engine": "bing",
"parsed_url": [
"https",
"en.wikipedia.org",
"/wiki/List_of_Banach_spaces",
"",
"",
""
],
"engines": [
"bing"
],
"positions": [
2
],
"score": 0.5,
"category": "general",
"pretty_url": "https://en.wikipedia.org/wiki/List_of_Banach_spaces"
},
{
"url": "https://encyclopediaofmath.org/wiki/Banach_space",
"title": "Banach space - Encyclopedia of Mathematics",
"content": "According to Diestel (1984, Chapter VII), the classical Banach spaces are those defined by Dunford & Schwartz (1958), which is the source for the following table. Here K denotes the field of real numbers or complex numbers and I is a closed and bounded interval [a,b]. The number p is a real number with 1 < p < ∞, and q is its Hölder conjugate (also with 1 < q < ∞), so that the next equation holds: $${\\displaystyle {\\frac {1}{q}}+{\\frac {1}{p}}=1,}$$According to Diestel (1984, Chapter VII), the classical Banach spaces are those defined by Dunford & Schwartz (1958), which is the source for the following table. Here K denotes the field of real numbers or complex numbers and I is a closed and bounded interval [a,b]. The number p is a real number with 1 < p < ∞, and q is its Hölder conjugate (also with 1 < q < ∞), so that the next equation holds: $${\\displaystyle {\\frac {1}{q}}+{\\frac {1}{p}}=1,}$$and thus $${\\displaystyle q={\\frac {p}{p-1}}.}$$The symbol Σ denotes a σ-algebra of sets, and Ξ denotes just an algebra of sets (for spaces only requiring finite additivity, such as the ba space). The symbol μ denotes a positive measure: that is, a real-valued positive set function defined on a σ-algebra which is countably additive.",
"engine": "bing",
"parsed_url": [
"https",
"encyclopediaofmath.org",
"/wiki/Banach_space",
"",
"",
""
],
"engines": [
"bing"
],
"positions": [
3
],
"score": 0.3333333333333333,
"category": "general",
"pretty_url": "https://encyclopediaofmath.org/wiki/Banach_space"
},
{
"url": "https://www.techopedia.com/definition/17852/banach-space",
"title": "What is Banach Space? - Definition from Techopedia",
"content": "22/03/2017 · In functional analysis, a Banach space is a normed vector space that allows vector length to be computed. When the vector space is normed, that means that each vector other than the zero vector has a length that is greater than zero. The length and distance between two vectors can thus be computed. The vector space is complete, meaning a Cauchy sequence of vectors in a Banach space …",
"engine": "bing",
"parsed_url": [
"https",
"www.techopedia.com",
"/definition/17852/banach-space",
"",
"",
""
],
"engines": [
"bing"
],
"positions": [
4
],
"score": 0.25,
"category": "general",
"pretty_url": "https://www.techopedia.com/definition/17852/banach-space"
},
{
"url": "https://www.sciencedirect.com/topics/mathematics/banach-spaces",
"title": "Banach Spaces - an overview | ScienceDirect Topics",
"content": "A Banach spaceis a complete normed linear space. Example 4.3 The spaces RN,CNare vector spaces which are also complete metric spaces with any of the norms ∥⋅∥p, hence they are Banach spaces. Similarly C(E), Lp(E) are Banach spaces with norms indicated above. □",
"engine": "bing",
"parsed_url": [
"https",
"www.sciencedirect.com",
"/topics/mathematics/banach-spaces",
"",
"",
""
],
"engines": [
"bing"
],
"positions": [
5
],
"score": 0.2,
"category": "general",
"pretty_url": "https://www.sciencedirect.com/topics/mathematics/banach-spaces"
},
{
"url": "https://people.math.gatech.edu/~heil/handouts/banach.pdf",
"title": "Banach Spaces - gatech.edu",
"content": "07/09/2006 · have already said that “a Banach space is complete” if every Cauchy sequence in the space converges. The term “complete sequences” defined in this section is a completely separate definition that applies to sets of vectors in a Hilbert or Banach space (although we …",
"engine": "bing",
"parsed_url": [
"https",
"people.math.gatech.edu",
"/~heil/handouts/banach.pdf",
"",
"",
""
],
"engines": [
"bing"
],
"positions": [
6
],
"score": 0.16666666666666666,
"category": "general",
"pretty_url": "https://people.math.gatech.edu/~heil/handouts/banach.pdf"
},
{
"url": "https://ncatlab.org/nlab/show/Banach+space",
"title": "Banach space in nLab",
"content": "",
"engine": "bing",
"parsed_url": [
"https",
"ncatlab.org",
"/nlab/show/Banach+space",
"",
"",
""
],
"engines": [
"bing"
],
"positions": [
7
],
"score": 0.14285714285714285,
"category": "general",
"pretty_url": "https://ncatlab.org/nlab/show/Banach+space"
},
{
"url": "https://www.numerade.com/books/chapter/structure-of-banach-spaces/",
"title": "Structure of Banach Spaces | Functional Analysis",
"content": "Structure of Banach Spaces, Functional Analysis and InfiniteDimensional Geometry - Marián Fabian, Petr Habala, Petr Hájek | All the textbook answers and step-b…",
"engine": "bing",
"parsed_url": [
"https",
"www.numerade.com",
"/books/chapter/structure-of-banach-spaces/",
"",
"",
""
],
"engines": [
"bing"
],
"positions": [
8
],
"score": 0.125,
"category": "general",
"pretty_url": "https://www.numerade.com/books/chapter/structure-of-banach-spaces/"
},
{
"url": "http://www.ma.huji.ac.il/~razk/iWeb/My_Site/Teaching_files/Banach.pdf",
"title": "2. Banach spaces - ma.huji.ac.il",
"content": "Definition 2.1A Banach space is a complete, normed, vector space. Comment 2.1Completeness is a metric space concept. In a normed space the metric is d(x,y)=x−y. Note that this metric satisfies the following “special\" properties: ¿ The underlying space is a vector space.",
"engine": "bing",
"parsed_url": [
"http",
"www.ma.huji.ac.il",
"/~razk/iWeb/My_Site/Teaching_files/Banach.pdf",
"",
"",
""
],
"engines": [
"bing"
],
"positions": [
9
],
"score": 0.1111111111111111,
"category": "general",
"pretty_url": "http://www.ma.huji.ac.il/~razk/iWeb/My_Site/Teaching_files/Banach.pdf"
}
],
"answers": [],
"corrections": [],
"infoboxes": [
{
"infobox": "Banach space",
"id": "https://en.wikipedia.org/wiki/Banach_space",
"content": "In mathematics, more specifically in functional analysis, a Banach space (pronounced [ˈbanax]) is a complete normed vector space. Thus, a Banach space is a vector space with a metric that allows the computation of vector length and distance between vectors and is complete in the sense that a Cauchy sequence of vectors always converges to a well defined limit that is within the space.",
"img_src": null,
"urls": [
{
"title": "Wikipedia",
"url": "https://en.wikipedia.org/wiki/Banach_space"
},
{
"title": "Wikidata",
"url": "https://www.wikidata.org/wiki/Q194397?uselang=en"
}
],
"engine": "wikidata",
"attributes": [
{
"label": "Inception",
"value": "1920"
}
]
}
],
"suggestions": [],
"unresponsive_engines": []
}
// POST https://searx.frame.gargantext.org/
// HTTP/1.1 200 OK
// Server: nginx/1.14.2
// Date: Tue, 27 Jul 2021 17:20:48 GMT
// Content-Type: application/json
// Content-Length: 8020
// Connection: keep-alive
// Server-Timing: total;dur=1826.455, total_0_go;dur=248.527, total_1_wp;dur=352.718, total_2_bi;dur=628.671, total_3_wd;dur=1822.518, load_0_go;dur=234.185, load_1_wp;dur=348.323, load_2_bi;dur=595.242, load_3_wd;dur=1778.783
// Request duration: 2.159931s
#+END_SRC
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/gargantext.ini_toModify 0000664 0000000 0000000 00000002667 14124644201 0027103 0 ustar 00root root 0000000 0000000 [gargantext]
# Main url serving the FrontEnd
URL = http://localhost
# Main API url serving the BackEnd
URL_BACKEND_API = http://localhost:8008/api/v1.0
# Needed to instantiate the first users and first data
MASTER_USER = gargantua
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = PASSWORD_TO_CHANGE
# Data path to local files
DATA_FILEPATH = FILEPATH_TO_CHANGE
# Data path to local files (do not use quotes)
REPO_FILEPATH = FILEPATH_TO_CHANGE
# [external]
# FRAMES (i.e. iframe sources used in various places on the frontend)
#FRAME_WRITE_URL = http://write.frame.gargantext.org/
FRAME_WRITE_URL = URL_TO_CHANGE
#FRAME_CALC_URL = http://calc.frame.gargantext.org/
FRAME_CALC_URL = URL_TO_CHANGE
FRAME_VISIO_URL = URL_TO_CHANGE
FRAME_SEARX_URL = URL_TO_CHANGE
FRAME_ISTEX_URL = URL_TO_CHANGE
MAX_DOCS_SCRAPERS = 10000
[server]
# Server config (TODO connect in ReaderMonad)
ALLOWED_ORIGIN = http://localhost
ALLOWED_ORIGIN_PORT = 8008
ALLOWED_HOST = localhost
ALLOWED_HOST_PORT = 3000
JWT_SETTINGS = TODO
[network]
# Emails From address (sent by smtp)
MAIL = username@gargantext.org
HOST = localhost
# if remote smtp host
# HOST_USER = user
# HOST_password = password
[database]
# PostgreSQL access
DB_HOST = 127.0.0.1
DB_PORT = 5432
DB_NAME = gargandbV5
DB_USER = gargantua
DB_PASS = PASSWORD_TO_CHANGE
[logs]
LOG_FILE = /var/log/gargantext/backend.log
LOG_LEVEL = LevelDebug
LOG_FORMATTER = verbose
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/nix/ 0000775 0000000 0000000 00000000000 14124644201 0023147 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/nix/pinned-20.09.nix 0000664 0000000 0000000 00000000641 14124644201 0025613 0 ustar 00root root 0000000 0000000 # this version of nixpkgs contains liblapack at ?
# this version of nixpkgs contains gsl at ?
import (builtins.fetchGit {
# Descriptive name to make the store path easier to identify
name = "nixos-20.09";
url = "https://github.com/nixos/nixpkgs/";
# `git ls-remote https://github.com/nixos/nixpkgs-channels nixos-20.09`
ref = "refs/heads/nixos-20.09";
rev = "69f3a9705014ce75b0489404210995fb6f29836e";
})
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/nix/pinned-21.05.nix 0000664 0000000 0000000 00000000271 14124644201 0025607 0 ustar 00root root 0000000 0000000 import (builtins.fetchGit {
name = "nixos-21.05";
url = "https://github.com/nixos/nixpkgs";
ref = "refs/heads/nixos-21.05";
rev = "7e9b0dff974c89e070da1ad85713ff3c20b0ca97";
})
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/nix/pkgs.nix 0000664 0000000 0000000 00000001404 14124644201 0024632 0 ustar 00root root 0000000 0000000 { pkgs ? import ./pinned-21.05.nix {} }:
rec {
inherit pkgs;
ghc = pkgs.haskell.compiler.ghc8104;
hsBuildInputs = [
ghc
pkgs.cabal-install
];
nonhsBuildInputs = with pkgs; [
bzip2
git
gmp
gsl
#haskell-language-server
hlint
igraph
liblapack
lzma
pcre
pkgconfig
postgresql
xz
zlib
blas
gfortran7
# gfortran7.cc.lib
];
libPaths = pkgs.lib.makeLibraryPath nonhsBuildInputs;
shellHook = ''
export LD_LIBRARY_PATH="${pkgs.gfortran7.cc.lib}:${libPaths}:$LD_LIBRARY_PATH"
export LIBRARY_PATH="${pkgs.gfortran7.cc.lib}:${libPaths}"
'';
shell = pkgs.mkShell {
name = "gargantext-shell";
buildInputs = hsBuildInputs ++ nonhsBuildInputs;
inherit shellHook;
};
}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/nix/shell.nix 0000664 0000000 0000000 00000000035 14124644201 0024774 0 ustar 00root root 0000000 0000000 (import ./pkgs.nix {}).shell
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/nix/stack-shell.nix 0000664 0000000 0000000 00000000342 14124644201 0026100 0 ustar 00root root 0000000 0000000 let ourpkgs = import ./pkgs.nix {};
pkgs = ourpkgs.pkgs;
in
pkgs.haskell.lib.buildStackProject rec {
name = "gargantext";
ghc = ourpkgs.ghc;
buildInputs = ourpkgs.nonhsBuildInputs;
shellHook = ourpkgs.shellHook;
}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/package.yaml 0000664 0000000 0000000 00000022105 14124644201 0024630 0 ustar 00root root 0000000 0000000 name: gargantext
version: '0.0.3.9.1'
synopsis: Search, map, share
description: Please see README.md
category: Data
author: Gargantext Team
maintainer: team@gargantext.org
copyright:
- ! 'Copyright: (c) 2017-Present: see git logs and README'
license: AGPL-3
homepage: https://gargantext.org
ghc-options: -Wall
dependencies:
- extra
- text
default-extensions:
- DataKinds
- DeriveGeneric
- FlexibleContexts
- FlexibleInstances
- GeneralizedNewtypeDeriving
- MultiParamTypeClasses
- NamedFieldPuns
- NoImplicitPrelude
- OverloadedStrings
- RankNTypes
- RecordWildCards
library:
source-dirs: src
ghc-options:
- -Wincomplete-uni-patterns
- -Wincomplete-record-updates
- -Wmissing-signatures
- -Wunused-binds
- -Wunused-imports
- -Werror
- -freduction-depth=300
exposed-modules:
- Gargantext
- Gargantext.API
- Gargantext.API.Dev
- Gargantext.API.HashedResponse
- Gargantext.API.Node
- Gargantext.API.Node.File
- Gargantext.API.Ngrams
- Gargantext.API.Ngrams.Tools
- Gargantext.API.Ngrams.Types
- Gargantext.API.Admin.Settings
- Gargantext.API.Admin.EnvTypes
- Gargantext.API.Admin.Types
- Gargantext.API.Prelude
- Gargantext.Core
- Gargantext.Core.NodeStory
- Gargantext.Core.Methods.Distances
- Gargantext.Core.Types
- Gargantext.Core.Types.Individu
- Gargantext.Core.Types.Main
- Gargantext.Core.Utils.Prefix
- Gargantext.Database.Action.Flow
- Gargantext.Database.Action.Flow.Types
- Gargantext.Database.Action.User.New
- Gargantext.Database.Query.Table.User
- Gargantext.Database.Query.Table.Node
- Gargantext.Database.Query.Table.Node.UpdateOpaleye
- Gargantext.Database.Query.Table.NgramsPostag
- Gargantext.Database.Prelude
- Gargantext.Database.Admin.Trigger.Init
- Gargantext.Database.Admin.Config
- Gargantext.Database.Admin.Types.Hyperdata
- Gargantext.Database.Admin.Types.Node
- Gargantext.Core.Text
- Gargantext.Core.Text.Context
- Gargantext.Core.Text.Corpus.Parsers
- Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
- Gargantext.Core.Text.Corpus.API
- Gargantext.Core.Text.Corpus.Parsers.CSV
- Gargantext.Core.Text.List.Formats.CSV
- Gargantext.Core.Text.Metrics
- Gargantext.Core.Text.Metrics.TFICF
- Gargantext.Core.Text.Metrics.CharByChar
- Gargantext.Core.Text.Metrics.Count
- Gargantext.Core.Text.Search
- Gargantext.Core.Text.Terms
- Gargantext.Core.Text.Terms.Mono
- Gargantext.Core.Text.Terms.Multi.Lang.En
- Gargantext.Core.Text.Terms.Multi.Lang.Fr
- Gargantext.Core.Text.Terms.Multi.RAKE
- Gargantext.Core.Text.Terms.WithList
- Gargantext.Core.Viz.Graph
- Gargantext.Core.Viz.Graph.Tools
- Gargantext.Core.Viz.Graph.Tools.IGraph
- Gargantext.Core.Viz.Graph.Index
- Gargantext.Core.Viz.AdaptativePhylo
- Gargantext.Core.Viz.Phylo.PhyloMaker
- Gargantext.Core.Viz.Phylo.PhyloTools
- Gargantext.Core.Viz.Phylo.PhyloExport
- Gargantext.Core.Viz.Phylo.SynchronicClustering
- Gargantext.Core.Viz.Types
dependencies:
- HSvm
- KMP
- MissingH
- MonadRandom
- QuickCheck
- Unique
- accelerate
- accelerate-arithmetic
- accelerate-utility
- aeson
- aeson-lens
- aeson-pretty
- array
- async
- attoparsec
- auto-update
- base >=4.7 && <5
- base16-bytestring
- base64-bytestring
- blaze-html
- blaze-markup
- blaze-svg
- bytestring
- case-insensitive
- cassava
- cereal # (IGraph)
- cborg
- conduit
- conduit-extra
- containers
- contravariant
- crawlerHAL
- crawlerISTEX
- crawlerIsidore
- crawlerPubMed
- cryptohash
- data-time-segment
- deepseq
- directory
- duckling
- exceptions
- fast-logger
- fclabels
- fgl
- filelock
- filepath
- formatting
- full-text-search
- fullstop
- gargantext-prelude
- graphviz
- hashable
- haskell-igraph
- hlcm
- hsparql
- hstatistics
- http-api-data
- http-client
- http-client-tls
- http-conduit
- http-media
- http-types
- hxt
- ini
- insert-ordered-containers
- jose
- json-stream
- lens
- located-base
- logging-effect
- matrix
- monad-control
- monad-logger
- mtl
- natural-transformation
- opaleye
- pandoc
- parallel
- parsec
- patches-class
- patches-map
- path
- path-io
- postgresql-simple
- pretty-simple
- probability
- product-profunctors
- profunctors
- protolude
- pureMD5
- quickcheck-instances
- rake
- random
- rdf4h
- regex-compat
- regex-tdfa
- resource-pool
- resourcet
- safe
- scientific
- semigroups
- serialise
- servant
- servant-auth
- servant-auth-server >= 0.4.4.0
- servant-auth-swagger
- servant-blaze
- servant-cassava
- servant-client
- servant-job
- servant-mock
- servant-multipart
- servant-server
- servant-static-th
- servant-swagger
- servant-swagger-ui
- servant-xml
- simple-reflect
- singletons # (IGraph)
- split
- stemmer
- swagger2
- tagsoup
- template-haskell
- temporary
- text-metrics
- time
- time-locale-compat
- timezone-series
- transformers
- transformers-base
- unordered-containers
- utf8-string
- uuid
- validity
- vector
- wai
- wai-app-static
- wai-cors
- wai-extra
- warp
- wreq
- xml-conduit
- xml-types
- xmlbf
- yaml
- zip
- zlib
executables:
gargantext-server:
main: Main.hs
source-dirs: bin/gargantext-server
ghc-options:
- -O2
- -Wcompat
- -Wmissing-signatures
- -rtsopts
- -threaded
- -with-rtsopts=-N
- -fprof-auto
dependencies:
- base
- containers
- gargantext
- gargantext-prelude
- vector
- cassava
- ini
- optparse-generic
- unordered-containers
- full-text-search
gargantext-cli:
main: Main.hs
source-dirs: bin/gargantext-cli
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies:
- aeson
- async
- base
- bytestring
- containers
- gargantext
- gargantext-prelude
- vector
- cassava
- ini
- optparse-generic
- split
- unordered-containers
- full-text-search
gargantext-adaptative-phylo:
main: Main.hs
source-dirs: bin/gargantext-adaptative-phylo
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies:
- aeson
- async
- base
- bytestring
- containers
- directory
- gargantext
- gargantext-prelude
- vector
- parallel
- cassava
- ini
- optparse-generic
- split
- unordered-containers
- cryptohash
- time
gargantext-import:
main: Main.hs
source-dirs: bin/gargantext-import
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies:
- gargantext
- gargantext-prelude
- base
- servant-server
gargantext-init:
main: Main.hs
source-dirs: bin/gargantext-init
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies:
- gargantext
- gargantext-prelude
- base
gargantext-upgrade:
main: Main.hs
source-dirs: bin/gargantext-upgrade
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies:
- gargantext
- gargantext-prelude
- base
gargantext-admin:
main: Main.hs
source-dirs: bin/gargantext-admin
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies:
- gargantext
- gargantext-prelude
- base
gargantext-cbor2json:
main: Main.hs
source-dirs: bin/gargantext-cbor2json
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies:
- gargantext
- gargantext-prelude
- base
- bytestring
- aeson
- serialise
tests:
garg-test:
main: Main.hs
source-dirs: src-test
default-extensions:
- DataKinds
- DeriveGeneric
- FlexibleContexts
- FlexibleInstances
- GeneralizedNewtypeDeriving
- MultiParamTypeClasses
- NoImplicitPrelude
- OverloadedStrings
- RankNTypes
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
dependencies:
- base
- gargantext
- gargantext-prelude
- hspec
- QuickCheck
- quickcheck-instances
- time
- parsec
- duckling
- text
- unordered-containers
# garg-doctest:
# main: Main.hs
# source-dirs: src-doctest
# ghc-options:
# - -O2
# - -Wcompat
# - -Wmissing-signatures
# - -rtsopts
# - -threaded
# - -with-rtsopts=-N
# dependencies:
# - doctest
# - Glob
# - QuickCheck
# - base
# - gargantext
# default-extensions:
# - DataKinds
# - DeriveGeneric
# - FlexibleContexts
# - FlexibleInstances
# - GeneralizedNewtypeDeriving
# - MultiParamTypeClasses
# - NoImplicitPrelude
# - OverloadedStrings
# - RankNTypes
#
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/run 0000775 0000000 0000000 00000000177 14124644201 0023110 0 ustar 00root root 0000000 0000000 #!/bin/bash
figlet "GarganText"
./bin/gargantext_stop; ./bin/gargantext_tmux && tmux a -t gargantext ; ./bin/gargantext_stop
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/server 0000775 0000000 0000000 00000000316 14124644201 0023605 0 ustar 00root root 0000000 0000000 #!/bin/bash
FOLDER="logs"
FILE=$(date +%Y%m%d%H%M.log)
LOGFILE=$FOLDER"/"$FILE
mkdir -p $FOLDER
~/.local/bin/gargantext-server --ini gargantext.ini --run Dev +RTS > $LOGFILE 2>&1 & tail -F $LOGFILE # -p
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-doctest/ 0000775 0000000 0000000 00000000000 14124644201 0024603 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-doctest/Main.hs 0000664 0000000 0000000 00000000204 14124644201 0026017 0 ustar 00root root 0000000 0000000 import System.FilePath.Glob
import Test.DocTest
import Gargantext.Prelude
main :: IO ()
main = glob "src/Gargantext/" >>= doctest
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/ 0000775 0000000 0000000 00000000000 14124644201 0024115 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Core/ 0000775 0000000 0000000 00000000000 14124644201 0025005 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Core/Text.hs 0000664 0000000 0000000 00000014556 14124644201 0026300 0 ustar 00root root 0000000 0000000
{-|
Module : Graph.Clustering
Description : Basic tests to avoid quick regression
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Core.Text where
{-
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Viz.Graph (Graph(..))
import Gargantext.Prelude
import Test.Hspec
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import Gargantext.Core.Viz.Graph.Tools (PartitionMethod(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (spinglass)
import Gargantext.Core.Methods.Distances (Distance(..))
-- | https://en.wikipedia.org/wiki/Text_mining
testText_en :: Text
testText_en = DT.pack "Text mining, also referred to as text data mining, roughly equivalent to text analytics, is the process of deriving high-quality information from text. High-quality information is typically derived through the devising of patterns and trends through means such as statistical pattern learning. Text mining usually involves the process of structuring the input text (usually parsing, along with the addition of some derived linguistic features and the removal of others, and subsequent insertion into a database), deriving patterns within the structured data, and finally evaluation and interpretation of the output. 'High quality' in text mining usually refers to some combination of relevance, novelty, and interestingness. Typical text mining tasks include text categorization, text clustering, concept/entity extraction, production of granular taxonomies, sentiment analysis, document summarization, and entity relation modeling (i.e., learning relations between named entities). Text analysis involves information retrieval, lexical analysis to study word frequency distributions, pattern recognition, tagging/annotation, information extraction, data mining techniques including link and association analysis, visualization, and predictive analytics. The overarching goal is, essentially, to turn text into data for analysis, via application of natural language processing (NLP) and analytical methods. A typical application is to scan a set of documents written in a natural language and either model the document set for predictive classification purposes or populate a database or search index with the information extracted."
testText_en_2 :: Text
testText_en_2 = DT.pack "It is hard to detect important articles in a specific context. Information retrieval techniques based on full text search can be inaccurate to identify main topics and they are not able to provide an indication about the importance of the article. Generating a citation network is a good way to find most popular articles but this approach is not context aware. The text around a citation mark is generally a good summary of the referred article. So citation context analysis presents an opportunity to use the wisdom of crowd for detecting important articles in a context sensitive way. In this work, we analyze citation contexts to rank articles properly for a given topic. The model proposed uses citation contexts in order to create a directed and edge-labeled citation network based on the target topic. Then we apply common ranking algorithms in order to find important articles in this newly created network. We showed that this method successfully detects a good subset of most prominent articles in a given topic. The biggest contribution of this approach is that we are able to identify important articles for a given search term even though these articles do not contain this search term. This technique can be used in other linked documents including web pages, legal documents, and patents as well as scientific papers."
-- | https://fr.wikipedia.org/wiki/Fouille_de_textes
testText_fr :: Text
testText_fr = DT.pack "La fouille de textes ou « l'extraction de connaissances » dans les textes est une spécialisation de la fouille de données et fait partie du domaine de l'intelligence artificielle. Cette technique est souvent désignée sous l'anglicisme text mining. Elle désigne un ensemble de traitements informatiques consistant à extraire des connaissances selon un critère de nouveauté ou de similarité dans des textes produits par des humains pour des humains. Dans la pratique, cela revient à mettre en algorithme un modèle simplifié des théories linguistiques dans des systèmes informatiques d'apprentissage et de statistiques. Les disciplines impliquées sont donc la linguistique calculatoire, l'ingénierie des langues, l'apprentissage artificiel, les statistiques et l'informatique."
termTests :: Text
termTests = "It is hard to detect important articles in a specific context. Information retrieval techniques based on full text search can be inaccurate to identify main topics and they are not able to provide an indication about the importance of the article. Generating a citation network is a good way to find most popular articles but this approach is not context aware. The text around a citation mark is generally a good summary of the referred article. So citation context analysis presents an opportunity to use the wisdom of crowd for detecting important articles in a context sensitive way. In this work, we analyze citation contexts to rank articles properly for a given topic. The model proposed uses citation contexts in order to create a directed and edge-labeled citation network based on the target topic. Then we apply common ranking algorithms in order to find important articles in this newly created network. We showed that this method successfully detects a good subset of most prominent articles in a given topic. The biggest contribution of this approach is that we are able to identify important articles for a given search term even though these articles do not contain this search term. This technique can be used in other linked documents including web pages, legal documents, and patents as well as scientific papers."
-- | Ngrams Test
-- >> ngramsTest testText
-- 248
--ngramsTest :: Text -> Int
--ngramsTest x = length ws
-- where
-- --txt = concat <$> lines <$> clean <$> readFile filePath
-- txt = clean x
-- -- | Number of sentences
-- --ls = sentences $ txt
-- -- | Number of monograms used in the full text
-- ws = ngrams $ txt
-- -- | stem ngrams
-- TODO
-- group ngrams
--ocs = occ $ ws
--
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Core/Text/ 0000775 0000000 0000000 00000000000 14124644201 0025731 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Core/Text/Examples.hs 0000664 0000000 0000000 00000012367 14124644201 0030054 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Examples
Description : Minimal Examples to test behavior of the functions.
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This file is intended for these purposes:
- documentation for teaching and research
- learn basics of Haskell which is a scientific programming language
- behavioral tests (that should be completed with uni-tests and scale-tests)
This document defines basic of Text definitions according to Gargantext..
- What is a term ?
- What is a sentence ?
- What is a paragraph ?
-}
{-# LANGUAGE BangPatterns #-}
module Core.Text.Examples
where
{-
import Data.Array.Accelerate (toList, Matrix)
import Data.Map (Map)
import Data.Ord (Down(..))
import Data.Text (Text)
import Data.Tuple.Extra (both)
import Gargantext.Core (Lang(EN))
import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
import Gargantext.Core.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Core.Text.Metrics.Count (Grouped)
import Gargantext.Core.Text.Metrics.Count (occurrences, cooc)
import Gargantext.Core.Text.Terms (TermType(MonoMulti), terms)
import Gargantext.Core.Types (Terms(..), Label)
import Gargantext.Core.Viz.Graph.Index
import Gargantext.Prelude
import qualified Data.Array.Accelerate as DAA
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Text as Text
-- | Sentences
-- Let be a list of Texts: ['Data.Text.Text']. Each text in this example is a sentence.
--
-- >>> ex_sentences
-- ["There is a table with a glass of wine and a spoon.","I can see the glass on the table.","There was only a spoon on that table.","The glass just fall from the table, pouring wine everywhere.","I wish the glass did not contain wine."]
ex_sentences :: [Text]
ex_sentences = [ "There is a table with a glass of wine and a spoon."
, "I can see the glass on the table."
, "There was only a spoon on that table."
, "The glass just fall from the table, pouring wine everywhere."
, "I wish the glass did not contain wine."
]
-- | From list to simple text as paragraph.
-- Let 'Data.Text.intercalate' each sentence with a space. Result is a paragraph.
--
-- >>> T.intercalate (T.pack " ") ex_sentences
-- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
ex_paragraph :: Text
ex_paragraph = Text.intercalate " " ex_sentences
-- | Let split sentences by Contexts of text.
-- More about 'Gargantext.Core.Text.Context'
--
-- >>> ex_sentences == splitBy (Sentences 0) ex_paragraph
-- True
-- | Terms reordered to visually check occurrences
-- Split text by sentence and then extract ngrams.
--
-- >>> mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
-- [[["table"],["glass"],["wine"],["spoon"]],[["glass"],["table"]],[["spoon"],["table"]],[["glass"],["table"],["wine"]],[["glass"],["wine"]]]
ex_terms :: IO [[Terms]]
ex_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
-- | Test the Occurrences
--
-- >>> occurrences <$> List.concat <$> ex_terms
-- fromList [(fromList ["glass"],fromList [(["glass"],4)]),(fromList ["spoon"],fromList [(["spoon"],2)]),(fromList ["tabl"],fromList [(["table"],4)]),(fromList ["wine"],fromList [(["wine"],3)])]
ex_occ :: IO (Map Grouped (Map Terms Int))
ex_occ = occurrences <$> List.concat <$> ex_terms
-- | Test the cooccurrences
-- Use the 'Gargantext.Core.Text.Metrics.Count.cooc' function.
--
-- >>> cooc <$> ex_terms
-- fromList [((["glass"],["glass"]),4),((["spoon"],["glass"]),1),((["spoon"],["spoon"]),2),((["table"],["glass"]),3),((["table"],["spoon"]),2),((["table"],["table"]),4),((["wine"],["glass"]),3),((["wine"],["spoon"]),1),((["wine"],["table"]),2),((["wine"],["wine"]),3)]
ex_cooc :: IO (Map (Label, Label) Int)
ex_cooc = cooc <$> ex_terms
-- | Tests the specificity and genericity
--
-- >>> ex_cooc_mat
-- (fromList [(["glass"],0),(["spoon"],1),(["table"],2),(["wine"],3)],Matrix (Z :. 4 :. 4)
-- [ 4, 0, 0, 0,
-- 1, 2, 0, 0,
-- 3, 2, 4, 0,
-- 3, 1, 2, 3],Matrix (Z :. 4 :. 4)
-- [ 1.0, 0.25, 0.75, 0.75,
-- 0.0, 1.0, 1.0, 0.5,
-- 0.0, 0.0, 1.0, 0.5,
-- 0.0, 0.0, 0.0, 1.0],(Vector (Z :. 4) [0.5833333333333334,0.5833333333333334,0.75,0.5833333333333334],Vector (Z :. 4) [-0.5833333333333334,-0.4166666666666667,0.41666666666666674,0.5833333333333334]))
ex_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector GenericityInclusion, DAA.Vector SpecificityExclusion))
ex_cooc_mat = do
m <- ex_cooc
let (ti,_) = createIndices m
let mat_cooc = cooc2mat Triangle ti m
pure ( ti
, mat_cooc
, incExcSpeGen_proba mat_cooc
, incExcSpeGen mat_cooc
)
ex_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
ex_incExcSpeGen = incExcSpeGen_sorted <$> ex_cooc
incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat Triangle ti m)
where
(ti,fi) = createIndices m
ordonne x = sortWith (Down . snd)
$ zip (map snd $ Map.toList fi) (toList x)
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Core/Text/Flow.hs 0000664 0000000 0000000 00000005744 14124644201 0027206 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Flow
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
From text to viz, all the flow of texts in Gargantext.
-}
module Core.Text.Flow
where
{-
import qualified Data.Text as T
--import Data.Text.IO (readFile)
import Database.PostgreSQL.Simple (Connection)
import GHC.IO (FilePath)
--import Gargantext.Core (Lang)
import Gargantext.Core.Types (CorpusId)
-}
{-
____ _____ _
/ ___| __ _ _ __ __ _ __ _ _ _|_ _|____ _| |_
| | _ / _` | '__/ _` |/ _` | '_ \| |/ _ \ \/ / __|
| |_| | (_| | | | (_| | (_| | | | | | __/> <| |_
\____|\__,_|_| \__, |\__,_|_| |_|_|\___/_/\_\\__|
|___/
-}
{-
contextText :: [T.Text]
contextText = ["The dog is an animal."
,"The bird is an animal."
,"The dog is an animal."
,"The animal is a bird or a dog ?"
,"The table is an object."
,"The pen is an object."
,"The object is a pen or a table ?"
,"The girl is a human."
,"The boy is a human."
,"The boy or the girl are human."
]
-- | Control the flow of text
data TextFlow = CSV FilePath
| FullText FilePath
| Contexts [T.Text]
| DBV3 Connection CorpusId
| Query T.Text
-}
{-
textFlow :: TermType Lang -> TextFlow -> IO Graph
textFlow termType workType = do
contexts <- case workType of
FullText path -> splitBy (Sentences 5) <$> readFile path
CSV path -> readCsvOn [csv_title, csv_abstract] path
Contexts ctxt -> pure ctxt
DBV3 con corpusId -> catMaybes <$> map (\n -> hyperdataDocumentV3_title (_node_hyperdata n) <> hyperdataDocumentV3_abstract (_node_hyperdata n))<$> runReaderT (getDocumentsV3WithParentId corpusId) con
_ -> undefined -- TODO Query not supported
textFlow' termType contexts
textFlow' :: TermType Lang -> [T.Text] -> IO Graph
textFlow' termType contexts = do
-- Context :: Text -> [Text]
-- Contexts = Paragraphs n | Sentences n | Chars n
myterms <- extractTerms termType contexts
-- TermsType = Mono | Multi | MonoMulti
-- myterms # filter (\t -> not . elem t stopList)
-- # groupBy (Stem|GroupList|Ontology)
--printDebug "terms" myterms
--printDebug "myterms" (sum $ map length myterms)
-- Bulding the map list
-- compute copresences of terms, i.e. cooccurrences of terms in same context of text
-- Cooc = Map (Term, Term) Int
let myCooc1 = coocOn (_terms_label) myterms
--printDebug "myCooc1 size" (M.size myCooc1)
-- Remove Apax: appears one time only => lighting the matrix
let myCooc2 = Map.filter (>0) myCooc1
--printDebug "myCooc2 size" (M.size myCooc2)
--printDebug "myCooc2" myCooc2
g <- cooc2graph myCooc2
pure g
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Graph/ 0000775 0000000 0000000 00000000000 14124644201 0025156 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Graph/Clustering.hs 0000664 0000000 0000000 00000137744 14124644201 0027651 0 ustar 00root root 0000000 0000000
{-|
Module : Graph.Clustering
Description : Basic tests to avoid quick regression
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Graph.Clustering where
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Viz.Graph (Graph(..))
import Gargantext.Core.Viz.Graph.Tools (doDistanceMap)
import Gargantext.Core.Viz.Graph.Tools.IGraph (spinglass)
import Gargantext.Prelude
import Test.Hspec
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import Gargantext.Core.Viz.Graph.Tools (PartitionMethod(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (spinglass)
import Gargantext.Core.Methods.Distances (Distance(..))
myCooc :: HashMap (NgramsTerm, NgramsTerm) Int
myCooc = HashMap.fromList [((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {unNgramsTerm = "sqrt"}),12),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "data"}),8),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "transverse momentum"}),6),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "gev au"}),7),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "rhic"}),13),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "sqrt"}),27),((NgramsTerm {unNgramsTerm = "pi ^"},NgramsTerm {unNgramsTerm = "s"}),8),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "p"}),27),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "s"}),10),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),5),((NgramsTerm {unNgramsTerm = "eta"},NgramsTerm {unNgramsTerm = "gev"}),9),((NgramsTerm {unNgramsTerm = "phi"},NgramsTerm {unNgramsTerm = "transverse momentum"}),3),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "hadrons"}),4),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "gev"}),9),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "gev/c"}),3),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "nn"}),80),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "particle"}),3),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "p collisions"}),6),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "sqrt"}),88),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "gev/c"}),5),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "gev au"}),5),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "p_t"}),5),((NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"},NgramsTerm {unNgramsTerm = "sqrt"}),10),((NgramsTerm {unNgramsTerm = "rhic"},NgramsTerm {unNgramsTerm = "s_nn"}),12),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "p"}),21),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "sqrt"}),6),((NgramsTerm {unNgramsTerm = "au-au collisions"},NgramsTerm {unNgramsTerm = "gev"}),12),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),8),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "rhic"}),3),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "eta"}),4),((NgramsTerm {unNgramsTerm = "eta"},NgramsTerm {unNgramsTerm = "sqrt"}),9),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "particle"}),8),((NgramsTerm {unNgramsTerm = "au-au collisions"},NgramsTerm {unNgramsTerm = "sqrt"}),10),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "s_nn"}),12),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "particle"}),4),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "phenix experiment"}),4),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "nn"}),65),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "number"}),3),((NgramsTerm {unNgramsTerm = "p_t"},NgramsTerm {unNgramsTerm = "s_nn"}),11),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "not found"}),7),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "number"}),4),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "pi ^"}),7),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "spectra"}),3),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "s"}),9),((NgramsTerm {unNgramsTerm = "second part"},NgramsTerm {unNgramsTerm = "thesis"}),4),((NgramsTerm {unNgramsTerm = "eta"},NgramsTerm {unNgramsTerm = "pi ^"}),5),((NgramsTerm {unNgramsTerm = "midrapidity"},NgramsTerm {unNgramsTerm = "p collisions"}),3),((NgramsTerm {unNgramsTerm = "phenix experiment"},NgramsTerm {unNgramsTerm = "s_nn"}),13),((NgramsTerm {unNgramsTerm = "number"},NgramsTerm {unNgramsTerm = "s_nn"}),5),((NgramsTerm {unNgramsTerm = "nuclear modification factors"},NgramsTerm {unNgramsTerm = "p collisions"}),3),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "number"}),4),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "s"}),8),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "p"}),6),((NgramsTerm {unNgramsTerm = "lambda"},NgramsTerm {unNgramsTerm = "rhic"}),4),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "pi ^"}),13),((NgramsTerm {unNgramsTerm = "lambda"},NgramsTerm {unNgramsTerm = "p"}),3),((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {unNgramsTerm = "pi ^"}),3),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "nn"}),10),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "mid-rapidity"}),7),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "p_t"}),3),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "yields"}),7),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "phenix experiment"}),9),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "rhic"}),7),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "gev/c"}),6),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "pi ^"}),4),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "central collisions"}),3),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "not found"}),7),((NgramsTerm {unNgramsTerm = "s"},NgramsTerm {unNgramsTerm = "s_nn"}),7),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "phenix experiment"}),5),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "energy"}),3),((NgramsTerm {unNgramsTerm = "ratio"},NgramsTerm {unNgramsTerm = "s"}),4),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "nn"}),6),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "p"}),4),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "number"}),11),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "spectra"}),5),((NgramsTerm {unNgramsTerm = "central collisions"},NgramsTerm {unNgramsTerm = "s_nn"}),3),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "nuclear modification factors"}),4),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "nn"}),6),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "phenix experiment"}),8),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "not found"}),3),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "p collisions"}),3),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "not found"}),15),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "phenix experiment"}),9),((NgramsTerm {unNgramsTerm = "s_nn"},NgramsTerm {unNgramsTerm = "yields"}),3),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "s"}),14),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "s"}),18),((NgramsTerm {unNgramsTerm = "not found"},NgramsTerm {unNgramsTerm = "phi"}),4),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "yields"}),3),((NgramsTerm {unNgramsTerm = "nuclear modification factors"},NgramsTerm {unNgramsTerm = "rhic"}),5),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "gev au"}),3),((NgramsTerm {unNgramsTerm = "phenix experiment"},NgramsTerm {unNgramsTerm = "phi"}),3),((NgramsTerm {unNgramsTerm = "phi"},NgramsTerm {unNgramsTerm = "s"}),9),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "midrapidity"}),4),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "function"}),6),((NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"},NgramsTerm {unNgramsTerm = "spectra"}),3),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "p"}),5),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "p_t"}),9),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "p_t"}),8),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "eta"}),3),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "spectra"}),6),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "ratio"}),5),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "p collisions"}),6),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "measurements"}),23),((NgramsTerm {unNgramsTerm = "p collisions"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),5),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "phi"}),10),((NgramsTerm {unNgramsTerm = "ratio"},NgramsTerm {unNgramsTerm = "sqrt"}),6),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "sqrt"}),25),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "sqrt"}),21),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "particle"}),5),((NgramsTerm {unNgramsTerm = "p collisions"},NgramsTerm {unNgramsTerm = "rhic"}),10),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "particle"}),4),((NgramsTerm {unNgramsTerm = "central collisions"},NgramsTerm {unNgramsTerm = "phenix experiment"}),3),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "hadrons"}),8),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "transverse momentum"}),3),((NgramsTerm {unNgramsTerm = "s"},NgramsTerm {unNgramsTerm = "yields"}),5),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "function"}),3),((NgramsTerm {unNgramsTerm = "central collisions"},NgramsTerm {unNgramsTerm = "s"}),3),((NgramsTerm {unNgramsTerm = "phi"},NgramsTerm {unNgramsTerm = "sqrt"}),9),((NgramsTerm {unNgramsTerm = "elliptic flow"},NgramsTerm {unNgramsTerm = "hadrons"}),3),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),3),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "spectra"}),3),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "pi ^"}),3),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "phi"}),7),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "p"}),4),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "measurements"}),14),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "ratio"}),7),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "suppression"}),4),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "s_nn"}),17),((NgramsTerm {unNgramsTerm = "phenix experiment"},NgramsTerm {unNgramsTerm = "yields"}),5),((NgramsTerm {unNgramsTerm = "number"},NgramsTerm {unNgramsTerm = "yields"}),3),((NgramsTerm {unNgramsTerm = "rhic"},NgramsTerm {unNgramsTerm = "transverse momentum"}),5),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "d"}),3),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "gev"}),69),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "centralities"}),20),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "centrality dependence"}),8),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "rhic"}),6),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "nn"}),99),((NgramsTerm {unNgramsTerm = "pi ^"},NgramsTerm {unNgramsTerm = "pi ^"}),22),((NgramsTerm {unNgramsTerm = "eta"},NgramsTerm {unNgramsTerm = "nn"}),7),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "p"}),4),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "phi"}),6),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "mid-rapidity"}),18),((NgramsTerm {unNgramsTerm = "midrapidity"},NgramsTerm {unNgramsTerm = "midrapidity"}),13),((NgramsTerm {unNgramsTerm = "eu"},NgramsTerm {unNgramsTerm = "ph"}),5),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "suppression"}),5),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "gev/c"}),30),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "pi ^"}),4),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "s_nn"}),3),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "measurements"}),58),((NgramsTerm {unNgramsTerm = "yields"},NgramsTerm {unNgramsTerm = "yields"}),15),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "measurements"}),6),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "suppression"}),3),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "p collisions"}),3),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "p"}),50),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "data"}),44),((NgramsTerm {unNgramsTerm = "sqrt"},NgramsTerm {unNgramsTerm = "sqrt"}),118),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "particle"}),12),((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {unNgramsTerm = "p"}),5),((NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),13),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "centrality dependence"}),18),((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {unNgramsTerm = "gev au"}),14),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "s_nn"}),5),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "function"}),31),((NgramsTerm {unNgramsTerm = "p collisions"},NgramsTerm {unNgramsTerm = "p collisions"}),20),((NgramsTerm {unNgramsTerm = "eu"},NgramsTerm {unNgramsTerm = "eu"}),9),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "central au"}),14),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "sqrt"}),98),((NgramsTerm {unNgramsTerm = "spectra"},NgramsTerm {unNgramsTerm = "spectra"}),16),((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),3),((NgramsTerm {unNgramsTerm = "au-au collisions"},NgramsTerm {unNgramsTerm = "nn"}),13),((NgramsTerm {unNgramsTerm = "particle"},NgramsTerm {unNgramsTerm = "sqrt"}),10),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "gev"}),125),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "d"}),32),((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {unNgramsTerm = "nn"}),10),((NgramsTerm {unNgramsTerm = "s_nn"},NgramsTerm {unNgramsTerm = "s_nn"}),20),((NgramsTerm {unNgramsTerm = "nuclear modification factors"},NgramsTerm {unNgramsTerm = "nuclear modification factors"}),9),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "ratio"}),6),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "centralities"}),28),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "rhic"}),9),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "au"}),101),((NgramsTerm {unNgramsTerm = "central collisions"},NgramsTerm {unNgramsTerm = "central collisions"}),9),((NgramsTerm {unNgramsTerm = "thesis"},NgramsTerm {unNgramsTerm = "thesis"}),13),((NgramsTerm {unNgramsTerm = "ph"},NgramsTerm {unNgramsTerm = "ph"}),12),((NgramsTerm {unNgramsTerm = "eta"},NgramsTerm {unNgramsTerm = "eta"}),13),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "particle"}),8),((NgramsTerm {unNgramsTerm = "case study"},NgramsTerm {unNgramsTerm = "case study"}),24),((NgramsTerm {unNgramsTerm = "rhic"},NgramsTerm {unNgramsTerm = "rhic"}),66),((NgramsTerm {unNgramsTerm = "particle"},NgramsTerm {unNgramsTerm = "particle"}),27),((NgramsTerm {unNgramsTerm = "number"},NgramsTerm {unNgramsTerm = "number"}),32),((NgramsTerm {unNgramsTerm = "au-au collisions"},NgramsTerm {unNgramsTerm = "au-au collisions"}),14),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "p"}),18),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),5),((NgramsTerm {unNgramsTerm = "au-au collisions"},NgramsTerm {unNgramsTerm = "eta"}),3),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),4),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "sqrt"}),76),((NgramsTerm {unNgramsTerm = "lambda"},NgramsTerm {unNgramsTerm = "lambda"}),10),((NgramsTerm {unNgramsTerm = "eta"},NgramsTerm {unNgramsTerm = "p"}),3),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "energy"}),25),((NgramsTerm {unNgramsTerm = "central collisions"},NgramsTerm {unNgramsTerm = "yields"}),4),((NgramsTerm {unNgramsTerm = "elliptic flow"},NgramsTerm {unNgramsTerm = "elliptic flow"}),14),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "gev"}),73),((NgramsTerm {unNgramsTerm = "number"},NgramsTerm {unNgramsTerm = "p"}),5),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "spectra"}),9),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "function"}),12),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "pi ^"}),5),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "number"}),4),((NgramsTerm {unNgramsTerm = "elliptic flow"},NgramsTerm {unNgramsTerm = "p_t"}),4),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "s_nn"}),7),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "sqrt"}),11),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "mid-rapidity"}),3),((NgramsTerm {unNgramsTerm = "nuclear modification factors"},NgramsTerm {unNgramsTerm = "suppression"}),3),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "measurements"}),3),((NgramsTerm {unNgramsTerm = "phenix experiment"},NgramsTerm {unNgramsTerm = "rhic"}),15),((NgramsTerm {unNgramsTerm = "number"},NgramsTerm {unNgramsTerm = "rhic"}),6),((NgramsTerm {unNgramsTerm = "p_t"},NgramsTerm {unNgramsTerm = "rhic"}),8),((NgramsTerm {unNgramsTerm = "pi ^"},NgramsTerm {unNgramsTerm = "sqrt"}),11),((NgramsTerm {unNgramsTerm = "not found"},NgramsTerm {unNgramsTerm = "rhic"}),28),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "phenix experiment"}),13),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "nuclear modification factors"}),7),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "s"}),16),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "pi ^"}),9),((NgramsTerm {unNgramsTerm = "elliptic flow"},NgramsTerm {unNgramsTerm = "not found"}),6),((NgramsTerm {unNgramsTerm = "elliptic flow"},NgramsTerm {unNgramsTerm = "s"}),9),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "central au"}),8),((NgramsTerm {unNgramsTerm = "phenix experiment"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),6),((NgramsTerm {unNgramsTerm = "not found"},NgramsTerm {unNgramsTerm = "p"}),16),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "midrapidity"}),3),((NgramsTerm {unNgramsTerm = "rhic"},NgramsTerm {unNgramsTerm = "s"}),23),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "p_t"}),12),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "measurements"}),6),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "energy"}),7),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "p_t"}),4),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "pi ^"}),8),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "s"}),10),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "midrapidity"}),8),((NgramsTerm {unNgramsTerm = "midrapidity"},NgramsTerm {unNgramsTerm = "sqrt"}),8),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "nuclear modification factors"}),5),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "s_nn"}),3),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "energy"}),3),((NgramsTerm {unNgramsTerm = "p collisions"},NgramsTerm {unNgramsTerm = "p_t"}),5),((NgramsTerm {unNgramsTerm = "lambda"},NgramsTerm {unNgramsTerm = "measurements"}),3),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "function"}),3),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "yields"}),5),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "not found"}),3),((NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"},NgramsTerm {unNgramsTerm = "yields"}),3),((NgramsTerm {unNgramsTerm = "phenix experiment"},NgramsTerm {unNgramsTerm = "transverse momentum"}),5),((NgramsTerm {unNgramsTerm = "pi ^"},NgramsTerm {unNgramsTerm = "ratio"}),3),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "mid-rapidity"}),5),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "mid-rapidity"}),7),((NgramsTerm {unNgramsTerm = "p collisions"},NgramsTerm {unNgramsTerm = "s"}),13),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "central au"}),14),((NgramsTerm {unNgramsTerm = "central collisions"},NgramsTerm {unNgramsTerm = "nn"}),3),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "ratio"}),4),((NgramsTerm {unNgramsTerm = "p_t"},NgramsTerm {unNgramsTerm = "transverse momentum"}),3),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "midrapidity"}),7),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "nuclear modification factors"}),6),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "spectra"}),6),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "s_nn"}),8),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "function"}),6),((NgramsTerm {unNgramsTerm = "nuclear modification factors"},NgramsTerm {unNgramsTerm = "sqrt"}),5),((NgramsTerm {unNgramsTerm = "s"},NgramsTerm {unNgramsTerm = "transverse momentum"}),9),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "yields"}),5),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "phenix experiment"}),3),((NgramsTerm {unNgramsTerm = "p collisions"},NgramsTerm {unNgramsTerm = "phenix experiment"}),6),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "ratio"}),4),((NgramsTerm {unNgramsTerm = "s"},NgramsTerm {unNgramsTerm = "spectra"}),8),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "phi"}),9),((NgramsTerm {unNgramsTerm = "nuclear modification factors"},NgramsTerm {unNgramsTerm = "phenix experiment"}),3),((NgramsTerm {unNgramsTerm = "rhic"},NgramsTerm {unNgramsTerm = "suppression"}),6),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "s_nn"}),5),((NgramsTerm {unNgramsTerm = "nuclear modification factors"},NgramsTerm {unNgramsTerm = "s"}),6),((NgramsTerm {unNgramsTerm = "p collisions"},NgramsTerm {unNgramsTerm = "s_nn"}),5),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "p collisions"}),4),((NgramsTerm {unNgramsTerm = "midrapidity"},NgramsTerm {unNgramsTerm = "s"}),8),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),6),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "hadrons"}),3),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "phi"}),3),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "eta"}),4),((NgramsTerm {unNgramsTerm = "midrapidity"},NgramsTerm {unNgramsTerm = "not found"}),5),((NgramsTerm {unNgramsTerm = "midrapidity"},NgramsTerm {unNgramsTerm = "phenix experiment"}),5),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "transverse momentum"}),5),((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {unNgramsTerm = "measurements"}),4),((NgramsTerm {unNgramsTerm = "phenix experiment"},NgramsTerm {unNgramsTerm = "spectra"}),3),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "measurements"}),7),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "data"}),5),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "ratio"}),3),((NgramsTerm {unNgramsTerm = "s_nn"},NgramsTerm {unNgramsTerm = "transverse momentum"}),4),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "suppression"}),6),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "rhic"}),28),((NgramsTerm {unNgramsTerm = "au-au collisions"},NgramsTerm {unNgramsTerm = "phi"}),3),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "gev/c"}),9),((NgramsTerm {unNgramsTerm = "eta"},NgramsTerm {unNgramsTerm = "measurements"}),3),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "nn"}),15),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "p"}),20),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "nn"}),13),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "elliptic flow"}),3),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "yields"}),4),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "ratio"}),6),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "gev au"}),4),((NgramsTerm {unNgramsTerm = "rhic"},NgramsTerm {unNgramsTerm = "sqrt"}),31),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "suppression"}),3),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "gev/c"}),15),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "nn"}),8),((NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"},NgramsTerm {unNgramsTerm = "s_nn"}),4),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "phenix experiment"}),7),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "transverse momentum"}),10),((NgramsTerm {unNgramsTerm = "particle"},NgramsTerm {unNgramsTerm = "rhic"}),5),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "rhic"}),23),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "s_nn"}),6),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "p"}),16),((NgramsTerm {unNgramsTerm = "p_t"},NgramsTerm {unNgramsTerm = "pi ^"}),5),((NgramsTerm {unNgramsTerm = "spectra"},NgramsTerm {unNgramsTerm = "yields"}),3),((NgramsTerm {unNgramsTerm = "phenix experiment"},NgramsTerm {unNgramsTerm = "pi ^"}),5),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "p collisions"}),7),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "particle"}),3),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "p collisions"}),7),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "hadrons"}),10),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "p"}),4),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "gev/c"}),24),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "measurements"}),4),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "p_t"}),6),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "elliptic flow"}),10),((NgramsTerm {unNgramsTerm = "lambda"},NgramsTerm {unNgramsTerm = "not found"}),3),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "s"}),8),((NgramsTerm {unNgramsTerm = "elliptic flow"},NgramsTerm {unNgramsTerm = "sqrt"}),10),((NgramsTerm {unNgramsTerm = "lambda"},NgramsTerm {unNgramsTerm = "s"}),7),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "nn"}),11),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "s_nn"}),9),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "data"}),4),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "rhic"}),27),((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {unNgramsTerm = "s_nn"}),4),((NgramsTerm {unNgramsTerm = "not found"},NgramsTerm {unNgramsTerm = "pi ^"}),8),((NgramsTerm {unNgramsTerm = "elliptic flow"},NgramsTerm {unNgramsTerm = "gev"}),11),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "sqrt"}),22),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "not found"}),61),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "hadrons"}),3),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "mid-rapidity"}),6),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "number"}),12),((NgramsTerm {unNgramsTerm = "phenix experiment"},NgramsTerm {unNgramsTerm = "sqrt"}),19),((NgramsTerm {unNgramsTerm = "number"},NgramsTerm {unNgramsTerm = "sqrt"}),10),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "transverse momentum"}),3),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "phenix experiment"}),20),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "transverse momentum"}),4),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "p_t"}),16),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "s"}),64),((NgramsTerm {unNgramsTerm = "lambda"},NgramsTerm {unNgramsTerm = "nn"}),6),((NgramsTerm {unNgramsTerm = "not found"},NgramsTerm {unNgramsTerm = "sqrt"}),62),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "not found"}),56),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "yields"}),3),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "rhic"}),5),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "number"}),10),((NgramsTerm {unNgramsTerm = "pi ^"},NgramsTerm {unNgramsTerm = "rhic"}),8),((NgramsTerm {unNgramsTerm = "s"},NgramsTerm {unNgramsTerm = "sqrt"}),100),((NgramsTerm {unNgramsTerm = "phi"},NgramsTerm {unNgramsTerm = "yields"}),3),((NgramsTerm {unNgramsTerm = "central collisions"},NgramsTerm {unNgramsTerm = "centralities"}),3),((NgramsTerm {unNgramsTerm = "number"},NgramsTerm {unNgramsTerm = "particle"}),4),((NgramsTerm {unNgramsTerm = "central collisions"},NgramsTerm {unNgramsTerm = "measurements"}),3),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "pi ^"}),6),((NgramsTerm {unNgramsTerm = "pi ^"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),3),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "p_t"}),10),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "s"}),85),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "phenix experiment"}),16),((NgramsTerm {unNgramsTerm = "particle"},NgramsTerm {unNgramsTerm = "s"}),10),((NgramsTerm {unNgramsTerm = "not found"},NgramsTerm {unNgramsTerm = "particle"}),3),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "p"}),6),((NgramsTerm {unNgramsTerm = "p_t"},NgramsTerm {unNgramsTerm = "sqrt"}),15),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "nn"}),6),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "spectra"}),4),((NgramsTerm {unNgramsTerm = "central collisions"},NgramsTerm {unNgramsTerm = "gev"}),6),((NgramsTerm {unNgramsTerm = "rhic"},NgramsTerm {unNgramsTerm = "spectra"}),6),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "rhic"}),4),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "central collisions"}),5),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "phenix experiment"}),18),((NgramsTerm {unNgramsTerm = "s"},NgramsTerm {unNgramsTerm = "suppression"}),6),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "not found"}),55),((NgramsTerm {unNgramsTerm = "account"},NgramsTerm {unNgramsTerm = "number"}),3),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "yields"}),12),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "s"}),66),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "gev/c"}),5),((NgramsTerm {unNgramsTerm = "sqrt"},NgramsTerm {unNgramsTerm = "yields"}),9),((NgramsTerm {unNgramsTerm = "midrapidity"},NgramsTerm {unNgramsTerm = "nn"}),7),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "p_t"}),11),((NgramsTerm {unNgramsTerm = "midrapidity"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),4),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "rhic"}),8),((NgramsTerm {unNgramsTerm = "central collisions"},NgramsTerm {unNgramsTerm = "sqrt"}),5),((NgramsTerm {unNgramsTerm = "p_t"},NgramsTerm {unNgramsTerm = "suppression"}),3),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "nuclear modification factors"}),6),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "p collisions"}),3),((NgramsTerm {unNgramsTerm = "p collisions"},NgramsTerm {unNgramsTerm = "pi ^"}),4),((NgramsTerm {unNgramsTerm = "phenix experiment"},NgramsTerm {unNgramsTerm = "suppression"}),5),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "number"}),7),((NgramsTerm {unNgramsTerm = "nuclear modification factors"},NgramsTerm {unNgramsTerm = "p"}),5),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "yields"}),8),((NgramsTerm {unNgramsTerm = "midrapidity"},NgramsTerm {unNgramsTerm = "p"}),6),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "transverse momentum"}),4),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "gev"}),22),((NgramsTerm {unNgramsTerm = "sqrt"},NgramsTerm {unNgramsTerm = "suppression"}),6),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "transverse momentum"}),9),((NgramsTerm {unNgramsTerm = "eta"},NgramsTerm {unNgramsTerm = "transverse momentum"}),3),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "nn"}),10),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "measurements"}),3),((NgramsTerm {unNgramsTerm = "nuclear modification factors"},NgramsTerm {unNgramsTerm = "pi ^"}),4),((NgramsTerm {unNgramsTerm = "au-au collisions"},NgramsTerm {unNgramsTerm = "transverse momentum"}),3),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "nuclear modification factors"}),3),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "mid-rapidity"}),5),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "transverse momentum"}),3),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "d"}),17),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "measurements"}),5),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "suppression"}),5),((NgramsTerm {unNgramsTerm = "particle"},NgramsTerm {unNgramsTerm = "suppression"}),3),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "ratio"}),3),((NgramsTerm {unNgramsTerm = "pi ^"},NgramsTerm {unNgramsTerm = "spectra"}),4),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "centrality dependence"}),12),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "particle"}),11),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "sqrt"}),77),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "transverse momentum"}),4),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "p collisions"}),18),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "s_nn"}),8),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "s_nn"}),11),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "au collisions"}),76),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "suppression"}),7),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "p collisions"}),8),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "gev"}),12),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "gev/c"}),4),((NgramsTerm {unNgramsTerm = "stability"},NgramsTerm {unNgramsTerm = "stability"}),8),((NgramsTerm {unNgramsTerm = "second part"},NgramsTerm {unNgramsTerm = "second part"}),8),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "rhic"}),8),((NgramsTerm {unNgramsTerm = "elliptic flow"},NgramsTerm {unNgramsTerm = "nn"}),9),((NgramsTerm {unNgramsTerm = "aspects"},NgramsTerm {unNgramsTerm = "aspects"}),9),((NgramsTerm {unNgramsTerm = "p_t"},NgramsTerm {unNgramsTerm = "s"}),9),((NgramsTerm {unNgramsTerm = "s_nn"},NgramsTerm {unNgramsTerm = "sqrt"}),20),((NgramsTerm {unNgramsTerm = "suppression"},NgramsTerm {unNgramsTerm = "suppression"}),16),((NgramsTerm {unNgramsTerm = "not found"},NgramsTerm {unNgramsTerm = "s"}),64),((NgramsTerm {unNgramsTerm = "not found"},NgramsTerm {unNgramsTerm = "not found"}),543),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "gev"}),14),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "hadrons"}),19),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "centrality dependence"}),12),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "sqrt"}),13),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "mid-rapidity"}),3),((NgramsTerm {unNgramsTerm = "phenix experiment"},NgramsTerm {unNgramsTerm = "s"}),10),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "pi ^"}),4),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "phi"}),4),((NgramsTerm {unNgramsTerm = "particle"},NgramsTerm {unNgramsTerm = "s_nn"}),3),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "rhic"}),18),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "centralities"}),19),((NgramsTerm {unNgramsTerm = "number"},NgramsTerm {unNgramsTerm = "s"}),11),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "s_nn"}),19),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "suppression"}),9),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "nn"}),13),((NgramsTerm {unNgramsTerm = "p_t"},NgramsTerm {unNgramsTerm = "phenix experiment"}),8),((NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"},NgramsTerm {unNgramsTerm = "rhic"}),7),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "au collisions"}),102),((NgramsTerm {unNgramsTerm = "transverse momentum"},NgramsTerm {unNgramsTerm = "transverse momentum"}),15),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "nuclear modification factors"}),3),((NgramsTerm {unNgramsTerm = "eta"},NgramsTerm {unNgramsTerm = "gev/c"}),3),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "ph"}),3),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "ratio"}),4),((NgramsTerm {unNgramsTerm = "number"},NgramsTerm {unNgramsTerm = "phenix experiment"}),4),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "measurements"}),15),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "ratio"}),5),((NgramsTerm {unNgramsTerm = "phenix experiment"},NgramsTerm {unNgramsTerm = "phenix experiment"}),25),((NgramsTerm {unNgramsTerm = "phi"},NgramsTerm {unNgramsTerm = "phi"}),12),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "eu"}),4),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),5),((NgramsTerm {unNgramsTerm = "centrality dependence"},NgramsTerm {unNgramsTerm = "sqrt"}),10),((NgramsTerm {unNgramsTerm = "ratio"},NgramsTerm {unNgramsTerm = "ratio"}),11),((NgramsTerm {unNgramsTerm = "account"},NgramsTerm {unNgramsTerm = "account"}),17),((NgramsTerm {unNgramsTerm = "au-au collisions"},NgramsTerm {unNgramsTerm = "rhic"}),3),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "p"}),12),((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {unNgramsTerm = "rhic"}),7),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "d"}),15),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "s_nn"}),15),((NgramsTerm {unNgramsTerm = "s"},NgramsTerm {unNgramsTerm = "s"}),118),((NgramsTerm {unNgramsTerm = "p_t"},NgramsTerm {unNgramsTerm = "p_t"}),17),((NgramsTerm {unNgramsTerm = "p collisions"},NgramsTerm {unNgramsTerm = "transverse momentum"}),3),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "rhic"}),18),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "p_t"}),9),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "sqrt"}),14),((NgramsTerm {unNgramsTerm = "au-au collisions"},NgramsTerm {unNgramsTerm = "s"}),10),((NgramsTerm {unNgramsTerm = "pi ^"},NgramsTerm {unNgramsTerm = "s_nn"}),4),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "function"}),13),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "not found"}),63),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "s_nn"}),4),((NgramsTerm {unNgramsTerm = "nuclear modification factors"},NgramsTerm {unNgramsTerm = "phi"}),3),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "lambda"}),7),((NgramsTerm {unNgramsTerm = "d"},NgramsTerm {unNgramsTerm = "pi ^"}),3),((NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"},NgramsTerm {unNgramsTerm = "s"}),6),((NgramsTerm {unNgramsTerm = "nuclear modification factors"},NgramsTerm {unNgramsTerm = "ratio"}),3),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "energy"}),7),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "p_t"}),6),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "nuclear modification factors"}),4),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "nuclear modification factors"}),4),((NgramsTerm {unNgramsTerm = "spectra"},NgramsTerm {unNgramsTerm = "suppression"}),3),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "mid-rapidity"}),12),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "s"}),89),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "spectra"}),5),((NgramsTerm {unNgramsTerm = "lambda"},NgramsTerm {unNgramsTerm = "sqrt"}),7),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "phenix experiment"}),9),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "number"}),9),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "gev"}),10),((NgramsTerm {unNgramsTerm = "eta"},NgramsTerm {unNgramsTerm = "s"}),7),((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {unNgramsTerm = "not found"}),7),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "midrapidity"}),7),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "phenix experiment"}),3),((NgramsTerm {unNgramsTerm = "data"},NgramsTerm {unNgramsTerm = "s"}),5),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "lambda"}),5),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "s"}),22),((NgramsTerm {unNgramsTerm = "au-au collisions"},NgramsTerm {unNgramsTerm = "not found"}),8),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "phenix experiment"}),13),((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {unNgramsTerm = "s"}),8),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "mid-rapidity"}),15),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "energy"}),8),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "ratio"}),3),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "particle"}),6),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "function"}),13),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "particle"}),4),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "pi ^"}),8),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "yields"}),5),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "pi ^"}),5),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "spectra"}),7),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "mid-rapidity"}),13),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "lambda"}),5),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "gev"}),14),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "centralities"}),5),((NgramsTerm {unNgramsTerm = "rhic"},NgramsTerm {unNgramsTerm = "yields"}),4),((NgramsTerm {unNgramsTerm = "number"},NgramsTerm {unNgramsTerm = "p collisions"}),3),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "sqrt"}),12),((NgramsTerm {unNgramsTerm = "mid-rapidity"},NgramsTerm {unNgramsTerm = "phi"}),3),((NgramsTerm {unNgramsTerm = "spectra"},NgramsTerm {unNgramsTerm = "sqrt"}),10),((NgramsTerm {unNgramsTerm = "pi ^"},NgramsTerm {unNgramsTerm = "suppression"}),3),((NgramsTerm {unNgramsTerm = "not found"},NgramsTerm {unNgramsTerm = "p collisions"}),6),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "sqrt"}),10),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "gev"}),9),((NgramsTerm {unNgramsTerm = "central collisions"},NgramsTerm {unNgramsTerm = "gev/c"}),4),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "number"}),3),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "spectra"}),9),((NgramsTerm {unNgramsTerm = "central au"},NgramsTerm {unNgramsTerm = "suppression"}),4),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "gev/c"}),18),((NgramsTerm {unNgramsTerm = "phi"},NgramsTerm {unNgramsTerm = "rhic"}),3),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "phenix experiment"}),9),((NgramsTerm {unNgramsTerm = "elliptic flow"},NgramsTerm {unNgramsTerm = "measurements"}),3),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "s"}),5),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "gev au"}),12),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "elliptic flow"}),10),((NgramsTerm {unNgramsTerm = "p"},NgramsTerm {unNgramsTerm = "suppression"}),5),((NgramsTerm {unNgramsTerm = "gev/c"},NgramsTerm {unNgramsTerm = "measurements"}),11),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "p collisions"}),11),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "eta"}),4),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "transverse momentum"}),14),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "hadrons"}),11),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "number"}),3),((NgramsTerm {unNgramsTerm = "energy"},NgramsTerm {unNgramsTerm = "s"}),7),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "data"}),6),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "relativistic heavy ion collider"}),7),((NgramsTerm {unNgramsTerm = "au collisions"},NgramsTerm {unNgramsTerm = "nn"}),67),((NgramsTerm {unNgramsTerm = "ratio"},NgramsTerm {unNgramsTerm = "rhic"}),4),((NgramsTerm {unNgramsTerm = "sqrt"},NgramsTerm {unNgramsTerm = "transverse momentum"}),13),((NgramsTerm {unNgramsTerm = "hadrons"},NgramsTerm {unNgramsTerm = "sqrt"}),11),((NgramsTerm {unNgramsTerm = "p collisions"},NgramsTerm {unNgramsTerm = "sqrt"}),16),((NgramsTerm {unNgramsTerm = "function"},NgramsTerm {unNgramsTerm = "p_t"}),6),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "p"}),10),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "p"}),14),((NgramsTerm {unNgramsTerm = "centralities"},NgramsTerm {unNgramsTerm = "rhic"}),6),((NgramsTerm {unNgramsTerm = "gev"},NgramsTerm {unNgramsTerm = "p collisions"}),15),((NgramsTerm {unNgramsTerm = "measurements"},NgramsTerm {unNgramsTerm = "rhic"}),15),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "transverse momentum"}),7),((NgramsTerm {unNgramsTerm = "au"},NgramsTerm {unNgramsTerm = "hadrons"}),8),((NgramsTerm {unNgramsTerm = "nn"},NgramsTerm {unNgramsTerm = "suppression"}),6)]
test :: IO ()
test = hspec $ do
describe "Cross" $ do
let
(distanceMap,_,_) = doDistanceMap Conditional 0 myCooc
it "Partition test" $ do
partitions <- spinglass 1 distanceMap
let
result = List.length partitions > 1
shouldBe True result
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Graph/Distance.hs 0000664 0000000 0000000 00000001177 14124644201 0027252 0 ustar 00root root 0000000 0000000
{-|
Module : Graph.Distance
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Graph.Distance where
import Test.Hspec
{-
import Gargantext.Core.Methods.Matrix.Accelerate.Utils (cross', matrix)
import Gargantext.Prelude
test :: IO ()
test = hspec $ do
describe "Cross" $ do
let result = cross' $ matrix 3 ([1,1..] :: [Double])
it "compare" $ do
shouldBe result (matrix 3 ([2,2..] :: [Double]))
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Main.hs 0000664 0000000 0000000 00000001530 14124644201 0025334 0 ustar 00root root 0000000 0000000 {-|
Module : Main.hs
Description : Main for Gargantext Tests
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
--import qualified Ngrams.Lang.Fr as Fr
--import qualified Ngrams.Lang as Lang
import qualified Ngrams.Lang.Occurrences as Occ
import qualified Ngrams.Metrics as Metrics
import qualified Parsers.Date as PD
-- import qualified Graph.Distance as GD
import qualified Graph.Clustering as Graph
import qualified Utils.Crypto as Crypto
main :: IO ()
main = do
-- Occ.parsersTest
-- Lang.ngramsExtractionTest FR
-- Lang.ngramsExtractionTest EN
-- Metrics.main
Graph.test
PD.testFromRFC3339
-- GD.test
Crypto.test
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Ngrams/ 0000775 0000000 0000000 00000000000 14124644201 0025344 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Ngrams/Lang.hs 0000664 0000000 0000000 00000001140 14124644201 0026555 0 ustar 00root root 0000000 0000000
{-|
Module : Ngrams.Lang
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Ngrams.Lang where
{-
import Gargantext.Prelude (IO())
import Gargantext.Core (Lang(..))
import qualified Ngrams.Lang.Fr as Fr
import qualified Ngrams.Lang.En as En
ngramsExtractionTest :: Lang -> IO ()
ngramsExtractionTest FR = Fr.ngramsExtractionTest
ngramsExtractionTest EN = En.ngramsExtractionTest
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Ngrams/Lang/ 0000775 0000000 0000000 00000000000 14124644201 0026225 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Ngrams/Lang/En.hs 0000664 0000000 0000000 00000005652 14124644201 0027133 0 ustar 00root root 0000000 0000000 {-|
Module : Ngrams.Lang
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# LANGUAGE ScopedTypeVariables #-}
module Ngrams.Lang.En where
{-
import Data.List ((!!))
import Data.Text (Text)
import Test.Hspec
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
-- TODO this import is not used anymore
import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
-- use instead
-- import Gargantext.Text.Terms (extractNgramsT)
ngramsExtractionTest :: IO ()
ngramsExtractionTest = hspec $ do
describe "Ngrams extraction in English Language" $ do
let textTest = [ "Alcoholic extract of Kaempferia galanga was tested for analgesic and antiinflammatory activities in animal models. ", "Three doses, 300 mg/kg, 600 mg/kg and 1200 mg/kg of the plant extract prepared as a suspension in 2 ml of 2% gum acacia were used. ", " Acute and sub acute inflammatory activities were studied in rats by carrageenan induced paw edema and cotton pellet induced granuloma models respectively. ", "In both models, the standard drug used was aspirin 100 mg/kg. ", "Two doses 600 mg/kg and 1200 mg/kg of plant extract exhibited significant (P<0.001) antiinflammatory activity in carrageenan model and cotton pellet granuloma model in comparison to control. ", "Analgesic activity was studied in rats using hot plate and tail-flick models. ", "Codeine 5 mg/kg and vehicle served as standard and control respectively. ", "The two doses of plant extract exhibited significant analgesic activity in tail flick model (P<0.001) and hot plate model (P<0.001) in comparison to control. ", "In conclusion K. galanga possesses antiinflammatory and analgesic activities. "] :: [Text]
it "\"Of\" seperates two ngrams" $ do
t1 <- map (selectNgrams EN) <$> extractNgrams EN (textTest !! 0)
t1 `shouldBe` [[("Alcoholic extract of Kaempferia galanga","NN","LOCATION"),("analgesic activities","NN+CC","O"),("antiinflammatory activities","NN+CC","O"),("animal models","NN","O")]]
it "Tests the conjunction of coordination in two ngrams with its adjectives" $ do
t2 <- map (selectNgrams EN) <$> extractNgrams EN (textTest !! 2)
t2 `shouldBe` [[("Acute activities","NN+CC","O"),("sub acute inflammatory activities","NN+CC","O"),("rats","NNS","O"),("carrageenan","NN","O"),("paw edema","NN","O"),("cotton pellet","NN","O"),("granuloma models","NN","O")]]
it "Tests nouns with preposition and determinants" $ do
let t = "Donald Trump is president of the United-States of America."
t2 <- map (selectNgrams EN) <$> extractNgrams EN t
t2 `shouldBe` [[("Donald Trump","NNP","PERSON"),("president of the United-States of America","NN","LOCATION")]]
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Ngrams/Lang/Fr.hs 0000664 0000000 0000000 00000005454 14124644201 0027140 0 ustar 00root root 0000000 0000000 {-|
Module : Ngrams.Lang
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# LANGUAGE ScopedTypeVariables #-}
module Ngrams.Lang.Fr where
{-
import Test.Hspec
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
-- TODO this import is not used anymore
import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
-- use instead
-
ngramsExtractionTest :: IO ()
ngramsExtractionTest = hspec $ do
describe "Behavioral tests: ngrams extraction in French Language" $ do
it "Groupe : adjectif et nom commun" $ do
let textFr = "Le beau texte fut écrit."
testFr <- map (selectNgrams FR) <$> (extractNgrams FR) textFr
testFr `shouldBe` [[("beau texte","NC","O")]]
it "Groupe : adjectifs et nom commun" $ do
let textFr = "Le beau petit texte fut écrit."
testFr <- map (selectNgrams FR) <$> (extractNgrams FR) textFr
testFr `shouldBe` [[("beau petit texte","NC","O")]]
-- `shouldBe` [[("beau texte","NC","O"),("petit texte","NC","O")]] ?
it "Groupe : nom commun et adjectif" $ do
let textFr = "Le livre blanc fut écrit."
testFr <- map (selectNgrams FR) <$> (extractNgrams FR) textFr
testFr `shouldBe` [[("livre blanc","NC","O")]]
it "Groupe : nom commun et adjectifs avec conjonction" $ do
let textFr = "Le livre blanc et rouge."
testFr <- map (selectNgrams FR) <$> (extractNgrams FR) textFr
testFr `shouldBe` [[("livre blanc","NC","O"),("livre rouge","NC","O")]]
-- `shouldBe` [[("livre blanc et rouge","N","O")]] ?
it "Groupe: Nom commun + préposition + Nom commun" $ do
let textFr0 = "Le problème du jour est résolu."
testFr0 <- map (selectNgrams FR) <$> (extractNgrams FR) textFr0
testFr0 `shouldBe` [[("problème du jour","NC","O")]]
it "Groupe: Nom commun + préposition + déterminant + Nom commun" $ do
let textFr0 = "Emmanuel Macron est le président de la France."
testFr0 <- map (selectNgrams FR) <$> (extractNgrams FR) textFr0
testFr0 `shouldBe` [[("Emmanuel Macron","NPP","PERSON"),("président de la France","NC","LOCATION")]]
it "Groupe: Nom commun + préposition + Nom commun + prép + Nom commun" $ do
let textFr1 = "L'heure d'arrivée des coureurs dépend de la météo du jour."
testFr1 <- map (selectNgrams FR) <$> (extractNgrams FR) textFr1
testFr1 `shouldBe` [[("heure d' arrivée des coureurs","NC","O"),("météo du jour","NC","O")]]
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Ngrams/Lang/Occurrences.hs 0000664 0000000 0000000 00000003575 14124644201 0031046 0 ustar 00root root 0000000 0000000 {-|
Module : Ngrams.Lang.Occurrences
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# LANGUAGE ScopedTypeVariables #-}
module Ngrams.Lang.Occurrences where
{-
import Test.Hspec
import Data.Either (Either(Right))
import Gargantext.Prelude
import Gargantext.Text.Metrics.Occurrences (parseOccurrences)
parsersTest :: IO ()
parsersTest = hspec $ do
describe "Parser for occurrences" $ do
let txt = "internet"
it "returns the result of one parsing" $ do
parseOccurrences "internet" "internet" `shouldBe` Right 1
-- | Context of Text should be toLower
it "returns the result of one parsing not case sensitive" $ do
let txtCase = "Internet"
parseOccurrences txtCase "internet" `shouldBe` Right 1
it "returns the result of one parsing after space" $ do
parseOccurrences txt " internet"
`shouldBe` Right 1
it "returns the result of one parsing after chars" $ do
parseOccurrences txt "l'internet"
`shouldBe` Right 1
it "returns the result of multiple parsing" $ do
parseOccurrences txt "internet internet of things"
`shouldBe` Right 2
it "returns the result of multiple parsing separated by text" $ do
parseOccurrences txt "internet in the internet of things"
`shouldBe` Right 2
it "returns the result of multiple parsing separated by punctuation" $ do
parseOccurrences txt "internet. In the internet of things, internet like; internet?"
`shouldBe` Right 4
-- describe "Parser for nodes" $ do
-- it "returns the result of one parsing after space" $ do
-- occOfCorpus 249509 "sciences" `shouldReturn` 7
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Ngrams/Metrics.hs 0000664 0000000 0000000 00000012040 14124644201 0027303 0 ustar 00root root 0000000 0000000 {-|
Module : Ngrams.Metrics
Description :
Copyright : Ngrams.Metrics (c)
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE CPP #-}
--module Ngrams.Metrics (main) where
module Ngrams.Metrics where
{-
import Data.Text (Text)
import qualified Data.Text as T
import Data.Ratio
import Test.Hspec
import Test.QuickCheck
import Gargantext.Prelude
import Gargantext.Text.Metrics
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative
#endif
instance Arbitrary Text where
arbitrary = T.pack <$> arbitrary
main :: IO ()
main = hspec spec
spec :: Spec
spec = do
describe "levenshtein" $ do
testSwap levenshtein
context "with concrete examples" $ do
testPair levenshtein "kitten" "sitting" 3
testPair levenshtein "cake" "drake" 2
testPair levenshtein "saturday" "sunday" 3
testPair levenshtein "red" "wax" 3
#if __GLASGOW_HASKELL__ >= 710
testPair levenshtein "a😀c" "abc" 1
#endif
testPair levenshtein "lucky" "lucky" 0
testPair levenshtein "" "" 0
describe "levenshteinNorm" $ do
testSwap levenshteinNorm
testPair levenshteinNorm "kitten" "sitting" (4 % 7)
testPair levenshteinNorm "cake" "drake" (3 % 5)
testPair levenshteinNorm "saturday" "sunday" (5 % 8)
testPair levenshteinNorm "red" "wax" (0 % 1)
#if __GLASGOW_HASKELL__ >= 710
testPair levenshteinNorm "a😀c" "abc" (2 % 3)
#endif
testPair levenshteinNorm "lucky" "lucky" (1 % 1)
testPair levenshteinNorm "" "" (1 % 1)
describe "damerauLevenshtein" $ do
testSwap damerauLevenshtein
testPair damerauLevenshtein "veryvery long" "very long" 4
testPair damerauLevenshtein "thing" "think" 1
testPair damerauLevenshtein "nose" "ones" 2
testPair damerauLevenshtein "thing" "sign" 3
testPair damerauLevenshtein "red" "wax" 3
#if __GLASGOW_HASKELL__ >= 710
testPair damerauLevenshtein "a😀c" "abc" 1
#endif
testPair damerauLevenshtein "lucky" "lucky" 0
testPair damerauLevenshtein "" "" 0
describe "damerauLevenshteinNorm" $ do
testSwap damerauLevenshteinNorm
testPair damerauLevenshteinNorm "veryvery long" "very long" (9 % 13)
testPair damerauLevenshteinNorm "thing" "think" (4 % 5)
testPair damerauLevenshteinNorm "nose" "ones" (1 % 2)
testPair damerauLevenshteinNorm "thing" "sign" (2 % 5)
testPair damerauLevenshteinNorm "red" "wax" (0 % 1)
#if __GLASGOW_HASKELL__ >= 710
testPair damerauLevenshteinNorm "a😀c" "abc" (2 % 3)
#endif
testPair damerauLevenshteinNorm "lucky" "lucky" (1 % 1)
testPair damerauLevenshteinNorm "" "" (1 % 1)
describe "hamming" $ do
testSwap hamming
testPair hamming "karolin" "kathrin" (Just 3)
testPair hamming "karolin" "kerstin" (Just 3)
testPair hamming "1011101" "1001001" (Just 2)
testPair hamming "2173896" "2233796" (Just 3)
testPair hamming "toned" "roses" (Just 3)
testPair hamming "red" "wax" (Just 3)
#if __GLASGOW_HASKELL__ >= 710
testPair hamming "a😀c" "abc" (Just 1)
#endif
testPair hamming "lucky" "lucky" (Just 0)
testPair hamming "" "" (Just 0)
testPair hamming "small" "big" Nothing
describe "overlap" $ do
testSwap overlap
testPair overlap "fly" "butterfly" (1 % 1)
testPair overlap "night" "nacht" (3 % 5)
testPair overlap "context" "contact" (5 % 7)
testPair overlap "red" "wax" (0 % 1)
#if __GLASGOW_HASKELL__ >= 710
testPair overlap "a😀c" "abc" (2 % 3)
#endif
testPair overlap "lucky" "lucky" (1 % 1)
describe "jaccard" $ do
testSwap jaccard
testPair jaccard "xxx" "xyx" (1 % 2)
testPair jaccard "night" "nacht" (3 % 7)
testPair jaccard "context" "contact" (5 % 9)
#if __GLASGOW_HASKELL__ >= 710
testPair overlap "a😀c" "abc" (2 % 3)
#endif
testPair jaccard "lucky" "lucky" (1 % 1)
-- | Test that given function returns the same results when order of
-- arguments is swapped.
testSwap :: (Eq a, Show a) => (Text -> Text -> a) -> SpecWith ()
testSwap f = context "if we swap the arguments" $
it "produces the same result" $
property $ \a b -> f a b === f b a
-- | Create spec for given metric function applying it to two 'Text' values
-- and comparing the result with expected one.
testPair :: (Eq a, Show a)
=> (Text -> Text -> a) -- ^ Function to test
-> Text -- ^ First input
-> Text -- ^ Second input
-> a -- ^ Expected result
-> SpecWith ()
testPair f a b r = it ("‘" <> T.unpack a <> "’ and ‘" <> T.unpack b <> "’") $
f a b `shouldBe` r
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Parsers/ 0000775 0000000 0000000 00000000000 14124644201 0025534 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Parsers/Date.hs 0000664 0000000 0000000 00000002702 14124644201 0026746 0 ustar 00root root 0000000 0000000 {-|
Module : Parsers.Date
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Parsers.Date where
import Test.Hspec
import Test.QuickCheck
import Control.Applicative ((<*>))
import Data.Either (Either(..))
import Data.Time (ZonedTime(..))
import Data.Text (pack, Text)
import Text.Parsec.Error (ParseError)
import Duckling.Time.Types (toRFC3339)
-----------------------------------------------------------
import Gargantext.Prelude
import Gargantext.Core.Text.Corpus.Parsers.Date.Parsec (fromRFC3339)
import Parsers.Types
-----------------------------------------------------------
fromRFC3339Inv :: Either ParseError ZonedTime -> Text
fromRFC3339Inv (Right z) = toRFC3339 z
fromRFC3339Inv (Left pe) = panic . pack $ show pe
testFromRFC3339 :: IO ()
testFromRFC3339 = hspec $ do
describe "Test fromRFC3339: " $ do
it "is the inverse of Duckling's toRFC3339" $ property $
((==) <*> (fromRFC3339 . fromRFC3339Inv)) . Right . looseZonedTimePrecision
-- \x -> uncurry (==) $ (,) <*> (fromRFC3339 . fromRFC3339Inv) $ Right $ looseZonedTimePrecision x
-- \x -> let e = Right x :: Either ParseError ZonedTime
-- in fmap looseZonedTimePrecision e == (fromRFC3339 . fromRFC3339Inv ) (fmap looseZonedTimePrecision e)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Parsers/Types.hs 0000664 0000000 0000000 00000003753 14124644201 0027204 0 ustar 00root root 0000000 0000000 {-|
Module : Parsers.Types
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE StandaloneDeriving #-}
module Parsers.Types where
import Gargantext.Prelude
import Test.QuickCheck
import Test.QuickCheck.Instances ()
import Text.Parsec.Pos
import Text.Parsec.Error (ParseError, Message(..), newErrorMessage)
import Data.Time.LocalTime (ZonedTime (..), TimeZone (..), TimeOfDay(..), LocalTime(..))
import Data.Eq (Eq(..))
import Data.Either (Either(..))
deriving instance Eq ZonedTime
looseTimeOfDayPrecision :: TimeOfDay -> TimeOfDay
looseTimeOfDayPrecision (TimeOfDay h m _) = TimeOfDay h m 0
looseLocalTimePrecision :: LocalTime -> LocalTime
looseLocalTimePrecision (LocalTime ld ltd) = LocalTime ld $ looseTimeOfDayPrecision ltd
looseTimeZonePrecision :: TimeZone -> TimeZone
looseTimeZonePrecision (TimeZone zm _ _) = TimeZone zm False "CET"
looseZonedTimePrecision :: ZonedTime -> ZonedTime
looseZonedTimePrecision (ZonedTime lt tz) = ZonedTime (looseLocalTimePrecision lt) $ looseTimeZonePrecision tz
loosePrecisionEitherPEZT :: Either ParseError ZonedTime -> Either ParseError ZonedTime
loosePrecisionEitherPEZT (Right zt) = Right $ looseZonedTimePrecision zt
loosePrecisionEitherPEZT pe = pe
instance Arbitrary Message where
arbitrary = do
msgContent <- arbitrary
oneof $ return <$> [SysUnExpect msgContent
, UnExpect msgContent
, Expect msgContent
, Message msgContent
]
instance Arbitrary SourcePos where
arbitrary = do
sn <- arbitrary
l <- arbitrary
c <- arbitrary
return $ newPos sn l c
instance Arbitrary ParseError where
arbitrary = do
sp <- arbitrary
msg <- arbitrary
return $ newErrorMessage msg sp
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Parsers/WOS.hs 0000664 0000000 0000000 00000000465 14124644201 0026545 0 ustar 00root root 0000000 0000000 {-|
Module : WOS
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Parsers.WOS where
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Utils/ 0000775 0000000 0000000 00000000000 14124644201 0025215 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src-test/Utils/Crypto.hs 0000664 0000000 0000000 00000002432 14124644201 0027032 0 ustar 00root root 0000000 0000000
{-|
Module : Utils.Crypto
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Utils.Crypto where
import Data.Text (Text)
import Test.Hspec
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash
import Gargantext.Prelude.Utils
-- | Crypto Hash tests
test :: IO ()
test = hspec $ do
describe "Hash String with frontend works" $ do
let text = "To hash with backend" :: Text
let hashed = "8a69a94d164279af2b7d1443ce08da6184b3d7e815406076e148159c284b53c3" :: Hash
-- ^ hash from fronted with text above
it "compare" $ do
hash text `shouldBe` hashed
describe "Hash List with backend works" $ do
let list = ["a","b"] :: [Text]
let hashed = "ab19ec537f09499b26f0f62eed7aefad46ab9f498e06a7328ce8e8ef90da6d86" :: Hash
-- ^ hash from frontend with text above
it "compare" $ do
hash list `shouldBe` hashed
------------------------------------------------------------------------
-- | TODO property based tests
describe "Hash works with any order of list" $ do
let hash1 = hash (["a","b"] :: [Text])
let hash2 = hash (["b","a"] :: [Text])
it "compare" $ do
hash1 `shouldBe` hash2
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/ 0000775 0000000 0000000 00000000000 14124644201 0023140 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext.hs 0000664 0000000 0000000 00000001206 14124644201 0025577 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext
Description : Textmining Collaborative Platform
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
@Gargantext@: search, map, share
-}
module Gargantext ( module Gargantext.API
, module Gargantext.Core
, module Gargantext.Database
, module Gargantext.Prelude
-- , module Gargantext.Core.Viz
) where
import Gargantext.API
import Gargantext.Core
import Gargantext.Database
import Gargantext.Prelude
--import Gargantext.Core.Viz
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/ 0000775 0000000 0000000 00000000000 14124644201 0025244 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API.hs 0000664 0000000 0000000 00000017467 14124644201 0026230 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API
Description : REST API declaration
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main (RESTful) API of the instance Gargantext.
The Garg-API is typed to derive the documentation, the mock and tests.
This API is indeed typed in order to be able to derive both the server
and the client sides.
The Garg-API-Monad enables:
- Security (WIP)
- Features (WIP)
- Database connection (long term)
- In Memory stack management (short term)
- Logs (WIP)
Thanks to Yann Esposito for our discussions at the start and to Nicolas
Pouillard (who mainly made it).
-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.API
where
import Control.Exception (finally)
import Control.Lens
import Control.Monad.Reader (runReaderT)
import Data.List (lookup)
import Data.Text.Encoding (encodeUtf8)
import Data.Text.IO (putStrLn)
import Data.Validity
import GHC.Base (Applicative)
import GHC.Generics (Generic)
import Gargantext.API.Admin.Auth.Types (AuthContext)
import Gargantext.API.Admin.Settings (newEnv)
import Gargantext.API.Admin.Types (FireWall(..), PortNumber, cookieSettings, jwtSettings, settings)
import Gargantext.API.Ngrams (saveNodeStory)
import Gargantext.API.Prelude
import Gargantext.API.Routes
import Gargantext.API.Server (server)
import Gargantext.Core.NodeStory
import Gargantext.Prelude hiding (putStrLn)
import Network.HTTP.Types hiding (Query)
import Network.Wai
import Network.Wai.Handler.Warp hiding (defaultSettings)
import Network.Wai.Middleware.Cors
import Network.Wai.Middleware.RequestLogger
import Servant
import System.IO (FilePath)
data Mode = Dev | Mock | Prod
deriving (Show, Read, Generic)
-- | startGargantext takes as parameters port number and Ini file.
startGargantext :: Mode -> PortNumber -> FilePath -> IO ()
startGargantext mode port file = do
env <- newEnv port file
portRouteInfo port
app <- makeApp env
mid <- makeDevMiddleware mode
run port (mid app) `finally` stopGargantext env
portRouteInfo :: PortNumber -> IO ()
portRouteInfo port = do
putStrLn " ----Main Routes----- "
putStrLn $ "http://localhost:" <> toUrlPiece port <> "/index.html"
putStrLn $ "http://localhost:" <> toUrlPiece port <> "/swagger-ui"
-- TODO clean this Monad condition (more generic) ?
stopGargantext :: HasNodeStorySaver env => env -> IO ()
stopGargantext env = do
putStrLn "----- Stopping gargantext -----"
runReaderT saveNodeStory env
{-
startGargantextMock :: PortNumber -> IO ()
startGargantextMock port = do
portRouteInfo port
application <- makeMockApp . MockEnv $ FireWall False
run port application
-}
----------------------------------------------------------------------
fireWall :: Applicative f => Request -> FireWall -> f Bool
fireWall req fw = do
let origin = lookup "Origin" (requestHeaders req)
let host = lookup "Host" (requestHeaders req)
if origin == Just (encodeUtf8 "http://localhost:8008")
&& host == Just (encodeUtf8 "localhost:3000")
|| (not $ unFireWall fw)
then pure True
else pure False
{-
-- makeMockApp :: Env -> IO (Warp.Settings, Application)
makeMockApp :: MockEnv -> IO Application
makeMockApp env = do
let serverApp = appMock
-- logWare <- mkRequestLogger def { destination = RequestLogger.Logger $ env^.logger }
--logWare <- mkRequestLogger def { destination = RequestLogger.Logger "/tmp/logs.txt" }
let checkOriginAndHost app req resp = do
blocking <- fireWall req (env ^. menv_firewall)
case blocking of
True -> app req resp
False -> resp ( responseLBS status401 []
"Invalid Origin or Host header")
let corsMiddleware = cors $ \_ -> Just CorsResourcePolicy
-- { corsOrigins = Just ([env^.settings.allowedOrigin], False)
{ corsOrigins = Nothing -- == /*
, corsMethods = [ methodGet , methodPost , methodPut
, methodDelete, methodOptions, methodHead]
, corsRequestHeaders = ["authorization", "content-type"]
, corsExposedHeaders = Nothing
, corsMaxAge = Just ( 60*60*24 ) -- one day
, corsVaryOrigin = False
, corsRequireOrigin = False
, corsIgnoreFailures = False
}
--let warpS = Warp.setPort (8008 :: Int) -- (env^.settings.appPort)
-- $ Warp.defaultSettings
--pure (warpS, logWare $ checkOriginAndHost $ corsMiddleware $ serverApp)
pure $ logStdoutDev $ checkOriginAndHost $ corsMiddleware $ serverApp
-}
makeDevMiddleware :: Mode -> IO Middleware
makeDevMiddleware mode = do
-- logWare <- mkRequestLogger def { destination = RequestLogger.Logger $ env^.logger }
-- logWare <- mkRequestLogger def { destination = RequestLogger.Logger "/tmp/logs.txt" }
-- let checkOriginAndHost app req resp = do
-- blocking <- fireWall req (env ^. menv_firewall)
-- case blocking of
-- True -> app req resp
-- False -> resp ( responseLBS status401 []
-- "Invalid Origin or Host header")
--
let corsMiddleware = cors $ \_ -> Just CorsResourcePolicy
-- { corsOrigins = Just ([env^.settings.allowedOrigin], False)
{ corsOrigins = Nothing -- == /*
, corsMethods = [ methodGet , methodPost , methodPut
, methodDelete, methodOptions, methodHead]
, corsRequestHeaders = ["authorization", "content-type"]
, corsExposedHeaders = Nothing
, corsMaxAge = Just ( 60*60*24 ) -- one day
, corsVaryOrigin = False
, corsRequireOrigin = False
, corsIgnoreFailures = False
}
--let warpS = Warp.setPort (8008 :: Int) -- (env^.settings.appPort)
-- $ Warp.defaultSettings
--pure (warpS, logWare . checkOriginAndHost . corsMiddleware)
case mode of
Prod -> pure $ logStdout . corsMiddleware
_ -> pure $ logStdoutDev . corsMiddleware
---------------------------------------------------------------------
-- | API Global
---------------------------------------------------------------------
---------------------------
-- TODO-SECURITY admin only: withAdmin
-- Question: How do we mark admins?
{-
serverGargAdminAPI :: GargServer GargAdminAPI
serverGargAdminAPI = roots
:<|> nodesAPI
-}
---------------------------------------------------------------------
--gargMock :: Server GargAPI
--gargMock = mock apiGarg Proxy
---------------------------------------------------------------------
makeApp :: EnvC env => env -> IO Application
makeApp env = serveWithContext api cfg <$> server env
where
cfg :: Servant.Context AuthContext
cfg = env ^. settings . jwtSettings
:. env ^. settings . cookieSettings
-- :. authCheck env
:. EmptyContext
--appMock :: Application
--appMock = serve api (swaggerFront :<|> gargMock :<|> serverStatic)
---------------------------------------------------------------------
api :: Proxy API
api = Proxy
apiGarg :: Proxy GargAPI
apiGarg = Proxy
---------------------------------------------------------------------
{- UNUSED
--import GHC.Generics (D1, Meta (..), Rep, Generic)
--import GHC.TypeLits (AppendSymbol, Symbol)
---------------------------------------------------------------------
-- Type Family for the Documentation
type family TypeName (x :: *) :: Symbol where
TypeName Int = "Int"
TypeName Text = "Text"
TypeName x = GenericTypeName x (Rep x ())
type family GenericTypeName t (r :: *) :: Symbol where
GenericTypeName t (D1 ('MetaData name mod pkg nt) f x) = name
type Desc t n = Description (AppendSymbol (TypeName t) (AppendSymbol " | " n))
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/ 0000775 0000000 0000000 00000000000 14124644201 0025655 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/ 0000775 0000000 0000000 00000000000 14124644201 0026705 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/Auth.hs 0000664 0000000 0000000 00000011055 14124644201 0030144 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Admin.Auth
Description : Server API Auth Module
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main authorization of Gargantext are managed in this module
-- 1: Implement the Server / Client JWT authentication
-> Client towards Python Backend
-> Server towards Purescript Front-End
-- 2: Implement the Auth API backend
https://github.com/haskell-servant/servant-auth
TODO-ACCESS Critical
-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.API.Admin.Auth
( auth
, withAccess
)
where
import Control.Lens (view)
import Data.Text.Lazy (toStrict)
import Data.Text.Lazy.Encoding (decodeUtf8)
import Servant
import Servant.Auth.Server
import qualified Gargantext.Prelude.Crypto.Auth as Auth
import Gargantext.API.Admin.Types
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Prelude (HasJoseError(..), joseError, HasServerError, GargServerC)
import Gargantext.Core.Types.Individu (User(..), Username, GargPassword(..))
import Gargantext.Database.Query.Tree (isDescendantOf, isIn)
import Gargantext.Database.Query.Tree.Root (getRoot)
import Gargantext.Database.Schema.Node (NodePoly(_node_id))
import Gargantext.Database.Admin.Types.Node (NodeId(..), UserId)
import Gargantext.Database.Prelude (Cmd', CmdM, HasConnectionPool, HasConfig)
import Gargantext.Prelude hiding (reverse)
import Gargantext.Database.Query.Table.User
---------------------------------------------------
-- | Main functions of authorization
makeTokenForUser :: (HasSettings env, HasJoseError err)
=> NodeId -> Cmd' env err Token
makeTokenForUser uid = do
jwtS <- view $ settings . jwtSettings
e <- liftBase $ makeJWT (AuthenticatedUser uid) jwtS Nothing
-- TODO-SECURITY here we can implement token expiration ^^.
either joseError (pure . toStrict . decodeUtf8) e
-- TODO not sure about the encoding...
checkAuthRequest :: (HasSettings env, HasConnectionPool env, HasJoseError err, HasConfig env)
=> Username
-> GargPassword
-> Cmd' env err CheckAuth
checkAuthRequest u (GargPassword p) = do
candidate <- head <$> getUsersWith u
case candidate of
Nothing -> pure InvalidUser
Just (UserLight _id _u _email h) ->
case Auth.checkPassword (Auth.mkPassword p) (Auth.PasswordHash h) of
Auth.PasswordCheckFail -> pure InvalidPassword
Auth.PasswordCheckSuccess -> do
muId <- head <$> getRoot (UserName u)
case _node_id <$> muId of
Nothing -> pure InvalidUser
Just uid -> do
token <- makeTokenForUser uid
pure $ Valid token uid
auth :: (HasSettings env, HasConnectionPool env, HasJoseError err, HasConfig env)
=> AuthRequest -> Cmd' env err AuthResponse
auth (AuthRequest u p) = do
checkAuthRequest' <- checkAuthRequest u p
case checkAuthRequest' of
InvalidUser -> pure $ AuthResponse Nothing (Just $ AuthInvalid "Invalid user")
InvalidPassword -> pure $ AuthResponse Nothing (Just $ AuthInvalid "Invalid password")
Valid to trId -> pure $ AuthResponse (Just $ AuthValid to trId) Nothing
--type instance BasicAuthCfg = BasicAuthData -> IO (AuthResult AuthenticatedUser)
{-
instance FromBasicAuthData AuthenticatedUser where
fromBasicAuthData authData authCheckFunction = authCheckFunction authData
authCheck :: forall env. env
-> BasicAuthData
-> IO (AuthResult AuthenticatedUser)
authCheck _env (BasicAuthData login password) = pure $
maybe Indefinite Authenticated $ TODO
-}
withAccessM :: (CmdM env err m, HasServerError err)
=> UserId
-> PathId
-> m a
-> m a
withAccessM uId (PathNode id) m = do
d <- id `isDescendantOf` NodeId uId
if d then m else m -- serverError err401
withAccessM uId (PathNodeNode cId docId) m = do
_a <- isIn cId docId -- TODO use one query for all ?
_d <- cId `isDescendantOf` NodeId uId
if True -- a && d
then m
else m
withAccess :: forall env err m api.
(GargServerC env err m, HasServer api '[]) =>
Proxy api -> Proxy m -> UserId -> PathId ->
ServerT api m -> ServerT api m
withAccess p _ uId id = hoistServer p f
where
f :: forall a. m a -> m a
f = withAccessM uId id
{- | Collaborative Schema
User at his root can create Teams Folder
User can create Team in Teams Folder.
User can invite User in Team as NodeNode only if Team in his parents.
All users can access to the Team folder as if they were owner.
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/Auth/ 0000775 0000000 0000000 00000000000 14124644201 0027606 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/Auth/Types.hs 0000664 0000000 0000000 00000007143 14124644201 0031253 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Admin.Auth.Types
Description : Types for Server API Auth Module
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Admin.Auth.Types
where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Servant.Auth.Server
import Test.QuickCheck (elements, oneof)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.Core.Types.Individu (Username, GargPassword(..), arbitraryUsername, arbitraryPassword)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Node (NodeId(..), ListId, DocId)
import Gargantext.Prelude hiding (reverse)
---------------------------------------------------
-- | Main types for AUTH API
data AuthRequest = AuthRequest { _authReq_username :: Username
, _authReq_password :: GargPassword
}
deriving (Generic)
-- TODO: Use an HTTP error to wrap AuthInvalid
data AuthResponse = AuthResponse { _authRes_valid :: Maybe AuthValid
, _authRes_inval :: Maybe AuthInvalid
}
deriving (Generic)
data AuthInvalid = AuthInvalid { _authInv_message :: Text }
deriving (Generic)
data AuthValid = AuthValid { _authVal_token :: Token
, _authVal_tree_id :: TreeId
}
deriving (Generic)
type Token = Text
type TreeId = NodeId
data CheckAuth = InvalidUser | InvalidPassword | Valid Token TreeId
deriving (Eq)
newtype AuthenticatedUser = AuthenticatedUser
{ _authUser_id :: NodeId
} deriving (Generic)
$(deriveJSON (unPrefix "_authUser_") ''AuthenticatedUser)
instance ToSchema AuthenticatedUser where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_authUser_")
instance ToJWT AuthenticatedUser
instance FromJWT AuthenticatedUser
-- TODO-SECURITY why is the CookieSettings necessary?
type AuthContext = '[JWTSettings, CookieSettings] -- , BasicAuthCfg
-- | Instances
$(deriveJSON (unPrefix "_authReq_") ''AuthRequest)
instance ToSchema AuthRequest where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_authReq_")
instance Arbitrary AuthRequest where
arbitrary = elements [ AuthRequest u p
| u <- arbitraryUsername
, p <- arbitraryPassword
]
$(deriveJSON (unPrefix "_authRes_") ''AuthResponse)
instance ToSchema AuthResponse where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_authRes_")
instance Arbitrary AuthResponse where
arbitrary = oneof [ AuthResponse Nothing . Just <$> arbitrary
, flip AuthResponse Nothing . Just <$> arbitrary ]
$(deriveJSON (unPrefix "_authInv_") ''AuthInvalid)
instance ToSchema AuthInvalid where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_authInv_")
instance Arbitrary AuthInvalid where
arbitrary = elements [ AuthInvalid m
| m <- [ "Invalid user", "Invalid password"]
]
$(deriveJSON (unPrefix "_authVal_") ''AuthValid)
instance ToSchema AuthValid where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_authVal_")
instance Arbitrary AuthValid where
arbitrary = elements [ AuthValid to tr
| to <- ["token0", "token1"]
, tr <- [1..3]
]
data PathId = PathNode NodeId | PathNodeNode ListId DocId haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/EnvTypes.hs 0000664 0000000 0000000 00000005472 14124644201 0031026 0 ustar 00root root 0000000 0000000 -- |
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Admin.EnvTypes where
import Control.Lens
import Data.Pool (Pool)
import Database.PostgreSQL.Simple (Connection)
import GHC.Generics (Generic)
import Network.HTTP.Client (Manager)
import Servant.Client (BaseUrl)
import Servant.Job.Async (HasJobEnv(..), Job)
import System.Log.FastLogger
import qualified Servant.Job.Core
import Gargantext.API.Ngrams.Types (HasRepoVar(..), HasRepoSaver(..), HasRepo(..), RepoEnv(..))
import Gargantext.API.Admin.Types
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig(..))
import Gargantext.Prelude
import Gargantext.Prelude.Config (GargConfig(..))
import Gargantext.Core.NodeStory
data Env = Env
{ _env_settings :: !Settings
, _env_logger :: !LoggerSet
, _env_pool :: !(Pool Connection)
, _env_repo :: !RepoEnv
, _env_nodeStory :: !NodeStoryEnv
, _env_manager :: !Manager
, _env_self_url :: !BaseUrl
, _env_scrapers :: !ScrapersEnv
, _env_config :: !GargConfig
}
deriving (Generic)
makeLenses ''Env
instance HasConfig Env where
hasConfig = env_config
instance HasConnectionPool Env where
connPool = env_pool
instance HasNodeStoryEnv Env where
hasNodeStory = env_nodeStory
instance HasNodeStoryVar Env where
hasNodeStoryVar = hasNodeStory . nse_getter
instance HasNodeStorySaver Env where
hasNodeStorySaver = hasNodeStory . nse_saver
instance HasSettings Env where
settings = env_settings
-- Specific to Repo
instance HasRepoVar Env where
repoVar = repoEnv . repoVar
instance HasRepoSaver Env where
repoSaver = repoEnv . repoSaver
instance HasRepo Env where
repoEnv = env_repo
instance Servant.Job.Core.HasEnv Env (Job JobLog JobLog) where
_env = env_scrapers . Servant.Job.Core._env
instance HasJobEnv Env JobLog JobLog where
job_env = env_scrapers
data MockEnv = MockEnv
{ _menv_firewall :: !FireWall
}
deriving (Generic)
makeLenses ''MockEnv
data DevEnv = DevEnv
{ _dev_env_settings :: !Settings
, _dev_env_repo :: !RepoEnv
, _dev_env_config :: !GargConfig
, _dev_env_pool :: !(Pool Connection)
, _dev_env_nodeStory :: !NodeStoryEnv
}
makeLenses ''DevEnv
instance HasConfig DevEnv where
hasConfig = dev_env_config
instance HasConnectionPool DevEnv where
connPool = dev_env_pool
instance HasSettings DevEnv where
settings = dev_env_settings
instance HasNodeStoryEnv DevEnv where
hasNodeStory = dev_env_nodeStory
instance HasNodeStoryVar DevEnv where
hasNodeStoryVar = hasNodeStory . nse_getter
instance HasNodeStorySaver DevEnv where
hasNodeStorySaver = hasNodeStory . nse_saver
instance HasRepoVar DevEnv where
repoVar = repoEnv . repoVar
instance HasRepoSaver DevEnv where
repoSaver = repoEnv . repoSaver
instance HasRepo DevEnv where
repoEnv = dev_env_repo
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/FrontEnd.hs 0000664 0000000 0000000 00000001105 14124644201 0030755 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Admin.FrontEnd
Description : Server FrontEnd API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Loads all static file for the front-end.
-}
{-# LANGUAGE TypeOperators #-}
---------------------------------------------------------------------
module Gargantext.API.Admin.FrontEnd where
import Servant
type FrontEndAPI = Raw
frontEndServer :: Server FrontEndAPI
frontEndServer = serveDirectoryFileServer "./purescript-gargantext/dist"
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/Orchestrator.hs0000664 0000000 0000000 00000006016 14124644201 0031723 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Admin.Orchestrator
Description : Jobs Orchestrator
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.API.Admin.Orchestrator where
import Control.Lens hiding (elements)
import Data.Aeson
import Servant
import Servant.Job.Async
import Servant.Job.Client
import qualified Data.ByteString.Lazy.Char8 as LBS
import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.Prelude
callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
=> JobServerURL e Schedule o
-> (URL -> Schedule)
-> m o
callJobScrapy jurl schedule = do
progress $ NewTask jurl
out <- view job_output <$>
retryOnTransientFailure (clientCallbackJob' jurl
(fmap (const ()) . scrapySchedule . schedule))
progress $ Finished jurl Nothing
pure out
logConsole :: ToJSON a => a -> IO ()
logConsole = LBS.putStrLn . encode
callScraper :: MonadClientJob m => URL -> ScraperInput -> m JobLog
callScraper url input =
callJobScrapy jurl $ \cb ->
Schedule
{ s_project = "gargantext"
, s_spider = input ^. scin_spider
, s_setting = []
, s_jobid = Nothing
, s_version = Nothing
, s_extra =
[("query", input ^.. scin_query . _Just)
,("user", [input ^. scin_user])
,("corpus", [input ^. scin_corpus . to toUrlPiece])
,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
,("url", input ^.. scin_local_file . _Just)
,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
,("callback", [toUrlPiece cb])]
}
where
jurl :: JobServerURL JobLog Schedule JobLog
jurl = JobServerURL url Callback
pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
-> (e -> IO ()) -> IO JobLog
pipeline scrapyurl client_env input log_status = do
e <- runJobMLog client_env log_status $ callScraper scrapyurl input
either (panic . cs . show) pure e -- TODO throwError
-- TODO integrate to ServerT
-- use:
-- * serveJobsAPI instead of simpleServeJobsAPI
-- * JobFunction instead of simpleJobFunction
-- TODO:
-- * HasSelfUrl or move self_url to settings
-- * HasScrapers or move scrapers to settings
-- * EnvC env
{- NOT USED YET
import Data.Text
import Servant.Job.Server
import Servant.Job.Utils (extendBaseUrl)
import Gargantext.API.Admin.Types
scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
scrapyOrchestrator env = do
apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
defaultSettings (extendBaseUrl ("scraper" :: Text) $ env ^. env_self_url)
(env ^. env_manager) (LogEvent logConsole) $
simpleServeJobsAPI (env ^. env_scrapers) .
simpleJobFunction . pipeline (URL $ env ^. settings . scrapydUrl)
-} haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/Orchestrator/ 0000775 0000000 0000000 00000000000 14124644201 0031364 5 ustar 00root root 0000000 0000000 Scrapy/ 0000775 0000000 0000000 00000000000 14124644201 0032546 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/Orchestrator Schedule.hs 0000664 0000000 0000000 00000003131 14124644201 0034634 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/Orchestrator/Scrapy {-|
Module : Gargantext.API.Admin.Orchestartor.Scrapy.Schedule
Description : Server API Auth Module
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
where
import Control.Lens
import Data.Aeson
import GHC.Generics
import Protolude
import Servant
import Servant.Client
import Servant.Job.Utils (jsonOptions)
import Web.FormUrlEncoded hiding (parseMaybe)
import qualified Data.HashMap.Strict as H
------------------------------------------------------------------------
data Schedule = Schedule
{ s_project :: !Text
, s_spider :: !Text
, s_setting :: ![Text]
, s_jobid :: !(Maybe Text)
, s_version :: !(Maybe Text)
, s_extra :: ![(Text,[Text])]
}
deriving (Generic)
data ScheduleResponse = ScheduleResponse
{ r_status :: !Text
, r_jobid :: !Text
}
deriving (Generic)
instance FromJSON ScheduleResponse where
parseJSON = genericParseJSON (jsonOptions "r_")
instance ToForm Schedule where
toForm s =
Form . H.fromList $
[("project", [s_project s])
,("spider", [s_spider s])
,("setting", s_setting s)
,("jobid", s_jobid s ^.. _Just)
,("_version", s_version s ^.. _Just)
] ++ s_extra s
type Scrapy =
"schedule.json" :> ReqBody '[FormUrlEncoded] Schedule
:> Post '[JSON] ScheduleResponse
scrapyAPI :: Proxy Scrapy
scrapyAPI = Proxy
scrapySchedule :: Schedule -> ClientM ScheduleResponse
scrapySchedule = client scrapyAPI
Types.hs 0000664 0000000 0000000 00000007151 14124644201 0032751 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/Orchestrator {-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Admin.Orchestrator.Types
where
import Control.Lens hiding (elements)
import Data.Aeson
import Data.Proxy
import Data.Swagger hiding (URL, url, port)
import Data.Text (Text)
import GHC.Generics hiding (to)
import Servant
import Servant.Job.Async
import Servant.Job.Types
import Servant.Job.Utils (jsonOptions)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.Core.Types (TODO(..))
import Gargantext.Prelude
------------------------------------------------------------------------
instance Arbitrary a => Arbitrary (JobStatus 'Safe a) where
arbitrary = panic "TODO"
instance Arbitrary a => Arbitrary (JobOutput a) where
arbitrary = JobOutput <$> arbitrary
-- | Main Types
-- TODO IsidoreAuth
data ExternalAPIs = All
| PubMed
| HAL
| IsTex
| Isidore
deriving (Show, Eq, Enum, Bounded, Generic)
-- | Main Instances
instance FromJSON ExternalAPIs
instance ToJSON ExternalAPIs
externalAPIs :: [ExternalAPIs]
externalAPIs = [minBound..maxBound]
instance Arbitrary ExternalAPIs
where
arbitrary = elements externalAPIs
instance ToSchema ExternalAPIs
instance ToSchema URL where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
data ScraperInput = ScraperInput
{ _scin_spider :: !Text
, _scin_query :: !(Maybe Text)
, _scin_user :: !Text
, _scin_corpus :: !Int
, _scin_report_every :: !(Maybe Int)
, _scin_limit :: !(Maybe Int)
, _scin_local_file :: !(Maybe Text)
, _scin_count_only :: !(Maybe Bool)
}
deriving Generic
makeLenses ''ScraperInput
instance FromJSON ScraperInput where
parseJSON = genericParseJSON $ jsonOptions "_scin_"
-- Proposal to replace the Corpus.API.Query type which seems to generically named.
data ScraperEvent = ScraperEvent
{ _scev_message :: !(Maybe Text)
, _scev_level :: !(Maybe Text)
, _scev_date :: !(Maybe Text)
}
deriving (Show, Generic)
instance Arbitrary ScraperEvent where
arbitrary = ScraperEvent <$> elements [Nothing, Just "test message"]
<*> elements [Nothing, Just "INFO", Just "WARN"]
<*> elements [Nothing, Just "2018-04-18"]
instance ToJSON ScraperEvent where
toJSON = genericToJSON $ jsonOptions "_scev_"
instance FromJSON ScraperEvent where
parseJSON = genericParseJSON $ jsonOptions "_scev_"
data JobLog = JobLog
{ _scst_succeeded :: !(Maybe Int)
, _scst_failed :: !(Maybe Int)
, _scst_remaining :: !(Maybe Int)
, _scst_events :: !(Maybe [ScraperEvent])
}
deriving (Show, Generic)
makeLenses ''JobLog
instance Arbitrary JobLog where
arbitrary = JobLog
<$> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
instance ToJSON JobLog where
toJSON = genericToJSON $ jsonOptions "_scst_"
instance FromJSON JobLog where
parseJSON = genericParseJSON $ jsonOptions "_scst_"
instance ToSchema JobLog -- TODO _scst_ prefix
instance ToSchema ScraperInput -- TODO _scin_ prefix
instance ToSchema ScraperEvent -- TODO _scev_ prefix
instance ToParamSchema Offset -- where
-- toParamSchema = panic "TODO"
instance ToParamSchema Limit -- where
-- toParamSchema = panic "TODO"
type ScrapersEnv = JobEnv JobLog JobLog
type ScraperAPI = AsyncJobsAPI JobLog ScraperInput JobLog
------------------------------------------------------------------------
type AsyncJobs event ctI input output =
AsyncJobsAPI' 'Unsafe 'Safe ctI '[JSON] Maybe event input output
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/Settings.hs 0000664 0000000 0000000 00000016606 14124644201 0031052 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Admin.Settings
Description : Settings of the API (Server and Client)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO-SECURITY: Critical
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Admin.Settings
where
import Codec.Serialise (Serialise(), serialise, deserialise)
import Control.Concurrent
import Control.Debounce (mkDebounce, defaultDebounceSettings, debounceFreq, debounceAction)
import Control.Lens
import Control.Monad.Logger
import Control.Monad.Reader
import Data.Maybe (fromMaybe)
import Data.Pool (Pool, createPool)
import Database.PostgreSQL.Simple (Connection, connect, close, ConnectInfo)
import Gargantext.Core.NodeStory
import Gargantext.Prelude.Config (GargConfig(..), {-gc_repofilepath,-} readConfig)
import Network.HTTP.Client.TLS (newTlsManager)
import Servant.Auth.Server (defaultJWTSettings, CookieSettings(..), XsrfCookieSettings(..), defaultCookieSettings, defaultXsrfCookieSettings, readKey, writeKey)
import Servant.Client (parseBaseUrl)
import Servant.Job.Async (newJobEnv, defaultSettings)
import System.Directory
import System.FileLock (tryLockFile, unlockFile, SharedExclusive(Exclusive))
import System.IO (FilePath, hClose)
import System.IO.Temp (withTempFile)
import System.Log.FastLogger
import qualified Data.ByteString.Lazy as L
import Gargantext.API.Admin.EnvTypes
import Gargantext.API.Admin.Types
import Gargantext.API.Ngrams.Types (NgramsRepo, HasRepo(..), RepoEnv(..), r_version, initRepo, renv_var, renv_lock)
import Gargantext.Database.Prelude (databaseParameters, HasConfig(..))
import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_repofilepath)
devSettings :: FilePath -> IO Settings
devSettings jwkFile = do
jwkExists <- doesFileExist jwkFile
when (not jwkExists) $ writeKey jwkFile
jwk <- readKey jwkFile
pure $ Settings
{ _allowedOrigin = "http://localhost:8008"
, _allowedHost = "localhost:3000"
, _appPort = 3000
, _logLevelLimit = LevelDebug
-- , _dbServer = "localhost"
, _sendLoginEmails = LogEmailToConsole
, _scrapydUrl = fromMaybe (panic "Invalid scrapy URL") $ parseBaseUrl "http://localhost:6800"
, _cookieSettings = defaultCookieSettings { cookieXsrfSetting = Just xsrfCookieSetting } -- TODO-SECURITY tune
, _jwtSettings = defaultJWTSettings jwk -- TODO-SECURITY tune
}
where
xsrfCookieSetting = defaultXsrfCookieSettings { xsrfExcludeGet = True }
{- NOT USED YET
import System.Environment (lookupEnv)
reqSetting :: FromHttpApiData a => Text -> IO a
reqSetting name = do
e <- fromMaybe (panic $ "Missing " <> name) <$> lookupEnv (unpack name)
pure $ either (panic $ "Unable to parse " <> name) identity $ parseUrlPiece $ pack e
optSetting :: FromHttpApiData a => Text -> a -> IO a
optSetting name d = do
me <- lookupEnv (unpack name)
case me of
Nothing -> pure d
Just e -> pure $ either (panic $ "Unable to parse " <> name) identity $ parseUrlPiece $ pack e
settingsFromEnvironment :: IO Settings
settingsFromEnvironment =
Settings <$> (encodeUtf8 <$> reqSetting "ALLOWED_ORIGIN")
<*> (encodeUtf8 <$> reqSetting "ALLOWED_HOST")
<*> optSetting "PORT" 3000
<*> (parseLogLevel <$> optSetting "LOG_LEVEL" "warn")
<*> reqSetting "DB_SERVER"
<*> (parseJwk <$> reqSetting "JWT_SECRET")
<*> optSetting "SEND_EMAIL" SendEmailViaAws
-}
-----------------------------------------------------------------------
-- | RepoDir FilePath configuration
type RepoDirFilePath = FilePath
repoSnapshot :: RepoDirFilePath -> FilePath
repoSnapshot repoDir = repoDir <> "/repo.cbor"
-- This assumes we own the lock on repoSnapshot.
repoSaverAction :: RepoDirFilePath -> Serialise a => a -> IO ()
repoSaverAction repoDir a = do
withTempFile repoDir "tmp-repo.cbor" $ \fp h -> do
printDebug "repoSaverAction" fp
L.hPut h $ serialise a
hClose h
renameFile fp (repoSnapshot repoDir)
--{-
-- The use of mkDebounce makes sure that repoSaverAction is not called too often.
-- If repoSaverAction start taking more time than the debounceFreq then it should
-- be increased.
mkRepoSaver :: RepoDirFilePath -> MVar NgramsRepo -> IO (IO ())
mkRepoSaver repoDir repo_var = mkDebounce settings'
where
settings' = defaultDebounceSettings
{ debounceFreq = let n = 6 :: Int in 10^n -- 1 second
, debounceAction = withMVar repo_var (repoSaverAction repoDir)
-- Here this not only `readMVar` but `takeMVar`.
-- Namely while repoSaverAction is saving no other change
-- can be made to the MVar.
-- This might be not efficent and thus reconsidered later.
-- However this enables to safely perform a *final* save.
-- See `cleanEnv`.
-- Future work:
-- Add a new MVar just for saving.
}
readRepoEnv :: FilePath -> IO RepoEnv
readRepoEnv repoDir = do
-- Does file exist ? :: Bool
_repoDir <- createDirectoryIfMissing True repoDir
repoFile <- doesFileExist (repoSnapshot repoDir)
-- Is file not empty ? :: Bool
repoExists <- if repoFile
then (>0) <$> getFileSize (repoSnapshot repoDir)
else pure False
mlock <- tryLockFile (repoSnapshot repoDir) Exclusive
lock <- maybe (panic "Repo file already locked") pure mlock
mvar <- newMVar =<<
if repoExists
then do
-- e_repo <- eitherDecodeStrict <$> deserialise <$> L.readFile repoSnapshot
repo <- deserialise <$> L.readFile (repoSnapshot repoDir)
-- repo <- either fail pure e_repo
let archive = (repoSnapshot repoDir) <> ".v" <> show (repo ^. r_version)
copyFile (repoSnapshot repoDir) archive
pure repo
else
pure initRepo
-- TODO save in DB here
saver <- mkRepoSaver repoDir mvar
pure $ RepoEnv { _renv_var = mvar, _renv_saver = saver, _renv_lock = lock }
--}
devJwkFile :: FilePath
devJwkFile = "dev.jwk"
newEnv :: PortNumber -> FilePath -> IO Env
newEnv port file = do
manager_env <- newTlsManager
settings' <- devSettings devJwkFile <&> appPort .~ port -- TODO read from 'file'
when (port /= settings' ^. appPort) $
panic "TODO: conflicting settings of port"
config_env <- readConfig file
self_url_env <- parseBaseUrl $ "http://0.0.0.0:" <> show port
dbParam <- databaseParameters file
pool <- newPool dbParam
repo <- readRepoEnv (_gc_repofilepath config_env)
nodeStory_env <- readNodeStoryEnv (_gc_repofilepath config_env)
scrapers_env <- newJobEnv defaultSettings manager_env
logger <- newStderrLoggerSet defaultBufSize
pure $ Env
{ _env_settings = settings'
, _env_logger = logger
, _env_pool = pool
, _env_repo = repo
, _env_nodeStory = nodeStory_env
, _env_manager = manager_env
, _env_scrapers = scrapers_env
, _env_self_url = self_url_env
, _env_config = config_env
}
newPool :: ConnectInfo -> IO (Pool Connection)
newPool param = createPool (connect param) close 1 (60*60) 8
--{-
cleanEnv :: (HasConfig env, HasRepo env) => env -> IO ()
cleanEnv env = do
r <- takeMVar (env ^. repoEnv . renv_var)
repoSaverAction (env ^. hasConfig . gc_repofilepath) r
unlockFile (env ^. repoEnv . renv_lock)
--}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/Types.hs 0000664 0000000 0000000 00000002301 14124644201 0030341 0 ustar 00root root 0000000 0000000 -- |
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Admin.Types where
import Control.Lens
import Control.Monad.Logger
import Data.ByteString (ByteString)
import GHC.Enum
import GHC.Generics (Generic)
import Gargantext.Prelude
import Servant.Auth.Server (JWTSettings, CookieSettings(..))
import Servant.Client (BaseUrl)
type PortNumber = Int
data SendEmailType = SendEmailViaAws
| LogEmailToConsole
| WriteEmailToFile
deriving (Show, Read, Enum, Bounded, Generic)
data Settings = Settings
{ _allowedOrigin :: !ByteString -- allowed origin for CORS
, _allowedHost :: !ByteString -- allowed host for CORS
, _appPort :: !PortNumber
, _logLevelLimit :: !LogLevel -- log level from the monad-logger package
-- , _dbServer :: Text
-- ^ this is not used yet
, _jwtSettings :: !JWTSettings
, _cookieSettings :: !CookieSettings
, _sendLoginEmails :: !SendEmailType
, _scrapydUrl :: !BaseUrl
}
makeLenses ''Settings
class HasSettings env where
settings :: Getter env Settings
instance HasSettings Settings where
settings = identity
data FireWall = FireWall { unFireWall :: Bool }
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Admin/Utils.hs 0000664 0000000 0000000 00000001243 14124644201 0030341 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Admin.Utils
Description : Server API main Types
Copyright : (c) CNRS, 2017-Present
License : BSD3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Mainly copied from Servant.Job.Utils (Thanks)
-}
module Gargantext.API.Admin.Utils
where
import Data.Maybe (fromMaybe)
import Gargantext.Prelude
import Prelude (String)
import qualified Data.Text as T
infixr 4 ?|
-- Reverse infix form of "fromMaybe"
(?|) :: Maybe a -> a -> a
(?|) = flip fromMaybe
infixr 4 ?!
-- Reverse infix form of "fromJust" with a custom error message
(?!) :: Maybe a -> String -> a
(?!) ma' msg = ma' ?| panic (T.pack msg)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Count.hs 0000664 0000000 0000000 00000010402 14124644201 0027276 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Count
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Count API part of Gargantext.
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE DeriveAnyClass #-}
module Gargantext.API.Count
where
import Data.Aeson hiding (Error)
import Data.Aeson.TH (deriveJSON)
import Data.Either
import Data.List (permutations)
import Data.Swagger
import Data.Text (Text, pack)
import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Prelude
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
-----------------------------------------------------------------------
-- TODO-ACCESS: CanCount
-- TODO-EVENTS: No events as this is a read only query.
type CountAPI = Post '[JSON] Counts
-----------------------------------------------------------------------
data Scraper = Pubmed | Hal | IsTex | Isidore
deriving (Eq, Show, Generic, Enum, Bounded)
scrapers :: [Scraper]
scrapers = [minBound..maxBound]
instance FromJSON Scraper
instance ToJSON Scraper
instance Arbitrary Scraper where
arbitrary = elements scrapers
instance ToSchema Scraper
-----------------------------------------------------------------------
data QueryBool = QueryBool Text
deriving (Eq, Show, Generic)
queries :: [QueryBool]
queries = [QueryBool (pack "(X OR X') AND (Y OR Y') NOT (Z OR Z')")]
--queries = [QueryBool (pack "(X + X') * (Y + Y') - (Z + Z')")]
instance Arbitrary QueryBool where
arbitrary = elements queries
instance FromJSON QueryBool
instance ToJSON QueryBool
instance ToSchema QueryBool
-----------------------------------------------------------------------
data Query = Query { query_query :: QueryBool
, query_name :: Maybe [Scraper]
}
deriving (Eq, Show, Generic)
instance FromJSON Query
instance ToJSON Query
instance Arbitrary Query where
arbitrary = elements [ Query q (Just n)
| q <- queries
, n <- take 10 $ permutations scrapers
]
instance ToSchema Query where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
-----------------------------------------------------------------------
type Code = Integer
type Error = Text
type Errors = [Error]
-----------------------------------------------------------------------
data Message = Message Code Errors
deriving (Eq, Show, Generic)
toMessage :: [(Code, Errors)] -> [Message]
toMessage = map (\(c,err) -> Message c err)
messages :: [Message]
messages = toMessage $ [ (400, ["Ill formed query "])
, (300, ["API connexion error "])
, (300, ["Internal Gargantext Error "])
] <> take 10 ( repeat (200, [""]))
instance Arbitrary Message where
arbitrary = elements messages
instance FromJSON Message
instance ToJSON Message
instance ToSchema Message
-----------------------------------------------------------------------
data Counts = Counts { results :: [Either Message Count]
} deriving (Eq, Show, Generic)
instance FromJSON Counts
instance ToJSON Counts
instance Arbitrary Counts where
arbitrary = elements [Counts [ Right (Count Pubmed (Just 20 ))
, Right (Count IsTex (Just 150))
, Right (Count Hal (Just 150))
]
]
instance ToSchema Counts
-----------------------------------------------------------------------
data Count = Count { count_name :: Scraper
, count_count :: Maybe Int
}
deriving (Eq, Show, Generic)
$(deriveJSON (unPrefix "count_") ''Count)
instance ToSchema Count where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "count_")
--instance Arbitrary Count where
-- arbitrary = Count <$> arbitrary <*> arbitrary <*> arbitrary
-----------------------------------------------------------------------
count :: Monad m => Query -> m Counts
count _ = undefined
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Dev.hs 0000664 0000000 0000000 00000004521 14124644201 0026731 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Dev
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
-- Use only for dev/repl
module Gargantext.API.Dev where
import Control.Exception (finally)
import Control.Monad (fail)
import Control.Monad.Reader (runReaderT)
import Gargantext.API.Admin.EnvTypes
import Gargantext.API.Admin.Settings
import Gargantext.API.Ngrams (saveNodeStory)
import Gargantext.API.Prelude
import Gargantext.Core.NodeStory
import Gargantext.Database.Prelude
import Gargantext.Prelude
import Gargantext.Prelude.Config (GargConfig(..), readConfig)
import Servant
import System.IO (FilePath)
type IniPath = FilePath
-------------------------------------------------------------------
withDevEnv :: IniPath -> (DevEnv -> IO a) -> IO a
withDevEnv iniPath k = do
env <- newDevEnv
k env `finally` cleanEnv env
where
newDevEnv = do
cfg <- readConfig iniPath
dbParam <- databaseParameters iniPath
nodeStory_env <- readNodeStoryEnv (_gc_repofilepath cfg)
pool <- newPool dbParam
repo <- readRepoEnv (_gc_repofilepath cfg)
setts <- devSettings devJwkFile
pure $ DevEnv
{ _dev_env_pool = pool
, _dev_env_repo = repo
, _dev_env_nodeStory = nodeStory_env
, _dev_env_settings = setts
, _dev_env_config = cfg
}
-- | Run Cmd Sugar for the Repl (GHCI)
runCmdRepl :: Show err => Cmd'' DevEnv err a -> IO a
runCmdRepl f = withDevEnv "gargantext.ini" $ \env -> runCmdDev env f
runCmdReplServantErr :: Cmd'' DevEnv ServerError a -> IO a
runCmdReplServantErr = runCmdRepl
-- In particular this writes the repo file after running
-- the command.
-- This function is constrained to the DevEnv rather than
-- using HasConnectionPool and HasRepoVar.
runCmdDev :: (Show err) => DevEnv -> Cmd'' DevEnv err a -> IO a
runCmdDev env f =
(either (fail . show) pure =<< runCmd env f)
`finally`
runReaderT saveNodeStory env
runCmdDevNoErr :: DevEnv -> Cmd' DevEnv () a -> IO a
runCmdDevNoErr = runCmdDev
runCmdDevServantErr :: DevEnv -> Cmd' DevEnv ServerError a -> IO a
runCmdDevServantErr = runCmdDev
runCmdReplEasy :: Cmd'' DevEnv GargError a -> IO a
runCmdReplEasy f = withDevEnv "gargantext.ini" $ \env -> runCmdDev env f
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Flow.hs 0000664 0000000 0000000 00000001133 14124644201 0027116 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Flow
Description : Main Flow API DataTypes
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.API.Flow
where
-- import Gargantext.API.Prelude
import Gargantext.Prelude
data InputFlow = TextsInput
| NgramsInput
| ListInput
data Flow = EndFlow
| Texts InputFlow [Flow]
| Ngrams InputFlow [Flow]
| Lists InputFlow [Flow]
data OutputFlow
flow :: Flow -> OutputFlow
flow = undefined
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/HashedResponse.hs 0000664 0000000 0000000 00000001475 14124644201 0031133 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.HashedResponse
Description :
Copyright : (c) CNRS, 2020-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.API.HashedResponse where
import Data.Aeson
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.Prelude
import qualified Gargantext.Prelude.Crypto.Hash as Crypto (hash)
data HashedResponse a = HashedResponse { hash :: Text, value :: a }
deriving (Generic)
instance ToSchema a => ToSchema (HashedResponse a)
instance ToJSON a => ToJSON (HashedResponse a) where
toJSON = genericToJSON defaultOptions
constructHashedResponse :: ToJSON a => a -> HashedResponse a
constructHashedResponse v = HashedResponse { hash = Crypto.hash $ encode v, value = v }
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Job.hs 0000664 0000000 0000000 00000003411 14124644201 0026722 0 ustar 00root root 0000000 0000000 module Gargantext.API.Job where
import Control.Lens (over, _Just)
import Data.IORef
import Data.Maybe
import Gargantext.Prelude
import Gargantext.API.Admin.Orchestrator.Types
jobLogInit :: Int -> JobLog
jobLogInit rem =
JobLog { _scst_succeeded = Just 0
, _scst_remaining = Just rem
, _scst_failed = Just 0
, _scst_events = Just [] }
jobLogSuccess :: JobLog -> JobLog
jobLogSuccess jl = over (scst_succeeded . _Just) (+ 1) $
over (scst_remaining . _Just) (\x -> x - 1) jl
jobLogFail :: JobLog -> JobLog
jobLogFail jl = over (scst_failed . _Just) (+ 1) $
over (scst_remaining . _Just) (\x -> x - 1) jl
jobLogFailTotal :: JobLog -> JobLog
jobLogFailTotal (JobLog { _scst_succeeded = mSucc
, _scst_remaining = mRem
, _scst_failed = mFail
, _scst_events = evt }) =
JobLog { _scst_succeeded = mSucc
, _scst_remaining = newRem
, _scst_failed = newFail
, _scst_events = evt }
where
(newRem, newFail) = case mRem of
Nothing -> (Nothing, mFail)
Just rem -> (Just 0, (+ rem) <$> mFail)
jobLogEvt :: JobLog -> ScraperEvent -> JobLog
jobLogEvt jl evt = over (scst_events . _Just) (\evts -> (evt:evts)) jl
runJobLog :: MonadBase IO m => Int -> (JobLog -> m ()) -> m (m (), m (), m JobLog)
runJobLog num logStatus = do
jlRef <- liftBase $ newIORef $ jobLogInit num
return (logRefF jlRef, logRefSuccessF jlRef, getRefF jlRef)
where
logRefF ref = do
jl <- liftBase $ readIORef ref
logStatus jl
logRefSuccessF ref = do
jl <- liftBase $ readIORef ref
let jl' = jobLogSuccess jl
liftBase $ writeIORef ref jl'
logStatus jl'
getRefF ref = do
liftBase $ readIORef ref
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Metrics.hs 0000664 0000000 0000000 00000031670 14124644201 0027626 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Metrics
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Metrics API
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Metrics
where
import Control.Lens
import Data.Text (Text)
import Data.Time (UTCTime)
import Data.Vector (Vector)
import Gargantext.API.HashedResponse
import Gargantext.API.Ngrams.NgramsTree
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.Text.Metrics (Scored(..), {-normalizeGlobal,-} normalizeLocal)
import Gargantext.Core.Types (CorpusId, Limit, ListId, ListType(..))
import Gargantext.Core.Viz.Chart
import Gargantext.Core.Viz.Types
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataList(..), hl_chart, hl_pie, hl_scatter, hl_tree)
import Gargantext.Database.Admin.Types.Metrics (ChartMetrics(..), Metric(..), Metrics(..))
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Servant
import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Database.Action.Metrics as Metrics
-------------------------------------------------------------
-- | Scatter metrics API
type ScatterAPI = Summary "SepGen IncExc metrics"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> QueryParam "limit" Int
:> Get '[JSON] (HashedResponse Metrics)
:<|> Summary "Scatter update"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> QueryParam "limit" Int
:> Post '[JSON] ()
:<|> "hash" :> Summary "Scatter Hash"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> Get '[JSON] Text
scatterApi :: NodeId -> GargServer ScatterAPI
scatterApi id' = getScatter id'
:<|> updateScatter id'
:<|> getScatterHash id'
getScatter :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> Maybe Limit
-> m (HashedResponse Metrics)
getScatter cId maybeListId tabType _maybeLimit = do
listId <- case maybeListId of
Just lid -> pure lid
Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let HyperdataList { _hl_scatter = scatterMap } = node ^. node_hyperdata
mChart = HashMap.lookup tabType scatterMap
chart <- case mChart of
Just chart -> pure chart
Nothing -> do
updateScatter' cId maybeListId tabType Nothing
pure $ constructHashedResponse chart
updateScatter :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> Maybe Limit
-> m ()
updateScatter cId maybeListId tabType maybeLimit = do
printDebug "[updateScatter] cId" cId
printDebug "[updateScatter] maybeListId" maybeListId
printDebug "[updateScatter] tabType" tabType
printDebug "[updateScatter] maybeLimit" maybeLimit
_ <- updateScatter' cId maybeListId tabType maybeLimit
pure ()
updateScatter' :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> Maybe Limit
-> m Metrics
updateScatter' cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics cId maybeListId tabType maybeLimit
let
metrics = fmap (\(Scored t s1 s2) -> Metric { m_label = unNgramsTerm t
, m_x = s1
, m_y = s2
, m_cat = listType t ngs' })
$ fmap normalizeLocal scores
listType t m = maybe (panic errorMsg) fst $ HashMap.lookup t m
errorMsg = "API.Node.metrics: key absent"
listId <- case maybeListId of
Just lid -> pure lid
Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let hl = node ^. node_hyperdata
scatterMap = hl ^. hl_scatter
_ <- updateHyperdata listId $ hl { _hl_scatter = HashMap.insert tabType (Metrics metrics) scatterMap }
pure $ Metrics metrics
getScatterHash :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> m Text
getScatterHash cId maybeListId tabType = do
hash <$> getScatter cId maybeListId tabType Nothing
-------------------------------------------------------------
-- | Chart metrics API
type ChartApi = Summary " Chart API"
:> QueryParam "from" UTCTime
:> QueryParam "to" UTCTime
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> Get '[JSON] (HashedResponse (ChartMetrics Histo))
:<|> Summary "Chart update"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> QueryParam "limit" Int
:> Post '[JSON] ()
:<|> "hash" :> Summary "Chart Hash"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> Get '[JSON] Text
chartApi :: NodeId -> GargServer ChartApi
chartApi id' = getChart id'
:<|> updateChart id'
:<|> getChartHash id'
-- TODO add start / end
getChart :: FlowCmdM env err m =>
CorpusId
-> Maybe UTCTime
-> Maybe UTCTime
-> Maybe ListId
-> TabType
-> m (HashedResponse (ChartMetrics Histo))
getChart cId _start _end maybeListId tabType = do
listId <- case maybeListId of
Just lid -> pure lid
Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let chartMap = node ^. node_hyperdata ^. hl_chart
mChart = HashMap.lookup tabType chartMap
chart <- case mChart of
Just chart -> pure chart
Nothing -> do
updateChart' cId maybeListId tabType Nothing
pure $ constructHashedResponse chart
updateChart :: HasNodeError err =>
CorpusId
-> Maybe ListId
-> TabType
-> Maybe Limit
-> Cmd err ()
updateChart cId maybeListId tabType maybeLimit = do
printDebug "[updateChart] cId" cId
printDebug "[updateChart] maybeListId" maybeListId
printDebug "[updateChart] tabType" tabType
printDebug "[updateChart] maybeLimit" maybeLimit
_ <- updateChart' cId maybeListId tabType maybeLimit
pure ()
updateChart' :: HasNodeError err =>
CorpusId
-> Maybe ListId
-> TabType
-> Maybe Limit
-> Cmd err (ChartMetrics Histo)
updateChart' cId maybeListId tabType _maybeLimit = do
listId <- case maybeListId of
Just lid -> pure lid
Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let hl = node ^. node_hyperdata
chartMap = hl ^. hl_chart
h <- histoData cId
_ <- updateHyperdata listId $ hl { _hl_chart = HashMap.insert tabType (ChartMetrics h) chartMap }
pure $ ChartMetrics h
getChartHash :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> m Text
getChartHash cId maybeListId tabType = do
hash <$> getChart cId Nothing Nothing maybeListId tabType
-------------------------------------------------------------
-- | Pie metrics API
type PieApi = Summary "Pie Chart"
:> QueryParam "from" UTCTime
:> QueryParam "to" UTCTime
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> Get '[JSON] (HashedResponse (ChartMetrics Histo))
:<|> Summary "Pie Chart update"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> QueryParam "limit" Int
:> Post '[JSON] ()
:<|> "hash" :> Summary "Pie Hash"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> Get '[JSON] Text
pieApi :: NodeId -> GargServer PieApi
pieApi id' = getPie id'
:<|> updatePie id'
:<|> getPieHash id'
getPie :: FlowCmdM env err m
=> CorpusId
-> Maybe UTCTime
-> Maybe UTCTime
-> Maybe ListId
-> TabType
-> m (HashedResponse (ChartMetrics Histo))
getPie cId _start _end maybeListId tabType = do
listId <- case maybeListId of
Just lid -> pure lid
Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let pieMap = node ^. node_hyperdata ^. hl_pie
mChart = HashMap.lookup tabType pieMap
chart <- case mChart of
Just chart -> pure chart
Nothing -> do
updatePie' cId maybeListId tabType Nothing
pure $ constructHashedResponse chart
updatePie :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> Maybe Limit
-> m ()
updatePie cId maybeListId tabType maybeLimit = do
printDebug "[updatePie] cId" cId
printDebug "[updatePie] maybeListId" maybeListId
printDebug "[updatePie] tabType" tabType
printDebug "[updatePie] maybeLimit" maybeLimit
_ <- updatePie' cId maybeListId tabType maybeLimit
pure ()
updatePie' :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> Maybe Limit
-> m (ChartMetrics Histo)
updatePie' cId maybeListId tabType _maybeLimit = do
listId <- case maybeListId of
Just lid -> pure lid
Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let hl = node ^. node_hyperdata
pieMap = hl ^. hl_pie
p <- chartData cId (ngramsTypeFromTabType tabType) MapTerm
_ <- updateHyperdata listId $ hl { _hl_pie = HashMap.insert tabType (ChartMetrics p) pieMap }
pure $ ChartMetrics p
getPieHash :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> m Text
getPieHash cId maybeListId tabType = do
hash <$> getPie cId Nothing Nothing maybeListId tabType
-------------------------------------------------------------
-- | Tree metrics API
type TreeApi = Summary " Tree API"
:> QueryParam "from" UTCTime
:> QueryParam "to" UTCTime
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> QueryParamR "listType" ListType
:> Get '[JSON] (HashedResponse (ChartMetrics (Vector NgramsTree)))
:<|> Summary "Tree Chart update"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> QueryParamR "listType" ListType
:> Post '[JSON] ()
:<|> "hash" :>
Summary "Tree Hash"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> QueryParamR "listType" ListType
:> Get '[JSON] Text
treeApi :: NodeId -> GargServer TreeApi
treeApi id' = getTree id'
:<|> updateTree id'
:<|> getTreeHash id'
getTree :: FlowCmdM env err m
=> CorpusId
-> Maybe UTCTime
-> Maybe UTCTime
-> Maybe ListId
-> TabType
-> ListType
-> m (HashedResponse (ChartMetrics (Vector NgramsTree)))
getTree cId _start _end maybeListId tabType listType = do
listId <- case maybeListId of
Just lid -> pure lid
Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let treeMap = node ^. node_hyperdata ^. hl_tree
mChart = HashMap.lookup tabType treeMap
chart <- case mChart of
Just chart -> pure chart
Nothing -> do
updateTree' cId maybeListId tabType listType
pure $ constructHashedResponse chart
updateTree :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> ListType
-> m ()
updateTree cId maybeListId tabType listType = do
printDebug "[updateTree] cId" cId
printDebug "[updateTree] maybeListId" maybeListId
printDebug "[updateTree] tabType" tabType
printDebug "[updateTree] listType" listType
_ <- updateTree' cId maybeListId tabType listType
pure ()
updateTree' :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> ListType
-> m (ChartMetrics (Vector NgramsTree))
updateTree' cId maybeListId tabType listType = do
listId <- case maybeListId of
Just lid -> pure lid
Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let hl = node ^. node_hyperdata
treeMap = hl ^. hl_tree
t <- treeData cId (ngramsTypeFromTabType tabType) listType
_ <- updateHyperdata listId $ hl { _hl_tree = HashMap.insert tabType (ChartMetrics t) treeMap }
pure $ ChartMetrics t
getTreeHash :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> ListType
-> m Text
getTreeHash cId maybeListId tabType listType = do
hash <$> getTree cId Nothing Nothing maybeListId tabType listType
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Ngrams.hs 0000664 0000000 0000000 00000072601 14124644201 0027446 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Ngrams
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Ngrams API
-- | TODO
get ngrams filtered by NgramsType
add get
-}
{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.API.Ngrams
( TableNgramsApi
, TableNgramsApiGet
, TableNgramsApiPut
, getTableNgrams
, setListNgrams
--, rmListNgrams TODO fix before exporting
, apiNgramsTableCorpus
, apiNgramsTableDoc
, NgramsStatePatch
, NgramsTablePatch
, NgramsTableMap
, NgramsTerm(..)
, NgramsElement(..)
, mkNgramsElement
, RootParent(..)
, MSet
, mSetFromList
, mSetToList
, Repo(..)
, r_version
, r_state
, r_history
, NgramsRepo
, NgramsRepoElement(..)
, saveNodeStory
, initRepo
, RepoEnv(..)
, renv_var
, renv_lock
, TabType(..)
, QueryParamR
, TODO
-- Internals
, getNgramsTableMap
, dumpJsonTableMap
, tableNgramsPull
, tableNgramsPut
, Version
, Versioned(..)
, VersionedWithCount(..)
, currentVersion
, listNgramsChangedSince
)
where
import Control.Concurrent
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
import Control.Monad.Reader
import Data.Aeson hiding ((.=))
import Data.Either (Either(..))
import Data.Foldable
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import Data.Monoid
import Data.Ord (Down(..))
import Data.Patch.Class (Action(act), Transformable(..), ours)
import Data.Swagger hiding (version, patch)
import Data.Text (Text, isInfixOf, unpack)
import Data.Text.Lazy.IO as DTL
import Formatting (hprint, int, (%))
import GHC.Generics (Generic)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude
import Gargantext.Core.NodeStory
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
import Gargantext.API.Ngrams.Tools
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms)
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
import Gargantext.Prelude hiding (log)
import Gargantext.Prelude.Clock (hasTime, getTime)
import Prelude (error)
import Servant hiding (Patch)
import Servant.Job.Async (JobFunction(..), serveJobsAPI)
import System.IO (stderr)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Aeson.Text as DAT
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Map.Strict.Patch as PM
import qualified Data.Set as S
import qualified Data.Set as Set
import qualified Gargantext.API.Metrics as Metrics
import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
{-
-- TODO sequences of modifications (Patchs)
type NgramsIdPatch = Patch NgramsId NgramsPatch
ngramsPatch :: Int -> NgramsPatch
ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty
toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
toEdit n p = Edit n p
ngramsIdPatch :: Patch NgramsId NgramsPatch
ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
, replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
, replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
]
-- applyPatchBack :: Patch -> IO Patch
-- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
-}
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
{-
-- TODO: Replace.old is ignored which means that if the current list
-- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
-- the list is going to be `StopTerm` while it should keep `MapTerm`.
-- However this should not happen in non conflicting situations.
mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
mkListsUpdate nt patches =
[ (ngramsTypeId nt, ng, listTypeId lt)
| (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
, lt <- patch ^.. patch_list . new
]
mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
-> NgramsType
-> NgramsTablePatch
-> [(NgramsTypeId, NgramsParent, NgramsChild)]
mkChildrenGroups addOrRem nt patches =
[ (ngramsTypeId nt, parent, child)
| (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
, child <- patch ^.. patch_children . to addOrRem . folded
]
-}
------------------------------------------------------------------------
saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
=> m ()
saveNodeStory = liftBase =<< view hasNodeStorySaver
listTypeConflictResolution :: ListType -> ListType -> ListType
listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
ngramsStatePatchConflictResolution
:: TableNgrams.NgramsType
-> NgramsTerm
-> ConflictResolutionNgramsPatch
ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
= (ours, (const ours, ours), (False, False))
-- (False, False) mean here that Mod has always priority.
-- (True, False) <- would mean priority to the left (same as ours).
-- undefined {- TODO think this through -}, listTypeConflictResolution)
-- Current state:
-- Insertions are not considered as patches,
-- they do not extend history,
-- they do not bump version.
insertNewOnly :: a -> Maybe b -> a
insertNewOnly m = maybe m (const $ error "insertNewOnly: impossible")
-- TODO error handling
{- unused
-- TODO refactor with putListNgrams
copyListNgrams :: RepoCmdM env err m
=> NodeId -> NodeId -> NgramsType
-> m ()
copyListNgrams srcListId dstListId ngramsType = do
var <- view repoVar
liftBase $ modifyMVar_ var $
pure . (r_state . at ngramsType %~ (Just . f . something))
saveNodeStory
where
f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)
-- TODO refactor with putListNgrams
-- The list must be non-empty!
-- The added ngrams must be non-existent!
addListNgrams :: RepoCmdM env err m
=> NodeId -> NgramsType
-> [NgramsElement] -> m ()
addListNgrams listId ngramsType nes = do
var <- view repoVar
liftBase $ modifyMVar_ var $
pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
saveNodeStory
where
m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
-}
-- | TODO: incr the Version number
-- && should use patch
-- UNSAFE
setListNgrams :: HasNodeStory env err m
=> NodeId
-> TableNgrams.NgramsType
-> Map NgramsTerm NgramsRepoElement
-> m ()
setListNgrams listId ngramsType ns = do
printDebug "[setListNgrams]" (listId, ngramsType)
getter <- view hasNodeStory
var <- liftBase $ (getter ^. nse_getter) [listId]
liftBase $ modifyMVar_ var $
pure . ( unNodeStory
. at listId . _Just
. a_state
. at ngramsType
.~ Just ns
)
saveNodeStory
currentVersion :: HasNodeStory env err m
=> ListId -> m Version
currentVersion listId = do
nls <- getRepo' [listId]
pure $ nls ^. unNodeStory . at listId . _Just . a_version
newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
newNgramsFromNgramsStatePatch p =
[ text2ngrams (unNgramsTerm n)
| (n,np) <- p ^.. _PatchMap
-- . each . _PatchMap
. each . _NgramsTablePatch
. _PatchMap . ifolded . withIndex
, _ <- np ^.. patch_new . _Just
]
commitStatePatch :: HasNodeStory env err m
=> ListId
-> Versioned NgramsStatePatch'
-> m (Versioned NgramsStatePatch')
commitStatePatch listId (Versioned p_version p) = do
printDebug "[commitStatePatch]" listId
var <- getNodeStoryVar [listId]
vq' <- liftBase $ modifyMVar var $ \ns -> do
let
a = ns ^. unNodeStory . at listId . _Just
q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
(p', q') = transformWith ngramsStatePatchConflictResolution p q
a' = a & a_version +~ 1
& a_state %~ act p'
& a_history %~ (p' :)
{-
-- Ideally we would like to check these properties. However:
-- * They should be checked only to debug the code. The client data
-- should be able to trigger these.
-- * What kind of error should they throw (we are in IO here)?
-- * Should we keep modifyMVar?
-- * Should we throw the validation in an Exception, catch it around
-- modifyMVar and throw it back as an Error?
assertValid $ transformable p q
assertValid $ applicable p' (r ^. r_state)
-}
printDebug "[commitStatePatch] a version" (a ^. a_version)
printDebug "[commitStatePatch] a' version" (a' ^. a_version)
pure ( ns & unNodeStory . at listId .~ (Just a')
, Versioned (a' ^. a_version) q'
)
saveNodeStory
-- Save new ngrams
_ <- insertNgrams (newNgramsFromNgramsStatePatch p)
pure vq'
-- This is a special case of tableNgramsPut where the input patch is empty.
tableNgramsPull :: HasNodeStory env err m
=> ListId
-> TableNgrams.NgramsType
-> Version
-> m (Versioned NgramsTablePatch)
tableNgramsPull listId ngramsType p_version = do
printDebug "[tableNgramsPull]" (listId, ngramsType)
var <- getNodeStoryVar [listId]
r <- liftBase $ readMVar var
let
a = r ^. unNodeStory . at listId . _Just
q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
q_table = q ^. _PatchMap . at ngramsType . _Just
pure (Versioned (a ^. a_version) q_table)
-- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
-- Apply the given patch to the DB and returns the patch to be applied on the
-- client.
-- TODO-ACCESS check
tableNgramsPut :: ( HasNodeStory env err m
, HasInvalidError err
, HasSettings env
)
=> TabType
-> ListId
-> Versioned NgramsTablePatch
-> m (Versioned NgramsTablePatch)
tableNgramsPut tabType listId (Versioned p_version p_table)
| p_table == mempty = do
printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
let ngramsType = ngramsTypeFromTabType tabType
tableNgramsPull listId ngramsType p_version
| otherwise = do
printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
let ngramsType = ngramsTypeFromTabType tabType
(p, p_validity) = PM.singleton ngramsType p_table
assertValid p_validity
ret <- commitStatePatch listId (Versioned p_version p)
<&> v_data %~ (view (_PatchMap . at ngramsType . _Just))
pure ret
tableNgramsPostChartsAsync :: ( HasNodeStory env err m
, FlowCmdM env err m
, HasNodeError err
, HasSettings env
)
=> UpdateTableNgramsCharts
-> (JobLog -> m ())
-> m JobLog
tableNgramsPostChartsAsync utn logStatus = do
let tabType = utn ^. utn_tab_type
let listId = utn ^. utn_list_id
node <- getNode listId
let nId = node ^. node_id
_uId = node ^. node_user_id
mCId = node ^. node_parent_id
-- printDebug "[tableNgramsPostChartsAsync] tabType" tabType
-- printDebug "[tableNgramsPostChartsAsync] listId" listId
case mCId of
Nothing -> do
printDebug "[tableNgramsPostChartsAsync] can't update charts, no parent, nId" nId
pure $ jobLogFail $ jobLogInit 1
Just cId -> do
case tabType of
Authors -> do
-- printDebug "[tableNgramsPostChartsAsync] Authors, updating Pie, cId" cId
(logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
logRef
_ <- Metrics.updatePie cId (Just listId) tabType Nothing
logRefSuccess
getRef
Institutes -> do
-- printDebug "[tableNgramsPostChartsAsync] Institutes, updating Tree, cId" cId
-- printDebug "[tableNgramsPostChartsAsync] updating tree StopTerm, cId" cId
(logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
logRef
_ <- Metrics.updateTree cId (Just listId) tabType StopTerm
-- printDebug "[tableNgramsPostChartsAsync] updating tree CandidateTerm, cId" cId
logRefSuccess
_ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
-- printDebug "[tableNgramsPostChartsAsync] updating tree MapTerm, cId" cId
logRefSuccess
_ <- Metrics.updateTree cId (Just listId) tabType MapTerm
logRefSuccess
getRef
Sources -> do
-- printDebug "[tableNgramsPostChartsAsync] Sources, updating chart, cId" cId
(logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
logRef
_ <- Metrics.updatePie cId (Just listId) tabType Nothing
logRefSuccess
getRef
Terms -> do
-- printDebug "[tableNgramsPostChartsAsync] Terms, updating Metrics (Histo), cId" cId
(logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
logRef
{-
_ <- Metrics.updateChart cId (Just listId) tabType Nothing
logRefSuccess
_ <- Metrics.updatePie cId (Just listId) tabType Nothing
logRefSuccess
_ <- Metrics.updateScatter cId (Just listId) tabType Nothing
logRefSuccess
_ <- Metrics.updateTree cId (Just listId) tabType StopTerm
logRefSuccess
_ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
logRefSuccess
_ <- Metrics.updateTree cId (Just listId) tabType MapTerm
-}
logRefSuccess
getRef
_ -> do
printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
pure $ jobLogFail $ jobLogInit 1
{-
{ _ne_list :: ListType
If we merge the parents/children we can potentially create cycles!
, _ne_parent :: Maybe NgramsTerm
, _ne_children :: MSet NgramsTerm
}
-}
getNgramsTableMap :: HasNodeStory env err m
=> NodeId
-> TableNgrams.NgramsType
-> m (Versioned NgramsTableMap)
getNgramsTableMap nodeId ngramsType = do
v <- getNodeStoryVar [nodeId]
repo <- liftBase $ readMVar v
pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version)
(repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
dumpJsonTableMap :: HasNodeStory env err m
=> Text
-> NodeId
-> TableNgrams.NgramsType
-> m ()
dumpJsonTableMap fpath nodeId ngramsType = do
m <- getNgramsTableMap nodeId ngramsType
liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
pure ()
type MinSize = Int
type MaxSize = Int
-- | TODO Errors management
-- TODO: polymorphic for Annuaire or Corpus or ...
-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
-- TODO: should take only one ListId
getTableNgrams :: forall env err m.
(HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
=> NodeType -> NodeId -> TabType
-> ListId -> Limit -> Maybe Offset
-> Maybe ListType
-> Maybe MinSize -> Maybe MaxSize
-> Maybe OrderBy
-> (NgramsTerm -> Bool)
-> m (VersionedWithCount NgramsTable)
getTableNgrams _nType nId tabType listId limit_ offset
listType minSize maxSize orderBy searchQuery = do
t0 <- getTime
-- lIds <- selectNodesWithUsername NodeList userMaster
let
ngramsType = ngramsTypeFromTabType tabType
offset' = maybe 0 identity offset
listType' = maybe (const True) (==) listType
minSize' = maybe (const True) (<=) minSize
maxSize' = maybe (const True) (>=) maxSize
selected_node n = minSize' s
&& maxSize' s
&& searchQuery (n ^. ne_ngrams)
&& listType' (n ^. ne_list)
where
s = n ^. ne_size
selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
---------------------------------------
sortOnOrder Nothing = identity
sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
---------------------------------------
filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
filteredNodes tableMap = rootOf <$> list & filter selected_node
where
rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
(tableMap ^. at r)
)
(ne ^. ne_root)
list = tableMap ^.. each
---------------------------------------
selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
selectAndPaginate tableMap = roots <> inners
where
list = tableMap ^.. each
rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
(tableMap ^. at r)
)
(ne ^. ne_root)
selected_nodes = list & take limit_
. drop offset'
. filter selected_node
. sortOnOrder orderBy
roots = rootOf <$> selected_nodes
rootsSet = Set.fromList (_ne_ngrams <$> roots)
inners = list & filter (selected_inner rootsSet)
---------------------------------------
setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
setScores False table = pure table
setScores True table = do
let ngrams_terms = table ^.. each . ne_ngrams
t1 <- getTime
occurrences <- getOccByNgramsOnlyFast' nId
listId
ngramsType
ngrams_terms
t2 <- getTime
liftBase $ hprint stderr
("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
(length ngrams_terms) t1 t2
{-
occurrences <- getOccByNgramsOnlySlow nType nId
(lIds <> [listId])
ngramsType
ngrams_terms
-}
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
pure $ table & each %~ setOcc
---------------------------------------
-- lists <- catMaybes <$> listsWith userMaster
-- trace (show lists) $
-- getNgramsTableMap ({-lists <>-} listIds) ngramsType
let scoresNeeded = needsScores orderBy
tableMap1 <- getNgramsTableMap listId ngramsType
t1 <- getTime
tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
. Map.mapWithKey ngramsElementFromRepo
fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
. filteredNodes
let fltrCount = length $ fltr ^. v_data . _NgramsTable
t2 <- getTime
tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
. setScores (not scoresNeeded)
. selectAndPaginate
t3 <- getTime
liftBase $ hprint stderr
("getTableNgrams total=" % hasTime
% " map1=" % hasTime
% " map2=" % hasTime
% " map3=" % hasTime
% " sql=" % (if scoresNeeded then "map2" else "map3")
% "\n"
) t0 t3 t0 t1 t1 t2 t2 t3
pure $ toVersionedWithCount fltrCount tableMap3
scoresRecomputeTableNgrams :: forall env err m.
(HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
=> NodeId -> TabType -> ListId -> m Int
scoresRecomputeTableNgrams nId tabType listId = do
tableMap <- getNgramsTableMap listId ngramsType
_ <- tableMap & v_data %%~ setScores
. Map.mapWithKey ngramsElementFromRepo
pure $ 1
where
ngramsType = ngramsTypeFromTabType tabType
setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
setScores table = do
let ngrams_terms = table ^.. each . ne_ngrams
occurrences <- getOccByNgramsOnlyFast' nId
listId
ngramsType
ngrams_terms
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
pure $ table & each %~ setOcc
-- APIs
-- TODO: find a better place for the code above, All APIs stay here
data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
deriving (Generic, Enum, Bounded, Read, Show)
instance FromHttpApiData OrderBy
where
parseUrlPiece "TermAsc" = pure TermAsc
parseUrlPiece "TermDesc" = pure TermDesc
parseUrlPiece "ScoreAsc" = pure ScoreAsc
parseUrlPiece "ScoreDesc" = pure ScoreDesc
parseUrlPiece _ = Left "Unexpected value of OrderBy"
instance ToParamSchema OrderBy
instance FromJSON OrderBy
instance ToJSON OrderBy
instance ToSchema OrderBy
instance Arbitrary OrderBy
where
arbitrary = elements [minBound..maxBound]
needsScores :: Maybe OrderBy -> Bool
needsScores (Just ScoreAsc) = True
needsScores (Just ScoreDesc) = True
needsScores _ = False
type TableNgramsApiGet = Summary " Table Ngrams API Get"
:> QueryParamR "ngramsType" TabType
:> QueryParamR "list" ListId
:> QueryParamR "limit" Limit
:> QueryParam "offset" Offset
:> QueryParam "listType" ListType
:> QueryParam "minTermSize" MinSize
:> QueryParam "maxTermSize" MaxSize
:> QueryParam "orderBy" OrderBy
:> QueryParam "search" Text
:> Get '[JSON] (VersionedWithCount NgramsTable)
type TableNgramsApiPut = Summary " Table Ngrams API Change"
:> QueryParamR "ngramsType" TabType
:> QueryParamR "list" ListId
:> ReqBody '[JSON] (Versioned NgramsTablePatch)
:> Put '[JSON] (Versioned NgramsTablePatch)
type RecomputeScoresNgramsApiGet = Summary " Recompute scores for ngrams table"
:> QueryParamR "ngramsType" TabType
:> QueryParamR "list" ListId
:> "recompute" :> Post '[JSON] Int
type TableNgramsApiGetVersion = Summary " Table Ngrams API Get Version"
:> QueryParamR "ngramsType" TabType
:> QueryParamR "list" ListId
:> Get '[JSON] Version
type TableNgramsApi = TableNgramsApiGet
:<|> TableNgramsApiPut
:<|> RecomputeScoresNgramsApiGet
:<|> "version" :> TableNgramsApiGetVersion
:<|> TableNgramsAsyncApi
type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
:> "async"
:> "charts"
:> "update"
:> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
=> NodeId
-> TabType
-> ListId
-> Limit
-> Maybe Offset
-> Maybe ListType
-> Maybe MinSize -> Maybe MaxSize
-> Maybe OrderBy
-> Maybe Text -- full text search
-> m (VersionedWithCount NgramsTable)
getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
where
searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf mt nt
getTableNgramsVersion :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
=> NodeId
-> TabType
-> ListId
-> m Version
getTableNgramsVersion _nId _tabType listId = currentVersion listId
-- TODO: limit?
-- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
-- This line above looks like a waste of computation to finally get only the version.
-- See the comment about listNgramsChangedSince.
-- | Text search is deactivated for now for ngrams by doc only
getTableNgramsDoc :: (HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
=> DocId -> TabType
-> ListId -> Limit -> Maybe Offset
-> Maybe ListType
-> Maybe MinSize -> Maybe MaxSize
-> Maybe OrderBy
-> Maybe Text -- full text search
-> m (VersionedWithCount NgramsTable)
getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orderBy _mt = do
ns <- selectNodesWithUsername NodeList userMaster
let ngramsType = ngramsTypeFromTabType tabType
ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
let searchQuery (NgramsTerm nt) = flip S.member (S.fromList ngs) nt
getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
apiNgramsTableCorpus :: ( GargServerC env err m
)
=> NodeId -> ServerT TableNgramsApi m
apiNgramsTableCorpus cId = getTableNgramsCorpus cId
:<|> tableNgramsPut
:<|> scoresRecomputeTableNgrams cId
:<|> getTableNgramsVersion cId
:<|> apiNgramsAsync cId
apiNgramsTableDoc :: ( GargServerC env err m
)
=> DocId -> ServerT TableNgramsApi m
apiNgramsTableDoc dId = getTableNgramsDoc dId
:<|> tableNgramsPut
:<|> scoresRecomputeTableNgrams dId
:<|> getTableNgramsVersion dId
:<|> apiNgramsAsync dId
apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
apiNgramsAsync _dId =
serveJobsAPI $
JobFunction $ \i log ->
let
log' x = do
printDebug "tableNgramsPostChartsAsync" x
liftBase $ log x
in tableNgramsPostChartsAsync i log'
-- Did the given list of ngrams changed since the given version?
-- The returned value is versioned boolean value, meaning that one always retrieve the
-- latest version.
-- If the given version is negative then one simply receive the latest version and True.
-- Using this function is more precise than simply comparing the latest version number
-- with the local version number. Indeed there might be no change to this particular list
-- and still the version number has changed because of other lists.
--
-- Here the added value is to make a compromise between precision, computation, and bandwidth:
-- * currentVersion: good computation, good bandwidth, bad precision.
-- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
-- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
listNgramsChangedSince :: HasNodeStory env err m
=> ListId -> TableNgrams.NgramsType -> Version -> m (Versioned Bool)
listNgramsChangedSince listId ngramsType version
| version < 0 =
Versioned <$> currentVersion listId <*> pure True
| otherwise =
tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Ngrams/ 0000775 0000000 0000000 00000000000 14124644201 0027104 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Ngrams/List.hs 0000664 0000000 0000000 00000024264 14124644201 0030363 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Ngrams.List
Description : Get Ngrams (lists)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Ngrams.List
where
import Control.Lens hiding (elements, Indexed)
import Data.Aeson
import Data.Either (Either(..))
import Data.HashMap.Strict (HashMap)
import Data.Map (Map, toList, fromList)
import Data.Maybe (catMaybes)
import Data.Set (Set)
import Data.Text (Text, concat, pack)
import Data.Vector (Vector)
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams)
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node
import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude
import Network.HTTP.Media ((//), (/:))
import Servant
import Servant.Job.Async
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Csv as Csv
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Text as Text
import qualified Data.Vector as Vec
import qualified Prelude as Prelude
import qualified Protolude as P
------------------------------------------------------------------------
-- | TODO refactor
{-
type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|> PostAPI
:<|> CSVPostAPI
api :: ListId -> GargServer API
api l = get l :<|> postAsync l :<|> csvPostAsync l
-}
----------------------
type GETAPI = Summary "Get List"
:> "lists"
:> Capture "listId" ListId
:> Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
getApi :: GargServer GETAPI
getApi = get
data HTML
instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")
instance ToJSON a => MimeRender HTML a where
mimeRender _ = encode
----------------------
type JSONAPI = Summary "Update List"
:> "lists"
:> Capture "listId" ListId
:> "add"
:> "form"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog
jsonApi :: GargServer JSONAPI
jsonApi = postAsync
----------------------
type CSVAPI = Summary "Update List (legacy v3 CSV)"
:> "lists"
:> Capture "listId" ListId
:> "csv"
:> "add"
:> "form"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] WithTextFile JobLog
csvApi :: GargServer CSVAPI
csvApi = csvPostAsync
------------------------------------------------------------------------
get :: HasNodeStory env err m =>
ListId -> m (Headers '[Header "Content-Disposition" Text] NgramsList)
get lId = do
lst <- get' lId
let (NodeId id') = lId
return $ addHeader (concat [ "attachment; filename=GarganText_NgramsList-"
, pack $ show id'
, ".json"
]
) lst
get' :: HasNodeStory env err m
=> ListId -> m NgramsList
get' lId = fromList
<$> zip ngramsTypes
<$> mapM (getNgramsTableMap lId) ngramsTypes
------------------------------------------------------------------------
-- TODO : purge list
-- TODO talk
post :: FlowCmdM env err m
=> ListId
-> NgramsList
-> m Bool
post l m = do
-- TODO check with Version for optim
printDebug "New list as file" l
_ <- mapM (\(nt, Versioned _v ns) -> setListNgrams l nt ns) $ toList m
-- TODO reindex
pure True
------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here)
reIndexWith :: ( HasNodeStory env err m
, FlowCmdM env err m
)
=> CorpusId
-> ListId
-> NgramsType
-> Set ListType
-> m ()
reIndexWith cId lId nt lts = do
-- Getting [NgramsTerm]
ts <- List.concat
<$> map (\(k,vs) -> k:vs)
<$> HashMap.toList
<$> getTermsWith identity [lId] nt lts
-- printDebug "ts" ts
-- Taking the ngrams with 0 occurrences only (orphans)
occs <- getOccByNgramsOnlyFast' cId lId nt ts
-- printDebug "occs" occs
let orphans = List.concat
$ map (\t -> case HashMap.lookup t occs of
Nothing -> [t]
Just n -> if n <= 1 then [t] else [ ]
) ts
-- printDebug "orphans" orphans
-- Get all documents of the corpus
docs <- selectDocNodes cId
-- printDebug "docs length" (List.length docs)
-- Checking Text documents where orphans match
-- TODO Tests here
let
ngramsByDoc = map (HashMap.fromList)
$ map (map (\(k,v) -> (SimpleNgrams (text2ngrams k), v)))
$ map (\doc -> List.zip
(termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans)
$ Text.unlines $ catMaybes
[ doc ^. node_hyperdata . hd_title
, doc ^. node_hyperdata . hd_abstract
]
)
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. node_id) 1 )]])
) docs
-- printDebug "ngramsByDoc" ngramsByDoc
-- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc
pure () -- ngramsByDoc
toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams)
toIndexedNgrams m t = Indexed <$> i <*> n
where
i = HashMap.lookup t m
n = Just (text2ngrams t)
------------------------------------------------------------------------
type PostAPI = Summary "Update List"
:> "add"
:> "form"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog
postAsync :: GargServer JSONAPI
postAsync lId =
serveJobsAPI $
JobFunction (\f log' ->
let
log'' x = do
printDebug "postAsync ListId" x
liftBase $ log' x
in postAsync' lId f log'')
postAsync' :: FlowCmdM env err m
=> ListId
-> WithFile
-> (JobLog -> m ())
-> m JobLog
postAsync' l (WithFile _ m _) logStatus = do
logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
printDebug "New list as file" l
_ <- post l m
-- printDebug "Done" r
pure JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
------------------------------------------------------------------------
type CSVPostAPI = Summary "Update List (legacy v3 CSV)"
:> "csv"
:> "add"
:> "form"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog
readCsvText :: Text -> [(Text, Text, Text)]
readCsvText t = case eDec of
Left _ -> []
Right dec -> Vec.toList dec
where
lt = BSL.fromStrict $ P.encodeUtf8 t
eDec = Csv.decodeWith
(Csv.defaultDecodeOptions { Csv.decDelimiter = fromIntegral (P.ord '\t') })
Csv.HasHeader lt :: Either Prelude.String (Vector (Text, Text, Text))
parseCsvData :: [(Text, Text, Text)] -> Map NgramsTerm NgramsRepoElement
parseCsvData lst = Map.fromList $ conv <$> lst
where
conv (_status, label, _forms) =
(NgramsTerm label, NgramsRepoElement { _nre_size = 1
, _nre_list = CandidateTerm
, _nre_root = Nothing
, _nre_parent = Nothing
, _nre_children = MSet Map.empty })
csvPost :: FlowCmdM env err m
=> ListId
-> Text
-> m Bool
csvPost l m = do
printDebug "[csvPost] l" l
-- printDebug "[csvPost] m" m
-- status label forms
let lst = readCsvText m
let p = parseCsvData lst
--printDebug "[csvPost] lst" lst
printDebug "[csvPost] p" p
_ <- setListNgrams l NgramsTerms p
pure True
------------------------------------------------------------------------
csvPostAsync :: GargServer CSVAPI
csvPostAsync lId =
serveJobsAPI $
JobFunction $ \f@(WithTextFile ft _ n) log' -> do
let log'' x = do
printDebug "[csvPostAsync] filetype" ft
printDebug "[csvPostAsync] name" n
liftBase $ log' x
csvPostAsync' lId f log''
csvPostAsync' :: FlowCmdM env err m
=> ListId
-> WithTextFile
-> (JobLog -> m ())
-> m JobLog
csvPostAsync' l (WithTextFile _ m _) logStatus = do
logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
_r <- csvPost l m
pure JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
------------------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Ngrams/List/ 0000775 0000000 0000000 00000000000 14124644201 0030017 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Ngrams/List/Types.hs 0000664 0000000 0000000 00000003164 14124644201 0031463 0 ustar 00root root 0000000 0000000 module Gargantext.API.Ngrams.List.Types where
--{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
--import Control.Lens hiding (elements, Indexed)
import Data.Aeson
import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
import Data.Text
import Servant.Job.Utils (jsonOptions)
import Web.FormUrlEncoded (FromForm)
import Protolude
import Gargantext.API.Ngrams.Types (NgramsList)
import Gargantext.API.Node.Corpus.New.File (FileType(..))
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
------------------------------------------------------------------------
data WithFile = WithFile
{ _wf_filetype :: !FileType
, _wf_data :: !NgramsList
, _wf_name :: !Text
} deriving (Eq, Show, Generic)
--makeLenses ''WithFile
instance FromForm WithFile
instance FromJSON WithFile where
parseJSON = genericParseJSON $ jsonOptions "_wf_"
instance ToJSON WithFile where
toJSON = genericToJSON $ jsonOptions "_wf_"
instance ToSchema WithFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
------------------------------------------------------------------------
data WithTextFile = WithTextFile
{ _wtf_filetype :: !FileType
, _wtf_data :: !Text
, _wtf_name :: !Text
} deriving (Eq, Show, Generic)
--makeLenses ''WithTextFile
instance FromForm WithTextFile
instance FromJSON WithTextFile where
parseJSON = genericParseJSON $ jsonOptions "_wtf_"
instance ToJSON WithTextFile where
toJSON = genericToJSON $ jsonOptions "_wtf_"
instance ToSchema WithTextFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wtf_")
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Ngrams/NgramsTree.hs 0000664 0000000 0000000 00000004620 14124644201 0031511 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Ngrams.NgramsTree
Description : Tree of Ngrams
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Ngrams.NgramsTree
where
import Data.Aeson.TH (deriveJSON)
import Data.HashMap.Strict (HashMap)
import Data.Maybe (catMaybes)
import Data.Set (Set)
import Data.Swagger
import Data.Text (Text)
import Data.Tree
import GHC.Generics (Generic)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (ListType(..), NodeId)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Prelude
import Test.QuickCheck
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Set as Set
type Children = Text
type Root = Text
data NgramsTree = NgramsTree { mt_label :: Text
, mt_value :: Double
, mt_children :: [NgramsTree]
}
deriving (Generic, Show)
toNgramsTree :: Tree (NgramsTerm,Double) -> NgramsTree
toNgramsTree (Node (NgramsTerm l,v) xs) = NgramsTree l v (map toNgramsTree xs)
deriveJSON (unPrefix "mt_") ''NgramsTree
instance ToSchema NgramsTree where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "mt_")
instance Arbitrary NgramsTree
where
arbitrary = NgramsTree <$> arbitrary <*> arbitrary <*> arbitrary
toTree :: ListType
-> HashMap NgramsTerm (Set NodeId)
-> HashMap NgramsTerm NgramsRepoElement
-> [NgramsTree]
toTree lt vs m = map toNgramsTree $ unfoldForest buildNode roots
where
buildNode r = maybe ((r, value r),[])
(\x -> ((r, value r), mSetToList $ _nre_children x))
(HashMap.lookup r m)
value l = maybe 0 (fromIntegral . Set.size) $ HashMap.lookup l vs
rootsCandidates :: [NgramsTerm]
rootsCandidates = catMaybes
$ List.nub
$ map (\(c, c') -> case _nre_root c' of
Nothing -> Just c
_ -> _nre_root c'
) (HashMap.toList m)
roots = map fst
$ filter (\(_,l) -> l == lt)
$ catMaybes
$ map (\c -> (,) <$> Just c <*> (_nre_list <$> HashMap.lookup c m))
$ rootsCandidates
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Ngrams/Prelude.hs 0000664 0000000 0000000 00000004552 14124644201 0031046 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Ngrams.Prelude
Description : Tools to manage Ngrams Elements (from the API)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.API.Ngrams.Prelude
where
import Data.Maybe (catMaybes)
import Control.Lens (view)
import Data.Hashable (Hashable)
import Data.Validity
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (ListType)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.Context (TermList)
import qualified Data.HashMap.Strict as HM
import qualified Data.Map.Strict as Map
import qualified Data.List as List
import qualified Data.Text as Text
------------------------------------------------------------------------
-- | Tools
-- Usage example: toTermList MapTerm NgramsTerms ngramsList
toTermList :: ListType -> NgramsType -> NgramsList -> Maybe TermList
toTermList lt nt nl = toTermList' lt <$> Map.lookup nt nl
where
toTermList' :: ListType -> Versioned NgramsTableMap -> TermList
toTermList' lt' = (toTermList'' lt') . Map.toList . view v_data
toTermList'' :: ListType -> [(NgramsTerm, NgramsRepoElement)] -> TermList
toTermList'' lt'' ns = Map.toList
$ Map.mapKeys toTerm
$ Map.fromListWith (<>) (roots' <> children')
where
toTerm = Text.splitOn " " . unNgramsTerm
(roots, children) = List.partition (\(_t, nre) -> view nre_root nre == Nothing)
$ List.filter (\(_t,nre) -> view nre_list nre == lt'') ns
roots' = map (\(t,nre) -> (t, map toTerm $ unMSet $ view nre_children nre )) roots
children' = catMaybes
$ map (\(t,nre) -> (,) <$> view nre_root nre
<*> Just (map toTerm $ [t]
<> (unMSet $ view nre_children nre)
)
) children
------------------------------------------
patchMSet_toList :: (Ord a, Hashable a) => PatchMSet a -> [(a,AddRem)]
patchMSet_toList = HM.toList . unPatchMapToHashMap . unPatchMSet
unMSet :: MSet a -> [a]
unMSet (MSet a) = Map.keys a
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Ngrams/Tools.hs 0000664 0000000 0000000 00000014243 14124644201 0030544 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Ngrams.Tools
Description : Tools to manage Ngrams Elements (from the API)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.API.Ngrams.Tools
where
import Control.Concurrent
import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
import Control.Monad.Reader
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Set (Set)
import Data.Validity
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (ListType(..), NodeId, ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import Gargantext.Core.NodeStory
mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
mergeNgramsElement _neOld neNew = neNew
type RootTerm = NgramsTerm
getRepo :: RepoCmdM env err m => m NgramsRepo
getRepo = do
v <- view repoVar
liftBase $ readMVar v
getRepo' :: HasNodeStory env err m
=> [ListId] -> m NodeListStory
getRepo' listIds = do
f <- getNodeListStory
v <- liftBase $ f listIds
v' <- liftBase $ readMVar v
pure $ v'
getNodeStoryVar :: HasNodeStory env err m
=> [ListId] -> m (MVar NodeListStory)
getNodeStoryVar l = do
f <- getNodeListStory
v <- liftBase $ f l
pure v
getNodeListStory :: HasNodeStory env err m
=> m ([NodeId] -> IO (MVar NodeListStory))
getNodeListStory = do
env <- view hasNodeStory
pure $ view nse_getter env
listNgramsFromRepo :: [ListId]
-> NgramsType
-> NodeListStory
-> HashMap NgramsTerm NgramsRepoElement
listNgramsFromRepo nodeIds ngramsType repo =
HM.fromList $ Map.toList
$ Map.unionsWith mergeNgramsElement ngrams
where
ngrams = [ repo
^. unNodeStory
. at nodeId . _Just
. a_state
. at ngramsType . _Just
| nodeId <- nodeIds
]
-- TODO-ACCESS: We want to do the security check before entering here.
-- Add a static capability parameter would be nice.
-- Ideally this is the access to `repoVar` which needs to
-- be properly guarded.
getListNgrams :: HasNodeStory env err m
=> [ListId] -> NgramsType
-> m (HashMap NgramsTerm NgramsRepoElement)
getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType
<$> getRepo' nodeIds
getTermsWith :: (HasNodeStory env err m, Eq a, Hashable a)
=> (NgramsTerm -> a) -> [ListId]
-> NgramsType -> Set ListType
-> m (HashMap a [a])
getTermsWith f ls ngt lts = HM.fromListWith (<>)
<$> map toTreeWith
<$> HM.toList
<$> HM.filter (\f' -> Set.member (fst f') lts)
<$> mapTermListRoot ls ngt
<$> getRepo' ls
where
toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
Nothing -> (f t, [])
Just r -> (f r, [f t])
mapTermListRoot :: [ListId]
-> NgramsType
-> NodeListStory
-> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
mapTermListRoot nodeIds ngramsType repo =
(\nre -> (_nre_list nre, _nre_root nre))
<$> listNgramsFromRepo nodeIds ngramsType repo
filterListWithRootHashMap :: ListType
-> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
-> HashMap NgramsTerm (Maybe RootTerm)
filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
where
isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> l == lt
Just r -> case HM.lookup r m of
Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
Just (l',_) -> l' == lt
filterListWithRoot :: ListType
-> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
-> HashMap NgramsTerm (Maybe RootTerm)
filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
where
isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> l == lt
Just r -> case HM.lookup r m of
Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
Just (l',_) -> l' == lt
groupNodesByNgrams :: ( At root_map
, Index root_map ~ NgramsTerm
, IxValue root_map ~ Maybe RootTerm
)
=> root_map
-> HashMap NgramsTerm (Set NodeId)
-> HashMap NgramsTerm (Set NodeId)
groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
where
occs' = map toSyn (HM.toList occs)
toSyn (t,ns) = case syn ^. at t of
Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
Just r -> case r of
Nothing -> (t, ns)
Just r' -> (r',ns)
data Diagonal = Diagonal Bool
getCoocByNgrams :: Diagonal
-> HashMap NgramsTerm (Set NodeId)
-> HashMap (NgramsTerm, NgramsTerm) Int
getCoocByNgrams = getCoocByNgrams' identity
getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
=> (b -> Set c)
-> Diagonal
-> HashMap a b
-> HashMap (a, a) Int
getCoocByNgrams' f (Diagonal diag) m =
HM.fromList [( (t1,t2)
, maybe 0 Set.size $ Set.intersection
<$> (fmap f $ HM.lookup t1 m)
<*> (fmap f $ HM.lookup t2 m)
)
| (t1,t2) <- if diag then
[ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
-- more efficient to enumerate all the y <= x.
else
listToCombi identity ks
]
where ks = HM.keys m
------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Ngrams/Types.hs 0000664 0000000 0000000 00000066273 14124644201 0030562 0 ustar 00root root 0000000 0000000 -- |
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
{-# OPTIONS -fno-warn-orphans #-}
module Gargantext.API.Ngrams.Types where
import Codec.Serialise (Serialise())
import Control.Category ((>>>))
import Control.Concurrent
import Control.Lens (makeLenses, makePrisms, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^?), (%~), (.~), (%=), at, _Just, Each(..), itraverse_, both, forOf_, (?~), Getter)
import Control.Monad.State
import Data.Aeson hiding ((.=))
import Data.Aeson.TH (deriveJSON)
import Data.Either (Either(..))
import Data.Foldable
import Data.Hashable (Hashable)
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import Data.Monoid
import Data.Patch.Class (Replace, replace, Action(act), Group, Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, MaybePatch(Mod), unMod, old, new)
import Data.Set (Set)
import Data.String (IsString, fromString)
import Data.Swagger hiding (version, patch)
import Data.Text (Text, pack, strip)
import Data.Validity
import Database.PostgreSQL.Simple.FromField (FromField, fromField, ResultError(ConversionFailed), returnError)
import GHC.Generics (Generic)
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..), ListId, NodeId, TODO)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM')
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (IsHashable(..))
import Protolude (maybeToEither)
import Servant hiding (Patch)
import Servant.Job.Utils (jsonOptions)
import System.FileLock (FileLock)
import Test.QuickCheck (elements, frequency)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Map.Strict.Patch as PM
import qualified Data.Set as Set
import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
------------------------------------------------------------------------
type QueryParamR = QueryParam' '[Required, Strict]
------------------------------------------------------------------------
--data FacetFormat = Table | Chart
data TabType = Docs | Trash | MoreFav | MoreTrash
| Terms | Sources | Authors | Institutes
| Contacts
deriving (Bounded, Enum, Eq, Generic, Ord, Show)
instance Hashable TabType
instance FromHttpApiData TabType
where
parseUrlPiece "Docs" = pure Docs
parseUrlPiece "Trash" = pure Trash
parseUrlPiece "MoreFav" = pure MoreFav
parseUrlPiece "MoreTrash" = pure MoreTrash
parseUrlPiece "Terms" = pure Terms
parseUrlPiece "Sources" = pure Sources
parseUrlPiece "Institutes" = pure Institutes
parseUrlPiece "Authors" = pure Authors
parseUrlPiece "Contacts" = pure Contacts
parseUrlPiece _ = Left "Unexpected value of TabType"
instance ToParamSchema TabType
instance ToJSON TabType
instance FromJSON TabType
instance ToSchema TabType
instance Arbitrary TabType where
arbitrary = elements [minBound .. maxBound]
instance FromJSONKey TabType where
fromJSONKey = genericFromJSONKey defaultJSONKeyOptions
instance ToJSONKey TabType where
toJSONKey = genericToJSONKey defaultJSONKeyOptions
newtype MSet a = MSet (Map a ())
deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
instance ToJSON a => ToJSON (MSet a) where
toJSON (MSet m) = toJSON (Map.keys m)
toEncoding (MSet m) = toEncoding (Map.keys m)
mSetFromSet :: Set a -> MSet a
mSetFromSet = MSet . Map.fromSet (const ())
mSetFromList :: Ord a => [a] -> MSet a
mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
-- mSetToSet :: Ord a => MSet a -> Set a
-- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
mSetToSet :: Ord a => MSet a -> Set a
mSetToSet = Set.fromList . mSetToList
mSetToList :: MSet a -> [a]
mSetToList (MSet a) = Map.keys a
instance Foldable MSet where
foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
instance (Ord a, FromJSON a) => FromJSON (MSet a) where
parseJSON = fmap mSetFromList . parseJSON
instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
-- TODO
declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
------------------------------------------------------------------------
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable)
instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t
instance Monoid NgramsTerm where
mempty = NgramsTerm ""
instance FromJSONKey NgramsTerm where
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t
instance IsString NgramsTerm where
fromString s = NgramsTerm $ pack s
instance FromField NgramsTerm
where
fromField field mb = do
v <- fromField field mb
case fromJSON v of
Success a -> pure $ NgramsTerm $ strip a
Error _err -> returnError ConversionFailed field
$ List.intercalate " " [ "cannot parse hyperdata for JSON: "
, show v
]
data RootParent = RootParent
{ _rp_root :: NgramsTerm
, _rp_parent :: NgramsTerm
}
deriving (Ord, Eq, Show, Generic)
deriveJSON (unPrefix "_rp_") ''RootParent
makeLenses ''RootParent
data NgramsRepoElement = NgramsRepoElement
{ _nre_size :: !Int
, _nre_list :: !ListType
, _nre_root :: !(Maybe NgramsTerm)
, _nre_parent :: !(Maybe NgramsTerm)
, _nre_children :: !(MSet NgramsTerm)
}
deriving (Ord, Eq, Show, Generic)
deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
-- TODO
-- if ngrams & not size => size
-- drop occurrences
makeLenses ''NgramsRepoElement
instance ToSchema NgramsRepoElement where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
instance Serialise (MSet NgramsTerm)
instance Serialise NgramsRepoElement
data NgramsElement =
NgramsElement { _ne_ngrams :: NgramsTerm
, _ne_size :: Int
, _ne_list :: ListType
, _ne_occurrences :: Int
, _ne_root :: Maybe NgramsTerm
, _ne_parent :: Maybe NgramsTerm
, _ne_children :: MSet NgramsTerm
}
deriving (Ord, Eq, Show, Generic)
deriveJSON (unPrefix "_ne_") ''NgramsElement
makeLenses ''NgramsElement
mkNgramsElement :: NgramsTerm
-> ListType
-> Maybe RootParent
-> MSet NgramsTerm
-> NgramsElement
mkNgramsElement ngrams list rp children =
NgramsElement ngrams (size (unNgramsTerm ngrams)) list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
newNgramsElement mayList ngrams =
mkNgramsElement ngrams (fromMaybe MapTerm mayList) Nothing mempty
instance ToSchema NgramsElement where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
instance Arbitrary NgramsElement where
arbitrary = elements [newNgramsElement Nothing "sport"]
------------------------------------------------------------------------
newtype NgramsTable = NgramsTable [NgramsElement]
deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
-- type NgramsList = NgramsTable
makePrisms ''NgramsTable
-- | Question: why these repetition of Type in this instance
-- may you document it please ?
instance Each NgramsTable NgramsTable NgramsElement NgramsElement where
each = _NgramsTable . each
-- TODO discuss
-- | TODO Check N and Weight
{-
toNgramsElement :: [NgramsTableData] -> [NgramsElement]
toNgramsElement ns = map toNgramsElement' ns
where
toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
where
p' = case p of
Nothing -> Nothing
Just x -> lookup x mapParent
c' = maybe mempty identity $ lookup t mapChildren
lt' = maybe (panic "API.Ngrams: listypeId") identity lt
mapParent :: Map Int Text
mapParent = Map.fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
mapChildren :: Map Text (Set Text)
mapChildren = Map.mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
$ Map.fromListWith (<>)
$ map (first fromJust)
$ filter (isJust . fst)
$ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
-}
mockTable :: NgramsTable
mockTable = NgramsTable
[ mkNgramsElement "animal" MapTerm Nothing (mSetFromList ["dog", "cat"])
, mkNgramsElement "cat" MapTerm (rp "animal") mempty
, mkNgramsElement "cats" StopTerm Nothing mempty
, mkNgramsElement "dog" MapTerm (rp "animal") (mSetFromList ["dogs"])
, mkNgramsElement "dogs" StopTerm (rp "dog") mempty
, mkNgramsElement "fox" MapTerm Nothing mempty
, mkNgramsElement "object" CandidateTerm Nothing mempty
, mkNgramsElement "nothing" StopTerm Nothing mempty
, mkNgramsElement "organic" MapTerm Nothing (mSetFromList ["flower"])
, mkNgramsElement "flower" MapTerm (rp "organic") mempty
, mkNgramsElement "moon" CandidateTerm Nothing mempty
, mkNgramsElement "sky" StopTerm Nothing mempty
]
where
rp n = Just $ RootParent n n
instance Arbitrary NgramsTable where
arbitrary = pure mockTable
instance ToSchema NgramsTable
------------------------------------------------------------------------
type NgramsTableMap = Map NgramsTerm NgramsRepoElement
------------------------------------------------------------------------
-- On the Client side:
--data Action = InGroup NgramsId NgramsId
-- | OutGroup NgramsId NgramsId
-- | SetListType NgramsId ListType
data PatchSet a = PatchSet
{ _rem :: Set a
, _add :: Set a
}
deriving (Eq, Ord, Show, Generic)
makeLenses ''PatchSet
makePrisms ''PatchSet
instance ToJSON a => ToJSON (PatchSet a) where
toJSON = genericToJSON $ unPrefix "_"
toEncoding = genericToEncoding $ unPrefix "_"
instance (Ord a, FromJSON a) => FromJSON (PatchSet a) where
parseJSON = genericParseJSON $ unPrefix "_"
{-
instance (Ord a, Arbitrary a) => Arbitrary (PatchSet a) where
arbitrary = PatchSet <$> arbitrary <*> arbitrary
type instance Patched (PatchSet a) = Set a
type ConflictResolutionPatchSet a = SimpleConflictResolution' (Set a)
type instance ConflictResolution (PatchSet a) = ConflictResolutionPatchSet a
instance Ord a => Semigroup (PatchSet a) where
p <> q = PatchSet { _rem = (q ^. rem) `Set.difference` (p ^. add) <> p ^. rem
, _add = (q ^. add) `Set.difference` (p ^. rem) <> p ^. add
} -- TODO Review
instance Ord a => Monoid (PatchSet a) where
mempty = PatchSet mempty mempty
instance Ord a => Group (PatchSet a) where
invert (PatchSet r a) = PatchSet a r
instance Ord a => Composable (PatchSet a) where
composable _ _ = undefined
instance Ord a => Action (PatchSet a) (Set a) where
act p source = (source `Set.difference` (p ^. rem)) <> p ^. add
instance Applicable (PatchSet a) (Set a) where
applicable _ _ = mempty
instance Ord a => Validity (PatchSet a) where
validate p = check (Set.disjoint (p ^. rem) (p ^. add)) "_rem and _add should be dijoint"
instance Ord a => Transformable (PatchSet a) where
transformable = undefined
conflicts _p _q = undefined
transformWith conflict p q = undefined conflict p q
instance ToSchema a => ToSchema (PatchSet a)
-}
type AddRem = Replace (Maybe ())
instance Serialise AddRem
remPatch, addPatch :: AddRem
remPatch = replace (Just ()) Nothing
addPatch = replace Nothing (Just ())
isRem :: Replace (Maybe ()) -> Bool
isRem = (== remPatch)
type PatchMap = PM.PatchMap
newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
deriving (Eq, Show, Generic, Validity, Semigroup, Monoid, Group,
Transformable, Composable)
unPatchMSet :: PatchMSet a -> PatchMap a AddRem
unPatchMSet (PatchMSet a) = a
type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
instance (Serialise a, Ord a) => Serialise (PatchMap a AddRem)
instance (Serialise a, Ord a) => Serialise (PatchMSet a)
-- TODO this breaks module abstraction
makePrisms ''PM.PatchMap
makePrisms ''PatchMSet
_PatchMSetIso :: Ord a => Iso' (PatchMSet a) (PatchSet a)
_PatchMSetIso = _PatchMSet . _PatchMap . iso f g . from _PatchSet
where
f :: Ord a => Map a (Replace (Maybe ())) -> (Set a, Set a)
f = Map.partition isRem >>> both %~ Map.keysSet
g :: Ord a => (Set a, Set a) -> Map a (Replace (Maybe ()))
g (rems, adds) = Map.fromSet (const remPatch) rems
<> Map.fromSet (const addPatch) adds
instance Ord a => Action (PatchMSet a) (MSet a) where
act (PatchMSet p) (MSet m) = MSet $ act p m
instance Ord a => Applicable (PatchMSet a) (MSet a) where
applicable (PatchMSet p) (MSet m) = applicable p m
instance (Ord a, ToJSON a) => ToJSON (PatchMSet a) where
toJSON = toJSON . view _PatchMSetIso
toEncoding = toEncoding . view _PatchMSetIso
instance (Ord a, FromJSON a) => FromJSON (PatchMSet a) where
parseJSON = fmap (_PatchMSetIso #) . parseJSON
instance (Ord a, Arbitrary a) => Arbitrary (PatchMSet a) where
arbitrary = (PatchMSet . PM.fromMap) <$> arbitrary
instance ToSchema a => ToSchema (PatchMSet a) where
-- TODO
declareNamedSchema _ = wellNamedSchema "" (Proxy :: Proxy TODO)
type instance Patched (PatchMSet a) = MSet a
instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
arbitrary = uncurry replace <$> arbitrary
-- If they happen to be equal then the patch is Keep.
instance ToSchema a => ToSchema (Replace a) where
declareNamedSchema (_ :: Proxy (Replace a)) = do
-- TODO Keep constructor is not supported here.
aSchema <- declareSchemaRef (Proxy :: Proxy a)
return $ NamedSchema (Just "Replace") $ mempty
& type_ ?~ SwaggerObject
& properties .~
InsOrdHashMap.fromList
[ ("old", aSchema)
, ("new", aSchema)
]
& required .~ [ "old", "new" ]
data NgramsPatch
= NgramsPatch { _patch_children :: !(PatchMSet NgramsTerm)
, _patch_list :: !(Replace ListType) -- TODO Map UserId ListType
}
| NgramsReplace { _patch_old :: !(Maybe NgramsRepoElement)
, _patch_new :: !(Maybe NgramsRepoElement)
}
deriving (Eq, Show, Generic)
-- The JSON encoding is untagged, this is OK since the field names are disjoints and thus the encoding is unambiguous.
-- TODO: the empty object should be accepted and treated as mempty.
deriveJSON (unPrefixUntagged "_") ''NgramsPatch
makeLenses ''NgramsPatch
-- TODO: This instance is simplified since we should either have the fields children and/or list
-- or the fields old and/or new.
instance ToSchema NgramsPatch where
declareNamedSchema _ = do
childrenSch <- declareSchemaRef (Proxy :: Proxy (PatchMSet NgramsTerm))
listSch <- declareSchemaRef (Proxy :: Proxy (Replace ListType))
nreSch <- declareSchemaRef (Proxy :: Proxy NgramsRepoElement)
return $ NamedSchema (Just "NgramsPatch") $ mempty
& type_ ?~ SwaggerObject
& properties .~
InsOrdHashMap.fromList
[ ("children", childrenSch)
, ("list", listSch)
, ("old", nreSch)
, ("new", nreSch)
]
instance Arbitrary NgramsPatch where
arbitrary = frequency [ (9, NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary))
, (1, NgramsReplace <$> arbitrary <*> arbitrary)
]
instance Serialise NgramsPatch
instance Serialise (Replace ListType)
instance Serialise ListType
type NgramsPatchIso =
MaybePatch NgramsRepoElement (PairPatch (PatchMSet NgramsTerm) (Replace ListType))
_NgramsPatch :: Iso' NgramsPatch NgramsPatchIso
_NgramsPatch = iso unwrap wrap
where
unwrap (NgramsPatch c l) = Mod $ PairPatch (c, l)
unwrap (NgramsReplace o n) = replace o n
wrap x =
case unMod x of
Just (PairPatch (c, l)) -> NgramsPatch c l
Nothing -> NgramsReplace (x ^? old . _Just) (x ^? new . _Just)
instance Semigroup NgramsPatch where
p <> q = _NgramsPatch # (p ^. _NgramsPatch <> q ^. _NgramsPatch)
instance Monoid NgramsPatch where
mempty = _NgramsPatch # mempty
instance Validity NgramsPatch where
validate p = p ^. _NgramsPatch . to validate
instance Transformable NgramsPatch where
transformable p q = transformable (p ^. _NgramsPatch) (q ^. _NgramsPatch)
conflicts p q = conflicts (p ^. _NgramsPatch) (q ^. _NgramsPatch)
transformWith conflict p q = (_NgramsPatch # p', _NgramsPatch # q')
where
(p', q') = transformWith conflict (p ^. _NgramsPatch) (q ^. _NgramsPatch)
type ConflictResolutionNgramsPatch =
( ConflictResolutionReplace (Maybe NgramsRepoElement)
, ( ConflictResolutionPatchMSet NgramsTerm
, ConflictResolutionReplace ListType
)
, (Bool, Bool)
)
type instance ConflictResolution NgramsPatch =
ConflictResolutionNgramsPatch
type PatchedNgramsPatch = Maybe NgramsRepoElement
type instance Patched NgramsPatch = PatchedNgramsPatch
instance Applicable (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepoElement where
applicable (PairPatch (c, l)) n = applicable c (n ^. nre_children) <> applicable l (n ^. nre_list)
instance Action (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepoElement where
act (PairPatch (c, l)) = (nre_children %~ act c)
. (nre_list %~ act l)
instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
applicable p = applicable (p ^. _NgramsPatch)
instance Action NgramsPatch (Maybe NgramsRepoElement) where
act p = act (p ^. _NgramsPatch)
newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
instance Serialise NgramsTablePatch
instance Serialise (PatchMap NgramsTerm NgramsPatch)
instance FromField NgramsTablePatch
where
fromField = fromField'
instance FromField (PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch))
where
fromField = fromField'
type instance ConflictResolution NgramsTablePatch =
NgramsTerm -> ConflictResolutionNgramsPatch
type PatchedNgramsTablePatch = Map NgramsTerm PatchedNgramsPatch
-- ~ Patched (PatchMap NgramsTerm NgramsPatch)
type instance Patched NgramsTablePatch = PatchedNgramsTablePatch
makePrisms ''NgramsTablePatch
instance ToSchema (PatchMap NgramsTerm NgramsPatch)
instance ToSchema NgramsTablePatch
instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
applicable p = applicable (p ^. _NgramsTablePatch)
ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
ngramsElementToRepo
(NgramsElement { _ne_size = s
, _ne_list = l
, _ne_root = r
, _ne_parent = p
, _ne_children = c
}) =
NgramsRepoElement
{ _nre_size = s
, _nre_list = l
, _nre_parent = p
, _nre_root = r
, _nre_children = c
}
ngramsElementFromRepo :: NgramsTerm -> NgramsRepoElement -> NgramsElement
ngramsElementFromRepo
ngrams
(NgramsRepoElement
{ _nre_size = s
, _nre_list = l
, _nre_parent = p
, _nre_root = r
, _nre_children = c
}) =
NgramsElement { _ne_size = s
, _ne_list = l
, _ne_root = r
, _ne_parent = p
, _ne_children = c
, _ne_ngrams = ngrams
, _ne_occurrences = panic $ "API.Ngrams.Types._ne_occurrences"
{-
-- Here we could use 0 if we want to avoid any `panic`.
-- It will not happen using getTableNgrams if
-- getOccByNgramsOnly provides a count of occurrences for
-- all the ngrams given.
-}
}
reRootChildren :: NgramsTerm -> ReParent NgramsTerm
reRootChildren root ngram = do
nre <- use $ at ngram
forOf_ (_Just . nre_children . folded) nre $ \child -> do
at child . _Just . nre_root ?= root
reRootChildren root child
reParent :: Maybe RootParent -> ReParent NgramsTerm
reParent rp child = do
at child . _Just %= ( (nre_parent .~ (_rp_parent <$> rp))
. (nre_root .~ (_rp_root <$> rp))
)
reRootChildren (fromMaybe child (rp ^? _Just . rp_root)) child
reParentAddRem :: RootParent -> NgramsTerm -> ReParent AddRem
reParentAddRem rp child p =
reParent (if isRem p then Nothing else Just rp) child
reParentNgramsPatch :: NgramsTerm -> ReParent NgramsPatch
reParentNgramsPatch parent ngramsPatch = do
root_of_parent <- use (at parent . _Just . nre_root)
let
root = fromMaybe parent root_of_parent
rp = RootParent { _rp_root = root, _rp_parent = parent }
itraverse_ (reParentAddRem rp) (ngramsPatch ^. patch_children . _PatchMSet . _PatchMap)
-- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
reParentNgramsTablePatch :: ReParent NgramsTablePatch
reParentNgramsTablePatch p = itraverse_ reParentNgramsPatch (p ^. _NgramsTablePatch. _PatchMap)
-- TODO FoldableWithIndex/TraversableWithIndex for PatchMap
------------------------------------------------------------------------
instance Action NgramsTablePatch (Maybe NgramsTableMap) where
act p =
fmap (execState (reParentNgramsTablePatch p)) .
act (p ^. _NgramsTablePatch)
instance Arbitrary NgramsTablePatch where
arbitrary = NgramsTablePatch <$> PM.fromMap <$> arbitrary
-- Should it be less than an Lens' to preserve PatchMap's abstraction.
-- ntp_ngrams_patches :: Lens' NgramsTablePatch (Map NgramsTerm NgramsPatch)
-- ntp_ngrams_patches = _NgramsTablePatch . undefined
type ReParent a = forall m. MonadState NgramsTableMap m => a -> m ()
------------------------------------------------------------------------
type Version = Int
data Versioned a = Versioned
{ _v_version :: Version
, _v_data :: a
}
deriving (Generic, Show, Eq)
deriveJSON (unPrefix "_v_") ''Versioned
makeLenses ''Versioned
instance (Typeable a, ToSchema a) => ToSchema (Versioned a) where
declareNamedSchema = wellNamedSchema "_v_"
instance Arbitrary a => Arbitrary (Versioned a) where
arbitrary = Versioned 1 <$> arbitrary -- TODO 1 is constant so far
------------------------------------------------------------------------
type Count = Int
data VersionedWithCount a = VersionedWithCount
{ _vc_version :: Version
, _vc_count :: Count
, _vc_data :: a
}
deriving (Generic, Show, Eq)
deriveJSON (unPrefix "_vc_") ''VersionedWithCount
makeLenses ''VersionedWithCount
instance (Typeable a, ToSchema a) => ToSchema (VersionedWithCount a) where
declareNamedSchema = wellNamedSchema "_vc_"
instance Arbitrary a => Arbitrary (VersionedWithCount a) where
arbitrary = VersionedWithCount 1 1 <$> arbitrary -- TODO 1 is constant so far
toVersionedWithCount :: Count -> Versioned a -> VersionedWithCount a
toVersionedWithCount count (Versioned version data_) = VersionedWithCount version count data_
------------------------------------------------------------------------
-- | TOREMOVE
data Repo s p = Repo
{ _r_version :: !Version
, _r_state :: !s
, _r_history :: ![p]
-- first patch in the list is the most recent
}
deriving (Generic, Show)
-- | TO REMOVE
type NgramsRepo = Repo NgramsState NgramsStatePatch
type NgramsState = Map TableNgrams.NgramsType (Map NodeId NgramsTableMap)
type NgramsStatePatch = PatchMap TableNgrams.NgramsType (PatchMap NodeId NgramsTablePatch)
----------------------------------------------------------------------
instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
parseJSON = genericParseJSON $ unPrefix "_r_"
instance (ToJSON s, ToJSON p) => ToJSON (Repo s p) where
toJSON = genericToJSON $ unPrefix "_r_"
toEncoding = genericToEncoding $ unPrefix "_r_"
instance (Serialise s, Serialise p) => Serialise (Repo s p)
makeLenses ''Repo
initRepo :: Monoid s => Repo s p
initRepo = Repo 1 mempty []
instance Serialise (PM.PatchMap NodeId NgramsTablePatch)
instance Serialise NgramsStatePatch
initMockRepo :: NgramsRepo
initMockRepo = Repo 1 s []
where
s = Map.singleton TableNgrams.NgramsTerms
$ Map.singleton 47254
$ Map.fromList
[ (n ^. ne_ngrams, ngramsElementToRepo n) | n <- mockTable ^. _NgramsTable ]
--------------------
data RepoEnv = RepoEnv
{ _renv_var :: !(MVar NgramsRepo)
, _renv_saver :: !(IO ())
, _renv_lock :: !FileLock
}
deriving (Generic)
makeLenses ''RepoEnv
type RepoCmdM env err m =
( CmdM' env err m
, HasRepo env
, HasConnectionPool env
, HasConfig env
)
class (HasRepoVar env, HasRepoSaver env)
=> HasRepo env where
repoEnv :: Getter env RepoEnv
class HasRepoVar env where
repoVar :: Getter env (MVar NgramsRepo)
class HasRepoSaver env where
repoSaver :: Getter env (IO ())
instance HasRepo RepoEnv where
repoEnv = identity
instance HasRepoVar (MVar NgramsRepo) where
repoVar = identity
instance HasRepoVar RepoEnv where
repoVar = renv_var
instance HasRepoSaver RepoEnv where
repoSaver = renv_saver
------------------------------------------------------------------------
-- Instances
instance Arbitrary NgramsRepoElement where
arbitrary = elements $ map ngramsElementToRepo ns
where
NgramsTable ns = mockTable
instance FromHttpApiData (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
where
parseUrlPiece x = maybeToEither x (decode $ cs x)
ngramsTypeFromTabType :: TabType -> TableNgrams.NgramsType
ngramsTypeFromTabType tabType =
let here = "Garg.API.Ngrams: " :: Text in
case tabType of
Sources -> TableNgrams.Sources
Authors -> TableNgrams.Authors
Institutes -> TableNgrams.Institutes
Terms -> TableNgrams.NgramsTerms
_ -> panic $ here <> "No Ngrams for this tab"
-- TODO: This `panic` would disapear with custom NgramsType.
----
-- Async task
data UpdateTableNgramsCharts = UpdateTableNgramsCharts
{ _utn_tab_type :: !TabType
, _utn_list_id :: !ListId
} deriving (Eq, Show, Generic)
makeLenses ''UpdateTableNgramsCharts
instance FromJSON UpdateTableNgramsCharts where
parseJSON = genericParseJSON $ jsonOptions "_utn_"
instance ToJSON UpdateTableNgramsCharts where
toJSON = genericToJSON $ jsonOptions "_utn_"
instance ToSchema UpdateTableNgramsCharts where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_utn_")
------------------------------------------------------------------------
type NgramsList = (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node.hs 0000664 0000000 0000000 00000031657 14124644201 0027112 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Node
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-- TODO-SECURITY: Critical
-- TODO-ACCESS: CanGetNode
-- TODO-EVENTS: No events as this is a read only query.
Node API
-------------------------------------------------------------------
-- TODO-ACCESS: access by admin only.
-- At first let's just have an isAdmin check.
-- Later: check userId CanDeleteNodes Nothing
-- TODO-EVENTS: DeletedNodes [NodeId]
-- {"tag": "DeletedNodes", "nodes": [Int*]}
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node
where
import Data.Aeson (FromJSON, ToJSON)
import Data.Aeson.TH (deriveJSON)
import Data.Maybe
import Data.Swagger
import Data.Text (Text())
import GHC.Generics (Generic)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.API.Admin.Auth.Types (PathId(..))
import Gargantext.API.Admin.Auth (withAccess)
import Gargantext.API.Metrics
import Gargantext.API.Ngrams (TableNgramsApi, apiNgramsTableCorpus)
import Gargantext.API.Ngrams.Types (TabType(..))
import Gargantext.API.Node.File
import Gargantext.API.Node.FrameCalcUpload (FrameCalcUploadAPI, frameCalcUploadAPI)
import Gargantext.API.Node.New
import Gargantext.API.Prelude
import Gargantext.API.Table
import Gargantext.Core.Types (NodeTableResult)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main (Tree, NodeTree)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude -- (Cmd, CmdM)
import Gargantext.Database.Query.Facet (FacetDoc, OrderBy(..))
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Children (getChildren)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.Node.Update (Update(..), update)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Query.Table.NodeNode
import Gargantext.Database.Query.Tree (tree, TreeMode(..))
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo.Legacy.LegacyAPI (PhyloAPI, phyloAPI)
import qualified Gargantext.API.Node.DocumentsFromWriteNodes as DocumentsFromWriteNodes
import qualified Gargantext.API.Node.Share as Share
import qualified Gargantext.API.Node.Update as Update
import qualified Gargantext.API.Search as Search
import qualified Gargantext.Database.Action.Delete as Action (deleteNode)
import qualified Gargantext.Database.Query.Table.Node.Update as U (update, Update(..))
{-
import qualified Gargantext.Core.Text.List.Learn as Learn
import qualified Data.Vector as Vec
--}
-- | Admin NodesAPI
-- TODO
type NodesAPI = Delete '[JSON] Int
-- | Delete Nodes
-- Be careful: really delete nodes
-- Access by admin only
nodesAPI :: [NodeId] -> GargServer NodesAPI
nodesAPI = deleteNodes
------------------------------------------------------------------------
-- | TODO-ACCESS: access by admin only.
-- At first let's just have an isAdmin check.
-- Later: CanAccessAnyNode or (CanGetAnyNode, CanPutAnyNode)
-- To manage the Users roots
-- TODO-EVENTS:
-- PutNode ?
-- TODO needs design discussion.
type Roots = Get '[JSON] [Node HyperdataUser]
:<|> Put '[JSON] Int -- TODO
-- | TODO: access by admin only
roots :: GargServer Roots
roots = getNodesWithParentId Nothing
:<|> pure (panic "not implemented yet") -- TODO use patch map to update what we need
-------------------------------------------------------------------
-- | Node API Types management
-- TODO-ACCESS : access by users
-- No ownership check is needed if we strictly follow the capability model.
--
-- CanGetNode (Node, Children, TableApi, TableNgramsApiGet, PairingApi, ChartApi,
-- SearchAPI)
-- CanRenameNode (or part of CanEditNode?)
-- CanCreateChildren (PostNodeApi)
-- CanEditNode / CanPutNode TODO not implemented yet
-- CanDeleteNode
-- CanPatch (TableNgramsApi)
-- CanFavorite
-- CanMoveToTrash
type NodeAPI a = Get '[JSON] (Node a)
:<|> "rename" :> RenameApi
:<|> PostNodeApi -- TODO move to children POST
:<|> PostNodeAsync
:<|> FrameCalcUploadAPI
:<|> ReqBody '[JSON] a :> Put '[JSON] Int
:<|> "update" :> Update.API
:<|> Delete '[JSON] Int
:<|> "children" :> ChildrenApi a
-- TODO gather it
:<|> "table" :> TableApi
:<|> "ngrams" :> TableNgramsApi
:<|> "category" :> CatApi
:<|> "score" :> ScoreApi
:<|> "search" :> (Search.API Search.SearchResult)
:<|> "share" :> Share.API
-- Pairing utilities
:<|> "pairwith" :> PairWith
:<|> "pairs" :> Pairs
:<|> "pairing" :> PairingApi
-- VIZ
:<|> "metrics" :> ScatterAPI
:<|> "chart" :> ChartApi
:<|> "pie" :> PieApi
:<|> "tree" :> TreeApi
:<|> "phylo" :> PhyloAPI
-- :<|> "add" :> NodeAddAPI
:<|> "move" :> MoveAPI
:<|> "unpublish" :> Share.Unpublish
:<|> "file" :> FileApi
:<|> "async" :> FileAsyncApi
:<|> "documents-from-write-nodes" :> DocumentsFromWriteNodes.API
-- TODO-ACCESS: check userId CanRenameNode nodeId
-- TODO-EVENTS: NodeRenamed RenameNode or re-use some more general NodeEdited...
type RenameApi = Summary " Rename Node"
:> ReqBody '[JSON] RenameNode
:> Put '[JSON] [Int]
type PostNodeApi = Summary " PostNode Node with ParentId as {id}"
:> ReqBody '[JSON] PostNode
:> Post '[JSON] [NodeId]
type ChildrenApi a = Summary " Summary children"
:> QueryParam "type" NodeType
:> QueryParam "offset" Int
:> QueryParam "limit" Int
-- :> Get '[JSON] [Node a]
:> Get '[JSON] (NodeTableResult a)
------------------------------------------------------------------------
type NodeNodeAPI a = Get '[JSON] (Node a)
nodeNodeAPI :: forall proxy a. (JSONB a, ToJSON a)
=> proxy a
-> UserId
-> CorpusId
-> NodeId
-> GargServer (NodeNodeAPI a)
nodeNodeAPI p uId cId nId = withAccess (Proxy :: Proxy (NodeNodeAPI a)) Proxy uId (PathNodeNode cId nId) nodeNodeAPI'
where
nodeNodeAPI' :: GargServer (NodeNodeAPI a)
nodeNodeAPI' = getNodeWith nId p
------------------------------------------------------------------------
-- TODO: make the NodeId type indexed by `a`, then we no longer need the proxy.
nodeAPI :: forall proxy a.
( JSONB a
, FromJSON a
, ToJSON a
, MimeRender JSON a
, MimeUnrender JSON a
) => proxy a
-> UserId
-> NodeId
-> GargServer (NodeAPI a)
nodeAPI p uId id' = withAccess (Proxy :: Proxy (NodeAPI a)) Proxy uId (PathNode id') nodeAPI'
where
nodeAPI' :: GargServer (NodeAPI a)
nodeAPI' = getNodeWith id' p
:<|> rename id'
:<|> postNode uId id'
:<|> postNodeAsyncAPI uId id'
:<|> frameCalcUploadAPI uId id'
:<|> putNode id'
:<|> Update.api uId id'
:<|> Action.deleteNode (RootId $ NodeId uId) id'
:<|> getChildren id' p
-- TODO gather it
:<|> tableApi id'
:<|> apiNgramsTableCorpus id'
:<|> catApi id'
:<|> scoreApi id'
:<|> Search.api id'
:<|> Share.api (RootId $ NodeId uId) id'
-- Pairing Tools
:<|> pairWith id'
:<|> pairs id'
:<|> getPair id'
-- VIZ
:<|> scatterApi id'
:<|> chartApi id'
:<|> pieApi id'
:<|> treeApi id'
:<|> phyloAPI id' uId
:<|> moveNode (RootId $ NodeId uId) id'
-- :<|> nodeAddAPI id'
-- :<|> postUpload id'
:<|> Share.unPublish id'
:<|> fileApi uId id'
:<|> fileAsyncApi uId id'
:<|> DocumentsFromWriteNodes.api uId id'
------------------------------------------------------------------------
data RenameNode = RenameNode { r_name :: Text }
deriving (Generic)
------------------------------------------------------------------------
------------------------------------------------------------------------
type CatApi = Summary " To Categorize NodeNodes: 0 for delete, 1/null neutral, 2 favorite"
:> ReqBody '[JSON] NodesToCategory
:> Put '[JSON] [Int]
data NodesToCategory = NodesToCategory { ntc_nodesId :: [NodeId]
, ntc_category :: Int
}
deriving (Generic)
-- TODO unPrefix "ntc_" FromJSON, ToJSON, ToSchema, adapt frontend.
instance FromJSON NodesToCategory
instance ToJSON NodesToCategory
instance ToSchema NodesToCategory
catApi :: CorpusId -> GargServer CatApi
catApi = putCat
where
putCat :: CorpusId -> NodesToCategory -> Cmd err [Int]
putCat cId cs' = nodeNodesCategory $ map (\n -> (cId, n, ntc_category cs')) (ntc_nodesId cs')
------------------------------------------------------------------------
type ScoreApi = Summary " To Score NodeNodes"
:> ReqBody '[JSON] NodesToScore
:> Put '[JSON] [Int]
data NodesToScore = NodesToScore { nts_nodesId :: [NodeId]
, nts_score :: Int
}
deriving (Generic)
-- TODO unPrefix "ntc_" FromJSON, ToJSON, ToSchema, adapt frontend.
instance FromJSON NodesToScore
instance ToJSON NodesToScore
instance ToSchema NodesToScore
scoreApi :: CorpusId -> GargServer ScoreApi
scoreApi = putScore
where
putScore :: CorpusId -> NodesToScore -> Cmd err [Int]
putScore cId cs' = nodeNodesScore $ map (\n -> (cId, n, nts_score cs')) (nts_nodesId cs')
------------------------------------------------------------------------
-- TODO adapt FacetDoc -> ListDoc (and add type of document as column)
-- Pairing utilities to move elsewhere
type PairingApi = Summary " Pairing API"
:> QueryParam "view" TabType
-- TODO change TabType -> DocType (CorpusId for pairing)
:> QueryParam "offset" Int
:> QueryParam "limit" Int
:> QueryParam "order" OrderBy
:> Get '[JSON] [FacetDoc]
----------
type Pairs = Summary "List of Pairs"
:> Get '[JSON] [AnnuaireId]
pairs :: CorpusId -> GargServer Pairs
pairs cId = do
ns <- getNodeNode cId
pure $ map _nn_node2_id ns
type PairWith = Summary "Pair a Corpus with an Annuaire"
:> "annuaire" :> Capture "annuaire_id" AnnuaireId
:> QueryParam "list_id" ListId
:> Post '[JSON] Int
pairWith :: CorpusId -> GargServer PairWith
pairWith cId aId lId = do
r <- pairing cId aId lId
_ <- insertNodeNode [ NodeNode { _nn_node1_id = cId
, _nn_node2_id = aId
, _nn_score = Nothing
, _nn_category = Nothing }]
pure r
------------------------------------------------------------------------
type TreeAPI = QueryParams "type" NodeType
:> Get '[JSON] (Tree NodeTree)
:<|> "first-level"
:> QueryParams "type" NodeType
:> Get '[JSON] (Tree NodeTree)
treeAPI :: NodeId -> GargServer TreeAPI
treeAPI id = tree TreeAdvanced id
:<|> tree TreeFirstLevel id
------------------------------------------------------------------------
-- | TODO Check if the name is less than 255 char
rename :: NodeId -> RenameNode -> Cmd err [Int]
rename nId (RenameNode name') = U.update (U.Rename nId name')
putNode :: forall err a. (HasNodeError err, JSONB a, ToJSON a)
=> NodeId
-> a
-> Cmd err Int
putNode n h = fromIntegral <$> updateHyperdata n h
-------------------------------------------------------------
type MoveAPI = Summary "Move Node endpoint"
:> Capture "parent_id" ParentId
:> Put '[JSON] [Int]
moveNode :: User
-> NodeId
-> ParentId
-> Cmd err [Int]
moveNode _u n p = update (Move n p)
-------------------------------------------------------------
$(deriveJSON (unPrefix "r_" ) ''RenameNode )
instance ToSchema RenameNode
instance Arbitrary RenameNode where
arbitrary = elements [RenameNode "test"]
-------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/ 0000775 0000000 0000000 00000000000 14124644201 0026542 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Contact.hs 0000664 0000000 0000000 00000010231 14124644201 0030466 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Node.Contact
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Contact
where
import Data.Aeson
import Data.Either (Either(Right))
import Data.Maybe (Maybe(..))
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Servant
import Servant.Job.Async (JobFunction(..), serveJobsAPI)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Node
import Gargantext.API.Prelude (GargServer, simuLogs)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flow)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataAnnuaire(..), HyperdataContact)
import Gargantext.Database.Admin.Types.Hyperdata.Contact (hyperdataContact)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude (($), liftBase, (.), printDebug, pure)
import qualified Gargantext.Utils.Aeson as GUA
------------------------------------------------------------------------
type API = "contact" :> Summary "Contact endpoint"
:> API_Async
:<|> Capture "contact_id" NodeId
:> NodeNodeAPI HyperdataContact
api :: UserId -> CorpusId -> GargServer API
api uid cid = (api_async (RootId (NodeId uid)) cid)
:<|> (nodeNodeAPI (Proxy :: Proxy HyperdataContact) uid cid)
type API_Async = AsyncJobs JobLog '[JSON] AddContactParams JobLog
------------------------------------------------------------------------
data AddContactParams = AddContactParams { firstname :: !Text, lastname :: !Text }
| AddContactParamsAdvanced { firstname :: !Text
, lastname :: !Text
-- TODO add others fields
}
deriving (Generic)
----------------------------------------------------------------------
api_async :: User -> NodeId -> GargServer API_Async
api_async u nId =
serveJobsAPI $
JobFunction (\p log ->
let
log' x = do
printDebug "addContact" x
liftBase $ log x
in addContact u nId p (liftBase . log')
)
addContact :: (HasSettings env, FlowCmdM env err m)
=> User
-> NodeId
-> AddContactParams
-> (JobLog -> m ())
-> m JobLog
addContact u nId (AddContactParams fn ln) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
_ <- flow (Nothing :: Maybe HyperdataAnnuaire) u (Right [nId]) (Multi EN) Nothing [[hyperdataContact fn ln]]
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
addContact _uId _nId _p logStatus = do
simuLogs logStatus 10
------------------------------------------------------------------------
-- TODO unPrefix "pn_" FromJSON, ToJSON, ToSchema, adapt frontend.
instance FromJSON AddContactParams where
parseJSON = genericParseJSON (defaultOptions { sumEncoding = GUA.defaultTaggedObject })
instance ToJSON AddContactParams where
toJSON = genericToJSON (defaultOptions { sumEncoding = GUA.defaultTaggedObject })
instance ToSchema AddContactParams
instance Arbitrary AddContactParams where
arbitrary = elements [AddContactParams "Pierre" "Dupont"]
------------------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Corpus/ 0000775 0000000 0000000 00000000000 14124644201 0030015 5 ustar 00root root 0000000 0000000 Annuaire.hs 0000664 0000000 0000000 00000005751 14124644201 0032044 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Corpus {-|
Module : Gargantext.API.Node.Corpus.Annuaire
Description : New annuaire API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Corpus.Annuaire
where
import Control.Lens hiding (elements)
import Data.Aeson
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Servant
import Servant.Job.Core
import Servant.Job.Types
import Servant.Job.Utils (jsonOptions)
import Web.FormUrlEncoded (FromForm)
import qualified Gargantext.API.Node.Corpus.New.File as NewFile
import Gargantext.API.Admin.Orchestrator.Types hiding (AsyncJobs)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) -- flowAnnuaire
import Gargantext.Database.Admin.Types.Node (AnnuaireId)
import Gargantext.Prelude
type Api = Summary "New Annuaire endpoint"
:> Post '[JSON] AnnuaireId
------------------------------------------------------------------------
------------------------------------------------------------------------
data AnnuaireWithForm = AnnuaireWithForm
{ _wf_filetype :: !NewFile.FileType
, _wf_data :: !Text
, _wf_lang :: !(Maybe Lang)
} deriving (Eq, Show, Generic)
makeLenses ''AnnuaireWithForm
instance FromForm AnnuaireWithForm
instance FromJSON AnnuaireWithForm where
parseJSON = genericParseJSON $ jsonOptions "_wf_"
instance ToJSON AnnuaireWithForm where
toJSON = genericToJSON $ jsonOptions "_wf_"
instance ToSchema AnnuaireWithForm where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
------------------------------------------------------------------------
type AsyncJobs event ctI input output =
AsyncJobsAPI' 'Unsafe 'Safe ctI '[JSON] Maybe event input output
------------------------------------------------------------------------
type AddWithForm = Summary "Add with FormUrlEncoded to annuaire endpoint"
:> "annuaire"
:> Capture "annuaire_id" AnnuaireId
:> "add"
:> "form"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] AnnuaireWithForm JobLog
------------------------------------------------------------------------
addToAnnuaireWithForm :: FlowCmdM env err m
=> AnnuaireId
-> AnnuaireWithForm
-> (JobLog -> m ())
-> m JobLog
addToAnnuaireWithForm _cid (AnnuaireWithForm { _wf_filetype }) logStatus = do
printDebug "ft" _wf_filetype
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Corpus/Export.hs0000664 0000000 0000000 00000006264 14124644201 0031642 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Node.Corpus.Export
Description : Get Metrics from Storage (Database like)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main exports of Gargantext:
- corpus
- document and ngrams
- lists
-}
module Gargantext.API.Node.Corpus.Export
where
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import Data.Set (Set)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.HashMap.Strict as HashMap
import Gargantext.API.Node.Corpus.Export.Types
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo')
import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Prelude.Crypto.Hash (hash)
import Gargantext.Core.Types
import Gargantext.Core.NodeStory
import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
import Gargantext.Prelude
--------------------------------------------------
-- | Hashes are ordered by Set
getCorpus :: CorpusId
-> Maybe ListId
-> Maybe NgramsType
-> GargNoServer Corpus
getCorpus cId lId nt' = do
let
nt = case nt' of
Nothing -> NgramsTerms
Just t -> t
ns <- Map.fromList
<$> map (\n -> (_node_id n, n))
<$> selectDocNodes cId
repo <- getRepo' [fromMaybe (panic "[Gargantext.API.Node.Corpus.Export]") lId]
ngs <- getNodeNgrams cId lId nt repo
let -- uniqId is hash computed already for each document imported in database
r = Map.intersectionWith
(\a b -> Document { _d_document = a
, _d_ngrams = Ngrams (Set.toList b) (hash b)
, _d_hash = d_hash a b }
) ns (Map.map (Set.map unNgramsTerm) ngs)
where
d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
, hash b
]
pure $ Corpus { _c_corpus = Map.elems r
, _c_hash = hash $ List.map _d_hash $ Map.elems r }
getNodeNgrams :: HasNodeError err
=> CorpusId
-> Maybe ListId
-> NgramsType
-> NodeListStory
-> Cmd err (Map NodeId (Set NgramsTerm))
getNodeNgrams cId lId' nt repo = do
lId <- case lId' of
Nothing -> defaultList cId
Just l -> pure l
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
-- TODO HashMap
r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
pure r
-- TODO
-- Exports List
-- Version number of the list
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Corpus/Export/ 0000775 0000000 0000000 00000000000 14124644201 0031276 5 ustar 00root root 0000000 0000000 Types.hs 0000664 0000000 0000000 00000004153 14124644201 0032662 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Corpus/Export {-|
Module : Gargantext.API.Node.Corpus.Export.Types
Description : Types for Gargantext.API.Node.Corpus.Export
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Corpus.Export.Types where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.Core.Types
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Servant
-- Corpus Export
data Corpus =
Corpus { _c_corpus :: [Document]
, _c_hash :: Hash
} deriving (Generic)
-- | Document Export
data Document =
Document { _d_document :: Node HyperdataDocument
, _d_ngrams :: Ngrams
, _d_hash :: Hash
} deriving (Generic)
data Ngrams =
Ngrams { _ng_ngrams :: [Text]
, _ng_hash :: Hash
} deriving (Generic)
type Hash = Text
-------
instance ToSchema Corpus where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
instance ToSchema Document where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
instance ToSchema Ngrams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
-------
instance ToParamSchema Corpus where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema Document where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema Ngrams where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
--------------------------------------------------
type API = Summary "Corpus Export"
:> "export"
:> QueryParam "listId" ListId
:> QueryParam "ngramsType" NgramsType
:> Get '[JSON] Corpus
$(deriveJSON (unPrefix "_c_") ''Corpus)
$(deriveJSON (unPrefix "_d_") ''Document)
$(deriveJSON (unPrefix "_ng_") ''Ngrams) haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Corpus/New.hs 0000664 0000000 0000000 00000030440 14124644201 0031103 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Node.Corpus.New
Description : New corpus API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
New corpus means either:
- new corpus
- new data in existing corpus
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Corpus.New
where
import Control.Lens hiding (elements, Empty)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Either
import Data.Maybe (fromMaybe)
import Data.Swagger
import Data.Text (Text)
import qualified Data.Text as T
import GHC.Generics (Generic)
import qualified Prelude as Prelude
import Protolude (readFile)
import Servant
import Servant.Job.Utils (jsonOptions)
-- import Servant.Multipart
-- import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.Prelude
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job (jobLogSuccess, jobLogFailTotal)
import Gargantext.API.Node.Corpus.New.File
import Gargantext.API.Node.Corpus.Searx
import Gargantext.API.Node.Corpus.Types
import Gargantext.API.Node.Types
import Gargantext.Core (Lang(..){-, allLangs-})
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import qualified Gargantext.Core.Text.Corpus.API as API
import qualified Gargantext.Core.Text.Corpus.Parsers as Parser (FileFormat(..), parseFormat)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Action.Flow (flowCorpus, getDataText, flowDataText, TermType(..){-, allDataOrigins-})
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Mail (sendMail)
import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), UserId)
import Gargantext.Database.Query.Table.Node (getNodeWith)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Node (node_hyperdata)
import qualified Gargantext.Database.GargDB as GargDB
------------------------------------------------------------------------
{-
data Query = Query { query_query :: Text
, query_node_id :: Int
, query_lang :: Lang
, query_databases :: [DataOrigin]
}
deriving (Eq, Generic)
deriveJSON (unPrefix "query_") 'Query
instance Arbitrary Query where
arbitrary = elements [ Query q n la fs
| q <- ["honeybee* AND collapse"
,"covid 19"
]
, n <- [0..10]
, la <- allLangs
, fs <- take 3 $ repeat allDataOrigins
]
instance ToSchema Query where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "query_")
-}
------------------------------------------------------------------------
{-
type Api = PostApi
:<|> GetApi
type PostApi = Summary "New Corpus endpoint"
:> ReqBody '[JSON] Query
:> Post '[JSON] CorpusId
type GetApi = Get '[JSON] ApiInfo
-}
-- | TODO manage several apis
-- TODO-ACCESS
-- TODO this is only the POST
{-
api :: (FlowCmdM env err m) => UserId -> Query -> m CorpusId
api uid (Query q _ as) = do
cId <- case head as of
Nothing -> flowCorpusSearchInDatabase (UserDBId uid) EN q
Just API.All -> flowCorpusSearchInDatabase (UserDBId uid) EN q
Just a -> do
docs <- liftBase $ API.get a q (Just 1000)
cId' <- flowCorpus (UserDBId uid) (Left q) (Multi EN) [docs]
pure cId'
pure cId
-}
------------------------------------------------
-- TODO use this route for Client implementation
data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
deriving (Generic)
instance Arbitrary ApiInfo where
arbitrary = ApiInfo <$> arbitrary
deriveJSON (unPrefix "") 'ApiInfo
instance ToSchema ApiInfo
info :: FlowCmdM env err m => UserId -> m ApiInfo
info _u = pure $ ApiInfo API.externalAPIs
------------------------------------------------------------------------
------------------------------------------------------------------------
data WithQuery = WithQuery
{ _wq_query :: !Text
, _wq_databases :: !Database
, _wq_datafield :: !(Maybe Datafield)
, _wq_lang :: !Lang
, _wq_node_id :: !Int
, _wq_flowListWith :: !FlowSocialListWith
}
deriving Generic
makeLenses ''WithQuery
instance FromJSON WithQuery where
parseJSON = genericParseJSON $ jsonOptions "_wq_"
instance ToJSON WithQuery where
toJSON = genericToJSON $ jsonOptions "_wq_"
instance ToSchema WithQuery where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wq_")
------------------------------------------------------------------------
type AddWithQuery = Summary "Add with Query to corpus endpoint"
:> "corpus"
:> Capture "corpus_id" CorpusId
:> "query"
:> AsyncJobs JobLog '[JSON] WithQuery JobLog
{-
type AddWithFile = Summary "Add with MultipartData to corpus endpoint"
:> "corpus"
:> Capture "corpus_id" CorpusId
:> "add"
:> "file"
:> MultipartForm Mem (MultipartData Mem)
:> QueryParam "fileType" FileType
:> "async"
:> AsyncJobs JobLog '[JSON] () JobLog
-}
------------------------------------------------------------------------
-- TODO WithQuery also has a corpus id
addToCorpusWithQuery :: FlowCmdM env err m
=> User
-> CorpusId
-> WithQuery
-> Maybe Integer
-> (JobLog -> m ())
-> m JobLog
addToCorpusWithQuery user cid (WithQuery { _wq_query = q
, _wq_databases = dbs
, _wq_datafield = datafield
, _wq_lang = l
, _wq_flowListWith = flw }) maybeLimit logStatus = do
-- TODO ...
logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
, _scst_remaining = Just 3
, _scst_events = Just []
}
printDebug "[addToCorpusWithQuery] (cid, dbs)" (cid, dbs)
printDebug "[addToCorpusWithQuery] datafield" datafield
printDebug "[addToCorpusWithQuery] flowListWith" flw
case datafield of
Just Web -> do
printDebug "[addToCorpusWithQuery] processing web request" datafield
_ <- triggerSearxSearch cid q l
pure JobLog { _scst_succeeded = Just 3
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
_ -> do
-- TODO add cid
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
txts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs]
logStatus JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
cids <- mapM (\txt -> flowDataText user txt (Multi l) cid Nothing) txts
printDebug "corpus id" cids
printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
sendMail user
-- TODO ...
pure JobLog { _scst_succeeded = Just 3
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
:> "corpus"
:> Capture "corpus_id" CorpusId
:> "add"
:> "form"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] NewWithForm JobLog
addToCorpusWithForm :: FlowCmdM env err m
=> User
-> CorpusId
-> NewWithForm
-> (JobLog -> m ())
-> JobLog
-> m JobLog
addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
printDebug "[addToCorpusWithForm] Parsing corpus: " cid
printDebug "[addToCorpusWithForm] fileType" ft
logStatus jobLog
let
parse = case ft of
CSV_HAL -> Parser.parseFormat Parser.CsvHal
CSV -> Parser.parseFormat Parser.CsvGargV3
WOS -> Parser.parseFormat Parser.WOS
PresseRIS -> Parser.parseFormat Parser.RisPresse
-- TODO granularity of the logStatus
eDocs <- liftBase $ parse $ cs d
case eDocs of
Right docs' -> do
let docs = splitEvery 500 $ take 1000000 docs'
printDebug "Parsing corpus finished : " cid
logStatus jobLog2
printDebug "Starting extraction : " cid
-- TODO granularity of the logStatus
_cid' <- flowCorpus user
(Right [cid])
(Multi $ fromMaybe EN l)
Nothing
(map (map toHyperdataDocument) docs)
printDebug "Extraction finished : " cid
printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
sendMail user
logStatus jobLog3
pure $ jobLog3
Left e -> do
printDebug "Error" e
logStatus jobLogE
pure jobLogE
where
jobLog2 = jobLogSuccess jobLog
jobLog3 = jobLogSuccess jobLog2
jobLogE = jobLogFailTotal jobLog
parseCsvGargV3Path :: [Char] -> IO (Either Prelude.String [HyperdataDocument])
parseCsvGargV3Path fp = do
contents <- readFile fp
Parser.parseFormat Parser.CsvGargV3 $ cs contents
{-
addToCorpusWithFile :: FlowCmdM env err m
=> CorpusId
-> MultipartData Mem
-> Maybe FileType
-> (JobLog -> m ())
-> m JobLog
addToCorpusWithFile cid input filetype logStatus = do
logStatus JobLog { _scst_succeeded = Just 10
, _scst_failed = Just 2
, _scst_remaining = Just 138
, _scst_events = Just []
}
printDebug "addToCorpusWithFile" cid
_h <- postUpload cid filetype input
pure JobLog { _scst_succeeded = Just 137
, _scst_failed = Just 13
, _scst_remaining = Just 0
, _scst_events = Just []
}
-}
type AddWithFile = Summary "Add with FileUrlEncoded to corpus endpoint"
:> "corpus"
:> Capture "corpus_id" CorpusId
:> "add"
:> "file"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] NewWithFile JobLog
addToCorpusWithFile :: (HasSettings env, FlowCmdM env err m)
=> User
-> CorpusId
-> NewWithFile
-> (JobLog -> m ())
-> m JobLog
addToCorpusWithFile user cid nwf@(NewWithFile _d _l fName) logStatus = do
printDebug "[addToCorpusWithFile] Uploading file to corpus: " cid
logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
fPath <- GargDB.writeFile nwf
printDebug "[addToCorpusWithFile] File saved as: " fPath
uId <- getUserId user
nIds <- mkNodeWithParent NodeFile (Just cid) uId fName
_ <- case nIds of
[nId] -> do
node <- getNodeWith nId (Proxy :: Proxy HyperdataFile)
let hl = node ^. node_hyperdata
_ <- updateHyperdata nId $ hl { _hff_name = fName
, _hff_path = T.pack fPath }
printDebug "[addToCorpusWithFile] Created node with id: " nId
_ -> pure ()
printDebug "[addToCorpusWithFile] File upload to corpus finished: " cid
printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
sendMail user
pure $ JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Corpus/New/ 0000775 0000000 0000000 00000000000 14124644201 0030546 5 ustar 00root root 0000000 0000000 File.hs 0000664 0000000 0000000 00000006245 14124644201 0031711 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Corpus/New {-|
Module : Gargantext.API.Node.Corpus.New.File
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Corpus.New.File
where
import Control.Lens ((.~), (?~))
import Control.Monad (forM)
import Data.Aeson
import Data.Maybe
import Data.Monoid (mempty)
import Data.Swagger
import Data.Text (Text())
import GHC.Generics (Generic)
import Servant
import Servant.Multipart
import Servant.Swagger.Internal
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.Core.Types (TODO)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude -- (Cmd, CmdM)
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (hash)
-------------------------------------------------------------
type Hash = Text
data FileType = CSV
| CSV_HAL
| PresseRIS
| WOS
deriving (Eq, Show, Generic)
instance ToSchema FileType
instance Arbitrary FileType
where
arbitrary = elements [CSV, PresseRIS]
instance ToParamSchema FileType
instance FromJSON FileType
instance ToJSON FileType
instance ToParamSchema (MultipartData Mem) where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance FromHttpApiData FileType
where
parseUrlPiece "CSV" = pure CSV
parseUrlPiece "CSV_HAL" = pure CSV_HAL
parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece _ = pure CSV -- TODO error here
instance (ToParamSchema a, HasSwagger sub) =>
HasSwagger (MultipartForm tag a :> sub) where
-- TODO
toSwagger _ = toSwagger (Proxy :: Proxy sub)
& addParam param
where
param = mempty
& required ?~ True
& schema .~ ParamOther sch
sch = mempty
& in_ .~ ParamFormData
& paramSchema .~ toParamSchema (Proxy :: Proxy a)
type WithUpload' = Summary "Upload file(s) to a corpus"
:> QueryParam "fileType" FileType
:> MultipartForm Mem (MultipartData Mem)
:> Post '[JSON] [Hash]
--postUpload :: NodeId -> Maybe FileType -> GargServer UploadAPI
--postUpload :: NodeId -> GargServer UploadAPI
postUpload :: NodeId
-> Maybe FileType
-> MultipartData Mem
-> Cmd err [Hash]
postUpload _ Nothing _ = panic "fileType is a required parameter"
postUpload _ (Just fileType) multipartData = do
printDebug "File Type: " fileType
is <- liftBase $ do
printDebug "Inputs:" ()
forM (inputs multipartData) $ \input -> do
printDebug "iName " (iName input)
printDebug "iValue " (iValue input)
pure $ iName input
_ <- forM (files multipartData) $ \file -> do
let content = fdPayload file
printDebug "XXX " (fdFileName file)
printDebug "YYY " content
--pure $ cs content
-- is <- inputs multipartData
pure $ map hash is
-------------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Corpus/Searx.hs 0000664 0000000 0000000 00000007003 14124644201 0031433 0 ustar 00root root 0000000 0000000 {-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Node.Corpus.Searx where
import Control.Lens (view)
import qualified Data.Aeson as Aeson
import Data.Aeson.TH (deriveJSON)
import qualified Data.Text as T
import GHC.Generics (Generic)
import Network.HTTP.Client
import Network.HTTP.Client.TLS
import qualified Prelude as Prelude
import Protolude (encodeUtf8, Text, Either)
import Gargantext.Prelude
import Gargantext.Prelude.Config
import Gargantext.Core (Lang(..))
import qualified Gargantext.Core.Text.Corpus.API as API
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Prelude (hasConfig)
data SearxResult = SearxResult
{ _sr_url :: Text
, _sr_title :: Text
, _sr_content :: Maybe Text
, _sr_engine :: Text
, _sr_score :: Double
, _sr_category :: Text
, _sr_pretty_url :: Text }
deriving (Show, Eq, Generic)
-- , _sr_parsed_url
-- , _sr_engines
-- , _sr_positions
$(deriveJSON (unPrefix "_sr_") ''SearxResult)
data SearxResponse = SearxResponse
{ _srs_query :: Text
, _srs_number_of_results :: Int
, _srs_results :: [SearxResult] }
deriving (Show, Eq, Generic)
-- , _srs_answers
-- , _srs_corrections
-- , _srs_infoboxes
-- , _srs_suggestions :: [Text]
-- , _srs_unresponsive_engines :: [Text] }
$(deriveJSON (unPrefix "_srs_") ''SearxResponse)
data FetchSearxParams = FetchSearxParams
{ _fsp_language :: Lang
, _fsp_manager :: Manager
, _fsp_pageno :: Int
, _fsp_query :: Text
, _fsp_url :: Text
}
fetchSearxPage :: FetchSearxParams -> IO (Either Prelude.String SearxResponse)
fetchSearxPage (FetchSearxParams { _fsp_language
, _fsp_manager
, _fsp_pageno
, _fsp_query
, _fsp_url }) = do
-- searx search API:
-- https://searx.github.io/searx/dev/search_api.html?highlight=json
req <- parseRequest $ T.unpack _fsp_url
let request = urlEncodedBody
[ --("category_general", "1")
("q", encodeUtf8 _fsp_query)
, ("categories", "news") -- https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/70#note_3976
, ("pageno", encodeUtf8 $ T.pack $ show _fsp_pageno)
--, ("time_range", "None")
, ("language", encodeUtf8 $ T.pack $ show _fsp_language)
, ("format", "json")
] req
res <- httpLbs request _fsp_manager
let dec = Aeson.eitherDecode $ responseBody res :: (Either Prelude.String SearxResponse)
pure dec
triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m)
=> CorpusId
-> API.Query
-> Lang
-> m ()
triggerSearxSearch cid q l = do
printDebug "[triggerSearxSearch] cid" cid
printDebug "[triggerSearxSearch] q" q
printDebug "[triggerSearxSearch] l" l
cfg <- view hasConfig
let surl = _gc_frame_searx_url cfg
printDebug "[triggerSearxSearch] surl" surl
manager <- liftBase $ newManager tlsManagerSettings
res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
, _fsp_manager = manager
, _fsp_pageno = 1
, _fsp_query = q
, _fsp_url = surl }
printDebug "[triggerSearxSearch] res" res
pure ()
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Corpus/Types.hs 0000664 0000000 0000000 00000004053 14124644201 0031457 0 ustar 00root root 0000000 0000000 {-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Node.Corpus.Types where
import Control.Lens hiding (elements, Empty)
import Control.Monad.Fail (fail)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Monoid (mempty)
import Data.Swagger
import Data.Text (Text)
import qualified Data.Text as T
import GHC.Generics (Generic)
import Text.Regex.TDFA ((=~))
import Protolude ((++))
import Gargantext.Prelude
import qualified Gargantext.API.Admin.Orchestrator.Types as T
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (DataOrigin(..))
data Database = Empty
| PubMed
| HAL
| IsTex
| Isidore
deriving (Eq, Show, Generic)
deriveJSON (unPrefix "") ''Database
instance ToSchema Database
database2origin :: Database -> DataOrigin
database2origin Empty = InternalOrigin T.IsTex
database2origin PubMed = ExternalOrigin T.PubMed
database2origin HAL = ExternalOrigin T.HAL
database2origin IsTex = ExternalOrigin T.IsTex
database2origin Isidore = ExternalOrigin T.Isidore
------------------------------------------------------------------------
data Datafield = Gargantext
| External (Maybe Database)
| Web
| Files
deriving (Eq, Show, Generic)
instance FromJSON Datafield where
parseJSON = withText "Datafield" $ \text ->
case text of
"Gargantext" -> pure Gargantext
"Web" -> pure Web
"Files" -> pure Files
v ->
let (preExternal, _, postExternal) = v =~ ("External " :: Text) :: (Text, Text, Text)
in
if preExternal == "" then do
db <- parseJSON $ String postExternal
pure $ External db
else fail $ "Cannot match patterh 'External ' for string " ++ (T.unpack v)
instance ToJSON Datafield where
toJSON (External db) = toJSON $ "External " ++ (show db)
toJSON s = toJSON $ show s
instance ToSchema Datafield where
declareNamedSchema _ = do
return $ NamedSchema (Just "Datafield") $ mempty
& type_ ?~ SwaggerObject
DocumentsFromWriteNodes.hs 0000664 0000000 0000000 00000005010 14124644201 0033604 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node {-|
Module : Gargantext.API.Node.DocumentsFromWriteNodes
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.API.Node.DocumentsFromWriteNodes
where
import Data.Aeson
import Data.Swagger
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Prelude (GargServer)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude
import GHC.Generics (Generic)
import Servant
import Servant.Job.Async (JobFunction(..), serveJobsAPI)
------------------------------------------------------------------------
type API = Summary " Documents from Write nodes."
:> AsyncJobs JobLog '[JSON] Params JobLog
------------------------------------------------------------------------
newtype Params = Params { id :: Int }
deriving (Generic, Show)
instance FromJSON Params where
parseJSON = genericParseJSON defaultOptions
instance ToJSON Params where
toJSON = genericToJSON defaultOptions
instance ToSchema Params
------------------------------------------------------------------------
api :: UserId -> NodeId -> GargServer API
api uId nId =
serveJobsAPI $
JobFunction (\p log'' ->
let
log' x = do
printDebug "documents from write nodes" x
liftBase $ log'' x
in documentsFromWriteNodes uId nId p (liftBase . log')
)
documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
=> UserId
-> NodeId
-> Params
-> (JobLog -> m ())
-> m JobLog
documentsFromWriteNodes uId nId p logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
_ <- printDebug "[documentsFromWriteNodes] inside job, uId" uId
_ <- printDebug "[documentsFromWriteNodes] inside job, nId" nId
_ <- printDebug "[documentsFromWriteNodes] inside job, p" p
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
------------------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/File.hs 0000664 0000000 0000000 00000010657 14124644201 0027766 0 ustar 00root root 0000000 0000000 {-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-matches #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.File where
import Control.Lens ((^.))
import Data.Swagger
import Data.Text
import GHC.Generics (Generic)
import Servant
import Servant.Job.Async (JobFunction(..), serveJobsAPI)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.MIME.Types as DMT
import qualified Gargantext.Database.GargDB as GargDB
import qualified Network.HTTP.Media as M
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Node.Types
import Gargantext.API.Prelude
import Gargantext.Core.Types (TODO)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Admin.Types.Hyperdata.File
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getNodeWith)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
data RESPONSE deriving Typeable
instance Accept RESPONSE where
contentType _ = "text" M.// "*"
instance MimeRender RESPONSE BSResponse where
mimeRender _ (BSResponse val) = BSL.fromStrict $ val
type FileApi = Summary "File download"
:> "download"
:> Get '[RESPONSE] (Headers '[Servant.Header "Content-Type" Text] BSResponse)
fileApi :: UserId -> NodeId -> GargServer FileApi
fileApi uId nId = fileDownload uId nId
newtype Contents = Contents BS.ByteString
instance GargDB.ReadFile Contents where
readFile' fp = do
c <- BS.readFile fp
pure $ Contents c
newtype BSResponse = BSResponse BS.ByteString
deriving (Generic)
instance ToSchema BSResponse where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
fileDownload :: (HasSettings env, FlowCmdM env err m)
=> UserId
-> NodeId
-> m (Headers '[Servant.Header "Content-Type" Text] BSResponse)
fileDownload uId nId = do
printDebug "[fileDownload] uId" uId
printDebug "[fileDownload] nId" nId
node <- getNodeWith nId (Proxy :: Proxy HyperdataFile)
let (HyperdataFile { _hff_name = name'
, _hff_path = path }) = node ^. node_hyperdata
Contents c <- GargDB.readFile $ unpack path
let (mMime, _) = DMT.guessType DMT.defaultmtd False $ unpack name'
mime = case mMime of
Just m -> m
Nothing -> "text/plain"
pure $ addHeader (pack mime) $ BSResponse c
--pure c
-- let settings = embeddedSettings [("", encodeUtf8 c)]
-- Tagged $ staticApp settings
-- let settings = embeddedSettings [("", "hello")]
-- Tagged $ staticApp settings
type FileAsyncApi = Summary "File Async Api"
:> "file"
:> "add"
:> AsyncJobs JobLog '[FormUrlEncoded] NewWithFile JobLog
fileAsyncApi :: UserId -> NodeId -> GargServer FileAsyncApi
fileAsyncApi uId nId =
serveJobsAPI $
JobFunction (\i l ->
let
log' x = do
printDebug "addWithFile" x
liftBase $ l x
in addWithFile uId nId i log')
addWithFile :: (HasSettings env, FlowCmdM env err m)
=> UserId
-> NodeId
-> NewWithFile
-> (JobLog -> m ())
-> m JobLog
addWithFile uId nId nwf@(NewWithFile _d _l fName) logStatus = do
printDebug "[addWithFile] Uploading file: " nId
logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
fPath <- GargDB.writeFile nwf
printDebug "[addWithFile] File saved as: " fPath
nIds <- mkNodeWithParent NodeFile (Just nId) uId fName
_ <- case nIds of
[nId'] -> do
node <- getNodeWith nId' (Proxy :: Proxy HyperdataFile)
let hl = node ^. node_hyperdata
_ <- updateHyperdata nId' $ hl { _hff_name = fName
, _hff_path = pack fPath }
printDebug "[addWithFile] Created node with id: " nId'
_ -> pure ()
printDebug "[addWithFile] File upload finished: " nId
pure $ JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
FrameCalcUpload.hs 0000664 0000000 0000000 00000006213 14124644201 0032003 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node {-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.FrameCalcUpload where
import Control.Lens ((^.))
import Data.Aeson
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.UTF8 as BSU8
import Data.Swagger
import qualified Data.Text as T
import GHC.Generics (Generic)
import Network.HTTP.Client (newManager, httpLbs, parseRequest, responseBody)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant
import Servant.Job.Async
import Web.FormUrlEncoded (FromForm)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Job (jobLogInit, jobLogSuccess, jobLogFail)
import Gargantext.API.Node.Corpus.New (addToCorpusWithForm)
import Gargantext.API.Node.Corpus.New.File (FileType(..))
import Gargantext.API.Node.Types (NewWithForm(..))
import Gargantext.API.Prelude
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
data FrameCalcUpload = FrameCalcUpload ()
deriving (Generic)
instance FromForm FrameCalcUpload
instance FromJSON FrameCalcUpload
instance ToJSON FrameCalcUpload
instance ToSchema FrameCalcUpload
type FrameCalcUploadAPI = Summary " FrameCalc upload"
:> "add"
:> "framecalc"
:> "async"
:> AsyncJobs JobLog '[JSON] FrameCalcUpload JobLog
frameCalcUploadAPI :: UserId -> NodeId -> GargServer FrameCalcUploadAPI
frameCalcUploadAPI uId nId =
serveJobsAPI $
JobFunction (\p logs ->
frameCalcUploadAsync uId nId p (liftBase . logs) (jobLogInit 5)
)
frameCalcUploadAsync :: FlowCmdM env err m
=> UserId
-> NodeId
-> FrameCalcUpload
-> (JobLog -> m ())
-> JobLog
-> m JobLog
frameCalcUploadAsync uId nId _f logStatus jobLog = do
logStatus jobLog
-- printDebug "[frameCalcUploadAsync] uId" uId
-- printDebug "[frameCalcUploadAsync] nId" nId
node <- getNodeWith nId (Proxy :: Proxy HyperdataFrame)
let (HyperdataFrame { _hf_base = base
, _hf_frame_id = frame_id }) = node ^. node_hyperdata
let csvUrl = base <> "/" <> frame_id <> ".csv"
-- printDebug "[frameCalcUploadAsync] csvUrl" csvUrl
res <- liftBase $ do
manager <- newManager tlsManagerSettings
req <- parseRequest $ T.unpack csvUrl
httpLbs req manager
let body = T.pack $ BSU8.toString $ BSL.toStrict $ responseBody res
mCId <- getClosestParentIdByType nId NodeCorpus
-- printDebug "[frameCalcUploadAsync] mCId" mCId
jobLog2 <- case mCId of
Nothing -> pure $ jobLogFail jobLog
Just cId ->
addToCorpusWithForm (RootId (NodeId uId)) cId (NewWithForm CSV body Nothing "calc-upload.csv") logStatus jobLog
pure $ jobLogSuccess jobLog2
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Get.hs 0000664 0000000 0000000 00000004077 14124644201 0027625 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Node.Get
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Polymorphic Get Node API
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.API.Node.Get
where
-- import Gargantext.API.Admin.Types (HasSettings)
-- import Servant.Job.Async (JobFunction(..), serveJobsAPI)
-- import Test.QuickCheck (elements)
-- import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Data.Aeson
import Data.Swagger
import GHC.Generics (Generic)
import Servant
import Test.QuickCheck.Arbitrary
import Gargantext.API.Prelude
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (JSONB{-, getNodeWith-})
import Gargantext.Prelude
------------------------------------------------------------------------
type API a = Summary "Polymorphic Get Node Endpoint"
:> ReqBody '[JSON] GetNodeParams
:> Get '[JSON] (Node a)
------------------------------------------------------------------------
data GetNodeParams = GetNodeParams { node_id :: NodeId
, nodetype :: NodeType
}
deriving (Generic)
----------------------------------------------------------------------
api :: forall proxy a.
( JSONB a
, FromJSON a
, ToJSON a
) => proxy a -> UserId -> NodeId -> GargServer (API a)
api _p _uId _nId (GetNodeParams _nId' _nt) = undefined
------------------------------------------------------------------------
instance FromJSON GetNodeParams where
parseJSON = genericParseJSON (defaultOptions { sumEncoding = ObjectWithSingleField })
instance ToJSON GetNodeParams where
toJSON = genericToJSON (defaultOptions { sumEncoding = ObjectWithSingleField })
instance ToSchema GetNodeParams
instance Arbitrary GetNodeParams where
arbitrary = GetNodeParams <$> arbitrary <*> arbitrary
------------------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/New.hs 0000664 0000000 0000000 00000006645 14124644201 0027642 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Node.Post
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
New = Post maybe change the name
Async new node feature
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.API.Node.New
where
import Control.Lens hiding (elements, Empty)
import Data.Aeson
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Servant
import Servant.Job.Async
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Web.FormUrlEncoded (FromForm)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Prelude
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Node
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.Node.User
import Gargantext.Database.Schema.Node
import Gargantext.Prelude
------------------------------------------------------------------------
data PostNode = PostNode { pn_name :: Text
, pn_typename :: NodeType}
deriving (Generic)
------------------------------------------------------------------------
-- TODO unPrefix "pn_" FromJSON, ToJSON, ToSchema, adapt frontend.
instance FromJSON PostNode
instance ToJSON PostNode
instance ToSchema PostNode
instance FromForm PostNode
instance Arbitrary PostNode where
arbitrary = elements [PostNode "Node test" NodeCorpus]
------------------------------------------------------------------------
postNode :: HasNodeError err
=> UserId
-> NodeId
-> PostNode
-> Cmd err [NodeId]
postNode uId pId (PostNode nodeName nt) = do
nodeUser <- getNodeUser (NodeId uId)
let uId' = nodeUser ^. node_user_id
mkNodeWithParent nt (Just pId) uId' nodeName
------------------------------------------------------------------------
type PostNodeAsync = Summary "Post Node"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] PostNode JobLog
postNodeAsyncAPI :: UserId -> NodeId -> GargServer PostNodeAsync
postNodeAsyncAPI uId nId =
serveJobsAPI $
JobFunction (\p logs -> postNodeAsync uId nId p (liftBase . logs))
------------------------------------------------------------------------
postNodeAsync :: FlowCmdM env err m
=> UserId
-> NodeId
-> PostNode
-> (JobLog -> m ())
-> m JobLog
postNodeAsync uId nId (PostNode nodeName tn) logStatus = do
printDebug "postNodeAsync" nId
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 2
, _scst_events = Just []
}
nodeUser <- getNodeUser (NodeId uId)
-- _ <- threadDelay 1000
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 2
, _scst_events = Just []
}
let uId' = nodeUser ^. node_user_id
_ <- mkNodeWithParent tn (Just nId) uId' nodeName
pure JobLog { _scst_succeeded = Just 3
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Share.hs 0000664 0000000 0000000 00000010462 14124644201 0030143 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Node.Share
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Share
where
import Data.Aeson
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Prelude
import Gargantext.Core.Types.Individu (User(..), arbitraryUsername)
import Gargantext.Database.Action.Share (ShareNodeWith(..))
import Gargantext.Database.Action.Share as DB (shareNodeWith, unPublish)
import Gargantext.Database.Action.User
import Gargantext.Database.Action.User.New
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Tree (findNodesWithType)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Prelude
import qualified Gargantext.Utils.Aeson as GUA
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import qualified Data.List as List
------------------------------------------------------------------------
data ShareNodeParams = ShareTeamParams { username :: Text }
| SharePublicParams { node_id :: NodeId}
deriving (Generic)
------------------------------------------------------------------------
-- TODO unPrefix "pn_" FromJSON, ToJSON, ToSchema, adapt frontend.
instance FromJSON ShareNodeParams where
parseJSON = genericParseJSON (defaultOptions { sumEncoding = GUA.defaultTaggedObject })
instance ToJSON ShareNodeParams where
toJSON = genericToJSON (defaultOptions { sumEncoding = GUA.defaultTaggedObject })
instance ToSchema ShareNodeParams
instance Arbitrary ShareNodeParams where
arbitrary = elements [ ShareTeamParams "user1"
, SharePublicParams (NodeId 1)
]
------------------------------------------------------------------------
-- TODO permission
-- TODO refactor userId which is used twice
-- TODO change return type for better warning/info/success/error handling on the front
api :: HasNodeError err
=> User
-> NodeId
-> ShareNodeParams
-> CmdR err Int
api userInviting nId (ShareTeamParams user') = do
user <- case guessUserName user' of
Nothing -> pure user'
Just (u,_) -> do
isRegistered <- getUserId' (UserName u)
case isRegistered of
Just _ -> do
printDebug "[G.A.N.Share.api]" ("Team shared with " <> u)
pure u
Nothing -> do
username' <- getUsername userInviting
_ <- case List.elem username' arbitraryUsername of
True -> do
printDebug "[G.A.N.Share.api]" ("Demo users are not allowed to invite" :: Text)
pure ()
False -> do
-- TODO better analysis of the composition of what is shared
children <- findNodesWithType nId [NodeList] [ NodeFolderShared
, NodeTeam
, NodeFolder
, NodeCorpus
]
_ <- case List.null children of
True -> do
printDebug "[G.A.N.Share.api]" ("Invitation is enabled if you share a corpus at least" :: Text)
pure 0
False -> do
printDebug "[G.A.N.Share.api]" ("Your invitation is sent to: " <> user')
newUsers [user']
pure ()
pure u
fromIntegral <$> DB.shareNodeWith (ShareNodeWith_User NodeFolderShared (UserName user)) nId
api _uId nId2 (SharePublicParams nId1) =
fromIntegral <$> DB.shareNodeWith (ShareNodeWith_Node NodeFolderPublic nId1) nId2
------------------------------------------------------------------------
type API = Summary " Share Node with username"
:> ReqBody '[JSON] ShareNodeParams
:> Post '[JSON] Int
------------------------------------------------------------------------
type Unpublish = Summary " Unpublish Node"
:> Capture "node_id" NodeId
:> Put '[JSON] Int
unPublish :: NodeId -> GargServer Unpublish
unPublish n = DB.unPublish n
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Types.hs 0000664 0000000 0000000 00000004310 14124644201 0030200 0 ustar 00root root 0000000 0000000 {-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Node.Types where
import Control.Lens hiding (elements, Empty)
import Data.Aeson
import qualified Data.ByteString as BS
import qualified Data.ByteString.Base64 as BSB64
import Data.Either
import Data.Swagger
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import GHC.Generics (Generic)
import Servant.Job.Utils (jsonOptions)
import Web.FormUrlEncoded (FromForm)
import Gargantext.Core (Lang(..){-, allLangs-})
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Prelude
import qualified Gargantext.Database.GargDB as GargDB
import Gargantext.API.Node.Corpus.New.File (FileType)
-------------------------------------------------------
data NewWithForm = NewWithForm
{ _wf_filetype :: !FileType
, _wf_data :: !Text
, _wf_lang :: !(Maybe Lang)
, _wf_name :: !Text
} deriving (Eq, Show, Generic)
makeLenses ''NewWithForm
instance FromForm NewWithForm
instance FromJSON NewWithForm where
parseJSON = genericParseJSON $ jsonOptions "_wf_"
instance ToJSON NewWithForm where
toJSON = genericToJSON $ jsonOptions "_wf_"
instance ToSchema NewWithForm where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
-------------------------------------------------------
data NewWithFile = NewWithFile
{ _wfi_b64_data :: !Text
, _wfi_lang :: !(Maybe Lang)
, _wfi_name :: !Text
} deriving (Eq, Show, Generic)
makeLenses ''NewWithFile
instance FromForm NewWithFile
instance FromJSON NewWithFile where
parseJSON = genericParseJSON $ jsonOptions "_wfi_"
instance ToJSON NewWithFile where
toJSON = genericToJSON $ jsonOptions "_wfi_"
instance ToSchema NewWithFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wfi_")
instance GargDB.SaveFile NewWithFile where
saveFile' fp (NewWithFile b64d _ _) = do
let eDecoded = BSB64.decode $ TE.encodeUtf8 b64d
case eDecoded of
Left err -> panic $ T.pack $ "Error decoding: " <> err
Right decoded -> BS.writeFile fp decoded
-- BS.writeFile fp $ BSB64.decodeLenient $ TE.encodeUtf8 b64d
--instance GargDB.ReadFile NewWithFile where
-- readFile' = TIO.readFile
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Node/Update.hs 0000664 0000000 0000000 00000014764 14124644201 0030334 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Node.Update
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.API.Node.Update
where
import Control.Lens (view)
import Data.Aeson
import Data.Maybe (Maybe(..))
import Data.Swagger
import GHC.Generics (Generic)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Ngrams.List (reIndexWith)
import Gargantext.API.Prelude (GargServer, simuLogs)
import Gargantext.Core.Methods.Distances (GraphMetric(..))
import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Schema.Node (node_parent_id)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude (Ord, Eq, (<$>), ($), liftBase, (.), printDebug, pure, show, cs, (<>), panic)
import qualified Gargantext.Utils.Aeson as GUA
import Prelude (Enum, Bounded, minBound, maxBound)
import Servant
import Servant.Job.Async (JobFunction(..), serveJobsAPI)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import qualified Data.Set as Set
------------------------------------------------------------------------
type API = Summary " Update node according to NodeType params"
:> AsyncJobs JobLog '[JSON] UpdateNodeParams JobLog
------------------------------------------------------------------------
data UpdateNodeParams = UpdateNodeParamsList { methodList :: !Method }
| UpdateNodeParamsGraph { methodGraph :: !GraphMetric }
| UpdateNodeParamsTexts { methodTexts :: !Granularity }
| UpdateNodeParamsBoard { methodBoard :: !Charts }
| LinkNodeReq { nodeType :: !NodeType, id :: !NodeId }
deriving (Generic)
----------------------------------------------------------------------
data Method = Basic | Advanced | WithModel
deriving (Generic, Eq, Ord, Enum, Bounded)
----------------------------------------------------------------------
data Granularity = NewNgrams | NewTexts | Both
deriving (Generic, Eq, Ord, Enum, Bounded)
----------------------------------------------------------------------
data Charts = Sources | Authors | Institutes | Ngrams | All
deriving (Generic, Eq, Ord, Enum, Bounded)
------------------------------------------------------------------------
api :: UserId -> NodeId -> GargServer API
api uId nId =
serveJobsAPI $
JobFunction (\p log'' ->
let
log' x = do
printDebug "updateNode" x
liftBase $ log'' x
in updateNode uId nId p (liftBase . log')
)
updateNode :: (HasSettings env, FlowCmdM env err m)
=> UserId
-> NodeId
-> UpdateNodeParams
-> (JobLog -> m ())
-> m JobLog
updateNode uId nId (UpdateNodeParamsGraph metric) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
_ <- recomputeGraph uId nId (Just metric)
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
updateNode _uId nid1 (LinkNodeReq nt nid2) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
_ <- case nt of
NodeAnnuaire -> pairing nid2 nid1 Nothing -- defaultList
NodeCorpus -> pairing nid1 nid2 Nothing -- defaultList
_ -> panic $ "[G.API.N.Update.updateNode] NodeType not implemented"
<> cs (show nt)
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 2
, _scst_events = Just []
}
corpusId <- view node_parent_id <$> getNode lId
logStatus JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
_ <- case corpusId of
Just cId -> reIndexWith cId lId NgramsTerms (Set.singleton MapTerm)
Nothing -> pure ()
pure JobLog { _scst_succeeded = Just 3
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
updateNode _uId _nId _p logStatus = do
simuLogs logStatus 10
------------------------------------------------------------------------
-- TODO unPrefix "pn_" FromJSON, ToJSON, ToSchema, adapt frontend.
instance FromJSON UpdateNodeParams where
parseJSON = genericParseJSON (defaultOptions { sumEncoding = GUA.defaultTaggedObject })
instance ToJSON UpdateNodeParams where
toJSON = genericToJSON (defaultOptions { sumEncoding = GUA.defaultTaggedObject })
instance ToSchema UpdateNodeParams
instance Arbitrary UpdateNodeParams where
arbitrary = do
l <- UpdateNodeParamsList <$> arbitrary
g <- UpdateNodeParamsGraph <$> arbitrary
t <- UpdateNodeParamsTexts <$> arbitrary
b <- UpdateNodeParamsBoard <$> arbitrary
elements [l,g,t,b]
instance FromJSON Method
instance ToJSON Method
instance ToSchema Method
instance Arbitrary Method where
arbitrary = elements [ minBound .. maxBound ]
instance FromJSON Granularity
instance ToJSON Granularity
instance ToSchema Granularity
instance Arbitrary Granularity where
arbitrary = elements [ minBound .. maxBound ]
instance FromJSON Charts
instance ToJSON Charts
instance ToSchema Charts
instance Arbitrary Charts where
arbitrary = elements [ minBound .. maxBound ]
------------------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Prelude.hs 0000664 0000000 0000000 00000010720 14124644201 0027611 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Prelude
Description : Server API main Types
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MonoLocalBinds #-}
module Gargantext.API.Prelude
( module Gargantext.API.Prelude
, HasServerError(..)
, serverError
)
where
import Control.Concurrent (threadDelay)
import Control.Exception (Exception)
import Control.Lens (Prism', (#))
import Control.Lens.TH (makePrisms)
import Control.Monad.Error.Class (MonadError(..))
import Control.Monad.Except (ExceptT)
import Control.Monad.Reader (ReaderT)
import Crypto.JOSE.Error as Jose
import Data.Aeson.Types
import Data.Typeable
import Data.Validity
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Admin.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.Types
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error (NodeError(..), HasNodeError(..))
import Gargantext.Database.Query.Tree
import Gargantext.Prelude
import Servant
import Servant.Job.Async
import Servant.Job.Core (HasServerError(..), serverError)
class HasJoseError e where
_JoseError :: Prism' e Jose.Error
joseError :: (MonadError e m, HasJoseError e) => Jose.Error -> m a
joseError = throwError . (_JoseError #)
type EnvC env =
( HasConnectionPool env
, HasSettings env -- TODO rename HasDbSettings
, HasJobEnv env JobLog JobLog
, HasConfig env
, HasNodeStoryEnv env
)
type ErrC err =
( HasNodeError err
, HasInvalidError err
, HasTreeError err
, HasServerError err
, HasJoseError err
, ToJSON err -- TODO this is arguable
, Exception err
)
type GargServerC env err m =
( CmdRandom env err m
, HasNodeStory env err m
, EnvC env
, ErrC err
, MimeRender JSON err
)
type GargServerT env err m api = GargServerC env err m => ServerT api m
type GargServer api = forall env err m. GargServerT env err m api
-- This is the concrete monad. It needs to be used as little as possible.
type GargM env err = ReaderT env (ExceptT err IO)
-- This is the server type using GargM. It needs to be used as little as possible.
-- Instead, prefer GargServer, GargServerT.
type GargServerM env err api = (EnvC env, ErrC err) => ServerT api (GargM env err)
-------------------------------------------------------------------
-- | This Type is needed to prepare the function before the GargServer
type GargNoServer t =
forall env err m. GargNoServer' env err m => m t
type GargNoServer' env err m =
( CmdM env err m
, HasNodeStory env err m
, HasSettings env
, HasNodeError err
)
-------------------------------------------------------------------
data GargError
= GargNodeError NodeError
| GargTreeError TreeError
| GargInvalidError Validation
| GargJoseError Jose.Error
| GargServerError ServerError
deriving (Show, Typeable)
makePrisms ''GargError
instance ToJSON GargError where
toJSON _ = String "SomeGargErrorPleaseReport"
instance Exception GargError
instance HasNodeError GargError where
_NodeError = _GargNodeError
instance HasInvalidError GargError where
_InvalidError = _GargInvalidError
instance HasTreeError GargError where
_TreeError = _GargTreeError
instance HasServerError GargError where
_ServerError = _GargServerError
instance HasJoseError GargError where
_JoseError = _GargJoseError
------------------------------------------------------------------------
-- | Utils
-- | Simulate logs
simuLogs :: MonadBase IO m
=> (JobLog -> m ())
-> Int
-> m JobLog
simuLogs logStatus t = do
_ <- mapM (\n -> simuTask logStatus n t) $ take t [0,1..]
pure $ JobLog { _scst_succeeded = Just t
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
simuTask :: MonadBase IO m
=> (JobLog -> m ())
-> Int
-> Int
-> m ()
simuTask logStatus cur total = do
let m = (10 :: Int) ^ (6 :: Int)
liftBase $ threadDelay (m*5)
let status = JobLog { _scst_succeeded = Just cur
, _scst_failed = Just 0
, _scst_remaining = (-) <$> Just total <*> Just cur
, _scst_events = Just []
}
printDebug "status" status
logStatus status
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Public.hs 0000664 0000000 0000000 00000011602 14124644201 0027427 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Public
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.API.Public
where
import Data.Set (Set)
import Control.Lens ((^?), (^.), _Just)
import Data.Maybe (catMaybes)
import Data.Text (Text)
import Data.List (replicate, null)
import Data.Aeson
import Data.Swagger hiding (title, url)
import GHC.Generics (Generic)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import Gargantext.API.Prelude
import Gargantext.API.Node.File
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Prelude
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Hyperdata.CorpusField
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.NodeNode (selectPublicNodes)
import Gargantext.Core.Utils.DateUtils (utc2year)
import Gargantext.Database.Schema.Node -- (NodePoly(..))
import Gargantext.Prelude
import qualified Gargantext.Utils.Aeson as GUA
------------------------------------------------------------------------
type API = API_Home
:<|> API_Node
api :: Text -> GargServer API
api baseUrl = (api_home baseUrl)
:<|> api_node
-------------------------------------------------------------------------
type API_Home = Summary " Public Home API"
:> Get '[JSON] [PublicData]
api_home :: Text -> GargServer API_Home
api_home baseUrl = catMaybes
<$> map (toPublicData baseUrl)
<$> filterPublicDatas
<$> selectPublic
-------------------------------------------------------------------------
type API_Node = Summary " Public Node API"
:> Capture "node" NodeId
:> "file" :> FileApi
api_node :: NodeId -> GargServer FileApi
api_node nId = do
pubNodes <- publicNodes
-- TODO optimize with SQL
case Set.member nId pubNodes of
False -> panic "Not allowed" -- TODO throwErr
True -> fileApi 0 nId
-------------------------------------------------------------------------
selectPublic :: HasNodeError err
=> Cmd err [( Node HyperdataFolder, Maybe Int)]
selectPublic = selectPublicNodes
-- For tests only
-- pure $ replicate 6 defaultPublicData
filterPublicDatas :: [(Node HyperdataFolder, Maybe Int)]
-> [(Node HyperdataFolder, [NodeId])]
filterPublicDatas datas =
map (\(n,mi) ->
let mi' = NodeId <$> mi in
( _node_id n, (n, maybe [] (:[]) mi' ))
) datas
& Map.fromListWith (\(n1,i1) (_n2,i2) -> (n1, i1 <> i2))
& Map.filter (not . null . snd)
& Map.elems
publicNodes :: HasNodeError err
=> Cmd err (Set NodeId)
publicNodes = do
candidates <- filterPublicDatas <$> selectPublicNodes
pure $ Set.fromList
$ List.concat
$ map (\(n, ns) -> (_node_id n) : ns) candidates
-- http://localhost:8008/api/v1.0/node/23543/file/download
-- http://localhost:8000/images/Gargantextuel-212x300.jpg
toPublicData :: Text -> (Node HyperdataFolder, [NodeId]) -> Maybe PublicData
toPublicData base (n , mn) = do
title <- (hd ^? (_Just . hf_data . cf_title))
abstract <- (hd ^? (_Just . hf_data . cf_desc ))
img <- (Just $ url' mn) -- "images/Gargantextuel-212x300.jpg"
url <- (Just $ url' mn)
date <- Just (cs $ show $ utc2year (n^.node_date))
database <- (hd ^? (_Just . hf_data . cf_query))
author <- (hd ^? (_Just . hf_data . cf_authors))
pure $ PublicData { .. }
where
hd = head
$ filter (\(HyperdataField cd _ _) -> cd == JSON)
$ n^. (node_hyperdata . hc_fields)
url' :: [NodeId] -> Text
url' mn' = base
<> "/public/"
<> (cs $ show $ (maybe 0 unNodeId $ head mn'))
<> "/file/download"
data PublicData = PublicData
{ title :: Text
, abstract :: Text
, img :: Text
, url :: Text
, date :: Text
, database :: Text
, author :: Text
} | NoData { nodata:: Text}
deriving (Generic)
instance FromJSON PublicData where
parseJSON = genericParseJSON (defaultOptions { sumEncoding = GUA.defaultTaggedObject })
instance ToJSON PublicData where
toJSON = genericToJSON (defaultOptions { sumEncoding = GUA.defaultTaggedObject })
instance ToSchema PublicData
instance Arbitrary PublicData where
arbitrary = elements
$ replicate 6 defaultPublicData
defaultPublicData :: PublicData
defaultPublicData =
PublicData { title = "Title"
, abstract = foldl (<>) "" $ replicate 100 "abstract "
, img = "images/Gargantextuel-212x300.jpg"
, url = "https://.."
, date = "YY/MM/DD"
, database = "database"
, author = "Author" }
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Routes.hs 0000664 0000000 0000000 00000024337 14124644201 0027503 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Routes
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
---------------------------------------------------------------------
module Gargantext.API.Routes
where
---------------------------------------------------------------------
-- import qualified Gargantext.API.Search as Search
import Control.Concurrent (threadDelay)
import Control.Lens (view)
import Data.Text (Text)
import Data.Validity
import Servant
import Servant.Auth as SA
import Servant.Auth.Swagger ()
import Servant.Job.Async
import Servant.Swagger.UI
import qualified Gargantext.API.Ngrams.List as List
import qualified Gargantext.API.Node.Contact as Contact
import qualified Gargantext.API.Node.Corpus.Annuaire as Annuaire
import qualified Gargantext.API.Node.Corpus.Export as Export
import qualified Gargantext.API.Node.Corpus.Export.Types as Export
import qualified Gargantext.API.Node.Corpus.New as New
import qualified Gargantext.API.Public as Public
import Gargantext.API.Admin.Auth.Types (AuthRequest, AuthResponse, AuthenticatedUser(..), PathId(..))
import Gargantext.API.Admin.Auth (withAccess)
import Gargantext.API.Admin.FrontEnd (FrontEndAPI)
import Gargantext.API.Count (CountAPI, count, Query)
import Gargantext.API.Job (jobLogInit)
import Gargantext.API.Ngrams (TableNgramsApi, apiNgramsTableDoc)
import Gargantext.API.Node
import Gargantext.API.Prelude
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Viz.Graph.API
import Gargantext.Database.Prelude (HasConfig(..))
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_max_docs_scrapers)
type GargAPI = "api" :> Summary "API " :> GargAPIVersion
-- | TODO :<|> Summary "Latest API" :> GargAPI'
type GargAPIVersion = "v1.0"
:> Summary "Garg API Version "
:> GargAPI'
type GargVersion = "version"
:> Summary "Backend version"
:> Get '[JSON] Text
type GargAPI' =
-- Auth endpoint
"auth" :> Summary "AUTH API"
:> ReqBody '[JSON] AuthRequest
:> Post '[JSON] AuthResponse
:<|> GargVersion
-- TODO-ACCESS here we want to request a particular header for
-- auth and capabilities.
:<|> GargPrivateAPI
:<|> "public" :> Public.API
type GargPrivateAPI = SA.Auth '[SA.JWT, SA.Cookie] AuthenticatedUser
:> GargPrivateAPI'
type GargAdminAPI
-- Roots endpoint
= "user" :> Summary "First user endpoint"
:> Roots
:<|> "nodes" :> Summary "Nodes endpoint"
:> ReqBody '[JSON] [NodeId] :> NodesAPI
type GargPrivateAPI' =
GargAdminAPI
-- Node endpoint
:<|> "node" :> Summary "Node endpoint"
:> Capture "node_id" NodeId
:> NodeAPI HyperdataAny
-- Corpus endpoints
:<|> "corpus" :> Summary "Corpus endpoint"
:> Capture "corpus_id" CorpusId
:> NodeAPI HyperdataCorpus
:<|> "corpus" :> Summary "Corpus endpoint"
:> Capture "node1_id" NodeId
:> "document"
:> Capture "node2_id" NodeId
:> NodeNodeAPI HyperdataAny
:<|> "corpus" :> Capture "node_id" CorpusId
:> Export.API
-- Annuaire endpoint
{-
:<|> "contact" :> Summary "Contact endpoint"
:> Capture "contact_id" ContactId
:> NodeAPI HyperdataContact
--}
:<|> "annuaire" :> Summary "Annuaire endpoint"
:> Capture "annuaire_id" AnnuaireId
:> NodeAPI HyperdataAnnuaire
:<|> "annuaire" :> Summary "Contact endpoint"
:> Capture "annuaire_id" NodeId
:> Contact.API
-- Document endpoint
:<|> "document" :> Summary "Document endpoint"
:> Capture "doc_id" DocId
:> "ngrams"
:> TableNgramsApi
-- :<|> "counts" :> Stream GET NewLineFraming '[JSON] Count :> CountAPI
-- TODO-SECURITY
:<|> "count" :> Summary "Count endpoint"
:> ReqBody '[JSON] Query
:> CountAPI
-- Corpus endpoint --> TODO rename s/search/filter/g
-- :<|> "search" :> Capture "corpus" NodeId
-- :> (Search.API Search.SearchResult)
-- TODO move to NodeAPI?
:<|> "graph" :> Summary "Graph endpoint"
:> Capture "graph_id" NodeId
:> GraphAPI
-- TODO move to NodeAPI?
-- Tree endpoint
:<|> "tree" :> Summary "Tree endpoint"
:> Capture "tree_id" NodeId
:> TreeAPI
-- :<|> New.Upload
:<|> New.AddWithForm
-- :<|> New.AddWithFile
:<|> New.AddWithQuery
-- :<|> "annuaire" :> Annuaire.AddWithForm
-- :<|> New.AddWithFile
-- :<|> "scraper" :> WithCallbacks ScraperAPI
-- :<|> "new" :> New.Api
-- TODO refactor the 3 routes below
:<|> List.GETAPI
:<|> List.JSONAPI
:<|> List.CSVAPI
{-
:<|> "wait" :> Summary "Wait test"
:> Capture "x" Int
:> WaitAPI -- Get '[JSON] Int
-}
-- /mv//
-- /merge//
-- /rename/
-- :<|> "static"
-- :<|> "list" :> Capture "node_id" Int :> NodeAPI
-- :<|> "ngrams" :> Capture "node_id" Int :> NodeAPI
-- :<|> "auth" :> Capture "node_id" Int :> NodeAPI
---------------------------------------------------------------------
type API = SwaggerAPI
:<|> GargAPI
:<|> FrontEndAPI
-- | API for serving @swagger.json@
type SwaggerAPI = SwaggerSchemaUI "swagger-ui" "swagger.json"
-- | API for serving main operational routes of @gargantext.org@
-- TODO
-- /mv//
-- /merge//
-- /rename/
-- :<|> "static"
-- :<|> "list" :> Capture "node_id" Int :> NodeAPI
-- :<|> "ngrams" :> Capture "node_id" Int :> NodeAPI
-- :<|> "auth" :> Capture "node_id" Int :> NodeAPI
---------------------------------------------------------------------
---------------------------------------------------------------------
-- | Server declarations
-- TODO-SECURITY admin only: withAdmin
-- Question: How do we mark admins?
serverGargAdminAPI :: GargServer GargAdminAPI
serverGargAdminAPI = roots
:<|> nodesAPI
serverPrivateGargAPI' :: AuthenticatedUser -> GargServer GargPrivateAPI'
serverPrivateGargAPI' (AuthenticatedUser (NodeId uid))
= serverGargAdminAPI
:<|> nodeAPI (Proxy :: Proxy HyperdataAny) uid
:<|> nodeAPI (Proxy :: Proxy HyperdataCorpus) uid
:<|> nodeNodeAPI (Proxy :: Proxy HyperdataAny) uid
:<|> Export.getCorpus -- uid
-- :<|> nodeAPI (Proxy :: Proxy HyperdataContact) uid
:<|> nodeAPI (Proxy :: Proxy HyperdataAnnuaire) uid
:<|> Contact.api uid
:<|> withAccess (Proxy :: Proxy TableNgramsApi) Proxy uid
<$> PathNode <*> apiNgramsTableDoc
:<|> count -- TODO: undefined
-- :<|> withAccess (Proxy :: Proxy (Search.API Search.SearchResult)) Proxy uid
-- <$> PathNode <*> Search.api -- TODO: move elsewhere
:<|> withAccess (Proxy :: Proxy GraphAPI) Proxy uid
<$> PathNode <*> graphAPI uid -- TODO: mock
:<|> withAccess (Proxy :: Proxy TreeAPI) Proxy uid
<$> PathNode <*> treeAPI
-- TODO access
:<|> addCorpusWithForm (RootId (NodeId uid))
-- :<|> addCorpusWithFile (RootId (NodeId uid))
:<|> addCorpusWithQuery (RootId (NodeId uid))
-- :<|> addAnnuaireWithForm
-- :<|> New.api uid -- TODO-SECURITY
-- :<|> New.info uid -- TODO-SECURITY
:<|> List.getApi
:<|> List.jsonApi
:<|> List.csvApi
-- :<|> waitAPI
----------------------------------------------------------------------
-- For Tests
type WaitAPI = Get '[JSON] Text
waitAPI :: Int -> GargServer WaitAPI
waitAPI n = do
let
m = (10 :: Int) ^ (6 :: Int)
_ <- liftBase $ threadDelay ( m * n)
pure $ "Waited: " <> (cs $ show n)
----------------------------------------
addCorpusWithQuery :: User -> GargServer New.AddWithQuery
addCorpusWithQuery user cid =
serveJobsAPI $
JobFunction (\q log' -> do
limit <- view $ hasConfig . gc_max_docs_scrapers
New.addToCorpusWithQuery user cid q (Just limit) (liftBase . log')
{- let log' x = do
printDebug "addToCorpusWithQuery" x
liftBase $ log x
-}
)
{-
addWithFile :: GargServer New.AddWithFile
addWithFile cid i f =
serveJobsAPI $
JobFunction (\_i log -> New.addToCorpusWithFile cid i f (liftBase . log))
-}
addCorpusWithForm :: User -> GargServer New.AddWithForm
addCorpusWithForm user cid =
serveJobsAPI $
JobFunction (\i log' ->
let
log'' x = do
printDebug "[addToCorpusWithForm] " x
liftBase $ log' x
in New.addToCorpusWithForm user cid i log'' (jobLogInit 3))
addCorpusWithFile :: User -> GargServer New.AddWithFile
addCorpusWithFile user cid =
serveJobsAPI $
JobFunction (\i log' ->
let
log'' x = do
printDebug "[addToCorpusWithFile]" x
liftBase $ log' x
in New.addToCorpusWithFile user cid i log'')
addAnnuaireWithForm :: GargServer Annuaire.AddWithForm
addAnnuaireWithForm cid =
serveJobsAPI $
JobFunction (\i log' -> Annuaire.addToAnnuaireWithForm cid i (liftBase . log'))
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Search.hs 0000664 0000000 0000000 00000024377 14124644201 0027433 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Count
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Count API part of Gargantext.
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE DeriveAnyClass #-}
module Gargantext.API.Search
where
import Data.Aeson hiding (defaultTaggedObject)
import Data.Maybe (fromMaybe)
import Data.Swagger hiding (fieldLabelModifier, Contact)
import Data.Text (Text)
import Data.Time (UTCTime)
import GHC.Generics (Generic)
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.Utils.Prefix (unPrefixSwagger, unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow.Pairing (isPairedWith)
import Gargantext.Database.Action.Search
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataContact(..), HyperdataDocument(..), ContactWho(..))
import Gargantext.Database.Admin.Types.Hyperdata.Contact (_cw_organization)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Facet
import Gargantext.Prelude
import Gargantext.Utils.Aeson (defaultTaggedObject)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import qualified Data.Text as Text
-----------------------------------------------------------------------
-- TODO-ACCESS: CanSearch? or is it part of CanGetNode
-- TODO-EVENTS: No event, this is a read-only query.
type API results = Summary "Search endpoint"
:> ReqBody '[JSON] SearchQuery
:> QueryParam "offset" Int
:> QueryParam "limit" Int
:> QueryParam "order" OrderBy
:> Post '[JSON] results
-----------------------------------------------------------------------
-- | Api search function
api :: NodeId -> GargServer (API SearchResult)
api nId (SearchQuery q SearchDoc) o l order =
SearchResult <$> SearchResultDoc
<$> map (toRow nId)
<$> searchInCorpus nId False q o l order
api nId (SearchQuery q SearchContact) o l order = do
printDebug "isPairedWith" nId
aIds <- isPairedWith nId NodeAnnuaire
-- TODO if paired with several corpus
case head aIds of
Nothing -> pure $ SearchResult
$ SearchNoResult "[G.A.Search] pair corpus with an Annuaire"
Just aId -> SearchResult
<$> SearchResultContact
<$> map (toRow aId)
<$> searchInCorpusWithContacts nId aId q o l order
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-- | Main Types
-----------------------------------------------------------------------
data SearchType = SearchDoc | SearchContact
deriving (Generic)
instance FromJSON SearchType where
parseJSON = genericParseJSON (defaultOptions { sumEncoding = ObjectWithSingleField })
instance ToJSON SearchType where
toJSON = genericToJSON (defaultOptions { sumEncoding = ObjectWithSingleField })
instance ToSchema SearchType
instance Arbitrary SearchType where
arbitrary = elements [SearchDoc, SearchContact]
-----------------------------------------------------------------------
data SearchQuery =
SearchQuery { query :: ![Text]
, expected :: !SearchType
}
deriving (Generic)
instance FromJSON SearchQuery where
parseJSON = genericParseJSON defaultOptions
instance ToJSON SearchQuery where
toJSON = genericToJSON defaultOptions
instance ToSchema SearchQuery
{-
where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
-}
instance Arbitrary SearchQuery where
arbitrary = elements [SearchQuery ["electrodes"] SearchDoc]
-- arbitrary = elements [SearchQuery "electrodes" 1 ] --SearchDoc]
-----------------------------------------------------------------------
data SearchResult =
SearchResult { result :: !SearchResultTypes}
deriving (Generic)
instance FromJSON SearchResult where
parseJSON = genericParseJSON defaultOptions
instance ToJSON SearchResult where
toJSON = genericToJSON defaultOptions
instance ToSchema SearchResult
{-
where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
-}
instance Arbitrary SearchResult where
arbitrary = SearchResult <$> arbitrary
data SearchResultTypes =
SearchResultDoc { docs :: ![Row] }
| SearchResultContact { contacts :: ![Row] }
| SearchNoResult { message :: !Text }
deriving (Generic)
instance FromJSON SearchResultTypes where
parseJSON = genericParseJSON (defaultOptions { sumEncoding = defaultTaggedObject })
instance ToJSON SearchResultTypes where
toJSON = genericToJSON (defaultOptions { sumEncoding = defaultTaggedObject })
instance Arbitrary SearchResultTypes where
arbitrary = do
srd <- SearchResultDoc <$> arbitrary
src <- SearchResultContact <$> arbitrary
srn <- pure $ SearchNoResult "No result because.."
elements [srd, src, srn]
instance ToSchema SearchResultTypes where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
--------------------------------------------------------------------
data Row =
Document { id :: !NodeId
, created :: !UTCTime
, title :: !Text
, hyperdata :: !HyperdataRow
, category :: !Int
, score :: !Int
}
| Contact { c_id :: !Int
, c_created :: !UTCTime
, c_hyperdata :: !HyperdataRow
, c_score :: !Int
, c_annuaireId :: !NodeId
}
deriving (Generic)
instance FromJSON Row
where
parseJSON = genericParseJSON
( defaultOptions { sumEncoding = defaultTaggedObject } )
instance ToJSON Row
where
toJSON = genericToJSON (defaultOptions { sumEncoding = defaultTaggedObject })
instance Arbitrary Row where
arbitrary = arbitrary
instance ToSchema Row where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
class ToRow a where
toRow :: NodeId -> a -> Row
instance ToRow FacetDoc where
toRow _ (FacetDoc { .. }) =
Document { id = facetDoc_id
, created = facetDoc_created
, title = facetDoc_title
, hyperdata = toHyperdataRow facetDoc_hyperdata
, category = fromMaybe 0 facetDoc_category
, score = round $ fromMaybe 0 facetDoc_score }
-- | TODO rename FacetPaired
type FacetContact = FacetPaired Int UTCTime HyperdataContact Int
instance ToRow FacetContact where
toRow annuaireId (FacetPaired nId utc h s) = Contact nId utc (toHyperdataRow h) s annuaireId
--------------------------------------------------------------------
data HyperdataRow =
HyperdataRowDocument { _hr_abstract :: !Text
, _hr_authors :: !Text
, _hr_bdd :: !Text
, _hr_doi :: !Text
, _hr_institutes :: !Text
, _hr_language_iso2 :: !Text
, _hr_page :: !Int
, _hr_publication_date :: !Text
, _hr_publication_day :: !Int
, _hr_publication_hour :: !Int
, _hr_publication_minute :: !Int
, _hr_publication_month :: !Int
, _hr_publication_second :: !Int
, _hr_publication_year :: !Int
, _hr_source :: !Text
, _hr_title :: !Text
, _hr_url :: !Text
, _hr_uniqId :: !Text
, _hr_uniqIdBdd :: !Text
}
| HyperdataRowContact { _hr_firstname :: !Text
, _hr_lastname :: !Text
, _hr_labs :: !Text
}
deriving (Generic)
instance FromJSON HyperdataRow
where
parseJSON = genericParseJSON
( defaultOptions
{ sumEncoding = defaultTaggedObject
, fieldLabelModifier = unCapitalize . dropPrefix "_hr_"
, omitNothingFields = False
}
)
instance ToJSON HyperdataRow
where
toJSON = genericToJSON
( defaultOptions
{ sumEncoding = defaultTaggedObject
, fieldLabelModifier = unCapitalize . dropPrefix "_hr_"
, omitNothingFields = False
}
)
instance Arbitrary HyperdataRow where
arbitrary = arbitrary
instance ToSchema HyperdataRow where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hr_")
class ToHyperdataRow a where
toHyperdataRow :: a -> HyperdataRow
instance ToHyperdataRow HyperdataDocument where
toHyperdataRow (HyperdataDocument { .. }) =
HyperdataRowDocument
{ _hr_abstract = fromMaybe "" _hd_abstract
, _hr_authors = fromMaybe "" _hd_authors
, _hr_bdd = fromMaybe "" _hd_bdd
, _hr_doi = fromMaybe "" _hd_doi
, _hr_institutes = fromMaybe "" _hd_institutes
, _hr_language_iso2 = fromMaybe "EN" _hd_language_iso2
, _hr_page = fromMaybe 0 _hd_page
, _hr_publication_date = fromMaybe "" _hd_publication_date
, _hr_publication_day = fromMaybe 1 _hd_publication_day
, _hr_publication_hour = fromMaybe 1 _hd_publication_hour
, _hr_publication_minute = fromMaybe 1 _hd_publication_minute
, _hr_publication_month = fromMaybe 1 _hd_publication_month
, _hr_publication_second = fromMaybe 1 _hd_publication_second
, _hr_publication_year = fromMaybe 2020 _hd_publication_year
, _hr_source = fromMaybe "" _hd_source
, _hr_title = fromMaybe "Title" _hd_title
, _hr_url = fromMaybe "" _hd_url
, _hr_uniqId = fromMaybe "" _hd_uniqId
, _hr_uniqIdBdd = fromMaybe "" _hd_uniqIdBdd }
instance ToHyperdataRow HyperdataContact where
toHyperdataRow (HyperdataContact { _hc_who = Just (ContactWho _ fn ln _ _), _hc_where = ou} ) =
HyperdataRowContact (fromMaybe "FirstName" fn) (fromMaybe "LastName" ln) ou'
where
ou' = maybe "CNRS" (Text.intercalate " " . _cw_organization) (head ou)
toHyperdataRow (HyperdataContact {}) =
HyperdataRowContact "FirstName" "LastName" "Labs"
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Server.hs 0000664 0000000 0000000 00000005533 14124644201 0027465 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Server
Description : REST API declaration
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
---------------------------------------------------------------------
module Gargantext.API.Server where
---------------------------------------------------------------------
import Control.Lens ((^.))
import Control.Monad.Except (withExceptT)
import Control.Monad.Reader (runReaderT)
import Data.Text (Text)
import Data.Version (showVersion)
import Servant
import Servant.Swagger.UI (swaggerSchemaUIServer)
import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Paths_gargantext as PG -- cabal magic build module
import qualified Gargantext.API.Public as Public
import Gargantext.API.Admin.Auth.Types (AuthContext)
import Gargantext.API.Admin.Auth (auth)
import Gargantext.API.Admin.FrontEnd (frontEndServer)
import Gargantext.API.Prelude
import Gargantext.API.Routes
import Gargantext.API.Swagger (swaggerDoc)
import Gargantext.API.ThrowAll (serverPrivateGargAPI)
import Gargantext.Database.Query.Table.Node.Error (NodeError(..))
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_url_backend_api)
serverGargAPI :: MimeRender JSON err => Text -> GargServerM env err GargAPI
serverGargAPI baseUrl -- orchestrator
= auth
:<|> gargVersion
:<|> serverPrivateGargAPI
:<|> Public.api baseUrl
-- :<|> orchestrator
where
gargVersion :: GargServer GargVersion
gargVersion = pure (cs $ showVersion PG.version)
-- | Server declarations
server :: forall env. EnvC env => env -> IO (Server API)
server env = do
-- orchestrator <- scrapyOrchestrator env
pure $ swaggerSchemaUIServer swaggerDoc
:<|> hoistServerWithContext
(Proxy :: Proxy GargAPI)
(Proxy :: Proxy AuthContext)
transform
(serverGargAPI (env ^. hasConfig . gc_url_backend_api))
:<|> frontEndServer
where
transform :: forall a. GargM env GargError a -> Handler a
transform = Handler . withExceptT showAsServantErr . (`runReaderT` env)
showAsServantErr :: GargError -> ServerError
showAsServantErr (GargNodeError err@NoListFound) = err404 { errBody = BL8.pack $ show err }
showAsServantErr (GargNodeError err@NoRootFound) = err404 { errBody = BL8.pack $ show err }
showAsServantErr (GargNodeError err@NoCorpusFound) = err404 { errBody = BL8.pack $ show err }
showAsServantErr (GargNodeError err@NoUserFound) = err404 { errBody = BL8.pack $ show err }
showAsServantErr (GargNodeError err@(DoesNotExist _)) = err404 { errBody = BL8.pack $ show err }
showAsServantErr (GargServerError err) = err
showAsServantErr a = err500 { errBody = BL8.pack $ show a }
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Swagger.hs 0000664 0000000 0000000 00000002540 14124644201 0027611 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Swagger
Description : Swagger API generation
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
---------------------------------------------------------------------
module Gargantext.API.Swagger where
---------------------------------------------------------------------
import Control.Lens
import Data.Swagger
import Data.Version (showVersion)
import Servant
import Servant.Swagger
import qualified Paths_gargantext as PG -- cabal magic build module
import Gargantext.API.Routes
import Gargantext.Prelude
-- | Swagger Specifications
swaggerDoc :: Swagger
swaggerDoc = toSwagger (Proxy :: Proxy GargAPI)
& info.title .~ "GarganText"
& info.version .~ (cs $ showVersion PG.version)
-- & info.base_url ?~ (URL "http://gargantext.org/")
& info.description ?~ "REST API specifications"
-- & tags .~ Set.fromList [Tag "Garg" (Just "Main perations") Nothing]
& applyTagsFor (subOperations (Proxy :: Proxy GargAPI)(Proxy :: Proxy GargAPI))
["Gargantext" & description ?~ "Main operations"]
& info.license ?~ ("AGPLV3 (English) and CECILL (French)" & url ?~ URL urlLicence )
where
urlLicence = "https://gitlab.iscpif.fr/gargantext/haskell-gargantext/blob/master/LICENSE"
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/Table.hs 0000664 0000000 0000000 00000013610 14124644201 0027241 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.Node
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-- TODO-ACCESS: CanGetNode
-- TODO-EVENTS: No events as this is a read only query.
Node API
-------------------------------------------------------------------
-- TODO-ACCESS: access by admin only.
-- At first let's just have an isAdmin check.
-- Later: check userId CanDeleteNodes Nothing
-- TODO-EVENTS: DeletedNodes [NodeId]
-- {"tag": "DeletedNodes", "nodes": [Int*]}
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Table
where
import Data.Aeson.TH (deriveJSON)
import Data.Maybe
import Data.Swagger
import Data.Text (Text())
import GHC.Generics (Generic)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.API.HashedResponse
import Gargantext.API.Ngrams.Types (TabType(..))
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.Types (Offset, Limit, TableResult(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Action.Learn (FavOrTrash(..), moreLike)
import Gargantext.Database.Action.Search
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude -- (Cmd, CmdM)
import Gargantext.Database.Query.Facet (FacetDoc , runViewDocuments, runCountDocuments, OrderBy(..), runViewAuthorsDoc)
import Gargantext.Prelude
------------------------------------------------------------------------
type TableApi = Summary "Table API"
:> QueryParam "tabType" TabType
:> QueryParam "list" ListId
:> QueryParam "limit" Int
:> QueryParam "offset" Int
:> QueryParam "orderBy" OrderBy
:> QueryParam "query" Text
:> Get '[JSON] (HashedResponse FacetTableResult)
:<|> Summary "Table API (POST)"
:> ReqBody '[JSON] TableQuery
:> Post '[JSON] FacetTableResult
:<|> "hash" :>
Summary "Hash Table"
:> QueryParam "tabType" TabType
:> Get '[JSON] Text
data TableQuery = TableQuery
{ tq_offset :: Int
, tq_limit :: Int
, tq_orderBy :: OrderBy
, tq_view :: TabType
, tq_query :: Text
} deriving (Generic)
type FacetTableResult = TableResult FacetDoc
$(deriveJSON (unPrefix "tq_") ''TableQuery)
instance ToSchema TableQuery where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "tq_")
instance Arbitrary TableQuery where
arbitrary = elements [TableQuery { tq_offset = 0
, tq_limit = 10
, tq_orderBy = DateAsc
, tq_view = Docs
, tq_query = "electrodes" }]
tableApi :: NodeId -> GargServer TableApi
tableApi id' = getTableApi id'
:<|> postTableApi id'
:<|> getTableHashApi id'
getTableApi :: NodeId
-> Maybe TabType
-> Maybe ListId
-> Maybe Int
-> Maybe Int
-> Maybe OrderBy
-> Maybe Text
-> Cmd err (HashedResponse FacetTableResult)
getTableApi cId tabType _mListId mLimit mOffset mOrderBy mQuery = do
printDebug "[getTableApi] mQuery" mQuery
t <- getTable cId tabType mOffset mLimit mOrderBy mQuery
pure $ constructHashedResponse t
postTableApi :: NodeId -> TableQuery -> Cmd err FacetTableResult
postTableApi cId (TableQuery o l order ft "") = getTable cId (Just ft) (Just o) (Just l) (Just order) Nothing
postTableApi cId (TableQuery o l order ft q) = case ft of
Docs -> searchInCorpus' cId False [q] (Just o) (Just l) (Just order)
Trash -> searchInCorpus' cId True [q] (Just o) (Just l) (Just order)
x -> panic $ "not implemented in tableApi " <> (cs $ show x)
getTableHashApi :: NodeId -> Maybe TabType -> Cmd err Text
getTableHashApi cId tabType = do
HashedResponse { hash = h } <- getTableApi cId tabType Nothing Nothing Nothing Nothing Nothing
pure h
searchInCorpus' :: CorpusId
-> Bool
-> [Text]
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Cmd err FacetTableResult
searchInCorpus' cId t q o l order = do
docs <- searchInCorpus cId t q o l order
countAllDocs <- searchCountInCorpus cId t q
pure $ TableResult { tr_docs = docs, tr_count = countAllDocs }
getTable :: NodeId
-> Maybe TabType
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Maybe Text
-> Cmd err FacetTableResult
getTable cId ft o l order query = do
docs <- getTable' cId ft o l order query
docsCount <- runCountDocuments cId (ft == Just Trash) query
pure $ TableResult { tr_docs = docs, tr_count = docsCount }
getTable' :: NodeId
-> Maybe TabType
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Maybe Text
-> Cmd err [FacetDoc]
getTable' cId ft o l order query =
case ft of
(Just Docs) -> runViewDocuments cId False o l order query
(Just Trash) -> runViewDocuments cId True o l order query
(Just MoreFav) -> moreLike cId o l order IsFav
(Just MoreTrash) -> moreLike cId o l order IsTrash
x -> panic $ "not implemented in getTable: " <> (cs $ show x)
getPair :: ContactId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err [FacetDoc]
getPair cId ft o l order =
case ft of
(Just Docs) -> runViewAuthorsDoc cId False o l order
(Just Trash) -> runViewAuthorsDoc cId True o l order
_ -> panic $ "not implemented: get Pairing" <> (cs $ show ft)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/API/ThrowAll.hs 0000664 0000000 0000000 00000003225 14124644201 0027747 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API.ThrowAll
Description : ThrowAll class and instance
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
module Gargantext.API.ThrowAll where
import Control.Monad.Except (MonadError(..))
import Control.Lens ((#))
import Servant
import Servant.Auth.Server (AuthResult(..))
import Gargantext.Prelude
import Gargantext.API.Prelude (GargServerM, _ServerError)
import Gargantext.API.Routes (GargPrivateAPI, serverPrivateGargAPI')
class ThrowAll' e a | a -> e where
-- | 'throwAll' is a convenience function to throw errors across an entire
-- sub-API
--
--
-- > throwAll err400 :: Handler a :<|> Handler b :<|> Handler c
-- > == throwError err400 :<|> throwError err400 :<|> err400
throwAll' :: e -> a
instance (ThrowAll' e a, ThrowAll' e b) => ThrowAll' e (a :<|> b) where
throwAll' e = throwAll' e :<|> throwAll' e
-- Really this shouldn't be necessary - ((->) a) should be an instance of
-- MonadError, no?
instance {-# OVERLAPPING #-} ThrowAll' e b => ThrowAll' e (a -> b) where
throwAll' e = const $ throwAll' e
instance {-# OVERLAPPABLE #-} (MonadError e m) => ThrowAll' e (m a) where
throwAll' = throwError
serverPrivateGargAPI :: MimeRender JSON err => GargServerM env err GargPrivateAPI
serverPrivateGargAPI (Authenticated auser) = serverPrivateGargAPI' auser
serverPrivateGargAPI _ = throwAll' (_ServerError # err401)
-- Here throwAll' requires a concrete type for the monad.
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core.hs 0000664 0000000 0000000 00000003760 14124644201 0026476 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core
Description : Supported Natural language
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core
where
import Data.Text (Text)
import Data.Aeson
import Data.Either(Either(Left))
import Data.Hashable (Hashable)
import Data.Swagger
import GHC.Generics (Generic)
import Gargantext.Prelude
import Servant.API
------------------------------------------------------------------------
-- | Language of a Text
-- For simplicity, we suppose text has an homogenous language
--
-- Next steps: | DE | IT | SP
--
-- - EN == english
-- - FR == french
-- - DE == deutch (not implemented yet)
-- - IT == italian (not implemented yet)
-- - SP == spanish (not implemented yet)
--
-- ... add your language and help us to implement it (:
-- | All languages supported
-- TODO : DE | SP | CH
data Lang = EN | FR | All
deriving (Show, Eq, Ord, Bounded, Enum, Generic)
instance ToJSON Lang
instance FromJSON Lang
instance ToSchema Lang
instance FromHttpApiData Lang
where
parseUrlPiece "EN" = pure EN
parseUrlPiece "FR" = pure FR
parseUrlPiece "All" = pure All
parseUrlPiece _ = Left "Unexpected value of OrderBy"
instance Hashable Lang
allLangs :: [Lang]
allLangs = [minBound ..]
class HasDBid a where
toDBid :: a -> Int
fromDBid :: Int -> a
instance HasDBid Lang where
toDBid All = 0
toDBid FR = 1
toDBid EN = 2
fromDBid 0 = All
fromDBid 1 = FR
fromDBid 2 = EN
fromDBid _ = panic "HasDBid lang, not implemented"
------------------------------------------------------------------------
type Form = Text
type Lem = Text
------------------------------------------------------------------------
data PosTagAlgo = CoreNLP
deriving (Show, Read, Eq, Ord, Generic)
instance Hashable PosTagAlgo
instance HasDBid PosTagAlgo where
toDBid CoreNLP = 1
fromDBid 1 = CoreNLP
fromDBid _ = panic "HasDBid posTagAlgo : Not implemented"
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/ 0000775 0000000 0000000 00000000000 14124644201 0026134 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Ext/ 0000775 0000000 0000000 00000000000 14124644201 0026674 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Ext/IMT.hs 0000664 0000000 0000000 00000011332 14124644201 0027661 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.API
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Ext.IMT where
import Data.Either (Either(..))
import Data.Map (Map)
import Data.Text (Text, splitOn)
import qualified Data.Set as S
import qualified Data.List as DL
import qualified Data.Vector as DV
import qualified Data.Map as M
import qualified Prelude as Prelude
import Gargantext.Prelude
import Gargantext.Core.Text.Metrics.Utils as Utils
import Gargantext.Core.Text.Corpus.Parsers.CSV as CSV
data School = School { school_shortName :: Text
, school_longName :: Text
, school_id :: Text
} deriving (Show, Read, Eq)
schools :: [School]
schools = [ School
{ school_shortName = "Mines Albi-Carmaux"
, school_longName = "Mines Albi-Carmaux - École nationale supérieure des Mines d'Albi‐Carmaux"
, school_id = "469216" }
, School
{ school_shortName = "Mines Alès"
, school_longName = "EMA - École des Mines d'Alès"
, school_id = "6279" }
, School
{ school_shortName = "Mines Douai"
, school_longName = "Mines Douai EMD - École des Mines de Douai"
, school_id = "224096" }
, School
{ school_shortName = "Mines Lille"
, school_longName = "Mines Lille - École des Mines de Lille"
, school_id = "144103" }
, School
{ school_shortName = "IMT Lille Douai"
, school_longName = "IMT Lille Douai"
, school_id = "497330" }
, School
{ school_shortName = "Mines Nantes"
, school_longName = "Mines Nantes - Mines Nantes"
, school_id = "84538" }
, School
{ school_shortName = "Télécom Bretagne"
, school_longName = "Télécom Bretagne"
, school_id = "301262" }
, School
{ school_shortName = "IMT Atlantique"
, school_longName = "IMT Atlantique - IMT Atlantique Bretagne-Pays de la Loire"
, school_id = "481355" }
, School
{ school_shortName = "Mines Saint-Étienne"
, school_longName = "Mines Saint-Étienne MSE - École des Mines de Saint-Étienne"
, school_id = "29212" }
, School
{ school_shortName = "Télécom École de Management"
, school_longName = "TEM - Télécom Ecole de Management"
, school_id = "301442" }
, School
{ school_shortName = "IMT Business School"
, school_longName = "IMT Business School"
, school_id = "542824" }
, School
{ school_shortName = "Télécom ParisTech"
, school_longName = "Télécom ParisTech"
, school_id = "300362" }
, School
{ school_shortName = "Télécom SudParis"
, school_longName = "TSP - Télécom SudParis"
, school_id = "352124" }
, School
{ school_shortName = "ARMINES"
, school_longName = "ARMINES"
, school_id = "300362" }
, School
{ school_shortName = "Eurecom"
, school_longName = "Eurecom"
, school_id = "421532" }
, School
{ school_shortName = "Mines ParisTech"
, school_longName = "MINES ParisTech - École nationale supérieure des mines de Paris"
, school_id = "301492" }
]
mapIdSchool :: Map Text Text
mapIdSchool = M.fromList $ Gargantext.Prelude.map
(\(School { school_shortName, school_id }) -> (school_id, school_shortName)) schools
hal_data :: IO (Either Prelude.String (DV.Vector CsvHal))
hal_data = do
r <- CSV.readCsvHal "doc/corpus_imt/Gargantext_Corpus.csv"
pure $ snd <$> r
names :: S.Set Text
names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools
toSchoolName :: Text -> Text
toSchoolName t = case M.lookup t mapIdSchool of
Nothing -> t
Just t' -> t'
publisBySchool :: DV.Vector CsvHal -> [(Maybe Text, Int)]
publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSchool, n))
$ DL.filter (\i -> S.member (fst i) names)
$ DL.reverse
$ DL.sortOn snd
$ M.toList
$ Utils.freq
$ DL.concat
$ DV.toList
$ DV.map (\n -> splitOn ( ", ") (csvHal_instStructId_i n) )
$ DV.filter (\n -> csvHal_publication_year n == 2017) hal_data'
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Ext/IMTUser.hs 0000664 0000000 0000000 00000016260 14124644201 0030525 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Ext.IMTUser
Description : Interface to get IMT users
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
We can not import the IMT Client API code since it is copyrighted.
Here is writtent a common interface.
-}
module Gargantext.Core.Ext.IMTUser -- (deserialiseImtUsersFromFile)
where
import Codec.Serialise
import Data.Csv
import Data.Either
import Data.Maybe (catMaybes)
import Data.Text (Text)
import Data.Vector (Vector)
import GHC.Generics (Generic)
import Gargantext.Core.Text.Corpus.Parsers.CSV
import Gargantext.Database.Admin.Types.Hyperdata.Contact
import Gargantext.Prelude
import System.FilePath.Posix (takeExtension)
import System.IO (FilePath)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Vector as Vector
------------------------------------------------------------------------
readFile_Annuaire :: FilePath -> IO [HyperdataContact]
readFile_Annuaire fp = case takeExtension fp of
".csv" -> readCSVFile_Annuaire fp
".data" -> deserialiseImtUsersFromFile fp
_ -> panic "[G.C.E.I.readFile_Annuaire] extension unknown"
------------------------------------------------------------------------
data IMTUser = IMTUser
{ id :: Maybe Text
, entite :: Maybe Text
, mail :: Maybe Text
, nom :: Maybe Text
, prenom :: Maybe Text
, fonction :: Maybe Text
, fonction2 :: Maybe Text
, tel :: Maybe Text
, fax :: Maybe Text
, service :: Maybe Text
, groupe :: Maybe Text
, entite2 :: Maybe Text
, service2 :: Maybe Text
, groupe2 :: Maybe Text
, bureau :: Maybe Text
, url :: Maybe Text
, pservice :: Maybe Text
, pfonction :: Maybe Text
, afonction :: Maybe Text
, afonction2 :: Maybe Text
, grprech :: Maybe Text
, appellation :: Maybe Text
, lieu :: Maybe Text
, aprecision :: Maybe Text
, atel :: Maybe Text
, sexe :: Maybe Text
, statut :: Maybe Text
, idutilentite :: Maybe Text
, actif :: Maybe Text
, idutilsiecoles :: Maybe Text
, date_modification :: Maybe Text
} deriving (Eq, Show, Generic)
-- | CSV instance
instance FromNamedRecord IMTUser where
parseNamedRecord r = do
id <- r .: "id"
entite <- r .: "entite"
mail <- r .: "mail"
nom <- r .: "nom"
prenom <- r .: "prenom"
fonction <- r .: "fonction"
fonction2 <- r .: "fonction2"
tel <- r .: "tel"
fax <- r .: "fax"
service <- r .: "service"
groupe <- r .: "groupe"
entite2 <- r .: "entite2"
service2 <- r .: "service2"
groupe2 <- r .: "groupe2"
bureau <- r .: "bureau"
url <- r .: "url"
pservice <- r .: "pservice"
pfonction <- r .: "pfonction"
afonction <- r .: "afonction"
afonction2 <- r .: "afonction2"
grprech <- r .: "grprech"
appellation <- r .: "appellation"
lieu <- r .: "lieu"
aprecision <- r .: "aprecision"
atel <- r .: "atel"
sexe <- r .: "sexe"
statut <- r .: "statut"
idutilentite <- r .: "idutilentite"
actif <- r .: "actif"
idutilsiecoles <- r .: "idutilsiecoles"
date_modification <- r .: "date_modification"
pure $ IMTUser {..}
headerCSVannuaire :: Header
headerCSVannuaire =
header ["id","entite","mail","nom","prenom","fonction","fonction2","tel","fax","service","groupe","entite2","service2","groupe2","bureau","url","pservice","pfonction","afonction","afonction2","grprech","appellation","lieu","aprecision","atel","sexe","statut","idutilentite","actif","idutilsiecoles","date_modification"]
readCSVFile_Annuaire :: FilePath -> IO [HyperdataContact]
readCSVFile_Annuaire fp = do
users <- snd <$> readCSVFile_Annuaire' fp
pure $ map imtUser2gargContact $ Vector.toList users
readCSVFile_Annuaire' :: FilePath -> IO (Header, Vector IMTUser)
readCSVFile_Annuaire' = fmap readCsvHalLazyBS' . BL.readFile
where
readCsvHalLazyBS' :: BL.ByteString -> (Header, Vector IMTUser)
readCsvHalLazyBS' bs = case decodeByNameWith csvDecodeOptions bs of
Left e -> panic (cs e)
Right rows -> rows
------------------------------------------------------------------------
-- | Serialization for optimization
instance Serialise IMTUser
deserialiseImtUsersFromFile :: FilePath -> IO [HyperdataContact]
deserialiseImtUsersFromFile filepath = map imtUser2gargContact <$> deserialiseFromFile' filepath
deserialiseFromFile' :: FilePath -> IO [IMTUser]
deserialiseFromFile' filepath = deserialise <$> BL.readFile filepath
------------------------------------------------------------------------
imtUser2gargContact :: IMTUser -> HyperdataContact
--imtUser2gargContact (IMTUser id' entite' mail' nom' prenom' fonction' _fonction2' tel' _fax'
-- service' _groupe' _entite2 _service2 _group2 bureau' url' _pservice' _pfonction' _afonction' _afonction2'
-- _grprech' _appellation' lieu' _aprecision' _atel' _sexe' _statut' _idutilentite'
-- _actif' _idutilsiecoles' date_modification')
-- = HyperdataContact (Just "IMT Annuaire") (Just qui) [ou] ((<>) <$> (fmap (\p -> p <> " ") prenom') <*> nom') entite' date_modification' Nothing Nothing
imtUser2gargContact (IMTUser { id
, entite
, mail
, nom
, prenom
, fonction
, tel
, service
, bureau
, url
, lieu
, date_modification }) =
HyperdataContact { _hc_bdd = Just "IMT Annuaire"
, _hc_who = Just qui
, _hc_where = [ou]
, _hc_title = title
, _hc_source = entite
, _hc_lastValidation = date_modification
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
where
title = (<>) <$> (fmap (\p -> p <> " ") prenom) <*> nom
qui = ContactWho { _cw_id = id
, _cw_firstName = prenom
, _cw_lastName = nom
, _cw_keywords = catMaybes [service]
, _cw_freetags = [] }
ou = ContactWhere { _cw_organization = toList entite
, _cw_labTeamDepts = toList service
, _cw_role = fonction
, _cw_office = bureau
, _cw_country = Just "France"
, _cw_city = lieu
, _cw_touch = contact
, _cw_entry = Nothing
, _cw_exit = Nothing }
contact = Just $ ContactTouch { _ct_mail = mail
, _ct_phone = tel
, _ct_url = url }
-- meta = ContactMetaData (Just "IMT annuaire") date_modification'
toList Nothing = []
toList (Just x) = [x]
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Flow/ 0000775 0000000 0000000 00000000000 14124644201 0027043 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Flow/Ngrams.hs 0000664 0000000 0000000 00000000651 14124644201 0030630 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Flow.Ngrams
Description : Core Flow main fun for Ngrams
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstrainedClassMethods #-}
module Gargantext.Core.Flow.Ngrams where
-- import Gargantext.Core.Text.Terms.WithList (filterWith)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Flow/Types.hs 0000664 0000000 0000000 00000001773 14124644201 0030513 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Flow.Types
Description : Core Flow main Types
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstrainedClassMethods #-}
module Gargantext.Core.Flow.Types where
import Control.Lens
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Schema.Node (node_hash_id)
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)
class UniqId a
where
uniqId :: Lens' a (Maybe Hash)
instance UniqId HyperdataDocument
where
uniqId = hd_uniqId
instance UniqId HyperdataContact
where
uniqId = hc_uniqId
instance UniqId (Node a)
where
uniqId = node_hash_id
{-
data DocumentIdWithNgrams a = DocumentIdWithNgrams
{ documentWithId :: !(Indexed NodeId a)
, documentNgrams :: !(Map Ngrams (Map NgramsType Int))
} deriving (Show)
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Mail.hs 0000664 0000000 0000000 00000010566 14124644201 0027362 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Mail
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO put main configuration variables in gargantext.ini
-}
module Gargantext.Core.Mail
where
import Data.Text (Text, unlines, splitOn)
import Gargantext.Core.Types.Individu
import Gargantext.Prelude
import Gargantext.Prelude.Mail (gargMail, GargMail(..))
import qualified Data.List as List
-- | Tool to put elsewhere
isEmail :: Text -> Bool
isEmail = ((==) 2) . List.length . (splitOn "@")
------------------------------------------------------------------------
data SendEmail = SendEmail Bool
type EmailAddress = Text
type Name = Text
type ServerAddress = Text
data MailModel = Invitation { invitation_user :: NewUser GargPassword }
| PassUpdate { passUpdate_user :: NewUser GargPassword }
| MailInfo { mailInfo_username :: Name
, mailInfo_address :: EmailAddress
}
------------------------------------------------------------------------
------------------------------------------------------------------------
mail :: ServerAddress -> MailModel -> IO ()
mail server model = gargMail (GargMail m (Just u) subject body)
where
(m,u) = email_to model
subject = email_subject model
body = emailWith server model
------------------------------------------------------------------------
emailWith :: ServerAddress -> MailModel -> Text
emailWith server model =
unlines $ [ "Hello" ]
<> bodyWith server model
<> email_disclaimer
<> email_signature
------------------------------------------------------------------------
email_to :: MailModel -> (EmailAddress, Name)
email_to (Invitation user) = email_to' user
email_to (PassUpdate user) = email_to' user
email_to (MailInfo u m) = (m, u)
email_to' :: NewUser GargPassword -> (EmailAddress, Name)
email_to' (NewUser u m _) = (m,u)
------------------------------------------------------------------------
bodyWith :: ServerAddress -> MailModel -> [Text]
bodyWith server (Invitation u) = [ "Congratulation, you have been granted a beta user account to test the"
, "new GarganText platform!"
] <> (email_credentials server u)
bodyWith server (PassUpdate u) = [ "Your account password have been updated on the GarganText platform!"
] <> (email_credentials server u)
bodyWith server (MailInfo _ _) = [ "Your last analysis is over on the server: " <> server]
------------------------------------------------------------------------
email_subject :: MailModel -> Text
email_subject (Invitation _) = "[GarganText] Invitation"
email_subject (PassUpdate _) = "[GarganText] Update"
email_subject (MailInfo _ _) = "[GarganText] Info"
email_credentials :: ServerAddress -> NewUser GargPassword -> [Text]
email_credentials server (NewUser u _ (GargPassword p)) =
[ ""
, "You can log in to: " <> server
, "Your username is: " <> u
, "Your password is: " <> p
, ""
]
email_disclaimer :: [Text]
email_disclaimer =
[ ""
, "If you log in you agree with the following terms of use:"
, " https://gitlab.iscpif.fr/humanities/tofu/tree/master"
, ""
, ""
, "/!\\ Please note that your account is opened for beta tester only. Hence"
, "we cannot guarantee neither the perenniality nor the stability of the"
, "service at this stage. It is therefore advisable to back up important"
, "data regularly."
, ""
, "/!\\ Gargantext is an academic service supported by ISC-PIF partners."
, "In case of congestion on this service, access to members of the ISC-PIF"
, "partners will be privileged."
, ""
, "Your feedback will be valuable for further development of the platform,"
, "do not hesitate to contact us and to contribute on our forum:"
, ""
, " https://discourse.iscpif.fr/c/gargantext"
, ""
]
email_signature :: [Text]
email_signature =
[ "With our best regards,"
, "-- "
, "The Gargantext Team (CNRS)"
]
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/ 0000775 0000000 0000000 00000000000 14124644201 0027537 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Distances.hs0000664 0000000 0000000 00000003214 14124644201 0032010 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Graph.Distances
Description : Distance management tools
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Strict #-}
module Gargantext.Core.Methods.Distances
where
import Data.Aeson
import Data.Array.Accelerate (Matrix)
import Data.Swagger
import GHC.Generics (Generic)
import Gargantext.Core.Methods.Distances.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Methods.Distances.Accelerate.Distributional (logDistributional)
import Gargantext.Prelude (Ord, Eq, Int, Double)
import Gargantext.Prelude (Show)
import Prelude (Enum, Bounded, minBound, maxBound)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
------------------------------------------------------------------------
data Distance = Conditional | Distributional
deriving (Show)
measure :: Distance -> Matrix Int -> Matrix Double
measure Conditional = measureConditional
measure Distributional = logDistributional
------------------------------------------------------------------------
withMetric :: GraphMetric -> Distance
withMetric Order1 = Conditional
withMetric Order2 = Distributional
------------------------------------------------------------------------
data GraphMetric = Order1 | Order2
deriving (Generic, Eq, Ord, Enum, Bounded, Show)
instance FromJSON GraphMetric
instance ToJSON GraphMetric
instance ToSchema GraphMetric
instance Arbitrary GraphMetric where
arbitrary = elements [ minBound .. maxBound ]
------------------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Distances/ 0000775 0000000 0000000 00000000000 14124644201 0031454 5 ustar 00root root 0000000 0000000 Accelerate/ 0000775 0000000 0000000 00000000000 14124644201 0033425 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Distances Conditional.hs 0000664 0000000 0000000 00000005525 14124644201 0036233 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Distances/Accelerate {-|
Module : Gargantext.Core.Methods.Distances.Accelerate.Conditional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This module aims at implementig distances of terms context by context is
the same referential of corpus.
Implementation use Accelerate library which enables GPU and CPU computation
See Gargantext.Core.Methods.Graph.Accelerate)
-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Methods.Distances.Accelerate.Conditional
where
-- import qualified Data.Foldable as P (foldl1)
-- import Debug.Trace (trace)
import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run)
import Gargantext.Core.Methods.Matrix.Accelerate.Utils
import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
import qualified Gargantext.Prelude as P
-- * Metrics of proximity
-----------------------------------------------------------------------
-- ** Conditional distance
-- *** Conditional distance (basic)
-- | Conditional distance (basic version)
--
-- 2 main metrics are actually implemented in order to compute the
-- proximity of two terms: conditional and distributional
--
-- Conditional metric is an absolute metric which reflects
-- interactions of 2 terms in the corpus.
measureConditional :: Matrix Int -> Matrix Double
measureConditional m = run $ zipWith (/) m' (matSumCol d m')
where
m' = map fromIntegral (use m)
d = dim m
-- *** Conditional distance (advanced)
-- | Conditional distance (advanced version)
--
-- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called
-- "confidence" , is the maximum probability between @i@ and @j@ to see
-- @i@ in the same context of @j@ knowing @j@.
--
-- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
-- in the corpus and _[n_{ij}\] the number of its occurrences we get:
--
-- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
conditional' :: Matrix Int -> (Matrix GenericityInclusion, Matrix SpecificityExclusion)
conditional' m = ( run $ ie $ map fromIntegral $ use m
, run $ sg $ map fromIntegral $ use m
)
where
ie :: Acc (Matrix Double) -> Acc (Matrix Double)
ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
sg :: Acc (Matrix Double) -> Acc (Matrix Double)
sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
n :: Exp Double
n = P.fromIntegral r
r :: Dim
r = dim m
xs :: Acc (Matrix Double) -> Acc (Matrix Double)
xs mat = zipWith (-) (matSumCol r $ matProba r mat) (matProba r mat)
ys :: Acc (Matrix Double) -> Acc (Matrix Double)
ys mat = zipWith (-) (matSumCol r $ transpose $ matProba r mat) (matProba r mat)
Distributional.hs 0000664 0000000 0000000 00000020040 14124644201 0036751 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Distances/Accelerate {-|
Module : Gargantext.Core.Methods.Distances.Accelerate.Distributional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
* Distributional Distance metric
__Definition :__ Distributional metric is a relative metric which depends on the
selected list, it represents structural equivalence of mutual information.
__Objective :__ We want to compute with matrices processing the similarity between term $i$ and term $j$ :
distr(i,j)=$\frac{\Sigma_{k \neq i,j} min(\frac{n_{ik}^2}{n_{ii}n_{kk}},\frac{n_{jk}^2}{n_{jj}n_{kk}})}{\Sigma_{k \neq i}\frac{n_{ik}^2}{ n_{ii}n_{kk}}}$
where $n_{ij}$ is the cooccurrence between term $i$ and term $j$
* For a vector V=[$x_1$ ... $x_n$], we note $|V|_1=\Sigma_ix_i$
* operator : .* and ./ cell by cell multiplication and division of the matrix
* operator * is the matrix multiplication
* Matrice M=[$n_{ij}$]$_{i,j}$
* opérateur : Diag(M)=[$n_{ii}$]$_i$ (vecteur)
* Id= identity matrix
* O=[1]$_{i,j}$ (matrice one)
* D(M)=Id .* M
* O * D(M) =[$n_{jj}$]$_{i,j}$
* D(M) * O =[$n_{ii}$]$_{i,j}$
* $V_i=[0~0~0~1~0~0~0]'$ en i
* MI=(M ./ O * D(M)) .* (M / D(M) * O )
* distr(i,j)=$\frac{|min(V'_i * (MI-D(MI)),V'_j * (MI-D(MI)))|_1}{|V'_i.(MI-D(MI))|_1}$
[Specifications written by David Chavalarias on Garg v4 shared NodeWrite, team Pyremiel 2020]
-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Methods.Distances.Accelerate.Distributional
where
-- import qualified Data.Foldable as P (foldl1)
-- import Debug.Trace (trace)
import Data.Array.Accelerate as A
import Data.Array.Accelerate.Interpreter (run)
import Gargantext.Core.Methods.Matrix.Accelerate.Utils
import qualified Gargantext.Prelude as P
-- | `distributional m` returns the distributional distance between terms each
-- pair of terms as a matrix. The argument m is the matrix $[n_{ij}]_{i,j}$
-- where $n_{ij}$ is the coocccurrence between term $i$ and term $j$.
--
-- ## Basic example with Matrix of size 3:
--
-- >>> theMatrixInt 3
-- Matrix (Z :. 3 :. 3)
-- [ 7, 4, 0,
-- 4, 5, 3,
-- 0, 3, 4]
--
-- >>> distributional $ theMatrixInt 3
-- Matrix (Z :. 3 :. 3)
-- [ 1.0, 0.0, 0.9843749999999999,
-- 0.0, 1.0, 0.0,
-- 1.0, 0.0, 1.0]
--
-- ## Basic example with Matrix of size 4:
--
-- >>> theMatrixInt 4
-- Matrix (Z :. 4 :. 4)
-- [ 4, 1, 2, 1,
-- 1, 4, 0, 0,
-- 2, 0, 3, 3,
-- 1, 0, 3, 3]
--
-- >>> distributional $ theMatrixInt 4
-- Matrix (Z :. 4 :. 4)
-- [ 1.0, 0.0, 0.5714285714285715, 0.8421052631578947,
-- 0.0, 1.0, 1.0, 1.0,
-- 8.333333333333333e-2, 4.6875e-2, 1.0, 0.25,
-- 0.3333333333333333, 5.7692307692307696e-2, 1.0, 1.0]
--
distributional :: Matrix Int -> Matrix Double
distributional m' = run result
where
m = map fromIntegral $ use m'
n = dim m'
diag_m = diag m
d_1 = replicate (constant (Z :. n :. All)) diag_m
d_2 = replicate (constant (Z :. All :. n)) diag_m
mi = (.*) ((./) m d_1) ((./) m d_2)
-- w = (.-) mi d_mi
-- The matrix permutations is taken care of below by directly replicating
-- the matrix mi, making the matrix w unneccessary and saving one step.
w_1 = replicate (constant (Z :. All :. n :. All)) mi
w_2 = replicate (constant (Z :. n :. All :. All)) mi
w' = zipWith min w_1 w_2
-- The matrix ii = [r_{i,j,k}]_{i,j,k} has r_(i,j,k) = 0 if k = i OR k = j
-- and r_(i,j,k) = 1 otherwise (i.e. k /= i AND k /= j).
ii = generate (constant (Z :. n :. n :. n))
(lift1 (\(Z :. i :. j :. k) -> cond ((&&) ((/=) k i) ((/=) k j)) 1 0))
z_1 = sum ((.*) w' ii)
z_2 = sum ((.*) w_1 ii)
result = termDivNan z_1 z_2
logDistributional :: Matrix Int -> Matrix Double
logDistributional m = run
$ diagNull n
$ matMiniMax
$ logDistributional' n m
where
n = dim m
logDistributional' :: Int -> Matrix Int -> Acc (Matrix Double)
logDistributional' n m' = result
where
m = map fromIntegral $ use m'
-- Scalar. Sum of all elements of m.
to = the $ sum (flatten m)
-- Diagonal matrix with the diagonal of m.
d_m = (.*) m (matrixIdentity n)
-- Size n vector. s = [s_i]_i
s = sum ((.-) m d_m)
-- Matrix nxn. Vector s replicated as rows.
s_1 = replicate (constant (Z :. All :. n)) s
-- Matrix nxn. Vector s replicated as columns.
s_2 = replicate (constant (Z :. n :. All)) s
-- Matrix nxn. ss = [s_i * s_j]_{i,j}. Outer product of s with itself.
ss = (.*) s_1 s_2
-- Matrix nxn. mi = [m_{i,j}]_{i,j} where
-- m_{i,j} = 0 if n_{i,j} = 0 or i = j,
-- m_{i,j} = log(to * n_{i,j} / s_{i,j}) otherwise.
mi = (.*) (matrixEye n)
(map (lift1 (\x -> cond (x == 0) 0 (log (x * to)))) ((./) m ss))
-- Tensor nxnxn. Matrix mi replicated along the 2nd axis.
w_1 = replicate (constant (Z :. All :. n :. All)) mi
-- Tensor nxnxn. Matrix mi replicated along the 1st axis.
w_2 = replicate (constant (Z :. n :. All :. All)) mi
-- Tensor nxnxn.
w' = zipWith min w_1 w_2
-- A predicate that is true when the input (i, j, k) satisfy
-- k /= i AND k /= j
k_diff_i_and_j = lift1 (\(Z :. i :. j :. k) -> ((&&) ((/=) k i) ((/=) k j)))
-- Matrix nxn.
sumMin = sum (condOrDefault k_diff_i_and_j 0 w')
-- Matrix nxn. All columns are the same.
sumM = sum (condOrDefault k_diff_i_and_j 0 w_1)
result = termDivNan sumMin sumM
-- The distributional metric P(c) of @i@ and @j@ terms is: \[
-- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
--
-- Mutual information
-- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
--
-- Number of cooccurrences of @i@ and @j@ in the same context of text
-- \[C{ij}\]
--
-- The expected value of the cooccurrences @i@ and @j@ (given a map list of size @n@)
-- \[E_{ij}^{m} = \frac {S_{i} S_{j}} {N_{m}}\]
--
-- Total cooccurrences of term @i@ given a map list of size @m@
-- \[S_{i} = \sum_{j, j \neq i}^{m} S_{ij}\]
--
-- Total cooccurrences of terms given a map list of size @m@
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
--
distributional'' :: Matrix Int -> Matrix Double
distributional'' m = -- run {- $ matMiniMax -}
run $ diagNull n
$ rIJ n
$ filterWith 0 100
$ filter' 0
$ s_mi
$ map fromIntegral
{- from Int to Double -}
$ use m
{- push matrix in Accelerate type -}
where
_ri :: Acc (Matrix Double) -> Acc (Matrix Double)
_ri mat = mat1 -- zipWith (/) mat1 mat2
where
mat1 = matSumCol n $ zipWith min (_myMin mat) (_myMin $ filterWith 0 100 $ diagNull n $ transpose mat)
_mat2 = total mat
_myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
_myMin = replicate (constant (Z :. n :. All)) . minimum
-- TODO fix NaN
-- Quali TEST: OK
s_mi :: Acc (Matrix Double) -> Acc (Matrix Double)
s_mi m' = zipWith (\x y -> log (x / y)) (diagNull n m')
$ zipWith (/) (crossProduct n m') (total m')
-- crossProduct n m'
total :: Acc (Matrix Double) -> Acc (Matrix Double)
total = replicate (constant (Z :. n :. n)) . sum . sum
n :: Dim
n = dim m
rIJ :: (Elt a, Ord a, P.Fractional (Exp a), P.Num a)
=> Dim -> Acc (Matrix a) -> Acc (Matrix a)
rIJ n m = matMiniMax $ divide a b
where
a = sumRowMin n m
b = sumColMin n m
-- * For Tests (to be removed)
-- | Test perfermance with this matrix
-- TODO : add this in a benchmark folder
distriTest :: Int -> Matrix Double
distriTest n = logDistributional (theMatrixInt n)
SpeGen.hs 0000664 0000000 0000000 00000010724 14124644201 0035146 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Distances/Accelerate {-|
Module : Gargantext.Core.Methods.Distances.Accelerate.SpeGen
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This module aims at implementig distances of terms context by context is
the same referential of corpus.
Implementation use Accelerate library which enables GPU and CPU computation
See Gargantext.Core.Methods.Graph.Accelerate)
-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Methods.Distances.Accelerate.SpeGen
where
-- import qualified Data.Foldable as P (foldl1)
-- import Debug.Trace (trace)
import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run)
import Gargantext.Core.Methods.Matrix.Accelerate.Utils
import qualified Gargantext.Prelude as P
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-- * Specificity and Genericity
{- | Metric Specificity and genericity: select terms
- let N termes and occurrences of i \[N{i}\]
- Cooccurrences of i and j \[N{ij}\]
- Probability to get i given j : \[P(i|j)=N{ij}/N{j}\]
- Genericity of i \[Gen(i) = \frac{\sum_{j \neq i,j} P(i|j)}{N-1}\]
- Specificity of j \[Spec(i) = \frac{\sum_{j \neq i,j} P(j|i)}{N-1}\]
- \[Inclusion (i) = Gen(i) + Spec(i)\)
- \[GenericityScore = Gen(i)- Spec(i)\]
- References: Science mapping with asymmetrical paradigmatic proximity
Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted
on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276,
arXiv:0803.2315 [cs.OH]
-}
type GenericityInclusion = Double
type SpecificityExclusion = Double
data SquareMatrix = SymetricMatrix | NonSymetricMatrix
type SymetricMatrix = Matrix
type NonSymetricMatrix = Matrix
incExcSpeGen :: Matrix Int
-> ( Vector GenericityInclusion
, Vector SpecificityExclusion
)
incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
where
run' fun mat = run $ fun $ map fromIntegral $ use mat
-- | Inclusion (i) = Gen(i)+Spec(i)
inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
inclusionExclusion mat = zipWith (+) (pV mat) (pV mat)
-- | Genericity score = Gen(i)- Spec(i)
specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
specificityGenericity mat = zipWith (+) (pH mat) (pH mat)
-- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
pV :: Acc (Matrix Double) -> Acc (Vector Double)
pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
-- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
pH :: Acc (Matrix Double) -> Acc (Vector Double)
pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
cardN :: Exp Double
cardN = constant (P.fromIntegral (dim m) :: Double)
-- | P(i|j) = Nij /N(jj) Probability to get i given j
--p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Matrix e) -> Acc (Matrix e)
p_ij m = zipWith (/) m (n_jj m)
where
n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
n_jj myMat' = backpermute (shape m)
(lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
-> (Z :. j :. j)
)
) myMat'
-- | P(j|i) = Nij /N(ii) Probability to get i given j
-- to test
p_ji :: (Elt e, P.Fractional (Exp e))
=> Acc (Array DIM2 e)
-> Acc (Array DIM2 e)
p_ji = transpose . p_ij
-- | Step to ckeck the result in visual/qualitative tests
incExcSpeGen_proba :: Matrix Int -> Matrix Double
incExcSpeGen_proba m = run' pro m
where
run' fun mat = run $ fun $ map fromIntegral $ use mat
pro mat = p_ji mat
{-
-- | Hypothesis to test maybe later (or not)
-- TODO ask accelerate for instances to ease such writtings:
p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
p_ m = zipWith (/) m (n_ m)
where
n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
n_ m = backpermute (shape m)
(lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
-> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
)
) m
-}
Conditional.hs 0000664 0000000 0000000 00000010101 14124644201 0034165 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Distances {-|
Module : Gargantext.Core.Methods.Distances
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Motivation and definition of the @Conditional@ distance.
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE Strict #-}
module Gargantext.Core.Methods.Distances.Conditional
where
import Data.List (sortOn)
import Data.Map (Map)
import Data.Matrix hiding (identity)
import Gargantext.Core.Viz.Graph.Utils
import Gargantext.Prelude
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Vector as V
------------------------------------------------------------------------
-- | Optimisation issue
toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
toBeOptimized m = proba Col m
------------------------------------------------------------------------
-- | Main Functions
-- Compute the probability from axis
-- x' = x / (sum Col x)
proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
where
f' m' c = mapOnly a f c m'
mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
mapOnly Col = mapCol
mapOnly Row = mapRow
mapAll :: (a -> a) -> Matrix a -> Matrix a
mapAll f m = mapOn Col (\_ -> f) m
---------------------------------------------------------------
-- | Compute a distance from axis
-- xs = (sum Col x') - x'
distFromSum :: (Num a, Fractional a)
=> Axis -> Matrix a -> Matrix a
distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
---------------------------------------------------------------
---------------------------------------------------------------
-- | To compute included/excluded or specific/generic scores
opWith :: (Fractional a1, Num a1)
=> (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
where
n = fromIntegral $ nOf Col xs
---------------------------------------------------------------
-------------------------------------------------------
conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
conditional m = filterMat (threshold m') m'
where
------------------------------------------------------------------------
-- | Main Operations
-- x' = x / (sum Col x)
x' = proba Col m
------------------------------------------------------------------------
-- xs = (sum Col x') - x'
xs = distFromSum Col x'
-- ys = (sum Row x') - x'
ys = distFromSum Row x'
------------------------------------------------------------------------
-- | Top included or excluded
ie = opWith (+) xs ys
-- ie = ( xs + ys) / (2 * (x.shape[0] - 1))
-- | Top specific or generic
sg = opWith (-) xs ys
-- sg = ( xs - ys) / (2 * (x.shape[0] - 1))
nodes_kept :: [Int]
nodes_kept = take k' $ S.toList
$ foldl' (\s (n1,n2) -> insert [n1,n2] s) S.empty
$ map fst
$ nodes_included k <> nodes_specific k
nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
nodes_specific n = take n $ sortOn snd $ toListsWithIndex sg
insert as s = foldl' (\s' a -> S.insert a s') s as
k' = 2*k
k = 10
dico_nodes :: Map Int Int
dico_nodes = M.fromList $ zip ([1..] :: [Int]) nodes_kept
--dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
m' = matrix (length nodes_kept)
(length nodes_kept)
(\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')
threshold m'' = V.minimum
$ V.map (\cId -> V.maximum $ getCol cId m'')
(V.enumFromTo 1 (nOf Col m'') )
filterMat t m'' = mapAll (\x -> filter' t x) m''
where
filter' t' x = case (x >= t') of
True -> x
False -> 0
------------------------------------------------------------------------
Distributional.hs 0000664 0000000 0000000 00000004260 14124644201 0034727 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Distances {-|
Module : Gargantext.Core.Methods.Distances.Distributional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Motivation and definition of the @Distributional@ distance.
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE Strict #-}
module Gargantext.Core.Methods.Distances.Distributional
where
import Data.Matrix hiding (identity)
import qualified Data.Map as M
import Data.Vector (Vector)
import qualified Data.Vector as V
import Gargantext.Prelude
import Gargantext.Core.Viz.Graph.Utils
distributional' :: (Floating a, Ord a) => Matrix a -> [((Int, Int), a)]
distributional' m = filter (\((x,y), d) -> foldl' (&&) True (conditions x y d) ) distriList
where
conditions x y d = [ (x /= y)
, (d > miniMax')
, ((M.lookup (x,y) distriMap) > (M.lookup (y,x) distriMap))
]
distriList = toListsWithIndex distriMatrix
distriMatrix = ri (mi m)
distriMap = M.fromList $ distriList
miniMax' = miniMax distriMatrix
ri :: (Ord a, Fractional a) => Matrix a -> Matrix a
ri m = matrix c r doRi
where
doRi (x,y) = doRi' x y m
doRi' x y mi'' = sumMin x y mi'' / (V.sum $ ax Col x y mi'')
sumMin x y mi' = V.sum $ V.map (\(a,b) -> min a b )
$ V.zip (ax Col x y mi') (ax Row x y mi')
(c,r) = (nOf Col m, nOf Row m)
mi :: (Ord a, Floating a) => Matrix a -> Matrix a
mi m = matrix c r createMat
where
(c,r) = (nOf Col m, nOf Row m)
createMat (x,y) = doMi x y m
doMi x y m' = if x == y then 0 else (max (log (doMi' x y m')) 0 )
doMi' x y m' = (getElem x y m) / ( cross x y m / total m' )
cross x y m' = (V.sum $ ax Col x y m) * (V.sum $ ax Row x y m')
ax :: Axis -> Int -> Int -> Matrix a -> Vector a
ax a i j m = dropAt j' $ axis a i' m
where
i' = div i c + 1
j' = mod r j + 1
(c,r) = (nOf Col m, nOf Row m)
miniMax :: (Ord a) => Matrix a -> a
miniMax m = V.minimum $ V.map (\c -> V.maximum $ getCol c m) (V.enumFromTo 1 (nOf Col m))
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Graph/ 0000775 0000000 0000000 00000000000 14124644201 0030600 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Graph/BAC/ 0000775 0000000 0000000 00000000000 14124644201 0031165 5 ustar 00root root 0000000 0000000 Proxemy.hs 0000664 0000000 0000000 00000103432 14124644201 0033110 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Graph/BAC {-| Module : Gargantext.Core.Viz.Graph.Proxemy
Description : Proxemy
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Références:
- Bruno Gaume, Karine Duvignau, Emmanuel Navarro, Yann Desalle, Hintat Cheung, et al.. Skillex: a graph-based lexical score for measuring the semantic efficiency of used verbs by human subjects describing actions. Revue TAL, Association pour le Traitement Automatique des Langues, 2016, Revue TAL : numéro spécial sur Traitement Automatique des Langues et Sciences Cognitives (55-3), 55 (3), ⟨https://www.atala.org/-Cognitive-Issues-in-Natural-⟩. ⟨hal-01320416⟩
- Implémentation Python [Lien]()
-}
module Gargantext.Core.Methods.Graph.BAC.Proxemy
where
--import Debug.SimpleReflect
import Gargantext.Prelude
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.List as List
--import Gargantext.Core.Viz.Graph.IGraph
import Gargantext.Core.Viz.Graph.FGL
type Length = Int
type FalseReflexive = Bool
type NeighborsFilter = Graph_Undirected -> Node -> [Node]
type RmEdge = Bool
confluence :: [(Node,Node)] -> Length -> FalseReflexive -> RmEdge -> Map (Node,Node) Double
confluence ns = similarity_conf (mkGraphUfromEdges ns)
similarity_conf :: Graph_Undirected -> Length -> FalseReflexive -> RmEdge -> Map (Node,Node) Double
similarity_conf g l fr rm = Map.fromList [ ((x,y), similarity_conf_x_y g (x,y) l fr rm)
| x <- nodes g, y <- nodes g, x < y]
similarity_conf_x_y :: Graph_Undirected -> (Node,Node) -> Length -> FalseReflexive -> RmEdge -> Double
similarity_conf_x_y g (x,y) l r rm_e = similarity
where
similarity :: Double
similarity | denominator == 0 = 0
| otherwise = prox_x_y / denominator
where
denominator = prox_x_y + lim_SC
prox_x_y :: Double
prox_x_y = maybe 0 identity $ Map.lookup y xline
xline :: Map Node Double
xline = prox_markov g [x] l r filterNeighbors'
where
filterNeighbors' | rm_e == True = filterNeighbors
| otherwise = rm_edge_neighbors (x,y)
pair_is_edge :: Bool
pair_is_edge | rm_e == True = False
| otherwise = List.elem y (filterNeighbors g x)
lim_SC :: Double
lim_SC
| denominator == 0 = 0
| otherwise = if pair_is_edge
then (degree g y + 1-1) / denominator
else (degree g y + 1 ) / denominator
where
denominator = if pair_is_edge
then (2 * (ecount g) + (vcount g) - 2)
else (2 * (ecount g) + (vcount g) )
rm_edge_neighbors :: (Node, Node) -> Graph_Undirected -> Node -> [Node]
rm_edge_neighbors (x,y) g n | (n == x && List.elem y all_neighbors) = List.filter (/= y) all_neighbors
| (n == y && List.elem x all_neighbors) = List.filter (/= x) all_neighbors
| otherwise = all_neighbors
where
all_neighbors = filterNeighbors g n
-- | TODO do as a Map instead of [Node] ?
prox_markov :: Graph_Undirected -> [Node] -> Length -> FalseReflexive -> NeighborsFilter -> Map Node Double
prox_markov g ns l r nf = foldl' (\m _ -> spreading g m r nf) ms path
where
path
| l == 0 = []
| l > 0 = [0..l-1]
| otherwise = panic "Gargantext.Core.Viz.Graph.Proxemy.prox_markov: Length < 0"
-- TODO if ns empty
ms = case List.length ns > 0 of
True -> Map.fromList $ map (\n -> (n, 1 / (fromIntegral $ List.length ns))) ns
_ -> Map.empty
spreading :: Graph_Undirected
-> Map Node Double
-> FalseReflexive
-> NeighborsFilter
-> Map Node Double
spreading g ms r nf = Map.fromListWith (+) $ List.concat $ map pvalue (Map.keys ms)
where
-- TODO if list empty ...
-- pvalue' n = [pvalue n] <> map pvalue (neighborhood n)
pvalue n = [(n, pvalue' n)] <> map (\n''->(n'', pvalue' n)) (nf g n)
where
pvalue' n' = (value n') / (fromIntegral $ List.length neighborhood)
value n' = maybe 0 identity $ Map.lookup n' ms
neighborhood = (nf g n) <> (if r then [n] else [])
------------------------------------------------------------------------
-- | Behavior tests
graphTest :: Graph_Undirected
graphTest= mkGraphUfromEdges graphTest_data
graphTest_data :: [(Int,Int)]
graphTest_data = [(0,1),(0,2),(0,4),(0,5),(1,3),(1,8),(2,3),(2,4),(2,5),(2,6),(2,16),(3,4),(3,5),(3,6),(3,18),(4,6),(5,8),(7,8),(7,9),(7,10),(7,13),(8,9),(8,10),(8,11),(8,12),(8,13),(9,12),(9,13),(10,11),(10,17),(11,12),(13,20),(14,16),(14,17),(14,18),(14,20),(15,16),(15,17),(15,18),(15,20),(16,18),(16,20),(17,18),(17,20),(18,19),(18,20),(19,20)]
graphTest_data' :: [(Int,Int)]
graphTest_data' = [(0,1),(0,2),(0,4),(0,5),(1,0),(1,3),(1,8),(2,0),(2,3),(2,4),(2,5),(2,6),(2,16),(3,1),(3,2),(3,4),(3,5),(3,6),(3,18),(4,0),(4,2),(4,3),(4,6),(5,0),(5,2),(5,3),(5,8),(6,2),(6,3),(6,4),(7,8),(7,9),(7,10),(7,13),(8,1),(8,5),(8,7),(8,9),(8,10),(8,11),(8,12),(8,13),(9,7),(9,8),(9,12),(9,13),(10,7),(10,8),(10,11),(10,17),(11,8),(11,10),(11,12),(12,8),(12,9),(12,11),(13,7),(13,8),(13,9),(13,20),(14,16),(14,17),(14,18),(14,20),(15,16),(15,17),(15,18),(15,20),(16,2),(16,14),(16,15),(16,18),(16,20),(17,10),(17,14),(17,15),(17,18),(17,20),(18,3),(18,14),(18,15),(18,16),(18,17),(18,19),(18,20),(19,18),(19,20),(20,13),(20,14),(20,15),(20,16),(20,17),(20,18),(20,19)]
-- | Tests
-- >>> runTest_Confluence_Proxemy
-- (True,True)
runTest_Confluence_Proxemy :: (Bool, Bool)
runTest_Confluence_Proxemy = (runTest_conf_is_ok, runTest_prox_is_ok)
where
runTest_conf_is_ok :: Bool
runTest_conf_is_ok = List.null $ List.filter (\t -> snd t == False)
[ (((x,y)), abs ((look (y,x) test) - (look (y,x) temoin)) < 0.0001)
| y <- nodes graphTest
, x <- nodes graphTest
]
where
test = toMap [(n, [ (y, similarity_conf_x_y graphTest (n,y) 3 True False) | y <- nodes graphTest])
| n <- nodes graphTest
]
temoin = test_confluence_temoin
runTest_prox_is_ok :: Bool
runTest_prox_is_ok = List.null (List.filter (not . List.null) $ map runTest_prox' [0..3])
runTest_prox' :: Node -> [((Node, (Node, Node)), Bool)]
runTest_prox' l = List.filter (\t -> snd t == False)
[ ((l,(x,y)), abs ((look (y,x) test) - (look (y,x) temoin)) < 0.0001)
| y <- nodes graphTest
, x <- nodes graphTest
]
where
test = toMap $ test_proxs_y l
temoin = toMap $ test_prox l
test_proxs_y :: Length -> [(Node, [(Node, Double)])]
test_proxs_y l' = map (\n -> test_proxs_x l' n) (nodes graphTest)
test_proxs_x :: Length -> Node -> (Node, [(Node, Double)])
test_proxs_x l' a = (a, map (\x -> (x, maybe 0 identity $ Map.lookup x (m a))) (nodes graphTest))
where
m x' = prox_markov graphTest [x'] l' True filterNeighbors
toMap = Map.map Map.fromList . Map.fromList
look :: (Node,Node) -> Map Node (Map Node Double) -> Double
look (x,y) m = look' x $ look' y m
where
look' x' m' = maybe (panic "nokey") identity $ Map.lookup x' m'
--prox : longueur balade = 0
test_prox :: Node -> [(Node, [(Node, Double)])]
test_prox 0 = [ (0,[(0,1.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (1,[(0,0.0000),(1,1.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (2,[(0,0.0000),(1,0.0000),(2,1.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (3,[(0,0.0000),(1,0.0000),(2,0.0000),(3,1.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (4,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,1.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (5,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,1.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (6,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,1.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (7,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,1.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (8,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,1.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (9,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,1.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (10,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,1.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (11,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,1.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (12,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,1.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (13,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,1.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (14,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,1.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (15,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,1.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (16,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,1.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (17,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,1.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (18,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,1.0000),(19,0.0000),(20,0.0000)])
, (19,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,1.0000),(20,0.0000)])
, (20,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,1.0000)])
]
--{-
--, longueur balade , 1]),
test_prox 1 = [(0,[(0,0.2000),(1,0.2000),(2,0.2000),(3,0.0000),(4,0.2000),(5,0.2000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (1,[(0,0.2500),(1,0.2500),(2,0.0000),(3,0.2500),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.2500),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (2,[(0,0.1429),(1,0.0000),(2,0.1429),(3,0.1429),(4,0.1429),(5,0.1429),(6,0.1429),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.1429),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (3,[(0,0.0000),(1,0.1429),(2,0.1429),(3,0.1429),(4,0.1429),(5,0.1429),(6,0.1429),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.1429),(19,0.0000),(20,0.0000)])
, (4,[(0,0.2000),(1,0.0000),(2,0.2000),(3,0.2000),(4,0.2000),(5,0.0000),(6,0.2000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (5,[(0,0.2000),(1,0.0000),(2,0.2000),(3,0.2000),(4,0.0000),(5,0.2000),(6,0.0000),(7,0.0000),(8,0.2000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (6,[(0,0.0000),(1,0.0000),(2,0.2500),(3,0.2500),(4,0.2500),(5,0.0000),(6,0.2500),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (7,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.2000),(8,0.2000),(9,0.2000),(10,0.2000),(11,0.0000),(12,0.0000),(13,0.2000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (8,[(0,0.0000),(1,0.1111),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.1111),(6,0.0000),(7,0.1111),(8,0.1111),(9,0.1111),(10,0.1111),(11,0.1111),(12,0.1111),(13,0.1111),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (9,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.2000),(8,0.2000),(9,0.2000),(10,0.0000),(11,0.0000),(12,0.2000),(13,0.2000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (10,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.2000),(8,0.2000),(9,0.0000),(10,0.2000),(11,0.2000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.2000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (11,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.2500),(9,0.0000),(10,0.2500),(11,0.2500),(12,0.2500),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (12,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.2500),(9,0.2500),(10,0.0000),(11,0.2500),(12,0.2500),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (13,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.2000),(8,0.2000),(9,0.2000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.2000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.2000)])
, (14,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.2000),(15,0.0000),(16,0.2000),(17,0.2000),(18,0.2000),(19,0.0000),(20,0.2000)])
, (15,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.2000),(16,0.2000),(17,0.2000),(18,0.2000),(19,0.0000),(20,0.2000)])
, (16,[(0,0.0000),(1,0.0000),(2,0.1667),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.1667),(15,0.1667),(16,0.1667),(17,0.0000),(18,0.1667),(19,0.0000),(20,0.1667)])
, (17,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.1667),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.1667),(15,0.1667),(16,0.0000),(17,0.1667),(18,0.1667),(19,0.0000),(20,0.1667)])
, (18,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.1250),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.1250),(15,0.1250),(16,0.1250),(17,0.1250),(18,0.1250),(19,0.1250),(20,0.1250)])
, (19,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.3333),(19,0.3333),(20,0.3333)])
, (20,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.1250),(14,0.1250),(15,0.1250),(16,0.1250),(17,0.1250),(18,0.1250),(19,0.1250),(20,0.1250)])
]
-- | longueur balade 2
test_prox 2 = [ (0,[(0,0.1986),(1,0.0900),(2,0.1486),(3,0.1586),(4,0.1086),(5,0.1086),(6,0.0686),(7,0.0000),(8,0.0900),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0286),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (1,[(0,0.1125),(1,0.1760),(2,0.0857),(3,0.0982),(4,0.0857),(5,0.1135),(6,0.0357),(7,0.0278),(8,0.0903),(9,0.0278),(10,0.0278),(11,0.0278),(12,0.0278),(13,0.0278),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0357),(19,0.0000),(20,0.0000)])
, (2,[(0,0.1061),(1,0.0490),(2,0.1861),(3,0.1337),(4,0.1337),(5,0.0980),(6,0.1051),(7,0.0000),(8,0.0286),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0238),(15,0.0238),(16,0.0442),(17,0.0000),(18,0.0442),(19,0.0000),(20,0.0238)])
, (3,[(0,0.1133),(1,0.0561),(2,0.1337),(3,0.1872),(4,0.1051),(5,0.0694),(6,0.1051),(7,0.0000),(8,0.0643),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0179),(15,0.0179),(16,0.0383),(17,0.0179),(18,0.0383),(19,0.0179),(20,0.0179)])
, (4,[(0,0.1086),(1,0.0686),(2,0.1871),(3,0.1471),(4,0.1871),(5,0.0971),(6,0.1471),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0286),(17,0.0000),(18,0.0286),(19,0.0000),(20,0.0000)])
, (5,[(0,0.1086),(1,0.0908),(2,0.1371),(3,0.0971),(4,0.0971),(5,0.1594),(6,0.0571),(7,0.0222),(8,0.0622),(9,0.0222),(10,0.0222),(11,0.0222),(12,0.0222),(13,0.0222),(14,0.0000),(15,0.0000),(16,0.0286),(17,0.0000),(18,0.0286),(19,0.0000),(20,0.0000)])
, (6,[(0,0.0857),(1,0.0357),(2,0.1839),(3,0.1839),(4,0.1839),(5,0.0714),(6,0.1839),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0357),(17,0.0000),(18,0.0357),(19,0.0000),(20,0.0000)])
, (7,[(0,0.0000),(1,0.0222),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0222),(6,0.0000),(7,0.1822),(8,0.1822),(9,0.1422),(10,0.1022),(11,0.0622),(12,0.0622),(13,0.1422),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0400),(18,0.0000),(19,0.0000),(20,0.0400)])
, (8,[(0,0.0500),(1,0.0401),(2,0.0222),(3,0.0500),(4,0.0000),(5,0.0346),(6,0.0000),(7,0.1012),(8,0.2068),(9,0.1068),(10,0.0846),(11,0.0901),(12,0.0901),(13,0.0790),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0222),(18,0.0000),(19,0.0000),(20,0.0222)])
, (9,[(0,0.0000),(1,0.0222),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0222),(6,0.0000),(7,0.1422),(8,0.1922),(9,0.1922),(10,0.0622),(11,0.0722),(12,0.1122),(13,0.1422),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0400)])
, (10,[(0,0.0000),(1,0.0222),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0222),(6,0.0000),(7,0.1022),(8,0.1522),(9,0.0622),(10,0.1856),(11,0.1122),(12,0.0722),(13,0.0622),(14,0.0333),(15,0.0333),(16,0.0000),(17,0.0733),(18,0.0333),(19,0.0000),(20,0.0333)])
, (11,[(0,0.0000),(1,0.0278),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0278),(6,0.0000),(7,0.0778),(8,0.2028),(9,0.0903),(10,0.1403),(11,0.2028),(12,0.1528),(13,0.0278),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0500),(18,0.0000),(19,0.0000),(20,0.0000)])
, (12,[(0,0.0000),(1,0.0278),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0278),(6,0.0000),(7,0.0778),(8,0.2028),(9,0.1403),(10,0.0903),(11,0.1528),(12,0.2028),(13,0.0778),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (13,[(0,0.0000),(1,0.0222),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0222),(6,0.0000),(7,0.1422),(8,0.1422),(9,0.1422),(10,0.0622),(11,0.0222),(12,0.0622),(13,0.1672),(14,0.0250),(15,0.0250),(16,0.0250),(17,0.0250),(18,0.0250),(19,0.0250),(20,0.0650)])
, (14,[(0,0.0000),(1,0.0000),(2,0.0333),(3,0.0250),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0333),(11,0.0000),(12,0.0000),(13,0.0250),(14,0.1567),(15,0.1167),(16,0.1233),(17,0.1233),(18,0.1567),(19,0.0500),(20,0.1567)])
, (15,[(0,0.0000),(1,0.0000),(2,0.0333),(3,0.0250),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0333),(11,0.0000),(12,0.0000),(13,0.0250),(14,0.1167),(15,0.1567),(16,0.1233),(17,0.1233),(18,0.1567),(19,0.0500),(20,0.1567)])
, (16,[(0,0.0238),(1,0.0000),(2,0.0516),(3,0.0446),(4,0.0238),(5,0.0238),(6,0.0238),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0208),(14,0.1028),(15,0.1028),(16,0.1599),(17,0.1083),(18,0.1361),(19,0.0417),(20,0.1361)])
, (17,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0208),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0333),(8,0.0333),(9,0.0000),(10,0.0611),(11,0.0333),(12,0.0000),(13,0.0208),(14,0.1028),(15,0.1028),(16,0.1083),(17,0.1694),(18,0.1361),(19,0.0417),(20,0.1361)])
, (18,[(0,0.0000),(1,0.0179),(2,0.0387),(3,0.0335),(4,0.0179),(5,0.0179),(6,0.0179),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0208),(11,0.0000),(12,0.0000),(13,0.0156),(14,0.0979),(15,0.0979),(16,0.1021),(17,0.1021),(18,0.1824),(19,0.0729),(20,0.1646)])
, (19,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0417),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0417),(14,0.0833),(15,0.0833),(16,0.0833),(17,0.0833),(18,0.1944),(19,0.1944),(20,0.1944)])
, (20,[(0,0.0000),(1,0.0000),(2,0.0208),(3,0.0156),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0250),(8,0.0250),(9,0.0250),(10,0.0208),(11,0.0000),(12,0.0000),(13,0.0406),(14,0.0979),(15,0.0979),(16,0.1021),(17,0.1021),(18,0.1646),(19,0.0729),(20,0.1896)])
]
-- | longueur balade 3
test_prox 3 = [ (0,[(0,0.1269),(1,0.0949),(2,0.1489),(3,0.1269),(4,0.1224),(5,0.1153),(6,0.0827),(7,0.0100),(8,0.0542),(9,0.0100),(10,0.0100),(11,0.0100),(12,0.0100),(13,0.0100),(14,0.0048),(15,0.0048),(16,0.0260),(17,0.0000),(18,0.0274),(19,0.0000),(20,0.0048)])
, (1,[(0,0.1186),(1,0.0906),(2,0.0975),(3,0.1235),(4,0.0748),(5,0.0815),(6,0.0523),(7,0.0323),(8,0.1128),(9,0.0336),(10,0.0281),(11,0.0295),(12,0.0295),(13,0.0267),(14,0.0045),(15,0.0045),(16,0.0167),(17,0.0100),(18,0.0185),(19,0.0045),(20,0.0100)])
, (2,[(0,0.1064),(1,0.0557),(2,0.1469),(3,0.1360),(4,0.1199),(5,0.0897),(6,0.0987),(7,0.0032),(8,0.0350),(9,0.0032),(10,0.0032),(11,0.0032),(12,0.0032),(13,0.0062),(14,0.0206),(15,0.0206),(16,0.0520),(17,0.0180),(18,0.0445),(19,0.0085),(20,0.0254)])
, (3,[(0,0.0907),(1,0.0706),(2,0.1360),(3,0.1258),(4,0.1158),(5,0.0895),(6,0.0931),(7,0.0071),(8,0.0351),(9,0.0071),(10,0.0101),(11,0.0071),(12,0.0071),(13,0.0094),(14,0.0199),(15,0.0199),(16,0.0396),(17,0.0171),(18,0.0562),(19,0.0130),(20,0.0295)])
, (4,[(0,0.1224),(1,0.0599),(2,0.1679),(3,0.1621),(4,0.1437),(5,0.0889),(6,0.1220),(7,0.0000),(8,0.0366),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0083),(15,0.0083),(16,0.0351),(17,0.0036),(18,0.0294),(19,0.0036),(20,0.0083)])
, (5,[(0,0.1153),(1,0.0652),(2,0.1255),(3,0.1253),(4,0.0889),(5,0.0940),(6,0.0672),(7,0.0247),(8,0.0904),(9,0.0258),(10,0.0214),(11,0.0225),(12,0.0225),(13,0.0202),(14,0.0083),(15,0.0083),(16,0.0279),(17,0.0080),(18,0.0222),(19,0.0036),(20,0.0128)])
, (6,[(0,0.1034),(1,0.0523),(2,0.1727),(3,0.1630),(4,0.1525),(5,0.0840),(6,0.1353),(7,0.0000),(8,0.0232),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0104),(15,0.0104),(16,0.0367),(17,0.0045),(18,0.0367),(19,0.0045),(20,0.0104)])
, (7,[(0,0.0100),(1,0.0258),(2,0.0044),(3,0.0100),(4,0.0000),(5,0.0247),(6,0.0000),(7,0.1340),(8,0.1751),(9,0.1291),(10,0.0994),(11,0.0718),(12,0.0798),(13,0.1186),(14,0.0117),(15,0.0117),(16,0.0050),(17,0.0321),(18,0.0117),(19,0.0050),(20,0.0401)])
, (8,[(0,0.0301),(1,0.0502),(2,0.0272),(3,0.0273),(4,0.0203),(5,0.0502),(6,0.0103),(7,0.0973),(8,0.1593),(9,0.1029),(10,0.0864),(11,0.0850),(12,0.0894),(13,0.0832),(14,0.0065),(15,0.0065),(16,0.0060),(17,0.0234),(18,0.0136),(19,0.0028),(20,0.0223)])
, (9,[(0,0.0100),(1,0.0269),(2,0.0044),(3,0.0100),(4,0.0000),(5,0.0258),(6,0.0000),(7,0.1291),(8,0.1852),(9,0.1447),(10,0.0803),(11,0.0799),(12,0.1059),(13,0.1217),(14,0.0050),(15,0.0050),(16,0.0050),(17,0.0174),(18,0.0050),(19,0.0050),(20,0.0334)])
, (10,[(0,0.0100),(1,0.0225),(2,0.0044),(3,0.0142),(4,0.0000),(5,0.0214),(6,0.0000),(7,0.0994),(8,0.1555),(9,0.0803),(10,0.1147),(11,0.1001),(12,0.0755),(13,0.0664),(14,0.0272),(15,0.0272),(16,0.0217),(17,0.0710),(18,0.0339),(19,0.0083),(20,0.0463)])
, (11,[(0,0.0125),(1,0.0295),(2,0.0056),(3,0.0125),(4,0.0000),(5,0.0281),(6,0.0000),(7,0.0898),(8,0.1911),(9,0.0999),(10,0.1252),(11,0.1395),(12,0.1295),(13,0.0617),(14,0.0083),(15,0.0083),(16,0.0000),(17,0.0364),(18,0.0083),(19,0.0000),(20,0.0139)])
, (12,[(0,0.0125),(1,0.0295),(2,0.0056),(3,0.0125),(4,0.0000),(5,0.0281),(6,0.0000),(7,0.0998),(8,0.2011),(9,0.1324),(10,0.0943),(11,0.1295),(12,0.1395),(13,0.0817),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0181),(18,0.0000),(19,0.0000),(20,0.0156)])
, (13,[(0,0.0100),(1,0.0214),(2,0.0086),(3,0.0131),(4,0.0000),(5,0.0202),(6,0.0000),(7,0.1186),(8,0.1497),(9,0.1217),(10,0.0664),(11,0.0494),(12,0.0654),(13,0.1143),(14,0.0246),(15,0.0246),(16,0.0254),(17,0.0379),(18,0.0379),(19,0.0196),(20,0.0714)])
, (14,[(0,0.0048),(1,0.0036),(2,0.0289),(3,0.0279),(4,0.0083),(5,0.0083),(6,0.0083),(7,0.0117),(8,0.0117),(9,0.0050),(10,0.0272),(11,0.0067),(12,0.0000),(13,0.0246),(14,0.1116),(15,0.1036),(16,0.1192),(17,0.1211),(18,0.1552),(19,0.0558),(20,0.1566)])
, (15,[(0,0.0048),(1,0.0036),(2,0.0289),(3,0.0279),(4,0.0083),(5,0.0083),(6,0.0083),(7,0.0117),(8,0.0117),(9,0.0050),(10,0.0272),(11,0.0067),(12,0.0000),(13,0.0246),(14,0.1036),(15,0.1116),(16,0.1192),(17,0.1211),(18,0.1552),(19,0.0558),(20,0.1566)])
, (16,[(0,0.0217),(1,0.0111),(2,0.0606),(3,0.0462),(4,0.0292),(5,0.0233),(6,0.0245),(7,0.0042),(8,0.0089),(9,0.0042),(10,0.0181),(11,0.0000),(12,0.0000),(13,0.0212),(14,0.0993),(15,0.0993),(16,0.1092),(17,0.0932),(18,0.1401),(19,0.0479),(20,0.1379)])
, (17,[(0,0.0000),(1,0.0067),(2,0.0210),(3,0.0200),(4,0.0030),(5,0.0067),(6,0.0030),(7,0.0268),(8,0.0351),(9,0.0145),(10,0.0592),(11,0.0243),(12,0.0120),(13,0.0316),(14,0.1009),(15,0.1009),(16,0.0932),(17,0.1156),(18,0.1383),(19,0.0479),(20,0.1395)])
, (18,[(0,0.0171),(1,0.0092),(2,0.0389),(3,0.0492),(4,0.0183),(5,0.0139),(6,0.0183),(7,0.0073),(8,0.0153),(9,0.0031),(10,0.0212),(11,0.0042),(12,0.0000),(13,0.0237),(14,0.0970),(15,0.0970),(16,0.1051),(17,0.1037),(18,0.1457),(19,0.0677),(20,0.1440)])
, (19,[(0,0.0000),(1,0.0060),(2,0.0198),(3,0.0303),(4,0.0060),(5,0.0060),(6,0.0060),(7,0.0083),(8,0.0083),(9,0.0083),(10,0.0139),(11,0.0000),(12,0.0000),(13,0.0326),(14,0.0931),(15,0.0931),(16,0.0958),(17,0.0958),(18,0.1805),(19,0.1134),(20,0.1829)])
, (20,[(0,0.0030),(1,0.0050),(2,0.0222),(3,0.0258),(4,0.0052),(5,0.0080),(6,0.0052),(7,0.0251),(8,0.0251),(9,0.0209),(10,0.0290),(11,0.0069),(12,0.0078),(13,0.0446),(14,0.0979),(15,0.0979),(16,0.1034),(17,0.1046),(18,0.1440),(19,0.0686),(20,0.1499)])
]
test_prox _ = undefined
-- | confluence longueur balade 3
test_confluence_temoin :: Map Node (Map Node Double)
test_confluence_temoin = Map.map Map.fromList $ Map.fromList [(0,[(0,0.7448),(1,0.4844),(2,0.6471),(3,0.6759),(4,0.6297),(5,0.6219),(6,0.7040),(7,0.1870),(8,0.4092),(9,0.1870),(10,0.1870),(11,0.2233),(12,0.2233),(13,0.1870),(14,0.0987),(15,0.0987),(16,0.3325),(17,0.0000),(18,0.2827),(19,0.0000),(20,0.0641)])
, (1,[(0,0.4844),(1,0.7225),(2,0.6158),(3,0.4509),(4,0.6326),(5,0.6521),(6,0.6008),(7,0.4259),(8,0.2441),(9,0.4362),(10,0.3925),(11,0.4587),(12,0.4587),(13,0.3804),(14,0.0931),(15,0.0931),(16,0.2426),(17,0.1611),(18,0.2100),(19,0.1461),(20,0.1259)])
, (2,[(0,0.6471),(1,0.6158),(2,0.7070),(3,0.6569),(4,0.7060),(5,0.5915),(6,0.6918),(7,0.0680),(8,0.3091),(9,0.0680),(10,0.0680),(11,0.0836),(12,0.0836),(13,0.1239),(14,0.3219),(15,0.3219),(16,0.0630),(17,0.2568),(18,0.3901),(19,0.2458),(20,0.2674)])
, (3,[(0,0.6759),(1,0.4509),(2,0.6569),(3,0.6740),(4,0.6865),(5,0.5777),(6,0.6659),(7,0.1411),(8,0.3093),(9,0.1411),(10,0.1888),(11,0.1704),(12,0.1704),(13,0.1774),(14,0.3144),(15,0.3144),(16,0.4317),(17,0.2472),(18,0.0602),(19,0.3320),(20,0.2975)])
, (4,[(0,0.6297),(1,0.6326),(2,0.7060),(3,0.6865),(4,0.7677),(5,0.6716),(6,0.7228),(7,0.0000),(8,0.3185),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.1608),(15,0.1608),(16,0.4020),(17,0.0641),(18,0.2967),(19,0.1204),(20,0.1070)])
, (5,[(0,0.6219),(1,0.6521),(2,0.5915),(3,0.5777),(4,0.6716),(5,0.6837),(6,0.6589),(7,0.3622),(8,0.2324),(9,0.3724),(10,0.3294),(11,0.3925),(12,0.3925),(13,0.3177),(14,0.1608),(15,0.1608),(16,0.3486),(17,0.1332),(18,0.2420),(19,0.1204),(20,0.1552)])
, (6,[(0,0.7040),(1,0.6008),(2,0.6918),(3,0.6659),(4,0.7228),(5,0.6589),(6,0.7955),(7,0.0000),(8,0.2288),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.1933),(15,0.1933),(16,0.4129),(17,0.0788),(18,0.3453),(19,0.1461),(20,0.1302)])
, (7,[(0,0.1870),(1,0.4259),(2,0.0680),(3,0.1411),(4,0.0000),(5,0.3622),(6,0.0000),(7,0.7551),(8,0.6496),(9,0.6811),(10,0.4974),(11,0.6737),(12,0.6964),(13,0.6598),(14,0.2116),(15,0.2116),(16,0.0875),(17,0.3810),(18,0.1436),(19,0.1608),(20,0.3657)])
, (8,[(0,0.4092),(1,0.2441),(2,0.3091),(3,0.3093),(4,0.3185),(5,0.2324),(6,0.2288),(7,0.6496),(8,0.6706),(9,0.6676),(10,0.5937),(11,0.6525),(12,0.6738),(13,0.5855),(14,0.1297),(15,0.1297),(16,0.1024),(17,0.3096),(18,0.1638),(19,0.0962),(20,0.2426)])
, (9,[(0,0.1870),(1,0.4362),(2,0.0680),(3,0.1411),(4,0.0000),(5,0.3724),(6,0.0000),(7,0.6811),(8,0.6676),(9,0.7690),(10,0.6487),(11,0.6967),(12,0.5845),(13,0.6642),(14,0.1031),(15,0.1031),(16,0.0875),(17,0.2506),(18,0.0671),(19,0.1608),(20,0.3247)])
, (10,[(0,0.1870),(1,0.3925),(2,0.0680),(3,0.1888),(4,0.0000),(5,0.3294),(6,0.0000),(7,0.4974),(8,0.5937),(9,0.6487),(10,0.7252),(11,0.5449),(12,0.6845),(13,0.6044),(14,0.3850),(15,0.3850),(16,0.2934),(17,0.0000),(18,0.3276),(19,0.2421),(20,0.3998)])
, (11,[(0,0.2233),(1,0.4587),(2,0.0836),(3,0.1704),(4,0.0000),(5,0.3925),(6,0.0000),(7,0.6737),(8,0.6525),(9,0.6967),(10,0.5449),(11,0.8004),(12,0.6217),(13,0.5866),(14,0.1608),(15,0.1608),(16,0.0000),(17,0.4109),(18,0.1070),(19,0.0000),(20,0.1664)])
, (12,[(0,0.2233),(1,0.4587),(2,0.0836),(3,0.1704),(4,0.0000),(5,0.3925),(6,0.0000),(7,0.6964),(8,0.6738),(9,0.5845),(10,0.6845),(11,0.6217),(12,0.8004),(13,0.6527),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.2571),(18,0.0000),(19,0.0000),(20,0.1827)])
, (13,[(0,0.1870),(1,0.3804),(2,0.1239),(3,0.1774),(4,0.0000),(5,0.3177),(6,0.0000),(7,0.6598),(8,0.5855),(9,0.6642),(10,0.6044),(11,0.5866),(12,0.6527),(13,0.7244),(14,0.3612),(15,0.3612),(16,0.3276),(17,0.4205),(18,0.3528),(19,0.4288),(20,0.0000)])
, (14,[(0,0.0987),(1,0.0931),(2,0.3219),(3,0.3144),(4,0.1608),(5,0.1608),(6,0.1933),(7,0.2116),(8,0.1297),(9,0.1031),(10,0.3850),(11,0.1608),(12,0.0000),(13,0.3612),(14,0.7197),(15,0.7044),(16,0.6289),(17,0.6289),(18,0.6538),(19,0.6816),(20,0.6538)])
, (15,[(0,0.0987),(1,0.0931),(2,0.3219),(3,0.3144),(4,0.1608),(5,0.1608),(6,0.1933),(7,0.2116),(8,0.1297),(9,0.1031),(10,0.3850),(11,0.1608),(12,0.0000),(13,0.3612),(14,0.7044),(15,0.7197),(16,0.6289),(17,0.6289),(18,0.6538),(19,0.6816),(20,0.6538)])
, (16,[(0,0.3325),(1,0.2426),(2,0.0630),(3,0.4317),(4,0.4020),(5,0.3486),(6,0.4129),(7,0.0875),(8,0.1024),(9,0.0875),(10,0.2934),(11,0.0000),(12,0.0000),(13,0.3276),(14,0.6289),(15,0.6289),(16,0.6766),(17,0.6411),(18,0.6290),(19,0.6475),(20,0.6197)])
, (17,[(0,0.0000),(1,0.1611),(2,0.2568),(3,0.2472),(4,0.0641),(5,0.1332),(6,0.0788),(7,0.3810),(8,0.3096),(9,0.2506),(10,0.0000),(11,0.4109),(12,0.2571),(13,0.4205),(14,0.6289),(15,0.6289),(16,0.6411),(17,0.6890),(18,0.6197),(19,0.6475),(20,0.6197)])
, (18,[(0,0.2827),(1,0.2100),(2,0.3901),(3,0.0602),(4,0.2967),(5,0.2420),(6,0.3453),(7,0.1436),(8,0.1638),(9,0.0671),(10,0.3276),(11,0.1070),(12,0.0000),(13,0.3528),(14,0.6538),(15,0.6538),(16,0.6290),(17,0.6197),(18,0.6768),(19,0.6023),(20,0.6536)])
, (19,[(0,0.0000),(1,0.1461),(2,0.2458),(3,0.3320),(4,0.1204),(5,0.1204),(6,0.1461),(7,0.1608),(8,0.0962),(9,0.1608),(10,0.2421),(11,0.0000),(12,0.0000),(13,0.4288),(14,0.6816),(15,0.6816),(16,0.6475),(17,0.6475),(18,0.6023),(19,0.8130),(20,0.6023)])
, (20,[(0,0.0641),(1,0.1259),(2,0.2674),(3,0.2975),(4,0.1070),(5,0.1552),(6,0.1302),(7,0.3657),(8,0.2426),(9,0.3247),(10,0.3998),(11,0.1664),(12,0.1827),(13,0.0000),(14,0.6538),(15,0.6538),(16,0.6197),(17,0.6197),(18,0.6536),(19,0.6023),(20,0.6830)])
]
MaxClique.hs 0000664 0000000 0000000 00000011546 14124644201 0032754 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Graph {-| Module : Gargantext.Core.Viz.Graph.MaxClique
Description : MaxCliques function
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
- Result of the workshop, Pyremiel 2019
- First written by Bruno Gaume in Python (see below for details)
- Then written by Alexandre Delanoë in Haskell (see below for details)
# By Bruno Gaume:
def fast_maximal_cliques(g):
def rec_maximal_cliques(g, subv):
mc = []
if subv == []: # stop condition
return [[]]
else :
for i in range(len(subv)):
newsubv = [j for j in subv[i+1:len(subv)]
if (j in g.neighbors(subv[i]))]
mci = rec_maximal_cliques(g, newsubv)
for x in mci:
x.append(subv[i])
mc.append(x)
return mc
def purge(clust):
clustset = [set(x) for x in clust]
new_clust = []
for i in range(len(clustset)):
ok = True
for j in range(len(clustset)):
if clustset[i].issubset(clustset[j]) and (not (len(clustset[i]) == len(clustset[j])) ):
ok = False
if ok and (not (clustset[i] in new_clust)):
new_clust.append(clustset[i])
return [list(x) for x in new_clust]
# to optimize : rank the vertices on the degrees
subv = [(v.index, v.degree()) for v in g.vs()]
subv.sort(key = lambda z:z[1])
subv = [x for (x, y) in subv]
return purge(rec_maximal_cliques(g, subv))
-}
module Gargantext.Core.Methods.Graph.MaxClique
where
import Data.Maybe (catMaybes)
import Gargantext.Prelude
import Data.Map (Map)
import qualified Data.Map as Map
import Data.List (sortOn, nub, concat)
import Data.Set (Set)
import Data.Set (fromList, toList, isSubsetOf)
import Data.Graph.Inductive hiding (Graph, neighbors, subgraph, (&))
import Gargantext.Core.Viz.Graph.FGL (Graph_Undirected, degree, neighbors, mkGraphUfromEdges)
import Gargantext.Core.Viz.Graph.Tools (cooc2graph',cooc2graph'', Threshold)
import Gargantext.Core.Methods.Distances (Distance)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex)
import Gargantext.Core.Viz.AdaptativePhylo
-- import Debug.Trace (trace)
type Graph = Graph_Undirected
type Neighbor = Node
-- | getMaxCliques
-- TODO chose distance order
getMaxCliques :: Ord a => CliqueFilter -> Distance -> Threshold -> Map (a, a) Int -> [[a]]
getMaxCliques f d t m = map fromIndices $ getMaxCliques' t m'
where
m' = toIndex to m
(to,from) = createIndices m
fromIndices = catMaybes . map (\n -> Map.lookup n from)
getMaxCliques' :: Threshold -> Map (Int, Int) Int -> [[Int]]
getMaxCliques' t' n = maxCliques graph
where
graph = mkGraphUfromEdges (Map.keys n')
-- n' = cooc2graph' d t' n
n' = case f of ByThreshold -> cooc2graph' d t' n
ByNeighbours -> cooc2graph'' d t' n
maxCliques :: Graph -> [[Node]]
maxCliques g = map (\n -> subMaxCliques g (n:ns)) ns & concat & takeMax
where
ns :: [Node]
ns = sortOn (degree g) $ nodes g
subMaxCliques :: Graph -> [Node] -> [[Node]]
subMaxCliques _ [] = [[]]
subMaxCliques g' (x:xs) = add x $ subMaxCliques g' ns'
where
ns' = [n | n <- xs, elem n $ neighborsOut g' x]
add :: Node -> [[Node]] -> [[Node]]
add n [] = [[n]]
add n (m:ms) = [n:m] <> add n ms
-- | Note, it is same as :
-- add n ns = map (\m -> n : m) ns
-- -- (but using pattern matching and recursivity)
-- -- (map is redefined in fact)
-- | To be sure self is not in neighbors of self
-- (out to exclude the self)
neighborsOut :: Graph -> Node -> [Node]
neighborsOut g'' n = filter (/= n) $ neighbors g'' n
takeMax :: [[Node]] -> [[Node]]
takeMax = map toList
. purge
. map fromList
. sortOn length
. nub
where
purge :: [Set Node] -> [Set Node]
purge [] = []
purge (x:xs) = x' <> purge xs
where
x' = if all (== False) (map (isSubsetOf x) xs)
then [x]
else []
------------------------------------------------------------------------
test_graph :: Graph
-- test_graph = mkGraphUfromEdges [(1,1), (2,2), (3,3)]
test_graph = mkGraphUfromEdges [(1,2), (3,3)]
test_graph' :: Graph
test_graph' = mkGraphUfromEdges [(1,2), (3,3), (3,2)]
test_graph'' :: Graph
test_graph'' = mkGraphUfromEdges [(1,2), (2,3), (1,3)]
test_graph''' :: Graph
test_graph''' = mkGraphUfromEdges [ (4,1)
, (4,2)
, (3,1)
, (3,2)
, (2,1)
]
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Matrix/ 0000775 0000000 0000000 00000000000 14124644201 0031003 5 ustar 00root root 0000000 0000000 Accelerate/ 0000775 0000000 0000000 00000000000 14124644201 0032754 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Matrix Utils.hs 0000664 0000000 0000000 00000032375 14124644201 0034422 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Methods/Matrix/Accelerate {-|
Module : Gargantext.Core.Methods.Matrix.Accelerate.Utils
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This module aims at implementig distances of terms context by context is
the same referential of corpus.
Implementation use Accelerate library which enables GPU and CPU computation:
* Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
[Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
* Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
[Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Methods.Matrix.Accelerate.Utils
where
import qualified Data.Foldable as P (foldl1)
import Debug.Trace (trace)
import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run)
import qualified Gargantext.Prelude as P
-- | Matrix cell by cell multiplication
(.*) :: ( Shape ix
, Slice ix
, Elt a
, P.Num (Exp a)
)
=> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
(.*) = zipWith (*)
(./) :: ( Shape ix
, Slice ix
, Elt a
, P.Num (Exp a)
, P.Fractional (Exp a)
)
=> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
(./) = zipWith (/)
-- | Term by term division where divisions by 0 produce 0 rather than NaN.
termDivNan :: ( Shape ix
, Slice ix
, Elt a
, Eq a
, P.Num (Exp a)
, P.Fractional (Exp a)
)
=> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
termDivNan = zipWith (\i j -> cond ((==) j 0) 0 ((/) i j))
(.-) :: ( Shape ix
, Slice ix
, Elt a
, P.Num (Exp a)
, P.Fractional (Exp a)
)
=> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
(.-) = zipWith (-)
(.+) :: ( Shape ix
, Slice ix
, Elt a
, P.Num (Exp a)
, P.Fractional (Exp a)
)
=> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
(.+) = zipWith (+)
-----------------------------------------------------------------------
matrixOne :: Num a => Dim -> Acc (Matrix a)
matrixOne n' = ones
where
ones = fill (index2 n n) 1
n = constant n'
matrixIdentity :: Num a => Dim -> Acc (Matrix a)
matrixIdentity n' =
let zeros = fill (index2 n n) 0
ones = fill (index1 n) 1
n = constant n'
in
permute const zeros (\(unindex1 -> i) -> index2 i i) ones
matrixEye :: Num a => Dim -> Acc (Matrix a)
matrixEye n' =
let ones = fill (index2 n n) 1
zeros = fill (index1 n) 0
n = constant n'
in
permute const ones (\(unindex1 -> i) -> index2 i i) zeros
diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
diagNull n m = zipWith (*) m (matrixEye n)
-- Returns an N-dimensional array with the values of x for the indices where
-- the condition is true, 0 everywhere else.
condOrDefault
:: forall sh a. (Shape sh, Elt a)
=> (Exp sh -> Exp Bool) -> Exp a -> Acc (Array sh a) -> Acc (Array sh a)
condOrDefault theCond def x = permute const zeros filterInd x
where
zeros = fill (shape x) (def)
filterInd ix = (cond (theCond ix)) ix ignore
-----------------------------------------------------------------------
_runExp :: Elt e => Exp e -> e
_runExp e = indexArray (run (unit e)) Z
-----------------------------------------------------------------------
-- | Define a vector
--
-- >>> vector 3
-- Vector (Z :. 3) [0,1,2]
vector :: Elt c => Int -> [c] -> (Array (Z :. Int) c)
vector n l = fromList (Z :. n) l
-- | Define a matrix
--
-- >>> matrix 3 ([1..] :: [Double])
-- Matrix (Z :. 3 :. 3)
-- [ 1.0, 2.0, 3.0,
-- 4.0, 5.0, 6.0,
-- 7.0, 8.0, 9.0]
matrix :: Elt c => Int -> [c] -> Matrix c
matrix n l = fromList (Z :. n :. n) l
-- | Two ways to get the rank (as documentation)
--
-- >>> rank (matrix 3 ([1..] :: [Int]))
-- 2
rank :: (Matrix a) -> Int
rank m = arrayRank $ arrayShape m
-----------------------------------------------------------------------
-- | Dimension of a square Matrix
-- How to force use with SquareMatrix ?
type Dim = Int
-- | Get Dimension of a square Matrix
--
-- >>> dim (matrix 3 ([1..] :: [Int]))
-- 3
dim :: Matrix a -> Dim
dim m = n
where
Z :. _ :. n = arrayShape m
-- indexTail (arrayShape m)
-----------------------------------------------------------------------
-- | Sum of a Matrix by Column
--
-- >>> run $ matSumCol 3 (use $ matrix 3 [1..])
-- Matrix (Z :. 3 :. 3)
-- [ 12.0, 15.0, 18.0,
-- 12.0, 15.0, 18.0,
-- 12.0, 15.0, 18.0]
matSumCol :: (Elt a, P.Num (Exp a)) => Dim -> Acc (Matrix a) -> Acc (Matrix a)
matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
matSumCol' :: (Elt a, P.Num (Exp a)) => Matrix a -> Matrix a
matSumCol' m = run $ matSumCol n m'
where
n = dim m
m' = use m
-- | Proba computes de probability matrix: all cells divided by thee sum of its column
-- if you need get the probability on the lines, just transpose it
--
-- >>> run $ matProba 3 (use $ matrix 3 [1..])
-- Matrix (Z :. 3 :. 3)
-- [ 8.333333333333333e-2, 0.13333333333333333, 0.16666666666666666,
-- 0.3333333333333333, 0.3333333333333333, 0.3333333333333333,
-- 0.5833333333333334, 0.5333333333333333, 0.5]
matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
matProba d mat = zipWith (/) mat (matSumCol d mat)
-- | Diagonal of the matrix
--
-- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
-- Vector (Z :. 3) [1,5,9]
diag :: Elt e
=> Acc (Matrix e)
-> Acc (Vector e)
diag m = backpermute (indexTail (shape m))
(lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int))))
m
-- | Divide by the Diagonal of the matrix
--
-- >>> run $ divByDiag 3 (use $ matrix 3 ([1..] :: [Double]))
-- Matrix (Z :. 3 :. 3)
-- [ 1.0, 0.4, 0.3333333333333333,
-- 4.0, 1.0, 0.6666666666666666,
-- 7.0, 1.6, 1.0]
divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
-----------------------------------------------------------------------
-- | Filters the matrix with the minimum of maximums
--
-- >>> run $ matMiniMax $ use $ matrix 3 [1..]
-- Matrix (Z :. 3 :. 3)
-- [ 0.0, 4.0, 7.0,
-- 0.0, 5.0, 8.0,
-- 0.0, 6.0, 9.0]
matMiniMax :: (Elt a, Ord a, P.Num a)
=> Acc (Matrix a)
-> Acc (Matrix a)
matMiniMax m = filterWith' miniMax' (constant 0) m
where
miniMax' = the $ maximum $ minimum m
-- | Filters the matrix with a constant
--
-- >>> run $ matFilter 5 $ use $ matrix 3 [1..]
-- Matrix (Z :. 3 :. 3)
-- [ 0.0, 0.0, 7.0,
-- 0.0, 0.0, 8.0,
-- 0.0, 6.0, 9.0]
filter' :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
filter' t m = filterWith t 0 m
filterWith :: Double -> Double -> Acc (Matrix Double) -> Acc (Matrix Double)
filterWith t v m = map (\x -> ifThenElse (x > (constant t)) x (constant v)) (transpose m)
filterWith' :: (Elt a, Ord a) => Exp a -> Exp a -> Acc (Matrix a) -> Acc (Matrix a)
filterWith' t v m = map (\x -> ifThenElse (x > t) x v) m
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO use Lenses
data Direction = MatCol (Exp Int) | MatRow (Exp Int) | Diag
nullOf :: Num a => Dim -> Direction -> Acc (Matrix a)
nullOf n' dir =
let ones = fill (index2 n n) 1
zeros = fill (index2 n n) 0
n = constant n'
in
permute const ones ( lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
-> case dir of
MatCol m -> (Z :. i :. m)
MatRow m -> (Z :. m :. i)
Diag -> (Z :. i :. i)
)
)
zeros
nullOfWithDiag :: Num a => Dim -> Direction -> Acc (Matrix a)
nullOfWithDiag n dir = zipWith (*) (nullOf n dir) (nullOf n Diag)
divide :: (Elt a, Ord a, P.Fractional (Exp a), P.Num a)
=> Acc (Matrix a) -> Acc (Matrix a) -> Acc (Matrix a)
divide = zipWith divide'
where
divide' a b = ifThenElse (b > (constant 0))
(a / b)
(constant 0)
-- | Nominator
sumRowMin :: (Num a, Ord a) => Dim -> Acc (Matrix a) -> Acc (Matrix a)
sumRowMin n m = {-trace (P.show $ run m') $-} m'
where
m' = reshape (shape m) vs
vs = P.foldl1 (++)
$ P.map (\z -> sumRowMin1 n (constant z) m) [0..n-1]
sumRowMin1 :: (Num a, Ord a) => Dim -> Exp Int -> Acc (Matrix a) -> Acc (Vector a)
sumRowMin1 n x m = trace (P.show (run m,run $ transpose m)) $ m''
where
m'' = sum $ zipWith min (transpose m) m
_m' = zipWith (*) (zipWith (*) (nullOf n (MatCol x)) $ nullOfWithDiag n (MatRow x)) m
-- | Denominator
sumColMin :: (Num a, Ord a) => Dim -> Acc (Matrix a) -> Acc (Matrix a)
sumColMin n m = reshape (shape m) vs
where
vs = P.foldl1 (++)
$ P.map (\z -> sumColMin1 n (constant z) m) [0..n-1]
sumColMin1 :: (Num a) => Dim -> Exp Int -> Acc (Matrix a) -> Acc (Matrix a)
sumColMin1 n x m = zipWith (*) (nullOfWithDiag n (MatCol x)) m
{- | WIP fun with indexes
selfMatrix :: Num a => Dim -> Acc (Matrix a)
selfMatrix n' =
let zeros = fill (index2 n n) 0
ones = fill (index2 n n) 1
n = constant n'
in
permute const ones ( lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
-> -- ifThenElse (i /= j)
-- (Z :. i :. j)
(Z :. i :. i)
)) zeros
selfMatrix' :: (Elt a, P.Num (Exp a)) => Array DIM2 a -> Matrix a
selfMatrix' m' = run $ selfMatrix n
where
n = dim m'
m = use m'
-}
-------------------------------------------------
-------------------------------------------------
crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
crossProduct n m = {-trace (P.show (run m',run m'')) $-} zipWith (*) m' m''
where
m' = cross n m
m'' = transpose $ cross n m
crossT :: Matrix Double -> Matrix Double
crossT = run . transpose . use
crossProduct' :: Matrix Double -> Matrix Double
crossProduct' m = run $ crossProduct n m'
where
n = dim m
m' = use m
runWith :: (Arrays c, Elt a1)
=> (Dim -> Acc (Matrix a1) -> a2 -> Acc c)
-> Matrix a1
-> a2
-> c
runWith f m = run . f (dim m) (use m)
-- | cross
cross :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
cross n mat = diagNull n (matSumCol n $ diagNull n mat)
cross' :: Matrix Double -> Matrix Double
cross' mat = run $ cross n mat'
where
mat' = use mat
n = dim mat
{-
-- | Hypothesis to test maybe later (or not)
-- TODO ask accelerate for instances to ease such writtings:
p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
p_ m = zipWith (/) m (n_ m)
where
n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
n_ m = backpermute (shape m)
(lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
-> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
)
) m
-}
theMatrixDouble :: Int -> Matrix Double
theMatrixDouble n = run $ map fromIntegral (use $ theMatrixInt n)
theMatrixInt :: Int -> Matrix Int
theMatrixInt n = matrix n (dataMatrix n)
where
dataMatrix :: Int -> [Int]
dataMatrix x | (P.==) x 2 = [ 1, 1
, 1, 2
]
| (P.==) x 3 = [ 7, 4, 0
, 4, 5, 3
, 0, 3, 4
]
| (P.==) x 4 = [ 4, 1, 2, 1
, 1, 4, 0, 0
, 2, 0, 3, 3
, 1, 0, 3, 3
]
| P.otherwise = P.undefined
{-
theResult :: Int -> Matrix Double
theResult n | (P.==) n 2 = let r = 1.6094379124341003 in [ 0, r, r, 0]
| P.otherwise = [ 1, 1 ]
-}
colMatrix :: Elt e
=> Int -> [e] -> Acc (Array ((Z :. Int) :. Int) e)
colMatrix n ns = replicate (constant (Z :. (n :: Int) :. All)) v
where
v = use $ vector (P.length ns) ns
-----------------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/NodeStory.hs 0000664 0000000 0000000 00000027232 14124644201 0030424 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.NodeStory
Description : Node API generation
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO:
- remove
- filter
- charger les listes
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
module Gargantext.Core.NodeStory where
-- import Debug.Trace (traceShow)
import Codec.Serialise (serialise, deserialise)
import Codec.Serialise.Class
import Control.Concurrent (MVar(), withMVar, newMVar, modifyMVar_)
import Control.Debounce (mkDebounce, defaultDebounceSettings, debounceFreq, debounceAction)
import Control.Lens (makeLenses, Getter, (^.))
import Control.Monad.Except
import Control.Monad.Reader
import Data.Aeson hiding ((.=), decode)
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import Data.Monoid
import Data.Semigroup
import GHC.Generics (Generic)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (NodeId)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Prelude (CmdM', HasConnectionPool, HasConfig)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Prelude
import System.Directory (renameFile, createDirectoryIfMissing, doesFileExist, removeFile)
import System.IO (FilePath, hClose)
import System.IO.Temp (withTempFile)
import qualified Data.ByteString.Lazy as DBL
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Map.Strict.Patch.Internal as Patch
import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
------------------------------------------------------------------------
data NodeStoryEnv = NodeStoryEnv
{ _nse_var :: !(MVar NodeListStory)
, _nse_saver :: !(IO ())
, _nse_getter :: [NodeId] -> IO (MVar NodeListStory)
--, _nse_cleaner :: !(IO ()) -- every 12 hours: cleans the repos of unused NodeStories
-- , _nse_lock :: !FileLock -- TODO (it depends on the option: if with database or file only)
}
deriving (Generic)
type HasNodeStory env err m = ( CmdM' env err m
, MonadReader env m
, MonadError err m
, HasNodeStoryEnv env
, HasConfig env
, HasConnectionPool env
, HasNodeError err
)
class (HasNodeStoryVar env, HasNodeStorySaver env)
=> HasNodeStoryEnv env where
hasNodeStory :: Getter env NodeStoryEnv
class HasNodeStoryVar env where
hasNodeStoryVar :: Getter env ([NodeId] -> IO (MVar NodeListStory))
class HasNodeStorySaver env where
hasNodeStorySaver :: Getter env (IO ())
------------------------------------------------------------------------
readNodeStoryEnv :: NodeStoryDir -> IO NodeStoryEnv
readNodeStoryEnv nsd = do
mvar <- nodeStoryVar nsd Nothing [0]
saver <- mkNodeStorySaver nsd mvar
pure $ NodeStoryEnv { _nse_var = mvar
, _nse_saver = saver
, _nse_getter = nodeStoryVar nsd (Just mvar) }
------------------------------------------------------------------------
mkNodeStorySaver :: NodeStoryDir -> MVar NodeListStory -> IO (IO ())
mkNodeStorySaver nsd mvns = mkDebounce settings
where
settings = defaultDebounceSettings
{ debounceAction = withMVar mvns (writeNodeStories nsd)
, debounceFreq = 1 * minute
-- , debounceEdge = trailingEdge -- Trigger on the trailing edge
}
minute = 60 * second
second = 10^(6 :: Int)
nodeStoryVar :: NodeStoryDir
-> Maybe (MVar NodeListStory)
-> [NodeId]
-> IO (MVar NodeListStory)
nodeStoryVar nsd Nothing ni = nodeStoryIncs nsd Nothing ni >>= newMVar
nodeStoryVar nsd (Just mv) ni = do
_ <- modifyMVar_ mv $ \mv' -> (nodeStoryIncs nsd (Just mv') ni)
pure mv
nodeStoryInc :: NodeStoryDir -> Maybe NodeListStory -> NodeId -> IO NodeListStory
nodeStoryInc nsd (Just ns@(NodeStory nls)) ni = do
case Map.lookup ni nls of
Nothing -> do
(NodeStory nls') <- nodeStoryRead nsd ni
pure $ NodeStory $ Map.union nls nls'
Just _ -> pure ns
nodeStoryInc nsd Nothing ni = nodeStoryRead nsd ni
nodeStoryIncs :: NodeStoryDir
-> Maybe NodeListStory
-> [NodeId]
-> IO NodeListStory
nodeStoryIncs _ Nothing [] = panic "nodeStoryIncs: Empty"
nodeStoryIncs nsd (Just nls) ns = foldM (\m n -> nodeStoryInc nsd (Just m) n) nls ns
nodeStoryIncs nsd Nothing (ni:ns) = do
m <- nodeStoryRead nsd ni
nodeStoryIncs nsd (Just m) ns
nodeStoryDec :: NodeStoryDir
-> NodeListStory
-> NodeId
-> IO NodeListStory
nodeStoryDec nsd ns@(NodeStory nls) ni = do
case Map.lookup ni nls of
Nothing -> do
-- we make sure the corresponding file repo is really removed
_ <- nodeStoryRemove nsd ni
pure ns
Just _ -> do
let ns' = Map.filterWithKey (\k _v -> k /= ni) nls
_ <- nodeStoryRemove nsd ni
pure $ NodeStory ns'
-- | TODO lock
nodeStoryRead :: NodeStoryDir -> NodeId -> IO NodeListStory
nodeStoryRead nsd ni = do
_repoDir <- createDirectoryIfMissing True nsd
let nsp = nodeStoryPath nsd ni
exists <- doesFileExist nsp
if exists
then deserialise <$> DBL.readFile nsp
else pure (initNodeStory ni)
nodeStoryRemove :: NodeStoryDir -> NodeId -> IO ()
nodeStoryRemove nsd ni = do
let nsp = nodeStoryPath nsd ni
exists <- doesFileExist nsp
if exists
then removeFile nsp
else pure ()
nodeStoryRead_test :: NodeStoryDir -> NodeId -> IO (Maybe [ TableNgrams.NgramsType ])
nodeStoryRead_test nsd ni = nodeStoryRead nsd ni >>= \n -> pure
$ fmap Map.keys
$ fmap _a_state
$ Map.lookup ni
$ _unNodeStory n
------------------------------------------------------------------------
type NodeStoryDir = FilePath
writeNodeStories :: NodeStoryDir -> NodeListStory -> IO ()
writeNodeStories fp nls = do
done <- mapM (writeNodeStory fp) $ splitByNode nls
printDebug "[writeNodeStories]" done
pure ()
writeNodeStory :: NodeStoryDir -> (NodeId, NodeListStory) -> IO ()
writeNodeStory rdfp (n, ns) = saverAction' rdfp n ns
splitByNode :: NodeListStory -> [(NodeId, NodeListStory)]
splitByNode (NodeStory m) =
List.map (\(n,a) -> (n, NodeStory $ Map.singleton n a)) $ Map.toList m
saverAction' :: Serialise a => NodeStoryDir -> NodeId -> a -> IO ()
saverAction' repoDir nId a = do
withTempFile repoDir ((cs $ show nId) <> "-tmp-repo.cbor") $ \fp h -> do
printDebug "[repoSaverAction]" fp
DBL.hPut h $ serialise a
hClose h
renameFile fp (nodeStoryPath repoDir nId)
nodeStoryPath :: NodeStoryDir -> NodeId -> FilePath
nodeStoryPath repoDir nId = repoDir <> "/" <> filename
where
filename = "repo" <> "-" <> (cs $ show nId) <> ".cbor"
------------------------------------------------------------------------
-- TODO : repo Migration TODO TESTS
repoMigration :: NodeStoryDir -> NgramsRepo -> IO ()
repoMigration fp r = writeNodeStories fp (repoToNodeListStory r)
repoToNodeListStory :: NgramsRepo -> NodeListStory
repoToNodeListStory (Repo _v s h) = NodeStory $ Map.fromList ns
where
s' = ngramsState_migration s
h' = ngramsStatePatch_migration h
ns = List.map (\(n,ns')
-> (n, let hs = fromMaybe [] (Map.lookup n h') in
Archive { _a_version = List.length hs
, _a_state = ns'
, _a_history = hs }
)
) $ Map.toList s'
ngramsState_migration :: NgramsState
-> Map NodeId NgramsState'
ngramsState_migration ns =
Map.fromListWith (Map.union) $
List.concat $
map (\(nt, nTable)
-> map (\(nid, table)
-> (nid, Map.singleton nt table)
) $ Map.toList nTable
) $ Map.toList ns
ngramsStatePatch_migration :: [NgramsStatePatch]
-> Map NodeId [NgramsStatePatch']
ngramsStatePatch_migration np' = Map.fromListWith (<>)
$ List.concat
$ map toPatch np'
where
toPatch :: NgramsStatePatch -> [(NodeId, [NgramsStatePatch'])]
toPatch p =
List.concat $
map (\(nt, nTable)
-> map (\(nid, table)
-> (nid, [fst $ Patch.singleton nt table])
) $ Patch.toList nTable
) $ Patch.toList p
------------------------------------------------------------------------
{- | Node Story for each NodeType where the Key of the Map is NodeId
TODO : generalize for any NodeType, let's start with NodeList which
is implemented already
-}
data NodeStory s p = NodeStory { _unNodeStory :: Map NodeId (Archive s p) }
deriving (Generic, Show)
instance (FromJSON s, FromJSON p) => FromJSON (NodeStory s p)
instance (ToJSON s, ToJSON p) => ToJSON (NodeStory s p)
instance (Serialise s, Serialise p) => Serialise (NodeStory s p)
data Archive s p = Archive
{ _a_version :: !Version
, _a_state :: !s
, _a_history :: ![p]
-- first patch in the list is the most recent
}
deriving (Generic, Show)
instance (Serialise s, Serialise p) => Serialise (Archive s p)
type NodeListStory = NodeStory NgramsState' NgramsStatePatch'
type NgramsState' = Map TableNgrams.NgramsType NgramsTableMap
type NgramsStatePatch' = PatchMap TableNgrams.NgramsType NgramsTablePatch
instance Serialise NgramsStatePatch'
-- TODO Semigroup instance for unions
-- TODO check this
instance (Semigroup s, Semigroup p) => Semigroup (Archive s p) where
(<>) (Archive { _a_history = p }) (Archive { _a_version = v'
, _a_state = s'
, _a_history = p'}) =
Archive { _a_version = v'
, _a_state = s'
, _a_history = p' <> p }
instance Monoid (Archive NgramsState' NgramsStatePatch') where
mempty = Archive { _a_version = 0
, _a_state = mempty
, _a_history = [] }
instance (FromJSON s, FromJSON p) => FromJSON (Archive s p) where
parseJSON = genericParseJSON $ unPrefix "_a_"
instance (ToJSON s, ToJSON p) => ToJSON (Archive s p) where
toJSON = genericToJSON $ unPrefix "_a_"
toEncoding = genericToEncoding $ unPrefix "_a_"
------------------------------------------------------------------------
initNodeStory :: Monoid s => NodeId -> NodeStory s p
initNodeStory ni = NodeStory $ Map.singleton ni initArchive
initArchive :: Monoid s => Archive s p
initArchive = Archive { _a_version = 0
, _a_state = mempty
, _a_history = [] }
initNodeListStoryMock :: NodeListStory
initNodeListStoryMock = NodeStory $ Map.singleton nodeListId archive
where
nodeListId = 0
archive = Archive { _a_version = 0
, _a_state = ngramsTableMap
, _a_history = [] }
ngramsTableMap = Map.singleton TableNgrams.NgramsTerms
$ Map.fromList
[ (n ^. ne_ngrams, ngramsElementToRepo n)
| n <- mockTable ^. _NgramsTable
]
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Lenses at the bottom of the file because Template Haskell would reorder order of execution in others cases
makeLenses ''NodeStoryEnv
makeLenses ''NodeStory
makeLenses ''Archive
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Statistics.hs 0000664 0000000 0000000 00000001715 14124644201 0030626 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Statistics
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Statistics
where
import Data.Map (Map)
import Gargantext.Prelude
import Numeric.Statistics.PCA (pcaReduceN)
import Data.Array.IArray (Array, listArray, elems)
import qualified Data.Vector.Storable as Vec
import qualified Data.List as List
import qualified Data.Map as Map
data Dimension = Dimension Int
pcaReduceTo :: Ord t
=> Dimension
-> Map t (Vec.Vector Double)
-> Map t (Vec.Vector Double)
pcaReduceTo (Dimension d) m = Map.fromList
$ zip txts
$ elems
$ pcaReduceN m'' d
where
m'' :: Array Int (Vec.Vector Double)
m'' = listArray (1, List.length m') m'
(txts,m') = List.unzip $ Map.toList m
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text.hs 0000664 0000000 0000000 00000005023 14124644201 0027414 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text
Description : Ngrams tools
Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Text gathers terms in unit of contexts.
-}
module Gargantext.Core.Text
where
import Data.Text (Text, split)
import Gargantext.Prelude hiding (filter)
import NLP.FullStop (segment)
import qualified Data.Text as DT
-----------------------------------------------------------------
class HasText h
where
hasText :: h -> [Text]
-----------------------------------------------------------------
-- French words to distinguish contexts
newtype Texte = Texte Text
newtype Paragraphe = Paragraphe Text
newtype Phrase = Phrase Text
newtype MultiTerme = MultiTerme Text
newtype Mot = Mot Text
newtype Lettre = Lettre Text
-- | Type syn seems obvious
type Titre = Phrase
-----------------------------------------------------------------
instance Show Texte where
show (Texte t) = show t
instance Show Paragraphe where
show (Paragraphe p) = show p
instance Show Phrase where
show (Phrase p) = show p
instance Show MultiTerme where
show (MultiTerme mt) = show mt
instance Show Mot where
show (Mot t) = show t
instance Show Lettre where
show (Lettre l) = show l
-----------------------------------------------------------------
class Collage sup inf where
dec :: sup -> [inf]
inc :: [inf] -> sup
instance Collage Texte Paragraphe where
dec (Texte t) = map Paragraphe $ DT.splitOn "\n" t
inc = Texte . DT.intercalate "\n" . map (\(Paragraphe t) -> t)
instance Collage Paragraphe Phrase where
dec (Paragraphe t) = map Phrase $ sentences t
inc = Paragraphe . DT.unwords . map (\(Phrase p) -> p)
instance Collage Phrase MultiTerme where
dec (Phrase t) = map MultiTerme $ DT.words t
inc = Phrase . DT.unwords . map (\(MultiTerme p) -> p)
instance Collage MultiTerme Mot where
dec (MultiTerme mt) = map Mot $ DT.words mt
inc = MultiTerme . DT.intercalate " " . map (\(Mot m) -> m)
-------------------------------------------------------------------
-- Contexts of text
sentences :: Text -> [Text]
sentences txt = map DT.pack $ segment $ DT.unpack txt
sentences' :: Text -> [Text]
sentences' txt = split isCharStop txt
isCharStop :: Char -> Bool
isCharStop c = c `elem` ['.','?','!']
unsentences :: [Text] -> Text
unsentences txts = DT.intercalate " " txts
-- | Ngrams size
size :: Text -> Int
size t = 1 + DT.count " " t
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/ 0000775 0000000 0000000 00000000000 14124644201 0027060 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Context.hs 0000664 0000000 0000000 00000003610 14124644201 0031040 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Context
Description : How to manage contexts of texts ?
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Context of text management tool, here are logic of main types:
- Term
- Multi-term
- Label
- Sentence
- Corpus
How to split contexts is describes in this module.
-}
module Gargantext.Core.Text.Context
where
import Data.Text (Text, pack, unpack)
import Data.String (IsString)
import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
import Gargantext.Core.Text
import Gargantext.Prelude hiding (length)
------------------------------------------------------------------------
type Term = Text
type MultiTerm = [Term]
type Label = MultiTerm
type TermList = [(Label, [MultiTerm])]
type Sentence a = [a] -- or a nominal group
type Corpus a = [Sentence a] -- a list of sentences
-- type ConText a = [Sentence a]
-- type Corpus a = [ConText a]
------------------------------------------------------------------------
-- | Contexts definition to build/unbuild contexts.
data SplitContext = Chars Int | Sentences Int | Paragraphs Int
-- | splitBy contexts of Chars or Sentences or Paragraphs
-- To see some examples at a higher level (sentences and paragraph), see
-- 'Gargantext.Core.Text.Examples.ex_terms'
--
-- >>> splitBy (Chars 0) (pack "abcde")
-- ["a","b","c","d","e"]
--
-- >>> splitBy (Chars 1) (pack "abcde")
-- ["ab","bc","cd","de"]
--
-- >>> splitBy (Chars 2) (pack "abcde")
-- ["abc","bcd","cde"]
splitBy :: SplitContext -> Text -> [Text]
splitBy (Chars n) = map pack . chunkAlong (n+1) 1 . unpack
splitBy (Sentences n) = map unsentences . chunkAlong (n+1) 1 . sentences
splitBy (Paragraphs _) = map unTag . filter isTagText . parseTags
where
unTag :: IsString p => Tag p -> p
unTag (TagText x) = x
unTag _ = ""
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Convert.hs 0000664 0000000 0000000 00000001607 14124644201 0031040 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Convert
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Format Converter.
-}
{-# LANGUAGE PackageImports #-}
module Gargantext.Core.Text.Convert (risPress2csvWrite)
where
import Data.Either (Either(..))
import qualified Data.Text as T
import System.FilePath (FilePath()) -- , takeExtension)
import Gargantext.Prelude
import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv)
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat(..))
risPress2csvWrite :: FilePath -> IO ()
risPress2csvWrite f = do
eContents <- parseFile RisPresse (f <> ".ris")
case eContents of
Right contents -> writeDocs2Csv (f <> ".csv") contents
Left e -> panic $ "Error: " <> (T.pack e)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/ 0000775 0000000 0000000 00000000000 14124644201 0030333 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/API.hs 0000664 0000000 0000000 00000002717 14124644201 0031307 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Corpus.API
Description : All crawlers of Gargantext in one file.
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.API
( ExternalAPIs(..)
, Query
, Limit
, get
, externalAPIs
)
where
import Data.Maybe
import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
-- | TODO put in gargantext.init
default_limit :: Maybe Integer
default_limit = Just 10000
-- | Get External API metadata main function
get :: ExternalAPIs
-> Lang
-> Query
-> Maybe Limit
-> IO [HyperdataDocument]
get PubMed _la q _l = PUBMED.get q default_limit -- EN only by default
get HAL la q _l = HAL.get la q default_limit
get IsTex la q _l = ISTEX.get la q default_limit
get Isidore la q _l = ISIDORE.get la (fromIntegral <$> default_limit) (Just q) Nothing
get _ _ _ _ = undefined
-- | Some Sugar for the documentation
type Query = PUBMED.Query
type Limit = PUBMED.Limit
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/API/ 0000775 0000000 0000000 00000000000 14124644201 0030744 5 ustar 00root root 0000000 0000000 Hal.hs 0000664 0000000 0000000 00000004452 14124644201 0031732 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/API {-|
Module : Gargantext.Core.Text.Corpus.API.Hal
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.API.Hal
where
import Data.Maybe
import Data.Text (Text, pack, intercalate)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
import qualified HAL as HAL
import qualified HAL.Client as HAL
import qualified HAL.Doc.Corpus as HAL
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do
docs <- HAL.getMetadataWith q (Just 0) (fromIntegral <$> ml)
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) docs
toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d)
pure $ HyperdataDocument { _hd_bdd = Just "Hal"
, _hd_doi = Just $ pack $ show i
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ intercalate " " t
, _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" aus
, _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id
, _hd_source = Just $ maybe "Nothing" identity s
, _hd_abstract = Just $ intercalate " " ab
, _hd_publication_date = fmap (pack . show) utctime
, _hd_publication_year = pub_year
, _hd_publication_month = pub_month
, _hd_publication_day = pub_day
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (pack . show) la }
Isidore.hs 0000664 0000000 0000000 00000005672 14124644201 0032631 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/API {-|
Module : Gargantext.Core.Text.Corpus.API.Isidore
Description : To query French Humanities publication database from its API
Copyright : (c) CNRS, 2019-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Core.Text.Corpus.API.Isidore where
import System.FilePath (FilePath())
import Data.Text (Text)
import qualified Data.Text as Text
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
import Isidore.Client
import Servant.Client
import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
import qualified Isidore as Isidore
import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv)
import Gargantext.Core.Text.Corpus.Parsers (cleanText)
-- | TODO work with the ServantErr
get :: Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO [HyperdataDocument]
get la l q a = do
let
printErr (DecodeFailure e _) = panic e
printErr e = panic (cs $ show e)
toIsidoreDocs :: Reply -> [IsidoreDoc]
toIsidoreDocs (ReplyOnly r) = [r]
toIsidoreDocs (Replies rs) = rs
iDocs <- either printErr _content <$> Isidore.get l q a
hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO ()
isidore2csvFile fp la li tq aq = do
hdocs <- get la li tq aq
writeDocs2Csv fp hdocs
isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do
let
author :: Author -> Text
author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
author (Authors aus) = Text.intercalate ". " $ map author aus
creator2text :: Creator -> Text
creator2text (Creator au) = author au
creator2text (Creators aus') = Text.intercalate ". " $ map author aus'
langText :: LangText -> Text
langText (LangText _l t1) = t1
langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit l (maybe (Just "2019") (Just) d)
pure $ HyperdataDocument (Just "Isidore")
Nothing
u
Nothing
Nothing
Nothing
(Just $ cleanText $ langText t)
(creator2text <$> as)
Nothing
(Just $ maybe "Nothing" identity $ _sourceName <$> s)
(cleanText <$> langText <$> a)
(fmap (Text.pack . show) utcTime)
(pub_year)
(pub_month)
(pub_day)
Nothing
Nothing
Nothing
(Just $ (Text.pack . show) l)
Istex.hs 0000664 0000000 0000000 00000005037 14124644201 0032322 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/API {-|
Module : Gargantext.Core.Text.Corpus.API.Istex
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.API.Istex
where
import Data.List (concat)
import Data.Maybe
import Data.Text (Text, pack)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
import qualified ISTEX as ISTEX
import qualified ISTEX.Client as ISTEX
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do
docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
either (panic . pack . show) (toDoc' la) docs
toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
toDoc' la docs' = do
--printDebug "ISTEX" (ISTEX._documents_total docs')
mapM (toDoc la) (ISTEX._documents_hits docs')
-- | TODO remove dateSplit here
-- TODO current year as default
toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
toDoc la (ISTEX.Document i t a ab d s) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") (Just . pack . show) d)
pure $ HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_doi = Just i
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
, _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)
, _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
, _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)
, _hd_abstract = ab
, _hd_publication_date = fmap (pack . show) utctime
, _hd_publication_year = pub_year
, _hd_publication_month = pub_month
, _hd_publication_day = pub_day
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (pack . show) la }
Pubmed.hs 0000664 0000000 0000000 00000005711 14124644201 0032441 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/API {-|
Module : Gargantext.Core.Text.Corpus.API.Pubmed
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.API.Pubmed
where
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import qualified PUBMED as PubMed
import qualified PUBMED.Parser as PubMedDoc
type Query = Text
type Limit = PubMed.Limit
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
get :: Query -> Maybe Limit -> IO [HyperdataDocument]
get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
<$> PubMed.getMetadataWith q l
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
(PubMedDoc.PubMedDate a y m d)
) = HyperdataDocument { _hd_bdd = Just "PubMed"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
, _hd_authors = authors aus
, _hd_institutes = institutes aus
, _hd_source = j
, _hd_abstract = abstract as
, _hd_publication_date = Just $ Text.pack $ show a
, _hd_publication_year = Just $ fromIntegral y
, _hd_publication_month = Just m
, _hd_publication_day = Just d
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (Text.pack . show) l }
where
authors :: Maybe [PubMedDoc.Author] -> Maybe Text
authors aus' = case aus' of
Nothing -> Nothing
Just au -> Just $ (Text.intercalate ", ")
$ catMaybes
$ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
institutes :: Maybe [PubMedDoc.Author] -> Maybe Text
institutes aus' = case aus' of
Nothing -> Nothing
Just au -> Just $ (Text.intercalate ", ")
$ (map (Text.replace ", " " - "))
$ catMaybes
$ map PubMedDoc.affiliation au
abstract :: Maybe [Text] -> Maybe Text
abstract as' = fmap (Text.intercalate ", ") as'
Parsers.hs 0000664 0000000 0000000 00000017503 14124644201 0032235 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus {-|
Module : Gargantext.Core.Text.Corpus.Parsers
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Gargantext enables analyzing semi-structured text that should be parsed
in order to be analyzed.
The parsers suppose we know the format of the Text (TextFormat data
type) according to which the right parser is chosen among the list of
available parsers.
This module mainly describe how to add a new parser to Gargantext,
please follow the types.
-}
{-# LANGUAGE PackageImports #-}
module Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), clean, parseFile, cleanText, parseFormat)
where
import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Monad (join)
import Data.Attoparsec.ByteString (parseOnly, Parser)
import Data.Either(Either(..))
import Data.Either.Extra (partitionEithers)
import Data.List (concat, lookup)
import Data.Ord()
import Data.String (String())
import Data.String()
import Data.Text (Text)
import Data.Text.Encoding (decodeUtf8)
import Data.Tuple.Extra (both, first, second)
import System.FilePath (FilePath(), takeExtension)
import qualified Data.ByteString as DB
import qualified Data.ByteString.Char8 as DBC
import qualified Data.ByteString.Lazy as DBL
import qualified Data.Map as DM
import qualified Data.Text as DT
import qualified Prelude as Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
import Gargantext.Core.Text.Corpus.Parsers.CSV (parseHal, parseHal', parseCsv, parseCsv')
import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
-- import Gargantext.Core.Text.Learn (detectLangDefault)
import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
import qualified Gargantext.Core.Text.Corpus.Parsers.RIS as RIS
import qualified Gargantext.Core.Text.Corpus.Parsers.WOS as WOS
------------------------------------------------------------------------
type ParseError = String
--type Field = Text
--type Document = DM.Map Field Text
--type FilesParsed = DM.Map FilePath FileParsed
--data FileParsed = FileParsed { _fileParsed_errors :: Maybe ParseError
-- , _fileParsed_result :: [Document]
-- } deriving (Show)
-- | According to the format of Input file,
-- different parser are available.
data FileFormat = WOS | RIS | RisPresse
| CsvGargV3 | CsvHal
deriving (Show)
-- Implemented (ISI Format)
-- | DOC -- Not Implemented / import Pandoc
-- | ODT -- Not Implemented / import Pandoc
-- | PDF -- Not Implemented / pdftotext and import Pandoc ?
-- | XML -- Not Implemented / see :
parseFormat :: FileFormat -> DB.ByteString -> IO (Either Prelude.String [HyperdataDocument])
parseFormat CsvGargV3 bs = pure $ parseCsv' $ DBL.fromStrict bs
parseFormat CsvHal bs = pure $ parseHal' $ DBL.fromStrict bs
parseFormat RisPresse bs = do
docs <- mapM (toDoc RIS)
<$> snd
<$> enrichWith RisPresse
$ partitionEithers
$ [runParser' RisPresse bs]
pure $ Right docs
parseFormat WOS bs = do
docs <- mapM (toDoc WOS)
<$> snd
<$> enrichWith WOS
$ partitionEithers
$ [runParser' WOS bs]
pure $ Right docs
parseFormat _ _ = undefined
-- | Parse file into documents
-- TODO manage errors here
-- TODO: to debug maybe add the filepath in error message
parseFile :: FileFormat -> FilePath -> IO (Either Prelude.String [HyperdataDocument])
parseFile CsvHal p = parseHal p
parseFile CsvGargV3 p = parseCsv p
parseFile RisPresse p = do
docs <- join $ mapM (toDoc RIS) <$> snd <$> enrichWith RisPresse <$> readFileWith RIS p
pure $ Right docs
parseFile WOS p = do
docs <- join $ mapM (toDoc WOS) <$> snd <$> enrichWith WOS <$> readFileWith WOS p
pure $ Right docs
parseFile ff p = do
docs <- join $ mapM (toDoc ff) <$> snd <$> enrichWith ff <$> readFileWith ff p
pure $ Right docs
toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument
-- TODO use language for RIS
toDoc ff d = do
-- let abstract = lookup "abstract" d
let lang = EN -- maybe EN identity (join $ detectLangDefault <$> (fmap (DT.take 50) abstract))
let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit lang dateToParse
pure $ HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff
, _hd_doi = lookup "doi" d
, _hd_url = lookup "URL" d
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = lookup "title" d
, _hd_authors = Nothing
, _hd_institutes = lookup "authors" d
, _hd_source = lookup "source" d
, _hd_abstract = lookup "abstract" d
, _hd_publication_date = fmap (DT.pack . show) utcTime
, _hd_publication_year = pub_year
, _hd_publication_month = pub_month
, _hd_publication_day = pub_day
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (DT.pack . show) lang }
enrichWith :: FileFormat
-> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]])
enrichWith RisPresse = enrichWith' presseEnrich
enrichWith WOS = enrichWith' (map (first WOS.keys))
enrichWith _ = enrichWith' identity
enrichWith' :: ([(DB.ByteString, DB.ByteString)] -> [(DB.ByteString, DB.ByteString)])
-> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]])
enrichWith' f = second (map both' . map f . concat)
where
both' = map (both decodeUtf8)
readFileWith :: FileFormat -> FilePath
-> IO ([ParseError], [[[(DB.ByteString, DB.ByteString)]]])
readFileWith format path = do
files <- case takeExtension path of
".zip" -> openZip path
_ -> pure <$> clean <$> DB.readFile path
partitionEithers <$> mapConcurrently (runParser format) files
-- | withParser:
-- According to the format of the text, choose the right parser.
-- TODO withParser :: FileFormat -> Parser [Document]
withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
withParser WOS = WOS.parser
withParser RIS = RIS.parser
--withParser ODT = odtParser
--withParser XML = xmlParser
withParser _ = panic "[ERROR] Parser not implemented yet"
runParser :: FileFormat -> DB.ByteString
-> IO (Either String [[(DB.ByteString, DB.ByteString)]])
runParser format text = pure $ runParser' format text
runParser' :: FileFormat -> DB.ByteString
-> (Either String [[(DB.ByteString, DB.ByteString)]])
runParser' format text = parseOnly (withParser format) text
openZip :: FilePath -> IO [DB.ByteString]
openZip fp = do
entries <- withArchive fp (DM.keys <$> getEntries)
bs <- mapConcurrently (\s -> withArchive fp (getEntry s)) entries
pure bs
cleanText :: Text -> Text
cleanText = cs . clean . cs
clean :: DB.ByteString -> DB.ByteString
clean txt = DBC.map clean' txt
where
clean' '’' = '\''
clean' '\r' = ' '
clean' '\t' = ' '
clean' ';' = '.'
clean' c = c
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers/0000775 0000000 0000000 00000000000 14124644201 0031752 5 ustar 00root root 0000000 0000000 CSV.hs 0000664 0000000 0000000 00000046233 14124644201 0032672 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers {-|
Module : Gargantext.Core.Text.Corpus.Parsers.CSV
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
CSV parser for Gargantext corpus files.
-}
module Gargantext.Core.Text.Corpus.Parsers.CSV where
import Control.Applicative
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BL
import Data.Char (ord)
import Data.Csv
import Data.Either (Either(..))
import Data.Maybe (fromMaybe)
import Data.Text (Text, pack, length, intercalate)
import Data.Time.Segment (jour)
import qualified Data.Vector as V
import Data.Vector (Vector)
import GHC.IO (FilePath)
import GHC.Word (Word8)
import qualified Prelude as Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (length)
import Gargantext.Core.Text
import Gargantext.Core.Text.Context
---------------------------------------------------------------
headerCsvGargV3 :: Header
headerCsvGargV3 =
header [ "title"
, "source"
, "publication_year"
, "publication_month"
, "publication_day"
, "abstract"
, "authors"
]
---------------------------------------------------------------
data CsvGargV3 = CsvGargV3
{ d_docId :: !Int
, d_title :: !Text
, d_source :: !Text
, d_publication_year :: !Int
, d_publication_month :: !Int
, d_publication_day :: !Int
, d_abstract :: !Text
, d_authors :: !Text
}
deriving (Show)
---------------------------------------------------------------
-- | Doc 2 HyperdataDocument
toDoc :: CsvGargV3 -> HyperdataDocument
toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
HyperdataDocument { _hd_bdd = Just "CSV"
, _hd_doi = Just . pack . show $ did
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just dt
, _hd_authors = Nothing
, _hd_institutes = Just dau
, _hd_source = Just dab
, _hd_abstract = Nothing
, _hd_publication_date = Nothing
, _hd_publication_year = Just dpy
, _hd_publication_month = Just dpm
, _hd_publication_day = Just dpd
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Nothing }
---------------------------------------------------------------
-- | Types Conversions
toDocs :: Vector CsvDoc -> [CsvGargV3]
toDocs v = V.toList
$ V.zipWith (\nId (CsvDoc { .. }) -- (CsvDoc t s mPy pm pd abst auth)
-> CsvGargV3 { d_docId = nId
, d_title = csv_title
, d_source = csv_source
, d_publication_year = fromMIntOrDec defaultYear csv_publication_year
, d_publication_month = fromMaybe defaultMonth csv_publication_month
, d_publication_day = fromMaybe defaultDay csv_publication_day
, d_abstract = csv_abstract
, d_authors = csv_authors })
(V.enumFromN 1 (V.length v'')) v''
where
v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
seps= (V.fromList [Paragraphs 1, Sentences 3, Chars 3])
---------------------------------------------------------------
fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
fromDocs docs = V.map fromDocs' docs
where
fromDocs' (CsvGargV3 { .. }) = CsvDoc { csv_title = d_title
, csv_source = d_source
, csv_publication_year = Just $ IntOrDec d_publication_year
, csv_publication_month = Just d_publication_month
, csv_publication_day = Just d_publication_day
, csv_abstract = d_abstract
, csv_authors = d_authors }
---------------------------------------------------------------
-- | Split a document in its context
-- TODO adapt the size of the paragraph according to the corpus average
splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc m splt doc = let docSize = (length $ csv_abstract doc) in
if docSize > 1000
then
if (mod (round m) docSize) >= 10
then
splitDoc' splt doc
else
V.fromList [doc]
else
V.fromList [doc]
where
splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc' contextSize (CsvDoc { .. }) = V.fromList $ [firstDoc] <> nextDocs
where
firstDoc = CsvDoc { csv_abstract = firstAbstract, .. }
firstAbstract = head' "splitDoc'1" abstracts
nextDocs = map (\txt -> CsvDoc { csv_title = head' "splitDoc'2" $ sentences txt
, csv_abstract = unsentences $ tail' "splitDoc'1" $ sentences txt
, .. }
) (tail' "splitDoc'2" abstracts)
abstracts = (splitBy $ contextSize) csv_abstract
---------------------------------------------------------------
---------------------------------------------------------------
type Mean = Double
docsSize :: Vector CsvDoc -> Mean
docsSize csvDoc = mean ls
where
ls = V.toList $ V.map (fromIntegral . length . csv_abstract) csvDoc
---------------------------------------------------------------
newtype IntOrDec = IntOrDec Int
deriving (Show, Eq, Read)
unIntOrDec :: IntOrDec -> Int
unIntOrDec (IntOrDec i) = i
instance FromField IntOrDec where
parseField s = case runParser (parseField s :: Parser Int) of
Left _err -> IntOrDec <$> Prelude.floor <$> (parseField s :: Parser Double)
Right n -> pure $ IntOrDec n
instance ToField IntOrDec where
toField (IntOrDec i) = toField i
fromMIntOrDec :: Int -> Maybe IntOrDec -> Int
fromMIntOrDec default' mVal = unIntOrDec $ fromMaybe (IntOrDec default') mVal
defaultYear :: Int
defaultYear = 1973
defaultMonth :: Int
defaultMonth = 1
defaultDay :: Int
defaultDay = 1
data CsvDoc = CsvDoc
{ csv_title :: !Text
, csv_source :: !Text
, csv_publication_year :: !(Maybe IntOrDec)
, csv_publication_month :: !(Maybe Int)
, csv_publication_day :: !(Maybe Int)
, csv_abstract :: !Text
, csv_authors :: !Text
}
deriving (Show)
instance FromNamedRecord CsvDoc where
parseNamedRecord r = do
csv_title <- r .: "title" <|> r .: "Title"
csv_source <- r .: "source" <|> r .: "Source"
csv_publication_year <- r .: "publication_year" <|> r .: "Publication Year"
csv_publication_month <- r .: "publication_month" <|> r .: "Publication Month"
csv_publication_day <- r .: "publication_day" <|> r .: "Publication Day"
csv_abstract <- r .: "abstract" <|> r .: "Abstract"
csv_authors <- r .: "authors" <|> r .: "Authors"
pure $ CsvDoc { .. }
instance ToNamedRecord CsvDoc where
toNamedRecord (CsvDoc{ .. }) =
namedRecord [ "title" .= csv_title
, "source" .= csv_source
, "publication_year" .= csv_publication_year
, "publication_month" .= csv_publication_month
, "publication_day" .= csv_publication_day
, "abstract" .= csv_abstract
, "authors" .= csv_authors
]
hyperdataDocument2csvDoc :: HyperdataDocument -> CsvDoc
hyperdataDocument2csvDoc h = CsvDoc { csv_title = m $ _hd_title h
, csv_source = m $ _hd_source h
, csv_publication_year = Just $ IntOrDec $ mI $ _hd_publication_year h
, csv_publication_month = Just $ mI $ _hd_publication_month h
, csv_publication_day = Just $ mI $ _hd_publication_day h
, csv_abstract = m $ _hd_abstract h
, csv_authors = m $ _hd_authors h }
where
m = maybe "" identity
mI = maybe 0 identity
csvDecodeOptions :: DecodeOptions
csvDecodeOptions = defaultDecodeOptions {decDelimiter = delimiter}
csvEncodeOptions :: EncodeOptions
csvEncodeOptions = defaultEncodeOptions {encDelimiter = delimiter}
delimiter :: Word8
delimiter = fromIntegral $ ord '\t'
------------------------------------------------------------------------
------------------------------------------------------------------------
readCsvOn' :: [CsvDoc -> Text] -> FilePath -> IO (Either Prelude.String [Text])
readCsvOn' fields fp = do
r <- readFile fp
pure $ ( V.toList
. V.map (\l -> intercalate (pack " ") $ map (\field -> field l) fields)
. snd ) <$> r
------------------------------------------------------------------------
readFileLazy :: (FromNamedRecord a) => proxy a -> FilePath -> IO (Either Prelude.String (Header, Vector a))
readFileLazy f = fmap (readByteStringLazy f) . BL.readFile
readFileStrict :: (FromNamedRecord a) => proxy a -> FilePath -> IO (Either Prelude.String (Header, Vector a))
readFileStrict f = fmap (readByteStringStrict f) . BS.readFile
readByteStringLazy :: (FromNamedRecord a) => proxy a -> BL.ByteString -> Either Prelude.String (Header, Vector a)
readByteStringLazy _f bs = decodeByNameWith csvDecodeOptions bs
readByteStringStrict :: (FromNamedRecord a) => proxy a -> BS.ByteString -> Either Prelude.String (Header, Vector a)
readByteStringStrict ff = (readByteStringLazy ff) . BL.fromStrict
------------------------------------------------------------------------
-- | TODO use readFileLazy
readFile :: FilePath -> IO (Either Prelude.String (Header, Vector CsvDoc))
readFile = fmap readCsvLazyBS . BL.readFile
-- | TODO use readByteStringLazy
readCsvLazyBS :: BL.ByteString -> Either Prelude.String (Header, Vector CsvDoc)
readCsvLazyBS bs = decodeByNameWith csvDecodeOptions bs
------------------------------------------------------------------------
-- | TODO use readFileLazy
readCsvHal :: FilePath -> IO (Either Prelude.String (Header, Vector CsvHal))
readCsvHal = fmap readCsvHalLazyBS . BL.readFile
-- | TODO use readByteStringLazy
readCsvHalLazyBS :: BL.ByteString -> Either Prelude.String (Header, Vector CsvHal)
readCsvHalLazyBS bs = decodeByNameWith csvDecodeOptions bs
readCsvHalBSStrict :: BS.ByteString -> Either Prelude.String (Header, Vector CsvHal)
readCsvHalBSStrict = readCsvHalLazyBS . BL.fromStrict
------------------------------------------------------------------------
writeFile :: FilePath -> (Header, Vector CsvDoc) -> IO ()
writeFile fp (h, vs) = BL.writeFile fp $
encodeByNameWith csvEncodeOptions h (V.toList vs)
writeDocs2Csv :: FilePath -> [HyperdataDocument] -> IO ()
writeDocs2Csv fp hs = BL.writeFile fp $ hyperdataDocument2csv hs
hyperdataDocument2csv :: [HyperdataDocument] -> BL.ByteString
hyperdataDocument2csv hs = encodeByNameWith csvEncodeOptions headerCsvGargV3 (map hyperdataDocument2csvDoc hs)
------------------------------------------------------------------------
-- Hal Format
data CsvHal = CsvHal
{ csvHal_title :: !Text
, csvHal_source :: !Text
, csvHal_publication_year :: !Integer
, csvHal_publication_month :: !Int
, csvHal_publication_day :: !Int
, csvHal_abstract :: !Text
, csvHal_authors :: !Text
, csvHal_url :: !Text
, csvHal_isbn_s :: !Text
, csvHal_issue_s :: !Text
, csvHal_journalPublisher_s:: !Text
, csvHal_language_s :: !Text
, csvHal_doiId_s :: !Text
, csvHal_authId_i :: !Text
, csvHal_instStructId_i :: !Text
, csvHal_deptStructId_i :: !Text
, csvHal_labStructId_i :: !Text
, csvHal_rteamStructId_i :: !Text
, csvHal_docType_s :: !Text
}
deriving (Show)
instance FromNamedRecord CsvHal where
parseNamedRecord r = do
csvHal_title <- r .: "title"
csvHal_source <- r .: "source"
csvHal_publication_year <- r .: "publication_year"
csvHal_publication_month <- r .: "publication_month"
csvHal_publication_day <- r .: "publication_day"
csvHal_abstract <- r .: "abstract"
csvHal_authors <- r .: "authors"
csvHal_url <- r .: "url"
csvHal_isbn_s <- r .: "isbn_s"
csvHal_issue_s <- r .: "issue_s"
csvHal_journalPublisher_s <- r .: "journalPublisher_s"
csvHal_language_s <- r .: "language_s"
csvHal_doiId_s <- r .: "doiId_s"
csvHal_authId_i <- r .: "authId_i"
csvHal_instStructId_i <- r .: "instStructId_i"
csvHal_deptStructId_i <- r .: "deptStructId_i"
csvHal_labStructId_i <- r .: "labStructId_i"
csvHal_rteamStructId_i <- r .: "rteamStructId_i"
csvHal_docType_s <- r .: "docType_s"
pure $ CsvHal { .. }
instance ToNamedRecord CsvHal where
--toNamedRecord (CsvHal t s py pm pd abst aut url isbn iss j lang doi auth inst dept lab team doct) =
toNamedRecord (CsvHal { .. }) =
namedRecord [ "title" .= csvHal_title
, "source" .= csvHal_source
, "publication_year" .= csvHal_publication_year
, "publication_month" .= csvHal_publication_month
, "publication_day" .= csvHal_publication_day
, "abstract" .= csvHal_abstract
, "authors" .= csvHal_authors
, "url" .= csvHal_url
, "isbn_s" .= csvHal_isbn_s
, "issue_s" .= csvHal_issue_s
, "journalPublisher_s" .= csvHal_journalPublisher_s
, "language_s" .= csvHal_language_s
, "doiId_s" .= csvHal_doiId_s
, "authId_i" .= csvHal_authId_i
, "instStructId_i" .= csvHal_instStructId_i
, "deptStructId_i" .= csvHal_deptStructId_i
, "labStructId_i" .= csvHal_labStructId_i
, "rteamStructId_i" .= csvHal_rteamStructId_i
, "docType_s" .= csvHal_docType_s
]
csvHal2doc :: CsvHal -> HyperdataDocument
csvHal2doc (CsvHal { .. }) =
HyperdataDocument { _hd_bdd = Just "CsvHal"
, _hd_doi = Just csvHal_doiId_s
, _hd_url = Just csvHal_url
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just csvHal_title
, _hd_authors = Just csvHal_authors
, _hd_institutes = Just csvHal_instStructId_i
, _hd_source = Just csvHal_source
, _hd_abstract = Just csvHal_abstract
, _hd_publication_date = Just $ pack . show $ jour csvHal_publication_year
csvHal_publication_month
csvHal_publication_day
, _hd_publication_year = Just $ fromIntegral csvHal_publication_year
, _hd_publication_month = Just csvHal_publication_month
, _hd_publication_day = Just csvHal_publication_day
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Nothing }
csv2doc :: CsvDoc -> HyperdataDocument
csv2doc (CsvDoc { .. })
= HyperdataDocument { _hd_bdd = Just "CsvHal"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just csv_title
, _hd_authors = Just csv_authors
, _hd_institutes = Nothing
, _hd_source = Just csv_source
, _hd_abstract = Just csv_abstract
, _hd_publication_date = Just $ pack . show $ jour (fromIntegral pubYear)
pubMonth
pubDay
, _hd_publication_year = Just pubYear
, _hd_publication_month = Just pubMonth
, _hd_publication_day = Just pubDay
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Nothing }
where
pubYear = fromMIntOrDec defaultYear csv_publication_year
pubMonth = fromMaybe defaultMonth csv_publication_month
pubDay = fromMaybe defaultDay csv_publication_day
------------------------------------------------------------------------
parseHal :: FilePath -> IO (Either Prelude.String [HyperdataDocument])
parseHal fp = do
r <- readCsvHal fp
pure $ (V.toList . V.map csvHal2doc . snd) <$> r
parseHal' :: BL.ByteString -> Either Prelude.String [HyperdataDocument]
parseHal' bs = (V.toList . V.map csvHal2doc . snd) <$> readCsvHalLazyBS bs
------------------------------------------------------------------------
parseCsv :: FilePath -> IO (Either Prelude.String [HyperdataDocument])
parseCsv fp = do
r <- readFile fp
pure $ (V.toList . V.map csv2doc . snd) <$> r
parseCsv' :: BL.ByteString -> Either Prelude.String [HyperdataDocument]
parseCsv' bs = (V.toList . V.map csv2doc . snd) <$> readCsvLazyBS bs
------------------------------------------------------------------------
-- Csv v3 weighted for phylo
data Csv' = Csv'
{ csv'_title :: !Text
, csv'_source :: !Text
, csv'_publication_year :: !Int
, csv'_publication_month :: !Int
, csv'_publication_day :: !Int
, csv'_abstract :: !Text
, csv'_authors :: !Text
, csv'_weight :: !Double } deriving (Show)
instance FromNamedRecord Csv' where
parseNamedRecord r = do
csv'_title <- r .: "title"
csv'_source <- r .: "source"
csv'_publication_year <- r .: "publication_year"
csv'_publication_month <- r .: "publication_month"
csv'_publication_day <- r .: "publication_day"
csv'_abstract <- r .: "abstract"
csv'_authors <- r .: "authors"
csv'_weight <- r .: "weight"
pure $ Csv' { .. }
readWeightedCsv :: FilePath -> IO (Header, Vector Csv')
readWeightedCsv fp =
fmap (\bs ->
case decodeByNameWith csvDecodeOptions bs of
Left e -> panic (pack e)
Right corpus -> corpus
) $ BL.readFile fp
Date.hs 0000664 0000000 0000000 00000011507 14124644201 0033110 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers {-|
Module : Gargantext.Core.Text.Corpus.Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017-present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
According to the language of the text, parseDateRaw returns date as Text:
TODO : Add some tests
import Gargantext.Core.Text.Corpus.Parsers.Date as DGP
DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Core.Text.Corpus.Parsers.Date {-(parse, parseRaw, dateSplit, Year, Month, Day)-} where
import Data.Aeson (toJSON, Value)
import Data.HashMap.Strict as HM hiding (map)
import Data.Text (Text, unpack, splitOn, pack)
import Data.Time (parseTimeOrError, defaultTimeLocale, toGregorian)
import Data.Time.Clock (UTCTime(..), getCurrentTime)
import Data.Time.LocalTime (utc)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Duckling.Api (analyze)
import Duckling.Core (makeLocale, Dimension(Time))
import Duckling.Types (Seal(..))
import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime), Options(..))
import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
import Gargantext.Core (Lang(FR,EN))
import Gargantext.Prelude
import qualified Data.Aeson as Json
import qualified Data.HashSet as HashSet
import qualified Duckling.Core as DC
------------------------------------------------------------------------
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
dateSplit :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
dateSplit _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
dateSplit l (Just txt) = do
utcTime <- parse l txt
let (y, m, d) = split' utcTime
pure (Just utcTime, (Just y, Just m,Just d))
split' :: UTCTime -> (Year, Month, Day)
split' utcTime = (fromIntegral y, m, d)
where
(UTCTime day _) = utcTime
(y,m,d) = toGregorian day
type Year = Int
type Month = Int
type Day = Int
------------------------------------------------------------------------
-- | Date Parser
-- Parses dates mentions in full text given the language.
-- >>> parseDate FR (pack "10 avril 1979 à 19H")
-- 1979-04-10 19:00:00 UTC
-- >>> parseDate EN (pack "April 10 1979")
-- 1979-04-10 00:00:00 UTC
parse :: Lang -> Text -> IO UTCTime
parse lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s
type DateFormat = Text
type DateDefault = Text
parseDate' :: DateFormat -> DateDefault -> Lang -> Text -> IO UTCTime
parseDate' format def lang s = do
dateStr' <- parseRaw lang s
let dateStr = unpack $ maybe def identity
$ head $ splitOn "." dateStr'
pure $ parseTimeOrError True defaultTimeLocale (unpack format) dateStr
-- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR
parserLang EN = DC.EN
parserLang _ = panic "not implemented"
-- | Final Date parser API
-- IO can be avoided here:
-- currentContext :: Lang -> IO Context
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseRaw :: Context -> Text -> SomeErrorHandling Text
-- TODO error handling
parseRaw :: Lang -> Text -> IO Text
parseRaw lang text = do -- case result
maybeResult <- extractValue <$> getTimeValue <$> parseDateWithDuckling lang text (Options True)
case maybeResult of
Just result -> pure result
Nothing -> panic $ "[G.C.T.C.P.D.parseRaw] ERROR" <> (pack . show) lang <> " " <> text
getTimeValue :: [ResolvedToken] -> Value
getTimeValue rt = case head rt of
Nothing -> panic "error"
Just x -> case rval x of
RVal Time t -> toJSON t
_ -> panic "error2"
extractValue :: Value -> Maybe Text
extractValue (Json.Object object) =
case HM.lookup "value" object of
Just (Json.String date) -> Just date
_ -> Nothing
extractValue _ = Nothing
-- | Current Time in DucklingTime format
-- TODO : get local Time in a more generic way
utcToDucklingTime :: UTCTime -> DucklingTime
utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
-- | Local Context which depends on Lang and Time
localContext :: Lang -> DucklingTime -> Context
localContext lang dt = Context {referenceTime = dt, locale = makeLocale (parserLang lang) Nothing}
-- | Date parser with Duckling
parseDateWithDuckling :: Lang -> Text -> Options -> IO [ResolvedToken]
parseDateWithDuckling lang input options = do
contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
-- TODO check/test Options False or True
pure $ analyze input contxt options $ HashSet.fromList [(Seal Time)]
Date/ 0000775 0000000 0000000 00000000000 14124644201 0032550 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers Attoparsec.hs 0000664 0000000 0000000 00000001624 14124644201 0035214 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers/Date {-|
Module : Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec
where
import Control.Applicative ((<*))
import Data.Attoparsec.ByteString (Parser, take)
import Data.ByteString (ByteString)
import Data.Tuple.Extra (first)
import Gargantext.Prelude hiding (takeWhile, take)
-------------------------------------------------------------
parserWith :: Parser ByteString -> Parser [(ByteString, ByteString)]
parserWith sep = do
day <- take 2 <* sep
mon <- take 2 <* sep
yea <- take 4
pure $ map (first (\x -> "publication_" <> x))
[ ("day",day)
, ("month", mon)
, ("year", yea)
, ("date", yea <> "-" <> mon <> "-" <> day <> "T0:0:0")
]
Parsec.hs 0000664 0000000 0000000 00000005724 14124644201 0034331 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers/Date {-|
Module : Gargantext.Core.Text.Corpus.Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017-present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
where
import Data.Either (Either)
import Data.Fixed (Fixed (MkFixed))
import Data.String (String)
import Data.Text (Text, unpack)
import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
import Data.Time.Calendar (Day, fromGregorian)
import Gargantext.Prelude
import Prelude (toInteger, (++))
import Text.Parsec.Error (ParseError)
import Text.Parsec.Prim (Stream, ParsecT)
import Text.Parsec.String (Parser)
import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
import Text.XML.HXT.DOM.Util (decimalStringToInt)
import qualified Text.ParserCombinators.Parsec (parse)
-- | Permit to transform a String to an Int in a monadic context
wrapDST :: Monad m => String -> m Int
wrapDST = return . decimalStringToInt
-- | Generic parser which take at least one element not given in argument
many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
many1NoneOf = (many1 . noneOf)
getMultiplicator :: Int -> Int
getMultiplicator a
| 0 >= a = 1
| otherwise = 10 * (getMultiplicator $ div a 10)
-- | Parser for date format y-m-d
parseGregorian :: Parser Day
parseGregorian = do
y <- wrapDST =<< many1NoneOf ['-']
_ <- char '-'
m <- wrapDST =<< many1NoneOf ['-']
_ <- char '-'
d <- wrapDST =<< many1NoneOf ['T']
_ <- char 'T'
return $ fromGregorian (toInteger y) m d
---- | Parser for time format h:m:s
parseTimeOfDay :: Parser TimeOfDay
parseTimeOfDay = do
h <- wrapDST =<< many1NoneOf [':']
_ <- char ':'
m <- wrapDST =<< many1NoneOf [':']
_ <- char ':'
r <- many1NoneOf ['.']
_ <- char '.'
dec <- many1NoneOf ['+', '-']
let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
seconds = nb * 10^(12-l)
return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
-- | Parser for timezone format +hh:mm
parseTimeZone :: Parser TimeZone
parseTimeZone = do
sign <- oneOf ['+', '-']
h <- wrapDST =<< many1NoneOf [':']
_ <- char ':'
m <- wrapDST =<< (many1 $ anyChar)
let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
in return $ TimeZone timeInMinute False "CET"
---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
parseZonedTime :: Parser ZonedTime
parseZonedTime= do
d <- parseGregorian
tod <- parseTimeOfDay
tz <- parseTimeZone
return $ ZonedTime (LocalTime d (tod)) tz
---- | Opposite of toRFC3339
fromRFC3339 :: Text -> Either ParseError ZonedTime
fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
where input = unpack t
FrameWrite.hs 0000664 0000000 0000000 00000012205 14124644201 0034274 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers module Gargantext.Core.Text.Corpus.Parsers.FrameWrite where
import Control.Applicative ((*>))
import Control.Monad (void)
import Data.Either
import Data.Maybe
import Data.Text hiding (foldl)
import Gargantext.Prelude
import Prelude ((++))
import Text.Parsec hiding (Line)
import Text.Parsec.String
-- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/331
-- title : everything above the first ==
-- Authors : default : anonymous ; except if the following line is encountered ^@@authors: FirstName1, LastName1 ; FirstName2, LastName2 ; etc.
-- date : default : date of last change except if the following line is encountered ^@@date: 2021-09-10
-- source: Name of the root node except if the following line is encountered ^@@source:
-- By default, 1 framawrite node = 1 document. Option for further developments: allow to give a level at generation for the split within framawrite node : :
--
-- par défaut: un doc == 1 NodeWrite
-- ## mean each ## section will be a new document with title the subsubsection title. Either it features options for author, date etc. or it will inherit the document's option.
sample :: Text
sample =
unlines
[ "title1"
, "title2"
, "=="
, "^@@authors: FirstName1, LastName1; FirstName2, LastName2"
, "^@@date: 2021-09-10"
, "^@@source: someSource"
, "document contents 1"
, "document contents 2"
]
sampleUnordered :: Text
sampleUnordered =
unlines
[ "title1"
, "title2"
, "=="
, "document contents 1"
, "^@@date: 2021-09-10"
, "^@@authors: FirstName1, LastName1; FirstName2, LastName2"
, "^@@source: someSource"
, "document contents 2"
]
-- parseSample = parse documentP "sample" (unpack sample)
-- parseSampleUnordered = parse documentP "sampleUnordered" (unpack sampleUnordered)
parseLinesSample :: Either ParseError Parsed
parseLinesSample = parseLines sample
parseLinesSampleUnordered :: Either ParseError Parsed
parseLinesSampleUnordered = parseLines sampleUnordered
data Author =
Author { firstName :: Text
, lastName :: Text }
deriving (Show)
data Parsed =
Parsed { title :: Text
, authors :: [Author]
, date :: Maybe Text
, source :: Maybe Text
, contents :: Text }
deriving (Show)
emptyParsed :: Parsed
emptyParsed =
Parsed { title = ""
, authors = []
, date = Nothing
, source = Nothing
, contents = "" }
data Line =
LAuthors [Author]
| LContents Text
| LDate Text
| LSource Text
| LTitle Text
deriving (Show)
parseLines :: Text -> Either ParseError Parsed
parseLines text = foldl f emptyParsed <$> lst
where
lst = parse documentLinesP "" (unpack text)
f (Parsed { .. }) (LAuthors as) = Parsed { authors = as, .. }
f (Parsed { .. }) (LContents c) = Parsed { contents = concat [contents, c], .. }
f (Parsed { .. }) (LDate d ) = Parsed { date = Just d, .. }
f (Parsed { .. }) (LSource s ) = Parsed { source = Just s, .. }
f (Parsed { .. }) (LTitle t ) = Parsed { title = t, .. }
documentLinesP :: Parser [Line]
documentLinesP = do
t <- titleP
ls <- lineP `sepBy` newline
pure $ [LTitle $ pack t] ++ ls
lineP :: Parser Line
lineP = do
choice [ try authorsLineP
, try dateLineP
, try sourceLineP
, contentsLineP ]
authorsLineP :: Parser Line
authorsLineP = do
authors <- authorsP
pure $ LAuthors authors
dateLineP :: Parser Line
dateLineP = do
date <- dateP
pure $ LDate $ pack date
sourceLineP :: Parser Line
sourceLineP = do
source <- sourceP
pure $ LSource $ pack source
contentsLineP :: Parser Line
contentsLineP = do
contents <- many (noneOf "\n")
pure $ LContents $ pack contents
--------------------
-- documentP = do
-- t <- titleP
-- a <- optionMaybe authorsP
-- d <- optionMaybe dateP
-- s <- optionMaybe sourceP
-- c <- contentsP
-- pure $ Parsed { title = pack t
-- , authors = fromMaybe [] a
-- , date = pack <$> d
-- , source = pack <$> s
-- , contents = pack c }
titleDelimiterP :: Parser ()
titleDelimiterP = do
_ <- newline
_ <- string "=="
tokenEnd
titleP :: Parser [Char]
titleP = manyTill anyChar (try titleDelimiterP)
authorsPrefixP :: Parser [Char]
authorsPrefixP = do
_ <- string "^@@authors:"
many (char ' ')
authorsP :: Parser [Author]
authorsP = try authorsPrefixP *> sepBy authorP (char ';')
authorP :: Parser Author
authorP = do
fn <- manyTill anyChar (char ',')
_ <- many (char ' ')
--ln <- manyTill anyChar (void (char ';') <|> tokenEnd)
--ln <- manyTill anyChar (tokenEnd)
ln <- many (noneOf "\n")
pure $ Author { firstName = pack fn, lastName = pack ln }
-- manyTill anyChar (void (char '\n') <|> eof)
datePrefixP :: Parser [Char]
datePrefixP = do
_ <- string "^@@date:"
many (char ' ')
dateP :: Parser [Char]
dateP = try datePrefixP
*> many (noneOf "\n")
sourcePrefixP :: Parser [Char]
sourcePrefixP = do
_ <- string "^@@source:"
many (char ' ')
sourceP :: Parser [Char]
sourceP = try sourcePrefixP
*> many (noneOf "\n")
-- contentsP :: Parser String
-- contentsP = many anyChar
tokenEnd :: Parser ()
tokenEnd = void (char '\n') <|> eof
GrandDebat.hs 0000664 0000000 0000000 00000007543 14124644201 0034233 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers {-|
Module : Gargantext.Core.Text.Corpus.Parsers.GrandDebat
Description : Grand Debat Types
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
_flowCorpusDebat :: FlowCmdM env err m
=> User -> Either CorpusName [CorpusId]
-> Limit -> FilePath
-> m CorpusId
_flowCorpusDebat u n l fp = do
docs <- liftBase ( splitEvery 500
<$> take l
<$> readFile' fp
:: IO [[GD.GrandDebatReference ]]
)
flowCorpus u n (Multi FR) (map (map toHyperdataDocument) docs)
-}
module Gargantext.Core.Text.Corpus.Parsers.GrandDebat
where
import Data.Aeson (ToJSON, FromJSON)
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), ToHyperdataDocument, toHyperdataDocument)
import Gargantext.Prelude
import Gargantext.Database.GargDB
import qualified Data.ByteString.Lazy as DBL
import qualified Data.JsonStream.Parser as P
import qualified Data.Text as Text
data GrandDebatReference = GrandDebatReference
{ id :: !(Maybe Text)
, reference :: !(Maybe Text)
, title :: !(Maybe Text)
, createdAt :: !(Maybe Text)
, publishedAt :: !(Maybe Text)
, updatedAt :: !(Maybe Text)
, trashed :: !(Maybe Bool)
, trashedStatus :: !(Maybe Text)
, authorId :: !(Maybe Text)
, authorType :: !(Maybe Text)
, authorZipCode :: !(Maybe Text)
, responses :: !(Maybe [GrandDebatResponse])
}
deriving (Show, Generic)
data GrandDebatResponse = GrandDebatResponse
{ questionId :: !(Maybe Text)
, questionTitle :: !(Maybe Text)
, value :: !(Maybe Text)
, formattedValue :: !(Maybe Text)
}
deriving (Show, Generic)
instance FromJSON GrandDebatResponse
instance FromJSON GrandDebatReference
instance ToJSON GrandDebatResponse
instance ToJSON GrandDebatReference
instance ToHyperdataDocument GrandDebatReference
where
toHyperdataDocument (GrandDebatReference { id, title, publishedAt, authorType, authorZipCode, responses }) =
HyperdataDocument { _hd_bdd = Just "GrandDebat"
, _hd_doi = id
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = title
, _hd_authors = authorType
, _hd_institutes = authorType
, _hd_source = authorZipCode
, _hd_abstract = toAbstract <$> responses
, _hd_publication_date = publishedAt
, _hd_publication_year = Nothing
, _hd_publication_month = Nothing
, _hd_publication_day = Nothing
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ Text.pack $ show FR }
where
toAbstract = (Text.intercalate " . ") . ((filter (/= "")) . (map toSentence))
toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of
Nothing -> ""
Just r' -> case Text.length r' > 10 of
True -> r'
False -> ""
instance ReadFile [GrandDebatReference]
where
-- | read json: 3 version below are working but with increased optimization
--readFile fp = maybe [] identity <$> decode <$> DBL.readFile fp
--readFile fp = either (panic . Text.pack) identity <$> P.eitherDecode <$> DBL.readFile fp
readFile' fp = P.parseLazyByteString (P.arrayOf P.value) <$> DBL.readFile fp
Isidore.hs 0000664 0000000 0000000 00000012540 14124644201 0033627 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers {-|
Module : Gargantext.Core.Text.Corpus.Parsers.Isidore
Description : To query French Humanities publication database
Copyright : (c) CNRS, 2019-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO:
- put endpoint in configuration file
- more flexible fields of research
- type database name
- use more ontologies to help building corpora
-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Core.Text.Corpus.Parsers.Isidore where
import Control.Lens hiding (contains)
import Data.ByteString.Lazy (ByteString)
import Data.RDF hiding (triple, Query)
import Data.Text hiding (groupBy, map)
import Database.HSparql.Connection
import Database.HSparql.QueryGenerator
import Gargantext.Core (Lang)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
import Prelude (String)
route :: EndPoint
route = "https://isidore.science/sparql/"
selectQueryRaw' :: String -> String -> IO (Response ByteString)
selectQueryRaw' uri q = getWith opts uri
where
opts = defaults & header "Accept" .~ ["application/sparql-results+xml"]
& header "User-Agent" .~ ["gargantext-hsparql-client"]
& param "query" .~ [Data.Text.pack q]
isidoreGet :: Lang -> Int -> Text -> IO (Maybe [HyperdataDocument])
isidoreGet la li q = do
bindingValues <- isidoreGet' li q
case bindingValues of
Nothing -> pure Nothing
Just dv -> pure $ Just $ map (bind2doc la) dv
isidoreGet' :: Int -> Text -> IO (Maybe [[BindingValue]])
isidoreGet' l q = do
let s = createSelectQuery $ isidoreSelect l q
putStrLn s
r <- selectQueryRaw' route s
putStrLn $ show $ r ^. responseStatus
pure $ structureContent $ r ^. responseBody
-- res <- selectQuery route $ simpleSelect q
-- pure res
isidoreSelect :: Int -> Text -> Query SelectQuery
isidoreSelect lim q = do
-- See Predefined Namespace Prefixes:
-- https://isidore.science/sparql?nsdecl
isidore <- prefix "isidore" (iriRef "http://isidore.science/class/")
rdf <- prefix "rdf" (iriRef "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
dcterms <- prefix "dcterms" (iriRef "http://purl.org/dc/terms/")
dc <- prefix "dc" (iriRef "http://purl.org/dc/elements/1.1/")
--iso <- prefix "fra" (iriRef "http://lexvo.org/id/iso639-3/")
--ore <- prefix "ore" (iriRef "http://www.openarchives.org/ore/terms/")
--bif <- prefix "bif" (iriRef "bif:")
link <- var
title <- var
date <- var
abstract <- var
authors <- var
source <- var
langDoc <- var
publisher <- var
--agg <- var
triple_ link (rdf .:. "type") (isidore .:. "Document")
triple_ link (dcterms .:. "title") title
triple_ link (dcterms .:. "date") date
triple_ link (dcterms .:. "creator") authors
--triple_ link (dcterms .:. "language") langDoc
triple_ link (dc .:. "description") abstract
--triple_ link (ore .:. "isAggregatedBy") agg
--triple_ agg (dcterms .:. "title") title
optional_ $ triple_ link (dcterms .:. "source") source
optional_ $ triple_ link (dcterms .:. "publisher") publisher
-- TODO FIX BUG with (.||.) operator
--filterExpr_ $ (.||.) (contains title q) (contains abstract q)
--filterExpr_ (containsWith authors q) -- (contains abstract q)
--filterExpr_ (containsWith title q) -- (contains abstract q)
--filterExpr_ $ (.||.) (containsWith title q) (contains abstract q)
filterExpr_ (containsWith title q)
-- TODO FIX filter with lang
--filterExpr_ $ langMatches title (str ("fra" :: Text))
--filterExpr_ $ (.==.) langDoc (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
orderNextDesc date
limit_ lim
distinct_
selectVars [link, date, langDoc, authors, source, publisher, title, abstract]
-- | TODO : check if all cases are taken into account
unbound :: Lang -> BindingValue -> Maybe Text
unbound _ Unbound = Nothing
unbound _ (Bound (UNode x)) = Just x
unbound _ (Bound (LNode (TypedL x _))) = Just x
unbound _ (Bound (LNode (PlainL x))) = Just x
unbound l (Bound (LNode (PlainLL x l'))) = if l' == (toLower $ cs $ show l) then Just x else Nothing
unbound _ _ = Nothing
bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
bind2doc l [ link, date, langDoc, authors, _source, publisher, title, abstract ] =
HyperdataDocument { _hd_bdd = Just "Isidore"
, _hd_doi = Nothing
, _hd_url = unbound l link
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = unbound l title
, _hd_authors = unbound l authors
, _hd_institutes = Nothing
, _hd_source = unbound l publisher
, _hd_abstract = unbound l abstract
, _hd_publication_date = unbound l date
, _hd_publication_year = Nothing
, _hd_publication_month = Nothing
, _hd_publication_day = Nothing
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = unbound l langDoc }
bind2doc _ _ = undefined
Json2Csv.hs 0000664 0000000 0000000 00000003402 14124644201 0033675 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers {-|
Module : Gargantext.Core.Text.Corpus.Parsers.Json2Csv
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Json parser to export towoard CSV GargV3 format.
(Export from the Patent Database.)
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.Corpus.Parsers.Json2Csv (json2csv, readPatents)
where
import Prelude (read)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.ByteString.Lazy (readFile)
import Data.Text (Text, unpack)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import System.IO (FilePath)
import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
import Data.Vector (fromList)
data Patent = Patent { _patent_title :: Text
, _patent_abstract :: Text
, _patent_year :: Text
, _patent_id :: Text
} deriving (Show)
$(deriveJSON (unPrefix "_patent_") ''Patent)
readPatents :: FilePath -> IO (Maybe [Patent])
readPatents fp = decode <$> readFile fp
type FilePathIn = FilePath
type FilePathOut = FilePath
json2csv :: FilePathIn -> FilePathOut -> IO ()
json2csv fin fout = do
patents <- maybe (panic "json2csv error") identity <$> readPatents fin
writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents)
patent2csvDoc :: Patent -> CsvDoc
patent2csvDoc (Patent { .. }) =
CsvDoc { csv_title = _patent_title
, csv_source = "Source"
, csv_publication_year = Just $ read (unpack _patent_year)
, csv_publication_month = Just 1
, csv_publication_day = Just 1
, csv_abstract = _patent_abstract
, csv_authors = "Authors" }
RIS.hs 0000664 0000000 0000000 00000004151 14124644201 0032665 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers {-|
Module : Gargantext.Core.Text.Corpus.Parsers.RIS
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
RIS is a standardized tag format developed by Research Information
Systems, Incorporated (the format name refers to the company) to enable
citation programs to exchange data.
[More](https://en.wikipedia.org/wiki/RIS_(file_format))
-}
module Gargantext.Core.Text.Corpus.Parsers.RIS (parser, onField, fieldWith, lines) where
import Data.List (lookup)
import Control.Applicative
import Data.Attoparsec.ByteString (Parser, try, takeTill, take, many1)
import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
import Data.ByteString (ByteString, concat)
import Gargantext.Prelude hiding (takeWhile, take)
import qualified Data.List as DL
-------------------------------------------------------------
parser :: Parser [[(ByteString, ByteString)]]
parser = do
n <- notice "TY -"
ns <- many1 (notice "\nTY -")
pure $ [n] <> ns
notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
notice s = start *> many (fieldWith field) <* end
where
field :: Parser ByteString
field = "\n" *> take 2 <* " - "
start :: Parser ByteString
start = s *> takeTill isEndOfLine
end :: Parser ByteString
end = "\nER -" *> takeTill isEndOfLine
fieldWith :: Parser ByteString -> Parser (ByteString, ByteString)
fieldWith n = do
name <- n
txt <- takeTill isEndOfLine
txts <- try lines
let txts' = case DL.length txts > 0 of
True -> txts
False -> []
pure (name, concat ([txt] <> txts'))
lines :: Parser [ByteString]
lines = many line
where
line :: Parser ByteString
line = "\n " *> takeTill isEndOfLine
-------------------------------------------------------------
-- Field for First elem of a Tuple, Key for corresponding Map
onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
-> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
onField k f m = m <> ( maybe [] f (lookup k m) )
RIS/ 0000775 0000000 0000000 00000000000 14124644201 0032330 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers Presse.hs 0000664 0000000 0000000 00000004250 14124644201 0034126 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers/RIS {-|
Module : Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Presse RIS format parser for Europresse Database.
-}
module Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich) where
import Data.List (lookup)
import Data.Tuple.Extra (first, both)
import Data.Attoparsec.ByteString (parseOnly)
import Data.ByteString (ByteString, length)
import Gargantext.Prelude hiding (takeWhile, take, length)
import Gargantext.Core.Text.Corpus.Parsers.RIS (onField)
import Gargantext.Core (Lang(..))
import qualified Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec as Date
presseEnrich :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
presseEnrich = (onField "DA" parseDate)
. (onField "LA" parseLang)
. fixFields
parseDate :: ByteString -> [(ByteString, ByteString)]
parseDate str = either (const []) identity $ parseOnly (Date.parserWith "/") str
parseLang :: ByteString -> [(ByteString, ByteString)]
parseLang "Français" = [(langField, cs $ show FR)]
parseLang "English" = [(langField, cs $ show EN)]
parseLang x = [(langField, x)]
langField :: ByteString
langField = "language"
fixFields :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
fixFields ns = map (first fixFields'') ns
where
-- | Title is sometimes longer than abstract
fixFields'' = case uncurry (>) <$> look'' of
Just True -> fixFields' "abstract" "title"
_ -> fixFields' "title" "abstract"
look'' :: Maybe (Int, Int)
look'' = both length <$> look
look :: Maybe (ByteString,ByteString)
look = (,) <$> lookup "TI" ns <*> lookup "N2" ns
fixFields' :: ByteString -> ByteString
-> ByteString -> ByteString
fixFields' title abstract champs
| champs == "AU" = "authors"
| champs == "TI" = title
| champs == "JF" = "source"
| champs == "DI" = "doi"
| champs == "UR" = "url"
| champs == "N2" = abstract
| otherwise = champs
WOS.hs 0000664 0000000 0000000 00000003455 14124644201 0032706 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers {-|
Module : Gargantext.Core.Text.Corpus.Parsers.WOS
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Gargantext.Core.Text.Corpus.Parsers.WOS (parser, keys) where
import Control.Applicative
import Data.Attoparsec.ByteString (Parser, string, takeTill, take, manyTill, many1)
import Data.Attoparsec.ByteString.Char8 (anyChar, isEndOfLine)
import Data.ByteString (ByteString)
import Data.ByteString.Char8 (pack)
import Gargantext.Core.Text.Corpus.Parsers.RIS (fieldWith)
import Prelude hiding (takeWhile, take, concat, readFile, lines, concat)
-------------------------------------------------------------
-- | wosParser parses ISI format from
-- Web Of Science Database
parser :: Parser [[(ByteString, ByteString)]]
parser = do
-- TODO Warning if version /= 1.0
-- FIXME anyChar (string ..) /= exact string "\nVR 1.0" ?
_ <- manyTill anyChar (string $ pack "\nVR 1.0")
ns <- many1 notice <* (string $ pack "\nEF" )
pure ns
notice :: Parser [(ByteString, ByteString)]
notice = start *> many (fieldWith field) <* end
where
field :: Parser ByteString
field = "\n" *> take 2 <* " "
start :: Parser ByteString
start = "\nPT " *> takeTill isEndOfLine
end :: Parser [Char]
end = manyTill anyChar (string $ pack "\nER\n")
keys :: ByteString -> ByteString
keys champs
| champs == "AF" = "authors"
| champs == "TI" = "title"
| champs == "SO" = "source"
| champs == "DI" = "doi"
| champs == "PD" = "publication_date"
| champs == "AB" = "abstract"
| otherwise = champs
Wikimedia.hs 0000664 0000000 0000000 00000007764 14124644201 0034150 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers {-|
Module : Gargantext.Core.Text.Corpus.Parsers.Wikimedia
Description : Parser for Wikimedia dump
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
@Gargantext.Core.Text.Corpus.Parsers.Wikimedia@:
This module provide a parser for wikipedia dump.
This include an xml parser for wikipedia's xml
and an wikimedia to plaintext converter for the wikipedia text field
-}
module Gargantext.Core.Text.Corpus.Parsers.Wikimedia
where
import Control.Monad.Catch
import Data.Conduit
import Data.Either
import Data.Text as T
import Data.XML.Types (Event, Name)
import Gargantext.Prelude
import Text.Pandoc
import Text.XML.Stream.Parse
-- | Use case
-- :{
-- wikimediaFile <- BL.readFile "text.xml"
-- _ <- runConduit $ parseLBS def wikimediaFile
-- .| force "mediawiki required" parseMediawiki
-- .| CL.mapM mediawikiPageToPlain
-- .| CL.mapM_ print
-- :}
-- | A simple "Page" type.
-- For the moment it takes only text and title
-- (since there is no abstract) will see if other data are relevant.
data Page =
Page { _markupFormat :: MarkupFormat
, _title :: Maybe T.Text
, _text :: Maybe T.Text
}
deriving (Show)
data MarkupFormat = Mediawiki | Plaintext
deriving (Show)
parseRevision :: MonadThrow m => ConduitT Event o m (Maybe T.Text)
parseRevision = tagNoAttr "{http://www.mediawiki.org/xml/export-0.10/}revision" $ do
text <- force "text is missing" $ ignoreExcept "{http://www.mediawiki.org/xml/export-0.10/}text" content
many_ ignoreAnyTreeContent
return text
-- | Utility function that matches everything but the tag given
tagUntil :: Name -> NameMatcher Name
tagUntil name = matching (/= name)
-- | Utility function that consumes everything but the tag given
-- usefull because we have to consume every data.
manyTagsUntil_ :: MonadThrow m => Name -> ConduitT Event o m ()
manyTagsUntil_ n = many_ (ignoreTree (tagUntil n) ignoreAttrs)
manyTagsUntil_' :: MonadThrow m => Name -> ConduitT Event o m ()
manyTagsUntil_' = many_ . ignoreEmptyTag . tagUntil
-- | Utility function that parses nothing but the tag given,
-- usefull because we have to consume every data.
ignoreExcept :: MonadThrow m => Name
-> ConduitT Event o m b
-> ConduitT Event o m (Maybe b)
ignoreExcept name f = do
_ <- manyTagsUntil_ name
tagIgnoreAttrs (matching (== name)) f
-- TODO: remove ignoreExcept to:
-- many ignoreAnyTreeContentUntil "Article"
manyTagsUntil :: MonadThrow m => Name
-> ConduitT Event o m b
-> ConduitT Event o m (Maybe b)
manyTagsUntil name f = do
_ <- manyTagsUntil_ name
tagIgnoreAttrs (matching (== name)) f
parsePage :: MonadThrow m => ConduitT Event o m (Maybe Page)
parsePage =
tagNoAttr "{http://www.mediawiki.org/xml/export-0.10/}page" $ do
title <-
tagNoAttr "{http://www.mediawiki.org/xml/export-0.10/}title" content
_ <- manyTagsUntil_ "{http://www.mediawiki.org/xml/export-0.10/}revision"
revision <-
parseRevision
many_ $ ignoreAnyTreeContent
return $ Page { _markupFormat = Mediawiki
, _title = title
, _text = revision }
parseMediawiki :: MonadThrow m => ConduitT Event Page m (Maybe ())
parseMediawiki =
tagIgnoreAttrs "{http://www.mediawiki.org/xml/export-0.10/}mediawiki"
$ manyYield' parsePage
-- | Convert a Mediawiki Page to a Plaintext Page.
-- Need to wrap the result in IO to parse and to combine it.
mediawikiPageToPlain :: Page -> IO Page
mediawikiPageToPlain page = do
title <- mediaToPlain $ _title page
revision <- mediaToPlain $ _text page
return $ Page { _markupFormat = Plaintext, _title = title, _text = revision }
where mediaToPlain media =
case media of
(Nothing) -> return Nothing
(Just med) -> do
res <- runIO $ do
doc <- readMediaWiki def med
writePlain def doc
case res of
(Left _) -> return Nothing
(Right r) -> return $ Just r
XML_hs 0000664 0000000 0000000 00000000071 14124644201 0032746 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Corpus/Parsers -- http://chrisdone.com/posts/fast-haskell-c-parsing-xml
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Learn.hs 0000664 0000000 0000000 00000022163 14124644201 0030461 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Terms.Stop
Description : Mono Terms module
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO:
- generalize to byteString
- Stop words and (how to learn it).
- Main type here is String check if Chars on Text would be optimized
-}
{-# LANGUAGE TypeSynonymInstances #-}
module Gargantext.Core.Text.Learn -- (detectLang, detectLangs, stopList)
where
import Codec.Serialise
import qualified Data.List as DL
import Data.Map.Strict (Map, toList)
import qualified Data.Map.Strict as DM
import GHC.Generics
import Data.String (String)
import Data.Text (Text)
import Data.Text (pack, unpack, toLower)
import Data.Tuple.Extra (both)
import qualified Data.ByteString.Lazy as BSL
import Gargantext.Prelude
import Gargantext.Database.GargDB
import Gargantext.Core (Lang(..), allLangs)
import Gargantext.Core.Text.Terms.Mono (words)
import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
import qualified Gargantext.Core.Text.Samples.FR as FR
import qualified Gargantext.Core.Text.Samples.EN as EN
--import qualified Gargantext.Core.Text.Samples.DE as DE
--import qualified Gargantext.Core.Text.Samples.SP as SP
--import qualified Gargantext.Core.Text.Samples.CH as CH
------------------------------------------------------------------------
data Candidate = Candidate { stop :: Double
, noStop :: Double
} deriving (Show)
------------------------------------------------------------------------
-- * Analyze candidate
type StringSize = Int
type TotalFreq = Int
type Freq = Int
type Word = String
data CatWord a = CatWord a Word
type CatProb a = Map a Double
type Events a = Map a EventBook
------------------------------------------------------------------------
data EventBook = EventBook { events_freq :: Map String Freq
, events_n :: Map StringSize TotalFreq
}
deriving (Show, Generic)
instance Serialise EventBook
instance (Serialise a, Ord a) => SaveFile (Events a) where
saveFile' f d = BSL.writeFile f (serialise d)
instance (Serialise a, Ord a) => ReadFile (Events a) where
readFile' filepath = deserialise <$> BSL.readFile filepath
------------------------------------------------------------------------
detectStopDefault :: Text -> Maybe Bool
detectStopDefault = undefined
detectBool :: [(Bool, Text)] -> Text -> Maybe Bool
detectBool events = detectDefault False events
detectDefault :: Ord a => a -> [(a, Text)] -> Text -> Maybe a
detectDefault = detectDefaultWith identity
detectDefaultWith :: Ord a => (b -> Text) -> a -> [(a, b)] -> b -> Maybe a
detectDefaultWith f d events = detectDefaultWithPriors f ps
where
ps = priorEventsWith f d events
detectDefaultWithPriors :: Ord b => (a -> Text) -> Events b -> a -> Maybe b
detectDefaultWithPriors f priors = detectCat 99 priors . f
priorEventsWith :: Ord a => (t -> Text) -> a -> [(a, t)] -> Events a
priorEventsWith f d e = toEvents d [0..2] 10 es
where
es = map (\(a,b) -> CatWord a (unpack $ toLower $ f b)) e
------------------------------------------------------------------------
detectLangDefault :: Text -> Maybe Lang
detectLangDefault = detectCat 99 eventLang
where
eventLang :: Events Lang
eventLang = toEvents FR [0..2] 10 [ langWord l | l <- allLangs ]
langWord :: Lang -> CatWord Lang
langWord l = CatWord l (textSample l)
textSample :: Lang -> String
textSample EN = EN.textSample
textSample FR = FR.textSample
textSample _ = panic "[G.C.T.L:detectLangDefault] not impl yet"
--textSample DE = DE.textSample
--textSample SP = SP.textSample
--textSample CH = CH.textSample
------------------------------------------------------------------------
detectCat :: Ord a => Int -> Events a -> Text -> Maybe a
detectCat n es = head . map fst . (detectCat' n es) . unpack
where
detectCat' :: Ord a => Int -> Events a -> String -> [(a, Double)]
detectCat' n' es' s = DL.reverse $ DL.sortOn snd
$ toList
$ detectWith n' es' (wordsToBook [0..2] n' s)
detectWith :: Ord a => Int -> Events a -> EventBook -> CatProb a
detectWith n'' el (EventBook mapFreq _) =
DM.unionsWith (+)
$ map DM.fromList
$ map (\(s,m) -> map (\(l,f) -> (l, (fromIntegral m) * f)) $ toPrior n'' s el)
$ filter (\x -> fst x /= " ")
$ DM.toList mapFreq
-- | TODO: monoids (but proba >= 0)
toPrior :: Int -> String -> Events a -> [(a, Double)]
toPrior n'' s el = prior n'' $ pebLang s el
where
pebLang :: String -> Events a -> [(a, (Freq,TotalFreq))]
pebLang st = map (\(l,eb) -> (l, peb st eb)) . DM.toList
peb :: String -> EventBook -> (Freq, TotalFreq)
peb st (EventBook mapFreq mapN) = (fromIntegral a, fromIntegral b)
where
a = maybe 0 identity $ DM.lookup st mapFreq
b = maybe 1 identity $ DM.lookup (length st) mapN
prior :: Int -> [(a, (Freq, TotalFreq))] -> [(a, Double)]
prior i ps = zip ls $ zipWith (\x y -> x^i * y) (map (\(a,_) -> part a (sum $ map fst ps')) ps')
(map (\(a,b) -> a / b) ps')
where
(ls, ps'') = DL.unzip ps
ps' = map (both fromIntegral) ps''
part :: (Eq p, Fractional p) => p -> p -> p
part 0 _ = 0
part _ 0 = 0
part x y = x / y
{-
toProba :: (Eq b, Fractional b, Functor t, Foldable t) =>
t (a, b) -> t (a, b)
toProba xs = map (\(a,b) -> (a, part b total)) xs
where
total = sum $ map snd xs
-}
-- | TODO: monoids
toEvents :: Ord a => a -> [Int] -> Int -> [CatWord a] -> Events a
toEvents e ns n = foldl' (opEvent (+)) (emptyEvent e ns n) . map (toEvent ns n)
where
emptyEvent :: Ord a => a -> [Int] -> Int -> Events a
emptyEvent e' ns' n'= toEvent ns' n' (CatWord e' "")
toEvent :: Ord a => [Int] -> Int -> CatWord a -> Events a
toEvent ns'' n'' (CatWord l txt) = DM.fromList [(l, wordsToBook ns'' n'' txt)]
opEvent :: Ord a => (Freq -> Freq -> Freq) -> Events a -> Events a -> Events a
opEvent f = DM.unionWith (op f)
------------------------------------------------------------------------
emptyEventBook :: [Int] -> Int -> EventBook
emptyEventBook ns n = wordToBook ns n " "
wordsToBook :: [Int] -> Int -> String -> EventBook
wordsToBook ns n txt = foldl' (op (+)) (emptyEventBook ns n) eventsBook
where
ws = map unpack $ words $ pack txt
eventsBook = map (wordToBook ns n) ws
wordToBook :: [Int] -> Int -> Word -> EventBook
wordToBook ns n txt = EventBook ef en
where
chks = allChunks ns n txt
en = DM.fromList $ map (\(n',ns') -> (n', length ns')) $ zip ns chks
ef = foldl' DM.union DM.empty $ map (occurrencesWith identity) chks
op :: (Freq -> Freq -> Freq) -> EventBook -> EventBook -> EventBook
op f (EventBook ef1 en1)
(EventBook ef2 en2) = EventBook (DM.unionWith f ef1 ef2)
(DM.unionWith f en1 en2)
------------------------------------------------------------------------
------------------------------------------------------------------------
allChunks :: [Int] -> Int -> String -> [[String]]
allChunks ns m st = map (\n -> chunks n m st) ns
-- | Chunks is the same function as splitBy in Context but for Strings,
-- not Text (without pack and unpack operations that are not needed).
chunks :: Int -> Int -> String -> [String]
chunks n m = DL.take m . filter (not . all (== ' '))
. chunkAlong (n+1) 1
. DL.concat
. DL.take 1000
. DL.repeat
. blanks
-- | String preparation
blanks :: String -> String
blanks [] = []
blanks xs = [' '] <> xs <> [' ']
{-
-- Some previous tests to be removed
--import GHC.Base (Functor)
--import Numeric.Probability.Distribution ((??))
--import qualified Numeric.Probability.Distribution as D
-- | Blocks increase the size of the word to ease computations
-- some border and unexepected effects can happen, need to be tested
blockOf :: Int -> String -> String
blockOf n = DL.concat . DL.take n . DL.repeat
-- * Make the distributions
makeDist :: [String] -> D.T Double String
makeDist = D.uniform . DL.concat . map (DL.concat . allChunks [0,2] 10)
stopDist :: D.T Double String
stopDist = makeDist $ map show ([0..9]::[Int]) <> EN.stopList
candDist :: D.T Double String
candDist = makeDist candList
------------------------------------------------------------------------
sumProba :: Num a => D.T a String -> [Char] -> a
sumProba ds x = sum $ map ((~?) ds) $ DL.concat $ allChunks [0,2] 10 $ map toLower x
-- | Get probability according a distribution
(~?) :: (Num prob, Eq a) => D.T prob a -> a -> prob
(~?) ds x = (==x) ?? ds
------------------------------------------------------------------------
candidate :: [Char] -> Candidate
candidate x = Candidate (sumProba stopDist x) (sumProba candDist x)
------------------------------------------------------------------------
candList :: [String]
candList = [ "france", "alexandre", "mael", "constitution"
, "etats-unis", "associes", "car", "train", "spam"]
--}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List.hs 0000664 0000000 0000000 00000032475 14124644201 0030342 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Ngrams.Lists
Description : Tools to build lists
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.List
where
import Control.Lens hiding (both) -- ((^.), view, over, set, (_1), (_2))
import Data.HashMap.Strict (HashMap)
import Data.HashSet (HashSet)
import Data.Map (Map)
import Data.Monoid (mempty)
import Data.Ord (Down(..))
import Data.Set (Set)
import Data.Tuple.Extra (both)
import Gargantext.API.Ngrams.Types (NgramsElement, NgramsTerm(..))
import Gargantext.Core.NodeStory
import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Group
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.List.Social
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.Metrics (scored', Scored(..), scored_speExc, scored_genInc, normalizeGlobal, normalizeLocal, scored_terms)
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Metrics.TFICF (getTficf_withSample)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Database.Prelude (CmdM)
import Gargantext.Database.Query.Table.Ngrams (text2ngrams)
import Gargantext.Database.Query.Table.NgramsPostag (selectLems)
import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Schema.Ngrams (NgramsType(..), Ngrams(..))
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.HashSet as HashSet
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
{-
-- TODO maybe useful for later
isStopTerm :: StopSize -> Text -> Bool
isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x)
where
isStopChar c = not (c `elem` ("- /()%" :: [Char]) || Char.isAlpha c)
-}
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists :: ( HasNodeStory env err m
, CmdM env err m
, HasTreeError err
, HasNodeError err
)
=> User
-> UserCorpusId
-> MasterCorpusId
-> Maybe FlowSocialListWith
-> GroupParams
-> m (Map NgramsType [NgramsElement])
buildNgramsLists user uCid mCid mfslw gp = do
ngTerms <- buildNgramsTermsList user uCid mCid mfslw gp (NgramsTerms, MapListSize 350)
othersTerms <- mapM (buildNgramsOthersList user uCid mfslw GroupIdentity)
[ (Authors , MapListSize 9)
, (Sources , MapListSize 9)
, (Institutes, MapListSize 9)
]
pure $ Map.unions $ [ngTerms] <> othersTerms
data MapListSize = MapListSize { unMapListSize :: !Int }
buildNgramsOthersList :: ( HasNodeError err
, CmdM env err m
, HasNodeStory env err m
, HasTreeError err
)
=> User
-> UserCorpusId
-> Maybe FlowSocialListWith
-> GroupParams
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize) = do
allTerms :: HashMap NgramsTerm (Set NodeId) <- getNodesByNgramsUser uCid nt
-- PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists :: FlowCont NgramsTerm FlowListScores
<- flowSocialList mfslw user nt ( FlowCont HashMap.empty
$ HashMap.fromList
$ List.zip (HashMap.keys allTerms)
(List.cycle [mempty])
)
let
groupedWithList = toGroupedTree {- groupParams -} socialLists allTerms
(stopTerms, tailTerms) = HashMap.partition ((== Just StopTerm) . viewListType)
$ view flc_scores groupedWithList
(mapTerms, tailTerms') = HashMap.partition ((== Just MapTerm) . viewListType) tailTerms
listSize = mapListSize - (List.length mapTerms)
(mapTerms', candiTerms) = both HashMap.fromList
$ List.splitAt listSize
$ List.sortOn (Down . viewScore . snd)
$ HashMap.toList tailTerms'
pure $ Map.fromList [( nt, (toNgramsElement stopTerms)
<> (toNgramsElement mapTerms )
<> (toNgramsElement $ setListType (Just MapTerm ) mapTerms' )
<> (toNgramsElement $ setListType (Just CandidateTerm) candiTerms)
)]
getGroupParams :: ( HasNodeError err
, CmdM env err m
, HasNodeStory env err m
, HasTreeError err
)
=> GroupParams -> HashSet Ngrams -> m GroupParams
getGroupParams gp@(GroupWithPosTag l a _m) ng = do
hashMap <- HashMap.fromList <$> selectLems l a (HashSet.toList ng)
-- printDebug "hashMap" hashMap
pure $ over gwl_map (\x -> x <> hashMap) gp
getGroupParams gp _ = pure gp
-- TODO use ListIds
buildNgramsTermsList :: ( HasNodeError err
, CmdM env err m
, HasNodeStory env err m
, HasTreeError err
)
=> User
-> UserCorpusId
-> MasterCorpusId
-> Maybe FlowSocialListWith
-> GroupParams
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
-- Filter 0 With Double
-- Computing global speGen score
printDebug "[buldNgramsTermsList: Sample List] / start" nt
allTerms :: HashMap NgramsTerm Double <- getTficf_withSample uCid mCid nt
printDebug "[buldNgramsTermsList: Sample List / end]" nt
printDebug "[buldNgramsTermsList: Flow Social List / start]" nt
-- PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists :: FlowCont NgramsTerm FlowListScores
<- flowSocialList mfslw user nt ( FlowCont HashMap.empty
$ HashMap.fromList
$ List.zip (HashMap.keys allTerms)
(List.cycle [mempty])
)
printDebug "[buldNgramsTermsList: Flow Social List / end]" nt
let ngramsKeys = HashMap.keysSet allTerms
groupParams' <- getGroupParams groupParams (HashSet.map (text2ngrams . unNgramsTerm) ngramsKeys)
let
socialLists_Stemmed = addScoreStem groupParams' ngramsKeys socialLists
--printDebug "socialLists_Stemmed" socialLists_Stemmed
groupedWithList = toGroupedTree socialLists_Stemmed allTerms
(stopTerms, candidateTerms) = HashMap.partition ((== Just StopTerm) . viewListType)
$ HashMap.filter (\g -> (view gts'_score g) > 1)
$ view flc_scores groupedWithList
(groupedMono, groupedMult) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) candidateTerms
-- printDebug "stopTerms" stopTerms
-- splitting monterms and multiterms to take proportional candidates
-- use % of list if to big, or Int if too small
listSizeGlobal = 2000 :: Double
monoSize = 0.4 :: Double
multSize = 1 - monoSize
splitAt n' ns = both (HashMap.fromListWith (<>))
$ List.splitAt (round $ n' * listSizeGlobal)
$ List.sortOn (viewScore . snd)
$ HashMap.toList ns
(groupedMonoHead, _groupedMonoTail) = splitAt monoSize groupedMono
(groupedMultHead, groupedMultTail) = splitAt multSize groupedMult
-------------------------
-- Filter 1 With Set NodeId and SpeGen
selectedTerms = Set.toList $ hasTerms (groupedMonoHead <> groupedMultHead)
-- TODO remove (and remove HasNodeError instance)
userListId <- defaultList uCid
masterListId <- defaultList mCid
mapTextDocIds <- getNodesByNgramsOnlyUser uCid
[userListId, masterListId]
nt
selectedTerms
let
groupedTreeScores_SetNodeId :: HashMap NgramsTerm (GroupedTreeScores (Set NodeId))
groupedTreeScores_SetNodeId = HashMap.filter (\g -> Set.size (view gts'_score g) > 1) -- removing hapax
$ setScoresWithMap mapTextDocIds (groupedMonoHead <> groupedMultHead)
--printDebug "groupedTreeScores_SetNodeId" groupedTreeScores_SetNodeId
-- Coocurrences computation
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
let mapCooc = HashMap.filter (>1) -- removing cooc of 1
$ HashMap.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2)
| (t1, s1) <- mapStemNodeIds
, (t2, s2) <- mapStemNodeIds
]
where
mapStemNodeIds = HashMap.toList
$ HashMap.map viewScores
$ groupedTreeScores_SetNodeId
let
-- computing scores
mapScores f = HashMap.fromList
$ map (\g -> (view scored_terms g, f g))
$ normalizeGlobal
$ map normalizeLocal
$ scored'
$ Map.fromList -- TODO remove this
$ HashMap.toList mapCooc
let
groupedTreeScores_SpeGen :: HashMap NgramsTerm (GroupedTreeScores (Scored NgramsTerm))
groupedTreeScores_SpeGen = setScoresWithMap (mapScores identity) groupedTreeScores_SetNodeId
let
-- sort / partition / split
-- filter mono/multi again
(monoScored, multScored) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) groupedTreeScores_SpeGen
-- filter with max score
partitionWithMaxScore = HashMap.partition (\g -> (view scored_genInc $ view gts'_score g)
> (view scored_speExc $ view gts'_score g)
)
(monoScoredIncl, monoScoredExcl) = partitionWithMaxScore monoScored
(multScoredIncl, multScoredExcl) = partitionWithMaxScore multScored
-- splitAt
let
-- use % of list if to big, or Int if to small
mapSize = 1000 :: Double
canSize = mapSize * 5 :: Double
inclSize = 0.4 :: Double
exclSize = 1 - inclSize
splitAt' max' n' = (both (HashMap.fromList)) . (List.splitAt (round $ n' * max'))
sortOn f = (List.sortOn (Down . (view (gts'_score . f)) . snd)) . HashMap.toList
monoInc_size n = splitAt' n $ monoSize * inclSize / 2
multExc_size n = splitAt' n $ multSize * exclSize / 2
(mapMonoScoredInclHead, monoScoredInclTail) = monoInc_size mapSize $ (sortOn scored_genInc) monoScoredIncl
(mapMonoScoredExclHead, monoScoredExclTail) = monoInc_size mapSize $ (sortOn scored_speExc) monoScoredExcl
(mapMultScoredInclHead, multScoredInclTail) = multExc_size mapSize $ (sortOn scored_genInc) multScoredIncl
(mapMultScoredExclHead, multScoredExclTail) = multExc_size mapSize $ (sortOn scored_speExc) multScoredExcl
(canMonoScoredIncHead , _) = monoInc_size canSize $ (sortOn scored_genInc) monoScoredInclTail
(canMonoScoredExclHead, _) = monoInc_size canSize $ (sortOn scored_speExc) monoScoredExclTail
(canMulScoredInclHead, _) = multExc_size canSize $ (sortOn scored_genInc) multScoredInclTail
(canMultScoredExclHead, _) = multExc_size canSize $ (sortOn scored_speExc) multScoredExclTail
------------------------------------------------------------
-- Final Step building the Typed list
-- Candidates Terms need to be filtered
let
maps = setListType (Just MapTerm)
$ mapMonoScoredInclHead
<> mapMonoScoredExclHead
<> mapMultScoredInclHead
<> mapMultScoredExclHead
-- An original way to filter to start with
cands = setListType (Just CandidateTerm)
$ canMonoScoredIncHead
<> canMonoScoredExclHead
<> canMulScoredInclHead
<> canMultScoredExclHead
-- TODO count it too
cands' = setListType (Just CandidateTerm)
{-\$ groupedMonoTail
<>-} groupedMultTail
-- Quick FIX
candNgramsElement = List.take 5000
$ toNgramsElement cands <> toNgramsElement cands'
result = Map.unionsWith (<>)
[ Map.fromList [( nt, toNgramsElement maps
<> toNgramsElement stopTerms
<> candNgramsElement
)]
]
pure result
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/ 0000775 0000000 0000000 00000000000 14124644201 0027773 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Formats/ 0000775 0000000 0000000 00000000000 14124644201 0031406 5 ustar 00root root 0000000 0000000 CSV.hs 0000664 0000000 0000000 00000007271 14124644201 0032325 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Formats {-|
Module : Gargantext.Core.Text.List.Formats.CSV
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
CSV parser for Gargantext corpus files.
-}
module Gargantext.Core.Text.List.Formats.CSV where
import Control.Applicative
import Control.Monad (mzero)
import Data.Char (ord)
import Data.Csv
import Data.Either (Either(Left, Right))
import Data.List (null)
import Data.Text (Text, pack)
import Data.Vector (Vector)
import GHC.IO (FilePath)
import Gargantext.Core.Text.Context
import Gargantext.Prelude hiding (length)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as DT
import qualified Data.Vector as V
------------------------------------------------------------------------
csvMapTermList :: FilePath -> IO TermList
csvMapTermList fp = csv2list CsvMap <$> snd <$> fromCsvListFile fp
csv2list :: CsvListType -> Vector CsvList -> TermList
csv2list lt vs = V.toList $ V.map (\(CsvList _ label forms)
-> (DT.words label, [DT.words label] <> (filter (not . null) . map DT.words $ DT.splitOn csvListFormsDelimiter forms)))
$ V.filter (\l -> csvList_status l == lt ) vs
------------------------------------------------------------------------
data CsvListType = CsvMap | CsvStop | CsvCandidate
deriving (Read, Show, Eq)
------------------------------------------------------------------------
-- CSV List Main Configuration
csvListFieldDelimiter :: Char
csvListFieldDelimiter = '\t'
csvListFormsDelimiter :: Text
csvListFormsDelimiter = "|&|"
------------------------------------------------------------------------
data CsvList = CsvList
{ csvList_status :: !CsvListType
, csvList_label :: !Text
, csvList_forms :: !Text
}
deriving (Show)
------------------------------------------------------------------------
instance FromNamedRecord CsvList where
parseNamedRecord r = CsvList <$> r .: "status"
<*> r .: "label"
<*> r .: "forms"
instance ToNamedRecord CsvList where
toNamedRecord (CsvList s l f) =
namedRecord [ "status" .= s
, "label" .= l
, "forms" .= f
]
------------------------------------------------------------------------
instance FromField CsvListType where
parseField "map" = pure CsvMap
parseField "main" = pure CsvCandidate
parseField "stop" = pure CsvStop
parseField _ = mzero
instance ToField CsvListType where
toField CsvMap = "map"
toField CsvCandidate = "main"
toField CsvStop = "stop"
------------------------------------------------------------------------
csvDecodeOptions :: DecodeOptions
csvDecodeOptions = (defaultDecodeOptions
{decDelimiter = fromIntegral $ ord csvListFieldDelimiter}
)
csvEncodeOptions :: EncodeOptions
csvEncodeOptions = ( defaultEncodeOptions
{encDelimiter = fromIntegral $ ord csvListFieldDelimiter}
)
------------------------------------------------------------------------
fromCsvListFile :: FilePath -> IO (Header, Vector CsvList)
fromCsvListFile fp = do
csvData <- BL.readFile fp
case decodeByNameWith csvDecodeOptions csvData of
Left e -> panic (pack e)
Right csvList -> pure csvList
------------------------------------------------------------------------
toCsvListFile :: FilePath -> (Header, Vector CsvList) -> IO ()
toCsvListFile fp (h, vs) = BL.writeFile fp $
encodeByNameWith csvEncodeOptions h (V.toList vs)
------------------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Group.hs 0000664 0000000 0000000 00000004665 14124644201 0031436 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.List.Group
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.List.Group
where
import Control.Lens (view)
import Data.HashMap.Strict (HashMap)
import Data.Maybe (fromMaybe)
import Data.Monoid (Monoid, mempty)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
------------------------------------------------------------------------
toGroupedTree :: (Ord a, Monoid a, HasSize a)
=> FlowCont NgramsTerm FlowListScores
-> HashMap NgramsTerm a
-> FlowCont NgramsTerm (GroupedTreeScores a)
toGroupedTree flc scores =
groupWithScores' flc scoring
where
scoring t = fromMaybe mempty $ HashMap.lookup t scores
------------------------------------------------------------------------
setScoresWithMap :: (Ord a, Ord b, Monoid b) => HashMap NgramsTerm b
-> HashMap NgramsTerm (GroupedTreeScores a)
-> HashMap NgramsTerm (GroupedTreeScores b)
setScoresWithMap m = setScoresWith (score m)
where
score m' t = case HashMap.lookup t m' of
Nothing -> mempty
Just r -> r
setScoresWith :: (Ord a, Ord b)
=> (NgramsTerm -> b)
-> HashMap NgramsTerm (GroupedTreeScores a)
-> HashMap NgramsTerm (GroupedTreeScores b)
{-
-- | This Type level lenses solution does not work
setScoresWith f = Map.mapWithKey (\k v -> over gts'_children (setScoresWith f)
$ set gts'_score (f k) v
)
-}
setScoresWith f = HashMap.mapWithKey (\k v -> v { _gts'_score = f k
, _gts'_children = setScoresWith f
$ view gts'_children v
}
)
------------------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Group/ 0000775 0000000 0000000 00000000000 14124644201 0031067 5 ustar 00root root 0000000 0000000 Prelude.hs 0000664 0000000 0000000 00000012150 14124644201 0032743 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Group {-|
Module : Gargantext.Core.Text.List.Group.Prelude
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.List.Group.Prelude
where
import Control.Lens (makeLenses, view, set, over)
import Data.HashMap.Strict (HashMap)
import Data.Maybe (fromMaybe)
import Data.Monoid
import Data.Semigroup
import Data.Set (Set)
import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList)
import Gargantext.Core.Types (ListType(..))
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Set as Set
import Prelude (foldl1)
type Stem = NgramsTerm
------------------------------------------------------------------------
-- | Main Types to group With Scores but preserving Tree dependencies
-- Therefore there is a need of Tree of GroupedTextScores
-- to target continuation type for the flow (FlowCont Text GroupedTreeScores)
data GroupedTreeScores score =
GroupedTreeScores { _gts'_listType :: !(Maybe ListType)
, _gts'_children :: !(HashMap NgramsTerm (GroupedTreeScores score))
, _gts'_score :: !score
} deriving (Show, Ord, Eq)
instance (Semigroup a) => Semigroup (GroupedTreeScores a) where
(<>) (GroupedTreeScores l1 s1 c1)
(GroupedTreeScores l2 s2 c2)
= GroupedTreeScores (l1 <> l2)
(s1 <> s2)
(c1 <> c2)
instance (Ord score, Monoid score)
=> Monoid (GroupedTreeScores score) where
mempty = GroupedTreeScores mempty mempty mempty
makeLenses 'GroupedTreeScores
------------------------------------------------------------------------
-- | Main Classes
class ViewListType a where
viewListType :: a -> Maybe ListType
class SetListType a where
setListType :: Maybe ListType -> a -> a
------
class Ord b => ViewScore a b | a -> b where
viewScore :: a -> b
class ViewScores a b | a -> b where
viewScores :: a -> b
--------
class ToNgramsElement a where
toNgramsElement :: a -> [NgramsElement]
class HasTerms a where
hasTerms :: a -> Set NgramsTerm
------------------------------------------------------------------------
-- | Instances declartion for (GroupedTreeScores a)
instance ViewListType (GroupedTreeScores a) where
viewListType = view gts'_listType
instance SetListType (GroupedTreeScores a) where
setListType lt g = over gts'_children (setListType lt)
$ set gts'_listType lt g
instance SetListType (HashMap NgramsTerm (GroupedTreeScores a)) where
setListType lt = HashMap.map (set gts'_listType lt)
------
class HasSize a where
hasSize :: a -> Integer
instance HasSize Double where
hasSize = round
instance HasSize (Set a) where
hasSize = fromIntegral . Set.size
instance (HasSize a, Semigroup a) => ViewScore (GroupedTreeScores a) Integer where
viewScore = hasSize . viewScores
instance Semigroup a=> ViewScores (GroupedTreeScores a) a where
viewScores g = foldl1 (<>) $ parent : children
where
parent = view gts'_score g
children = map viewScores $ HashMap.elems $ view gts'_children g
------
instance HasTerms (HashMap NgramsTerm (GroupedTreeScores a)) where
hasTerms = Set.unions . (map hasTerms) . HashMap.toList
instance HasTerms (NgramsTerm, GroupedTreeScores a) where
hasTerms (t, g) = Set.singleton t <> children
where
children = Set.unions
$ map hasTerms
$ HashMap.toList
$ view gts'_children g
------
instance ToNgramsElement (HashMap NgramsTerm (GroupedTreeScores a)) where
toNgramsElement = List.concat . (map toNgramsElement) . HashMap.toList
instance ToNgramsElement (NgramsTerm, GroupedTreeScores a) where
toNgramsElement (t, gts) = parent : children
where
parent = mkNgramsElement t
(fromMaybe CandidateTerm $ viewListType gts)
Nothing
(mSetFromList $ HashMap.keys
$ view gts'_children gts
)
children = List.concat
$ map (childrenWith t t)
$ HashMap.toList
$ view gts'_children gts
childrenWith root parent' (t', gts') = parent'' : children'
where
parent'' = mkNgramsElement t'
(fromMaybe CandidateTerm $ viewListType gts')
(Just $ RootParent root parent')
(mSetFromList $ HashMap.keys
$ view gts'_children gts'
)
children' = List.concat
$ map (childrenWith root t' )
$ HashMap.toList
$ view gts'_children gts'
WithScores.hs 0000664 0000000 0000000 00000007253 14124644201 0033445 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Group {-|
Module : Gargantext.Core.Text.List.WithScores
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.List.Group.WithScores
where
import Control.Lens (view, set, over)
import Data.HashMap.Strict (HashMap)
import Data.Maybe (fromMaybe)
import Data.Monoid (Monoid, mempty)
import Data.Semigroup
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
------------------------------------------------------------------------
-- | Main function
groupWithScores' :: (Eq a, Ord a, Monoid a, HasSize a)
=> FlowCont NgramsTerm FlowListScores
-> (NgramsTerm -> a)
-> FlowCont NgramsTerm (GroupedTreeScores a)
groupWithScores' flc scores = FlowCont groups orphans
where
-- parent/child relation is inherited from social lists
groups = HashMap.filter ((0 <) . viewScore)
$ toGroupedTree'
$ toMapMaybeParent scores
$ (view flc_scores flc <> view flc_cont flc)
-- orphans should be filtered already then becomes empty
orphans = mempty
------------------------------------------------------------------------
toMapMaybeParent :: (Eq a, Ord a, Monoid a)
=> (NgramsTerm -> a)
-> HashMap NgramsTerm FlowListScores
-> HashMap (Maybe Parent) (HashMap NgramsTerm (GroupedTreeScores a))
toMapMaybeParent f = HashMap.fromListWith (<>)
. (map (fromScores'' f))
. HashMap.toList
fromScores'' :: (Eq a, Ord a, Monoid a)
=> (NgramsTerm -> a)
-> (NgramsTerm, FlowListScores)
-> (Maybe Parent, HashMap NgramsTerm (GroupedTreeScores a))
fromScores'' f' (t, fs) = ( maybeParent
, HashMap.fromList [( t, set gts'_score (f' t)
$ set gts'_listType maybeList mempty
)]
)
where
maybeParent = keyWithMaxValue $ view fls_parents fs
maybeList = keyWithMaxValue $ view fls_listType fs
------------------------------------------------------------------------
toGroupedTree' :: Eq a
=> HashMap (Maybe Parent) (HashMap NgramsTerm (GroupedTreeScores a))
-> HashMap Parent (GroupedTreeScores a)
toGroupedTree' m = case HashMap.lookup Nothing m of
Nothing -> mempty
Just m' -> toGroupedTree'' m m'
filterGroupedTree :: (GroupedTreeScores a -> Bool)
-> HashMap Parent (GroupedTreeScores a)
-> HashMap Parent (GroupedTreeScores a)
filterGroupedTree f = HashMap.filter f
toGroupedTree'' :: Eq a => HashMap (Maybe Parent) (HashMap NgramsTerm (GroupedTreeScores a))
-> (HashMap NgramsTerm (GroupedTreeScores a))
-> HashMap Parent (GroupedTreeScores a)
toGroupedTree'' m notEmpty
| notEmpty == mempty = mempty
| otherwise = HashMap.mapWithKey (addGroup m) notEmpty
where
addGroup m' k v = over gts'_children ( (toGroupedTree'' m')
. (HashMap.union ( fromMaybe mempty
$ HashMap.lookup (Just k) m'
)
)
)
v
WithStem.hs 0000664 0000000 0000000 00000011462 14124644201 0033114 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Group {-|
Module : Gargantext.Core.Text.List.Group.WithStem
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
module Gargantext.Core.Text.List.Group.WithStem
where
import Control.Lens (makeLenses)
import Data.HashMap.Strict (HashMap)
import Data.HashSet (HashSet)
import Data.Map (Map)
import Data.Maybe (catMaybes)
import Gargantext.API.Ngrams.Types
import Gargantext.Core (Lang(..), PosTagAlgo(..), Form, Lem)
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Patch
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.HashSet as Set
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict.Patch as PatchMap
import qualified Data.Patch.Class as Patch (Replace(..))
import qualified Data.Text as Text
------------------------------------------------------------------------
addScoreStem :: GroupParams
-> HashSet NgramsTerm
-> FlowCont NgramsTerm FlowListScores
-> FlowCont NgramsTerm FlowListScores
addScoreStem groupParams ngrams fl = foldl' addScorePatch fl
$ stemPatches groupParams ngrams
------------------------------------------------------------------------
-- | Main Types
data StopSize = StopSize {unStopSize :: !Int}
deriving (Eq)
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
-- | Lenses instances at the end of this file
data GroupParams = GroupParams { unGroupParams_lang :: !Lang
, unGroupParams_len :: !Int
, unGroupParams_limit :: !Int
, unGroupParams_stopSize :: !StopSize
}
| GroupIdentity
| GroupWithPosTag { _gwl_lang :: !Lang
, _gwl_algo :: !PosTagAlgo
, _gwl_map :: !(HashMap Form Lem)
}
deriving (Eq)
------------------------------------------------------------------------
groupWith :: GroupParams
-> NgramsTerm
-> NgramsTerm
groupWith GroupIdentity t = identity t
groupWith (GroupParams { unGroupParams_lang = l }) t =
NgramsTerm
$ Text.intercalate " "
$ map (stem l)
-- . take n
$ List.sort
-- \$ Set.toList
-- \$ Set.fromList
-- . (List.filter (\t -> Text.length t > m))
$ Text.splitOn " "
$ Text.replace "-" " "
$ unNgramsTerm t
-- | This lemmatization group done with CoreNLP algo (or others)
groupWith (GroupWithPosTag { _gwl_map = m }) t =
case HashMap.lookup (unNgramsTerm t) m of
Nothing -> clean t
Just t' -> clean $ NgramsTerm t'
where
clean (NgramsTerm t'') = NgramsTerm $ Text.replace "-" " " t''
--------------------------------------------------------------------
stemPatches :: GroupParams
-> HashSet NgramsTerm
-> [(NgramsTerm, NgramsPatch)]
stemPatches groupParams = patches
. Map.fromListWith (<>)
. map (\ng -> ( groupWith groupParams ng
, Set.singleton ng
)
)
. Set.toList
-- | For now all NgramsTerm which have same stem
-- are grouped together
-- Parent is taken arbitrarly for now (TODO use a score like occ)
patches :: Map Stem (HashSet NgramsTerm)
-> [(NgramsTerm, NgramsPatch)]
patches = catMaybes . map patch . Map.elems
patch :: HashSet NgramsTerm
-> Maybe (NgramsTerm, NgramsPatch)
patch s = case Set.size s > 1 of
False -> Nothing
True -> do
let ngrams = Set.toList s
parent <- headMay ngrams
let children = List.tail ngrams
pure (parent, toNgramsPatch children)
toNgramsPatch :: [NgramsTerm] -> NgramsPatch
toNgramsPatch children = NgramsPatch children' Patch.Keep
where
children' :: PatchMSet NgramsTerm
children' = PatchMSet
$ fst
$ PatchMap.fromList
$ List.zip children (List.cycle [addPatch])
-- | Instances
makeLenses ''GroupParams
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Learn.hs 0000664 0000000 0000000 00000010203 14124644201 0031364 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.List.Learn
Description : Learn to make lists
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
CSV parser for Gargantext corpus files.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Core.Text.List.Learn
where
import qualified Data.IntMap as IntMap
import qualified Data.List as List
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.SVM as SVM
import qualified Data.Vector as Vec
import Gargantext.Core
import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Prelude
import Gargantext.Database.GargDB
------------------------------------------------------------------------
train :: Double -> Double -> SVM.Problem -> IO SVM.Model
train x y = (SVM.train (SVM.CSvc x) (SVM.RBF y))
predict :: SVM.Model -> [Vec.Vector Double] -> IO [Double]
predict m vs = mapM (predict' m) vs
where
predict' m' vs' = SVM.predict m' (IntMap.fromList $ (zip [1..]) $ Vec.toList vs')
------------------------------------------------------------------------
trainList :: Double -> Double -> Map ListType [Vec.Vector Double] -> IO SVM.Model
trainList x y = (train x y) . trainList'
where
trainList' :: Map ListType [Vec.Vector Double] -> SVM.Problem
trainList' = mapVec2problem . (Map.mapKeys (fromIntegral . toDBid))
mapVec2problem :: Map Double [Vec.Vector Double] -> SVM.Problem
mapVec2problem = List.concat . (map (\(a,as) -> zip (repeat a) as)) . Map.toList . (Map.map vecs2maps)
vecs2maps :: [Vec.Vector Double] -> [IntMap.IntMap Double]
vecs2maps = map (IntMap.fromList . (zip [1..]) . Vec.toList)
predictList :: Model -> [Vec.Vector Double] -> IO [Maybe ListType]
predictList (ModelSVM m _ _) vs = map (Just . fromDBid . round) <$> predict m vs
------------------------------------------------------------------------
data Model = ModelSVM { modelSVM :: SVM.Model
, param1 :: Maybe Double
, param2 :: Maybe Double
}
--{-
instance SaveFile Model
where
saveFile' fp (ModelSVM m _ _) = SVM.saveModel m fp
instance ReadFile Model
where
readFile' fp = do
m <- SVM.loadModel fp
pure $ ModelSVM m Nothing Nothing
--}
------------------------------------------------------------------------
-- | TODO
-- shuffle list
-- split list : train / test
-- grid parameters on best result on test
type Train = Map ListType [Vec.Vector Double]
type Tests = Map ListType [Vec.Vector Double]
type Score = Double
type Param = Double
grid :: (MonadBase IO m)
=> Param -> Param -> Train -> [Tests] -> m (Maybe Model)
grid _ _ _ [] = panic "Gargantext.Core.Text.List.Learn.grid : empty test data"
grid s e tr te = do
let
grid' :: (MonadBase IO m)
=> Double -> Double
-> Train
-> [Tests]
-> m (Score, Model)
grid' x y tr' te' = do
model'' <- liftBase $ trainList x y tr'
let
model' = ModelSVM model'' (Just x) (Just y)
score' :: [(ListType, Maybe ListType)] -> Map (Maybe Bool) Int
score' = occurrencesWith (\(a,b) -> (==) <$> Just a <*> b)
score'' :: Map (Maybe Bool) Int -> Double
score'' m'' = maybe 0 (\t -> (fromIntegral t)/total) (Map.lookup (Just True) m'')
where
total = fromIntegral $ foldl (+) 0 $ Map.elems m''
getScore m t = do
let (res, toGuess) = List.unzip
$ List.concat
$ map (\(k,vs) -> zip (repeat k) vs)
$ Map.toList t
res' <- liftBase $ predictList m toGuess
pure $ score'' $ score' $ List.zip res res'
score <- mapM (getScore model') te'
pure (mean score, model')
r <- head . List.reverse
. (List.sortOn fst)
<$> mapM (\(x,y) -> grid' x y tr te)
[(x,y) | x <- [s..e], y <- [s..e]]
printDebug "GRID SEARCH" (map fst r)
--printDebug "file" fp
--fp <- saveFile (ModelSVM model')
--save best result
pure $ snd <$> r
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Merge.hs 0000664 0000000 0000000 00000002147 14124644201 0031372 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.List.Merge
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.List.Merge
where
import Control.Lens (view)
import Data.Map (Map)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.Prelude
import Data.Map.Strict.Patch hiding (PatchMap)
type List = Map NgramsTerm NgramsRepoElement
type Patch = PatchMap NgramsTerm (Replace (Maybe NgramsRepoElement))
-- Question: which version of Patching increment is using the FrontEnd ?
diffList :: Versioned List -> Versioned List -> Versioned Patch
diffList l1 l2 = Versioned (1 + view v_version l1)
(diff (view v_data l1) (view v_data l2))
-- | TODO
{-
commit :: ListId -> NgramsType -> Versioned Patch -> List -> List
commit = undefined
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Social.hs 0000664 0000000 0000000 00000015304 14124644201 0031544 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.List.Social
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Core.Text.List.Social
where
import Control.Monad (mzero)
import Data.Aeson
import GHC.Generics
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Monoid (mconcat)
import qualified Data.Scientific as Scientific
import Data.Swagger
import qualified Data.Text as T
import qualified Data.Vector as V
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.Text.List.Social.Find
import Gargantext.Core.Text.List.Social.History
import Gargantext.Core.Text.List.Social.Patch
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Types.Individu
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Tree
import Gargantext.Database.Schema.Ngrams
import Gargantext.Prelude
import qualified Prelude as Prelude
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Main parameters
-- | FlowSocialListPriority
-- Sociological assumption: either private or others (public) first
-- This parameter depends on the user choice
data FlowSocialListWith = FlowSocialListWithPriority { fslw_priority :: FlowSocialListPriority }
| FlowSocialListWithLists { fslw_lists :: [ListId] }
deriving (Show, Generic)
instance FromJSON FlowSocialListWith where
parseJSON (Object v) = do
typ :: T.Text <- v .: "type"
value <- v .:? "value" .!= []
case typ of
"MyListsFirst" -> pure $ FlowSocialListWithPriority { fslw_priority = MySelfFirst }
"OtherListsFirst" -> pure $ FlowSocialListWithPriority { fslw_priority = OthersFirst }
"SelectedLists" -> pure $ FlowSocialListWithLists { fslw_lists = value }
_ -> pure $ FlowSocialListWithPriority { fslw_priority = MySelfFirst }
parseJSON _ = mzero
instance ToJSON FlowSocialListWith where
toJSON (FlowSocialListWithPriority { fslw_priority = MySelfFirst }) =
object [ ("type", String "MyListsFirst") ]
toJSON (FlowSocialListWithPriority { fslw_priority = OthersFirst }) =
object [ ("type", String "ListsFirst") ]
toJSON (FlowSocialListWithLists { fslw_lists = ids }) =
object [ ("type", String "SelectedLists")
, ("value", Array $ V.fromList $ (map (\(NodeId id) -> Number $ Scientific.scientific (Prelude.toInteger id) 1) ids)) ]
instance ToSchema FlowSocialListWith where
declareNamedSchema = genericDeclareNamedSchema defaultSchemaOptions
data FlowSocialListPriority = MySelfFirst | OthersFirst
deriving (Show, Generic)
instance ToSchema FlowSocialListPriority where
declareNamedSchema = genericDeclareNamedSchema defaultSchemaOptions
flowSocialListPriority :: FlowSocialListPriority -> [NodeMode]
flowSocialListPriority MySelfFirst = [Private{-, Shared, Public -}]
flowSocialListPriority OthersFirst = reverse $ flowSocialListPriority MySelfFirst
{-
-- | We keep the parents for all ngrams but terms
keepAllParents :: NgramsType -> KeepAllParents
keepAllParents NgramsTerms = KeepAllParents False
keepAllParents _ = KeepAllParents True
-}
------------------------------------------------------------------------
flowSocialList :: ( HasNodeStory env err m
, CmdM env err m
, HasNodeError err
, HasTreeError err
)
=> Maybe FlowSocialListWith
-> User
-> NgramsType
-> FlowCont NgramsTerm FlowListScores
-> m (FlowCont NgramsTerm FlowListScores)
flowSocialList Nothing u = flowSocialList' MySelfFirst u
flowSocialList (Just (FlowSocialListWithPriority p)) u = flowSocialList' p u
flowSocialList (Just (FlowSocialListWithLists ls)) _ = getHistoryScores ls History_User
flowSocialList' :: ( HasNodeStory env err m
, CmdM env err m
, HasNodeError err
, HasTreeError err
)
=> FlowSocialListPriority
-> User -> NgramsType
-> FlowCont NgramsTerm FlowListScores
-> m (FlowCont NgramsTerm FlowListScores)
flowSocialList' flowPriority user nt flc =
mconcat <$> mapM (flowSocialListByMode' user nt flc)
(flowSocialListPriority flowPriority)
where
flowSocialListByMode' :: ( HasNodeStory env err m
, CmdM env err m
, HasNodeError err
, HasTreeError err
)
=> User -> NgramsType
-> FlowCont NgramsTerm FlowListScores
-> NodeMode
-> m (FlowCont NgramsTerm FlowListScores)
flowSocialListByMode' user' nt' flc' mode =
findListsId user' mode
>>= flowSocialListByModeWith nt' flc'
flowSocialListByModeWith :: ( HasNodeStory env err m
, CmdM env err m
, HasNodeError err
, HasTreeError err
)
=> NgramsType
-> FlowCont NgramsTerm FlowListScores
-> [ListId]
-> m (FlowCont NgramsTerm FlowListScores)
flowSocialListByModeWith nt'' flc'' listes =
getHistoryScores listes History_User nt'' flc''
-----------------------------------------------------------------
getHistoryScores :: ( HasNodeStory env err m
, CmdM env err m
, HasNodeError err
, HasTreeError err
)
=> [ListId]
-> History
-> NgramsType
-> FlowCont NgramsTerm FlowListScores
-> m (FlowCont NgramsTerm FlowListScores)
getHistoryScores lists hist nt fl =
addScorePatches nt lists fl <$> getHistory hist nt lists
getHistory :: ( HasNodeStory env err m
, CmdM env err m
, HasNodeError err
, HasTreeError err
)
=> History
-> NgramsType
-> [ListId]
-> m (Map ListId (Map NgramsType [HashMap NgramsTerm NgramsPatch]))
getHistory hist nt listes =
history hist [nt] listes <$> getRepo' listes
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Social/ 0000775 0000000 0000000 00000000000 14124644201 0031205 5 ustar 00root root 0000000 0000000 Find.hs 0000664 0000000 0000000 00000003604 14124644201 0032345 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Social {-|
Module : Gargantext.Core.Text.List.Social.Find
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.List.Social.Find
where
-- findList imports
import Control.Lens (view)
import Gargantext.Core.Types.Individu
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Tree
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Prelude
------------------------------------------------------------------------
findListsId :: (HasNodeError err, HasTreeError err)
=> User -> NodeMode -> Cmd err [NodeId]
findListsId u mode = do
rootId <- getRootId u
ns <- map (view dt_nodeId) <$> filter ((== nodeTypeId NodeList) . (view dt_typeId))
<$> findNodes' rootId mode
pure ns
-- | TODO not clear enough:
-- | Shared is for Shared with me but I am not the owner of it
-- | Private is for all Lists I have created
findNodes' :: (HasTreeError err, HasNodeError err)
=> RootId
-> NodeMode
-> Cmd err [DbTreeNode]
findNodes' r Private = do
pv <- (findNodes r Private $ [NodeFolderPrivate] <> commonNodes)
sh <- (findNodes' r Shared)
pure $ pv <> sh
findNodes' r Shared = findNodes r Shared $ [NodeFolderShared, NodeTeam] <> commonNodes
findNodes' r SharedDirect = findNodes r Shared $ [NodeFolderShared, NodeTeam] <> commonNodes
findNodes' r Public = findNodes r Public $ [NodeFolderPublic ] <> commonNodes
findNodes' r PublicDirect = findNodes r Public $ [NodeFolderPublic ] <> commonNodes
commonNodes:: [NodeType]
commonNodes = [NodeFolder, NodeCorpus, NodeList, NodeFolderShared, NodeTeam]
History.hs 0000664 0000000 0000000 00000004317 14124644201 0033130 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Social {-|
Module : Gargantext.Core.Text.List.Social.History
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.List.Social.History
where
import Control.Lens hiding (cons)
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Map.Strict as Map
-- TODO put this in Prelude
cons :: a -> [a]
cons a = [a]
------------------------------------------------------------------------
-- | History control
data History = History_User
| History_NotUser
| History_All
------------------------------------------------------------------------
-- | Main Function
history :: History
-> [NgramsType]
-> [ListId]
-> NodeStory s NgramsStatePatch'
-> Map ListId (Map NgramsType [HashMap NgramsTerm NgramsPatch])
history History_User t l = clean . (history' t l)
where
clean = Map.map (Map.map List.init)
history History_NotUser t l = clean . (history' t l)
where
clean = Map.map (Map.map last)
last = (maybe [] cons) . lastMay
history _ t l = history' t l
------------------------------------------------------------------------
history' :: [NgramsType]
-> [ListId]
-> NodeStory s NgramsStatePatch'
-> Map ListId (Map NgramsType [HashMap NgramsTerm NgramsPatch])
history' types lists = (Map.map (Map.unionsWith (<>)))
. (Map.map (map (Map.filterWithKey (\k _ -> List.elem k types))))
. (Map.map (map toMap))
. (Map.map (view a_history))
. (Map.filterWithKey (\k _ -> List.elem k lists))
. (view unNodeStory)
where
toMap :: PatchMap NgramsType NgramsTablePatch
-> Map NgramsType [HashMap NgramsTerm NgramsPatch]
toMap m = Map.map (cons . unNgramsTablePatch)
$ unPatchMapToMap m
Patch.hs 0000664 0000000 0000000 00000013232 14124644201 0032522 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Social {-|
Module : Gargantext.Core.Text.List.Social.Patch
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.List.Social.Patch
where
import Control.Lens hiding (cons)
import Data.Map (Map)
import Data.Hashable (Hashable)
import Data.HashMap.Strict (HashMap)
import Data.Monoid
import Data.Semigroup
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Prelude (unMSet, patchMSet_toList)
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Patch.Class as Patch (Replace(..))
addScorePatches :: NgramsType -> [ListId]
-> FlowCont NgramsTerm FlowListScores
-> Map ListId (Map NgramsType [HashMap NgramsTerm NgramsPatch])
-> FlowCont NgramsTerm FlowListScores
addScorePatches nt listes fl repo =
foldl' (addScorePatchesList nt repo) fl listes
addScorePatchesList :: NgramsType
-- -> Map NgramsType (Map ListId [HashMap NgramsTerm NgramsPatch])
-> Map ListId (Map NgramsType [HashMap NgramsTerm NgramsPatch])
-> FlowCont NgramsTerm FlowListScores
-> ListId
-> FlowCont NgramsTerm FlowListScores
addScorePatchesList nt repo fl lid =
foldl' addScorePatch fl patches
where
patches = maybe [] (List.concat . (map HashMap.toList)) patches'
patches' = do
lists <- Map.lookup lid repo
mapPatches <- Map.lookup nt lists
pure mapPatches
addScorePatch :: FlowCont NgramsTerm FlowListScores
-> (NgramsTerm , NgramsPatch)
-> FlowCont NgramsTerm FlowListScores
{- | Case of changing listType only. Patches look like:
This patch move "problem" from MapTerm to CandidateTerm
,fromList [(NgramsTerm {unNgramsTerm = "problem"},NgramsPatch {_patch_children = PatchMSet (PatchMap (fromList [])), _patch_list = Replace {_old = MapTerm, _new = CandidateTerm}})]
This patch move "paper" from MapTerm to StopTerm
,fromList [(NgramsTerm {unNgramsTerm = "paper"},NgramsPatch {_patch_children = PatchMSet (PatchMap (fromList [])), _patch_list = Replace {_old = MapTerm, _new = StopTerm}})]])])]
Children are not modified in this specific case.
-}
-- | Old list get -1 score
-- New list get +1 score
-- Hence others lists lay around 0 score
addScorePatch fl (t, (NgramsPatch { _patch_children
, _patch_list = Patch.Replace old_list new_list })) =
-- Adding new 'Children' score
addScorePatch fl' (t, NgramsPatch { _patch_children
, _patch_list = Patch.Keep })
where
-- | Adding new 'ListType' score
fl' = fl & flc_scores . at t %~ (score fls_listType old_list (-1))
& flc_scores . at t %~ (score fls_listType new_list ( 1))
& flc_cont %~ (HashMap.delete t)
-- | Patching existing Ngrams with children
addScorePatch fl (p, NgramsPatch { _patch_children
, _patch_list = Patch.Keep }) =
foldl' addChild fl $ patchMSet_toList _patch_children
where
-- | Adding a child
addChild fl' (t, Patch.Replace Nothing (Just _)) = doLink ( 1) p t fl'
-- | Removing a child
addChild fl' (t, Patch.Replace (Just _) Nothing) = doLink (-1) p t fl'
-- | This case should not happen: does Nothing
addChild fl' _ = fl'
-- | Inserting a new Ngrams
addScorePatch fl (t, NgramsReplace { _patch_old = Nothing
, _patch_new = Just nre }) =
childrenScore 1 t (nre ^. nre_children)
$ fl & flc_scores . at t %~ (score fls_listType $ nre ^. nre_list) 1
& flc_cont %~ (HashMap.delete t)
addScorePatch fl (t, NgramsReplace { _patch_old = Just old_nre
, _patch_new = maybe_new_nre }) =
let fl' = childrenScore (-1) t (old_nre ^. nre_children)
$ fl & flc_scores . at t %~ (score fls_listType $ old_nre ^. nre_list) (-1)
& flc_cont %~ (HashMap.delete t)
in case maybe_new_nre of
Nothing -> fl'
Just new_nre -> addScorePatch fl' (t, NgramsReplace { _patch_old = Nothing
, _patch_new = Just new_nre })
addScorePatch fl (_, NgramsReplace { _patch_old = Nothing
, _patch_new = Nothing }) = fl
-------------------------------------------------------------------------------
-- | Utils
childrenScore :: Int
-> NgramsTerm
-> MSet NgramsTerm
-> FlowCont NgramsTerm FlowListScores
-> FlowCont NgramsTerm FlowListScores
childrenScore n parent children' fl =
foldl' add' fl $ unMSet children'
where
add' fl' t = doLink n parent t fl'
------------------------------------------------------------------------
doLink :: (Ord a, Hashable a)
=> Int
-> NgramsTerm
-> a
-> FlowCont a FlowListScores
-> FlowCont a FlowListScores
doLink n parent child fl' = fl' & flc_scores . at child %~ (score fls_parents parent n)
score :: (Monoid a, At m, Semigroup (IxValue m))
=> ((m -> Identity m) -> a -> Identity b)
-> Index m -> IxValue m -> Maybe a -> Maybe b
score field list n m = (Just mempty <> m)
& _Just
. field
. at list
%~ (<> Just n)
------------------------------------------------------------------------
Prelude.hs 0000664 0000000 0000000 00000011206 14124644201 0033062 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/List/Social {-|
Module : Gargantext.Core.Text.List.Social.Prelude
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
------------------------------------------------------------------------
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
------------------------------------------------------------------------
module Gargantext.Core.Text.List.Social.Prelude
where
import Control.Lens
import Data.Map (Map)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Monoid
import Data.Semigroup (Semigroup(..))
import GHC.Generics (Generic)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Prelude
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
import qualified Data.Map.Strict as Map
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Map.Strict.Patch as PatchMap
------------------------------------------------------------------------
type Parent = NgramsTerm
------------------------------------------------------------------------
-- | DataType inspired by continuation Monad (but simpler)
data FlowCont a b =
FlowCont { _flc_scores :: HashMap a b
, _flc_cont :: HashMap a b
}
deriving (Show)
instance (Ord a, Eq b, Hashable a) => Monoid (FlowCont a b) where
mempty = FlowCont mempty mempty
instance (Eq a, Ord a, Eq b, Hashable a) => Semigroup (FlowCont a b) where
(<>) (FlowCont m1 s1)
(FlowCont m2 s2)
= FlowCont (m1 <> m2)
(s1 <> s2)
makeLenses ''FlowCont
-- | Datatype definition
data FlowListScores =
FlowListScores { _fls_listType :: HashMap ListType Int
, _fls_parents :: HashMap Parent Int
-- You can add any score by incrementing this type
-- , _flc_score :: HashMap Score Int
}
deriving (Show, Generic, Eq)
makeLenses ''FlowListScores
-- | Rules to compose 2 datatype FlowListScores
-- About the shape of the Type fun:
-- Triangle de Pascal, nombre d'or ou pi ?
-- Question: how to add a score field and derive such definition
-- without the need to fix it below ?
instance Semigroup FlowListScores where
(<>) (FlowListScores p1 l1)
(FlowListScores p2 l2) =
FlowListScores (p1 <> p2)
(l1 <> l2)
instance Monoid FlowListScores where
mempty = FlowListScores HashMap.empty HashMap.empty
------------------------------------------------------------------------
-- | Tools to inherit groupings
------------------------------------------------------------------------
-- | Tools
parentUnionsMerge :: (Ord a, Ord b, Num c, Hashable a, Hashable b)
=> [HashMap a (HashMap b c)]
-> HashMap a (HashMap b c)
parentUnionsMerge = HashMap.unionsWith (HashMap.unionWith (+))
-- This Parent union is specific
-- [Private, Shared, Public]
-- means the following preferences:
-- Private > Shared > Public
-- if data have not been tagged privately, then use others tags
-- This unions behavior takes first key only and ignore others
parentUnionsExcl :: (Ord a, Hashable a)
=> [HashMap a b]
-> HashMap a b
parentUnionsExcl = HashMap.unions
------------------------------------------------------------------------
-- | Takes key with max value if and only if value > 0
-- If value <= 0 alors key is not taken at all
-- It can happens since some score are non positive (i.e. removing a child)
-- >>> keyWithMaxValue $ DM.fromList $ zip (['a'..'z'] :: [Char]) ([1,2..]::[Int])
-- Just 'z'
-- >>> keyWithMaxValue $ DM.fromList $ zip (['a'..'z'] :: [Char]) ([-1,-2..]::[Int])
-- Nothing
-- TODO duplicate with getMaxFromMap and improve it (lookup value should not be needed)
-- TODO put in custom Prelude
keyWithMaxValue :: (Ord a, Ord b, Num b, Hashable a)
=> HashMap a b -> Maybe a
keyWithMaxValue m = do
maxKey <- headMay $ HashMap.getKeysOrderedByValueMaxFirst m
maxValue <- HashMap.lookup maxKey m
if maxValue > 0
then pure maxKey
else Nothing
------------------------------------------------------------------------
unPatchMapToHashMap :: (Ord a, Hashable a) => PatchMap a b -> HashMap a b
unPatchMapToHashMap = HashMap.fromList . PatchMap.toList
unPatchMapToMap :: Ord a => PatchMap a b -> Map a b
unPatchMapToMap = Map.fromList . PatchMap.toList
unNgramsTablePatch :: NgramsTablePatch -> HashMap NgramsTerm NgramsPatch
unNgramsTablePatch (NgramsTablePatch p) = unPatchMapToHashMap p
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Metrics.hs 0000664 0000000 0000000 00000007116 14124644201 0031027 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Metrics
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.Metrics
where
--import Data.Array.Accelerate ((:.)(..), Z(..))
--import Math.KMeans (kmeans, euclidSq, elements)
import Control.Lens (makeLenses)
import Data.Map (Map)
import Data.Monoid (Monoid, mempty)
import Data.HashMap.Strict (HashMap)
import Data.Semigroup (Semigroup)
import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
import Gargantext.Core.Viz.Graph.Index
import Gargantext.Prelude
import qualified Data.Array.Accelerate as DAA
import qualified Data.Array.Accelerate.Interpreter as DAA
import qualified Data.Map as Map
import qualified Data.Vector as V
import qualified Data.Vector.Storable as Vec
import qualified Data.HashMap.Strict as HashMap
type MapListSize = Int
type InclusionSize = Int
scored :: Ord t => HashMap (t,t) Int -> V.Vector (Scored t)
scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map . Map.fromList . HashMap.toList
where
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
map2scored :: Ord t => Map t (Vec.Vector Double) -> V.Vector (Scored t)
map2scored = V.map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . V.fromList . Map.toList
-- TODO change type with (x,y)
data Scored ts = Scored
{ _scored_terms :: !ts
, _scored_genInc :: !GenericityInclusion
, _scored_speExc :: !SpecificityExclusion
} deriving (Show, Eq, Ord)
instance Monoid a => Monoid (Scored a) where
mempty = Scored mempty mempty mempty
instance Semigroup a => Semigroup (Scored a) where
(<>) (Scored a b c )
(Scored _a' b' c')
= Scored (a {-<> a'-})
(b <> b')
(c <> c')
localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
(Map.toList fi)
scores
where
(ti, fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat Triangle ti m
scores = DAA.toList
$ DAA.run
$ DAA.zip (DAA.use is) (DAA.use ss)
-- TODO Code to be removed below
-- TODO in the textflow we end up needing these indices , it might be
-- better to compute them earlier and pass them around.
scored' :: Ord t => Map (t,t) Int -> [Scored t]
scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) scores
where
(ti, fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat Triangle ti m
scores = DAA.toList
$ DAA.run
$ DAA.zip (DAA.use is) (DAA.use ss)
normalizeGlobal :: [Scored a] -> [Scored a]
normalizeGlobal ss = map (\(Scored t s1 s2)
-> Scored t ((s1 - s1min) / s1max)
((s2 - s2min) / s2max)) ss
where
ss1 = map _scored_genInc ss
ss2 = map _scored_speExc ss
s1min = minimum ss1
s1max = maximum ss1
s2min = minimum ss2
s2max = maximum ss2
normalizeLocal :: Scored a -> Scored a
normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
where
log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)
-- | Type Instances
makeLenses 'Scored
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Metrics/ 0000775 0000000 0000000 00000000000 14124644201 0030466 5 ustar 00root root 0000000 0000000 CharByChar.hs 0000664 0000000 0000000 00000006345 14124644201 0032721 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Metrics {-|
Module : Gargantext.Core.Text.Metrics.CharByChar
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@
-}
module Gargantext.Core.Text.Metrics.CharByChar (levenshtein
, levenshteinNorm
, damerauLevenshtein
, damerauLevenshteinNorm
, overlap
, jaccard
, hamming
) where
import Data.Text (Text)
import GHC.Real (Ratio)
import qualified Data.Text.Metrics as DTM
import Gargantext.Prelude
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
{- * Example de titre
-}
-- | This module provide metrics to compare Text
-- starting as an API rexporting main functions of the great lib
-- text-metrics of Mark Karpov
-- | Levenshtein Distance
-- In information theory, Linguistics and computer science,
-- the Levenshtein distance is a string metric for measuring
-- the difference between two sequences.
-- See: https://en.wikipedia.org/wiki/Levenshtein_distance
--
levenshtein :: Text -> Text -> Int
levenshtein = DTM.levenshtein
-- | Return normalized Levenshtein distance between two 'Text' values.
-- Result is a non-negative rational number (represented as @'Ratio'
-- 'Data.Numeric.Natural'@), where 0 signifies no similarity between the
-- strings, while 1 means exact match.
--
levenshteinNorm :: Text -> Text -> Ratio Int
levenshteinNorm = DTM.levenshteinNorm
-- | Return Damerau-Levenshtein distance between two 'Text' values. The
-- function works like 'levenshtein', but the collection of allowed
-- operations also includes transposition of two /adjacent/ characters.
-- See also:
--
--
damerauLevenshtein :: Text -> Text -> Int
damerauLevenshtein = DTM.damerauLevenshtein
-- damerau-Levenshtein distance normalized
--
damerauLevenshteinNorm :: Text -> Text -> Ratio Int
damerauLevenshteinNorm = DTM.damerauLevenshteinNorm
-- Treating inputs like sets
-- | Return overlap coefficient for two 'Text' values. Returned value
-- is in the range from 0 (no similarity) to 1 (exact match). Return 1
-- if both 'Text' values are empty.
--
-- See also: .
overlap :: Text -> Text -> Ratio Int
overlap = DTM.overlap
-- | Jaccard distance
-- measures dissimilarity between sample sets
jaccard :: Text -> Text -> Ratio Int
jaccard = DTM.jaccard
-- | Hamming Distance
-- In information theory, the Hamming distance between two strings of
-- equal length is the number of positions at which the corresponding
-- symbols are different. In other words, it measures the minimum number of
-- substitutions required to change one string into the other
-- See: https://en.wikipedia.org/wiki/Hamming_distance
hamming :: Text -> Text -> Maybe Int
hamming = DTM.hamming
Count.hs 0000664 0000000 0000000 00000013555 14124644201 0032044 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Metrics {-|
Module : Gargantext.Core.Text.Metrics.Count
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Token and occurrence
An occurrence is not necessarily a token. Considering the sentence:
"A rose is a rose is a rose". We may equally correctly state that there
are eight or three words in the sentence. There are, in fact, three word
types in the sentence: "rose", "is" and "a". There are eight word tokens
in a token copy of the line. The line itself is a type. There are not
eight word types in the line. It contains (as stated) only the three
word types, 'a', 'is' and 'rose', each of which is unique. So what do we
call what there are eight of? They are occurrences of words. There are
three occurrences of the word type 'a', two of 'is' and three of 'rose'.
Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrences
-}
module Gargantext.Core.Text.Metrics.Count
where
import Data.Text (Text)
import Control.Arrow (Arrow(..), (***))
import qualified Data.List as List
import qualified Data.Map.Strict as DMS
import Data.Map.Strict ( Map, empty, singleton
, insertWith, unionWith, unionsWith
, mapKeys
)
import Data.Set (Set)
import Data.Text (pack)
------------------------------------------------------------------------
import Gargantext.Prelude
import Gargantext.Core.Types
------------------------------------------------------------------------
type Occ a = Map a Int
type Cooc a = Map (a, a) Int
type FIS a = Map (Set a) Int
data Group = ByStem | ByOntology
type Grouped = Stems
{-
-- >> let testData = ["blue lagoon", "blues lagoon", "red lagoon"]
-- >> map occurrences <$> Prelude.mapM (terms Mono EN)
-- [fromList [(fromList ["blue"],1),(fromList ["lagoon"],1)],fromList [(fromList ["blue"],1),(fromList ["lagoon"],1)],fromList [(fromList ["lagoon"],1),(fromList ["red"],1)]]
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),1)]
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon", "red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon", "red lagoon red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon blues lagoon", "red lagoon red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
----
-}
type Occs = Int
type Coocs = Int
type Threshold = Int
removeApax :: Threshold -> Map ([Text], [Text]) Int -> Map ([Text], [Text]) Int
removeApax t = DMS.filter (> t)
cooc :: [[Terms]] -> Map ([Text], [Text]) Int
cooc tss = coocOnWithLabel _terms_stem (useLabelPolicy label_policy) tss
where
terms_occs = occurrencesOn _terms_stem (List.concat tss)
label_policy = mkLabelPolicy terms_occs
coocOnWithLabel :: (Ord label, Ord b) => (a -> b) -> (b -> label)
-> [[a]] -> Map (label, label) Coocs
coocOnWithLabel on' policy tss = mapKeys (delta policy) $ coocOn on' tss
where
delta :: Arrow a => a b' c' -> a (b', b') (c', c')
delta f = f *** f
mkLabelPolicy :: Map Grouped (Map Terms Occs) -> Map Grouped [Text]
mkLabelPolicy = DMS.map f where
f = _terms_label . fst . maximumWith snd . DMS.toList
-- TODO use the Foldable instance of Map instead of building a list
useLabelPolicy :: Map Grouped [Text] -> Grouped -> [Text]
useLabelPolicy m g = case DMS.lookup g m of
Just label -> label
Nothing -> panic $ "Label of Grouped not found: " <> (pack $ show g)
-- TODO: use a non-fatal error if this can happen in practice
{-
labelPolicy :: Map Grouped (Map Terms Occs) -> Grouped -> Label
labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList <$> lookup g m of
Just label -> label
Nothing -> panic $ "Label of Grouped not found: " <> (pack $ show g)
-}
coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Int
coocOn f as = DMS.unionsWith (+) $ map (coocOn' f) as
coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Int
coocOn' fun ts = DMS.fromListWith (+) xs
where
ts' = List.nub $ map fun ts
xs = [ ((x, y), 1)
| x <- ts'
, y <- ts'
, x >= y
]
------------------------------------------------------------------------
coocOnContexts :: (a -> [Text]) -> [[a]] -> Map ([Text], [Text]) Int
coocOnContexts fun = DMS.fromListWith (+) . List.concat . map (coocOnSingleContext fun)
coocOnSingleContext :: (a -> [Text]) -> [a] -> [(([Text], [Text]), Int)]
coocOnSingleContext fun ts = xs
where
ts' = List.nub $ map fun ts
xs = [ ((x, y), 1)
| x <- ts'
, y <- ts'
, x >= y
]
------------------------------------------------------------------------
-- | Compute the grouped occurrences (occ)
occurrences :: [Terms] -> Map Grouped (Map Terms Int)
occurrences = occurrencesOn _terms_stem
occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
occurrencesWith :: (Foldable list, Ord k, Num a) => (b -> k) -> list b -> Map k a
occurrencesWith f xs = foldl' (\x y -> insertWith (+) (f y) 1 x) empty xs
-- TODO add groups and filter stops
sumOcc :: Ord a => [Occ a] -> Occ a
sumOcc xs = unionsWith (+) xs
FrequentItemSet.hs 0000664 0000000 0000000 00000013333 14124644201 0034032 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Metrics {-|
Module : Gargantext.Core.Text.Metrics.FrequentItemSet
Description : Ngrams tools
Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Domain Specific Language to manage Frequent Item Set (FIS)
-}
module Gargantext.Core.Text.Metrics.FrequentItemSet
( Fis, Size(..)
, occ_hlcm, cooc_hlcm
, allFis, between
, fisWithSize
, fisWith
, fisWithSizePoly
, fisWithSizePoly2
, fisWithSizePolyMap
, fisWithSizePolyMap'
, module HLCM
)
where
import Data.List (concat, null)
import Data.Map.Strict (Map)
import Data.Maybe (catMaybes)
import Data.Set (Set)
import Gargantext.Prelude
import HLCM
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Vector as V
import Control.Monad (sequence)
data Size = Point Int | Segment Int Int
------------------------------------------------------------------------
-- | Occurrence is Frequent Item Set of size 1
occ_hlcm :: Frequency -> [[Item]] -> [Fis]
occ_hlcm = fisWithSize (Point 1)
-- | Cooccurrence is Frequent Item Set of size 2
cooc_hlcm :: Frequency -> [[Item]] -> [Fis]
cooc_hlcm = fisWithSize (Point 2)
allFis :: Frequency -> [[Item]] -> [Fis]
allFis = fisWith Nothing
------------------------------------------------------------------------
between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
between (x,y) = fisWithSize (Segment x y)
--maximum :: Int -> Frequency -> [[Item]] -> [Fis]
--maximum m = between (0,m)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Data type to type the Frequent Item Set
-- TODO replace List with Set in fisItemSet
-- be careful : risks to erase HLCM behavior
type Fis = Fis' Item
data Fis' a = Fis' { _fisCount :: Int
, _fisItemSet :: [a]
} deriving (Show)
instance Functor Fis' where
fmap f (Fis' c is) = Fis' c (fmap f is)
-- | Sugar from items to FIS
items2fis :: [Item] -> Maybe Fis
items2fis [] = Nothing
items2fis (i:is) = Just $ Fis' i is
------------------------------------------------------------------------
------------------------------------------------------------------------
fisWithSize :: Size -> Frequency -> [[Item]] -> [Fis]
fisWithSize n f is = case n of
Point n' -> fisWith (Just (\x -> length x == (n'+1) )) f is
Segment a b -> fisWith (Just (\x -> cond a (length x) b)) f is
where
cond a' x b' = a' <= x && x <= b'
--- Filter on Fis and not on [Item]
fisWith :: Maybe ([Item] -> Bool) -> Frequency -> [[Item]] -> [Fis]
fisWith s f is = case filter (not . null) is of
[] -> []
js -> catMaybes $ map items2fis $ filter' $ runLCMmatrix js f
-- drop unMaybe
where
filter' = case s of
Nothing -> identity
Just fun -> filter fun
-- Here the sole purpose to take the keys as a Set is tell we do not want
-- duplicates.
fisWithSizePoly :: Ord a => Size -> Frequency -> Set a -> [[a]] -> [Fis' a]
fisWithSizePoly n f ks = map (fmap fromItem) . fisWithSize n f . map (map toItem)
where
ksv = V.fromList $ Set.toList ks
ksm = Map.fromList . flip zip [0..] $ V.toList ksv
toItem = (ksm Map.!)
fromItem = (ksv V.!)
fisWithSizePoly2 :: Ord a => Size -> Frequency -> [[a]] -> [Fis' a]
fisWithSizePoly2 n f is = fisWithSizePoly n f ks is
where
ks = Set.fromList $ concat is
fisWithSizePolyMap :: Ord a => Size -> Frequency -> [[a]] -> Map (Set a) Int
fisWithSizePolyMap n f is =
Map.fromList $ (\i -> (Set.fromList (_fisItemSet i), _fisCount i)) <$> fisWithSizePoly2 n f is
------------------------------------------------------------------------
------------------------------------------------------------------------
---- Weighted [[Item]]
isSublistOf :: Ord a => [a] -> [a] -> Bool
isSublistOf sub lst = all (\i -> elem i lst) sub
reIndexFis :: Ord a => [([a],(b,c))] -> [Fis' a] -> [(Fis' a,([b],[c]))]
reIndexFis items fis = map (\f ->
let docs = filter (\(lst,_) -> isSublistOf (_fisItemSet f) lst) items
in (f, (map (fst . snd) docs,map (snd . snd) docs))) fis
wsum :: [Maybe Double] -> Maybe Double
wsum lst = fmap sum $ sequence lst
fisWithSizePolyMap' :: Ord a => Size -> Frequency -> [([a], (Maybe Double,[Int]))] -> Map (Set a) (Int, (Maybe Double,[Int]))
fisWithSizePolyMap' n f is = Map.fromList
$ map (\(fis,(ws,sources)) -> (Set.fromList (_fisItemSet fis),(_fisCount fis,(wsum ws,concat sources))))
$ reIndexFis is
$ fisWithSizePoly2 n f (map fst is)
------------------------------------------------------------------------
------------------------------------------------------------------------
--
---- | /!\ indexes are not the same:
--
---- | Index ngrams from Map
----indexNgram :: Ord a => Map a Occ -> Map Index a
----indexNgram m = fromList (zip [1..] (keys m))
--
---- | Index ngrams from Map
----ngramIndex :: Ord a => Map a Occ -> Map a Index
----ngramIndex m = fromList (zip (keys m) [1..])
--
--indexWith :: Ord a => Map a Occ -> [a] -> [Int]
--indexWith m xs = unMaybe $ map (\x -> lookupIndex x m) xs
--
--indexIt :: Ord a => [[a]] -> (Map a Int, [[Int]])
--indexIt xs = (m, is)
-- where
-- m = sumOcc (map occ xs)
-- is = map (indexWith m) xs
--
--list2fis :: Ord a => FIS.Frequency -> [[a]] -> (Map a Int, [FIS.Fis])
--list2fis n xs = (m', fs)
-- where
-- (m, is) = indexIt xs
-- m' = M.filter (>50000) m
-- fs = FIS.all n is
--
--text2fis :: FIS.Frequency -> [Text] -> (Map Text Int, [FIS.Fis])
--text2fis n xs = list2fis n (map terms xs)
--
----text2fisWith :: FIS.Size -> FIS.Frequency -> [Text] -> (Map Text Int, [FIS.Fis])
----text2fisWith = undefined
--
Hetero.purs 0000664 0000000 0000000 00000005142 14124644201 0032552 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Metrics {-|
Module : Gargantext.
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Gargantext.Text.Hetero where
import Data.List.Split as S
import Data.Map as M
import Data.Set as S
import Database.PostgreSQL.Simple as PGS
import GHC.Real as R
import Gargantext.Database.Admin.Gargandb
import Gargantext.Database.Admin.Private
import Gargantext.Database.Simple
import Gargantext.Text.Count (occurrences)
import Gargantext.Text.Words (cleanText)
import Opaleye.Internal.Column (Column)
import Opaleye.PGTypes (PGInt4)
--main = do
-- t <- getTextquery
-- print (Prelude.map (heterogeinity . concat) $ S.chunksOf 3 t)
-- heterogeinity sur concat texts
heterogeinity' :: Int -> Int -> Int -> IO [Integer]
heterogeinity' corpus_id limit x = do
t <- getAbstract corpus_id limit
Prelude.mapM (dicoStruct . occurrences) $ (S.chunksOf x) . cleanText $ concat t
heterogeinity'' :: Int -> Int -> Int -> IO [Integer]
heterogeinity'' corpus_id limit size = do
t <- getAbstract corpus_id limit
Prelude.mapM (dicoStruct . occurrences) $ (S.chunksOf size) . cleanText $ concat t
dicoStruct :: (Integral r, Monad m) => M.Map t r -> m r
dicoStruct dict_occ = do
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
return $ div total_occ (fromIntegral keys_size)
-- heterogeinity sur UCT (Unité de Context Textuel)
heterogeinity :: [Char] -> IO Integer
heterogeinity string = do
let dict_occ = occurrences $ cleanText string
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
return $ div total_occ (fromIntegral keys_size)
--computeHeterogeinity
-- :: Fractional t =>
-- Opaleye.Internal.Column.Column Opaleye.PGTypes.PGInt4
-- -> IO (t, Integer, Integer)
computeHeterogeinity corpus_id = do
c <- PGS.connect infoGargandb
t <- getText c (nodeHyperdataText corpus_id)
heterogeinity $ Prelude.concat t
main2 = do
let corpus_ids = [
("ALL", 272927) -- 73
,("Histoire", 1387736) -- 28
,("Sciences Po", 1296892) -- 37
,("Phylosophie", 1170004) -- 20
,("Psychologie", 1345852) -- 37
,("Sociologie", 1246452) -- 42
]
r <- Prelude.map computeHeterogeinity $ Prelude.map (\(t,id) -> id) corpus_ids
return r
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Metrics/SpeGen/0000775 0000000 0000000 00000000000 14124644201 0031647 5 ustar 00root root 0000000 0000000 IncExc.hs 0000664 0000000 0000000 00000000615 14124644201 0033277 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Metrics/SpeGen {-|
Module : Gargantext.Core.Text.Metrics.SpeGen.IncExc
Description :
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.Metrics.SpeGen.IncExc
where
{-
data IncExc = Inclusion { unInclusion :: !Double }
| Exclusion { unExclusion :: !Double }
-}
TFICF.hs 0000664 0000000 0000000 00000003572 14124644201 0031605 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Metrics {-|
Module : Gargantext.Core.Text.Metrics.TFICF
Description : TFICF Ngrams tools
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Definition of TFICF : Term Frequency - Inverse of Context Frequency
TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).
-}
module Gargantext.Core.Text.Metrics.TFICF ( TFICF
, TficfContext(..)
, Total(..)
, Count(..)
, tficf
, sortTficf
)
where
import Data.Map.Strict (Map, toList)
import Data.Text (Text)
import Gargantext.Core.Types (Ordering(..))
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Ord as DO (Down(..))
path :: Text
path = "[G.T.Metrics.TFICF]"
type TFICF = Double
data TficfContext n m = TficfInfra n m
| TficfSupra n m
deriving (Show)
data Total = Total {unTotal :: !Double}
data Count = Count {unCount :: !Double}
tficf :: TficfContext Count Total
-> TficfContext Count Total
-> TFICF
tficf (TficfInfra (Count ic) (Total it) )
(TficfSupra (Count sc) (Total st) )
| it >= ic && st >= sc && it <= st = (it/ic) * log (st/sc)
| otherwise = panic
$ "[ERR]"
<> path
<> " Frequency impossible"
tficf _ _ = panic $ "[ERR]" <> path <> "Undefined for these contexts"
sortTficf :: Ordering
-> Map Text Double
-> [(Text, Double)]
sortTficf Down = List.sortOn (DO.Down . snd) . toList
sortTficf Up = List.sortOn snd . toList
Utils.hs 0000664 0000000 0000000 00000002174 14124644201 0032047 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Metrics {-|
Module : Gargantext.Core.Text.Metrics.Utils
Description : Some functions to count.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.Metrics.Utils where
import Gargantext.Prelude
import Data.Map (empty, Map, insertWith, toList)
import qualified Data.List as L
countElem :: (Ord k) => Data.Map.Map k Int -> k -> Data.Map.Map k Int
countElem m e = Data.Map.insertWith (+) e 1 m
freq :: (Ord k) => [k] -> Data.Map.Map k Int
freq = foldl countElem Data.Map.empty
getMaxFromMap :: Ord a => Map a1 a -> [a1]
getMaxFromMap m = go [] Nothing (toList m)
where
go ks _ [] = ks
go ks Nothing ((k,v):rest) = go (k:ks) (Just v) rest
go ks (Just u) ((k,v):rest)
| v < u = go ks (Just u) rest
| v > u = go [k] (Just v) rest
| otherwise = go (k:ks) (Just v) rest
average :: [Double] -> Double
average x = L.sum x / L.genericLength x
average' :: [Int] -> Double
average' x = (L.sum y) / (L.genericLength y) where
y = L.map fromIntegral x
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Samples/ 0000775 0000000 0000000 00000000000 14124644201 0030464 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Samples/CH.hs 0000664 0000000 0000000 00000002664 14124644201 0031322 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Samples.CH
Description : Sample of Chinese Text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Source: Wikipedia
Page : text mining
-}
module Gargantext.Core.Text.Samples.CH where
import Data.String (String)
textSample :: String
textSample = "文本挖掘有时也被称为文字探勘、文本数据挖掘等,大致相当于文字分析,一般指文本处理过程中产生高质量的信息。高质量的信息通常通过分类和预测来产生,如模式识别。文本挖掘通常涉及输入文本的处理过程(通常进行分析,同时加上一些衍生语言特征以及消除杂音,随后插入到数据库中) ,产生结构化数据,并最终评价和解释输出。'高品质'的文本挖掘通常是指某种组合的相关性,新颖性和趣味性。典型的文本挖掘方法包括文本分类,文本聚类,概念/实体挖掘,生产精确分类,观点分析,文档摘要和实体关系模型(即,学习已命名实体之间的关系) 。 文本分析包括了信息检索、词典分析来研究词语的频数分布、模式识别、标签 注释、信息抽取,数据挖掘技术包括链接和关联分析、可视化和预测分析。本质上,首要的任务是,通过自然语言处理和分析方法,将文本转化为数据进行分析"
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Samples/DE.hs 0000664 0000000 0000000 00000002204 14124644201 0031306 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Samples.DE
Description : Sample of German Text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Source: Wikipedia
Page : text mining
-}
module Gargantext.Core.Text.Samples.DE where
import Data.String (String)
textSample :: String
textSample = "Text Mining, seltener auch Textmining, Text Data Mining oder Textual Data Mining, ist ein Bündel von Algorithmus-basierten Analyseverfahren zur Entdeckung von Bedeutungsstrukturen aus un- oder schwachstrukturierten Textdaten. Mit statistischen und linguistischen Mitteln erschließt Text-Mining-Software aus Texten Strukturen, die die Benutzer in die Lage versetzen sollen, Kerninformationen der verarbeiteten Texte schnell zu erkennen. Im Optimalfall liefern Text-Mining-Systeme Informationen, von denen die Benutzer zuvor nicht wissen, ob und dass sie in den verarbeiteten Texten enthalten sind. Bei zielgerichteter Anwendung sind Werkzeuge des Text Mining außerdem in der Lage, Hypothesen zu generieren, diese zu überprüfen und schrittweise zu verfeinern."
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Samples/EN.hs 0000664 0000000 0000000 00000017241 14124644201 0031327 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Samples.EN
Description : Sample of English Text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Source: Wikipedia
Page : text mining
-}
module Gargantext.Core.Text.Samples.EN where
import Data.String (String)
textSample :: String
textSample = "Text mining, also referred to as text data mining, roughly equivalent to text analytics, is the process of deriving high-quality information from text. High-quality information is typically derived through the devising of patterns and trends through means such as statistical pattern learning. Text mining usually involves the process of structuring the input text (usually parsing, along with the addition of some derived linguistic features and the removal of others, and subsequent insertion into a database), deriving patterns within the structured data, and finally evaluation and interpretation of the output. 'High quality' in text mining usually refers to some combination of relevance, novelty, and interestingness. Typical text mining tasks include text categorization, text clustering, concept/entity extraction, production of granular taxonomies, sentiment analysis, document summarization, and entity relation modeling (i.e., learning relations between named entities). Text analysis involves information retrieval, lexical analysis to study word frequency distributions, pattern recognition, tagging/annotation, information extraction, data mining techniques including link and association analysis, visualization, and predictive analytics. The overarching goal is, essentially, to turn text into data for analysis, via application of natural language processing (NLP) and analytical methods. A typical application is to scan a set of documents written in a natural language and either model the document set for predictive classification purposes or populate a database or search index with the information extracted."
stopList :: [String]
stopList =
["a", "a's", "able", "about", "above", "according", "accordingly"
, "across", "actually", "after", "afterwards", "again", "against"
, "ain't", "all", "allow", "allows", "almost", "alone", "along"
, "already", "also", "although", "always", "am", "among", "amongst", "an"
, "analyze", "and", "another", "any", "anybody", "anyhow", "anyone"
, "anything", "anyway", "anyways", "anywhere", "apart", "appear"
, "apply", "appreciate", "appropriate", "are", "aren't", "around"
, "as", "aside", "ask", "asking", "associated", "at", "available"
, "away", "awfully", "b", "based", "be", "became", "because", "become"
, "becomes", "becoming", "been", "before", "beforehand", "behind"
, "being", "believe", "below", "beside", "besides", "best", "better"
, "between", "beyond", "both", "brief", "but", "by", "c", "c'mon", "c's"
, "came", "can", "can't", "cannot", "cant", "cause", "causes", "certain"
, "certainly", "changes", "clearly", "co", "com", "come", "comes"
, "common", "concerning", "consequently", "consider", "considering"
, "contain", "containing", "contains", "corresponding", "could"
, "couldn't", "course", "currently", "d", "definitely", "described"
, "despite", "detecting", "detects", "did", "didn't", "different", "do"
, "does", "doesn't", "doing", "don't", "done", "down", "downwards"
, "during", "e", "each", "edu", "eg", "eight", "either", "else"
, "elsewhere", "enough", "entirely", "especially", "et", "etc", "even"
, "ever", "every", "everybody", "everyone", "everything", "everywhere"
, "ex", "exactly", "example", "except", "f", "far", "few", "fifth"
, "find", "first", "five", "followed", "following", "follows", "for"
, "former", "formerly", "forth", "four", "from", "further", "furthermore"
, "g", "get", "gets", "getting", "gif", "given", "gives", "go", "goes"
, "going", "gone", "got", "gotten", "greetings", "h", "had", "hadn't"
, "happens", "hardly", "has", "hasn't", "have", "haven't", "having"
, "he", "he'd", "he'll", "he's", "hello", "help", "hence", "her"
, "here", "here's", "hereafter", "hereby", "herein", "hereupon", "hers"
, "herself", "hi", "him", "himself", "his", "hither", "hopefully", "how"
, "how's", "howbeit", "however", "i", "i'd", "i'll", "i'm", "i've"
, "identify", "ie", "if", "ignored", "immediate", "in", "inasmuch"
, "inc", "indeed", "indicate", "indicated", "indicates", "inner"
, "insofar", "instead", "into", "involves", "inward", "is", "isn't"
, "it", "it'd", "it'll", "it's", "its", "itself", "j", "just", "k"
, "keep", "keeps", "kept", "know", "known", "knows", "l", "last"
, "late", "lately", "later", "latter", "latterly", "least", "less"
, "lest", "let", "let's", "like", "liked", "likely", "little", "look"
, "looking", "looks", "ltd", "m", "main", "mainly", "many", "may"
, "maybe", "me", "mean", "meanwhile", "merely", "might", "min", "more"
, "moreover", "most", "mostly", "much", "must", "mustn't", "my", "myself"
, "n", "name", "namely", "nd", "near", "nearly", "necessary", "need"
, "needs", "neither", "never", "nevertheless", "new", "next", "nine"
, "no", "nobody", "non", "none", "noone", "nor", "normally", "not"
, "nothing", "novel", "now", "nowhere", "o", "obviously", "of", "off"
, "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones"
, "only", "onto", "or", "other", "others", "otherwise", "ought", "our"
, "ours", "ourselves", "out", "outside", "over", "overall", "own", "p"
, "particular", "particularly", "per", "perhaps", "placed", "please"
, "plus", "possible", "presents", "presumably", "probably", "provides"
, "q", "que", "quite", "qv", "r", "rather", "rd", "re", "really"
, "reasonably", "regarding", "regardless", "regards", "relatively"
, "respectively", "right", "s", "said", "same", "saw", "say", "saying"
, "says", "sds", "second", "secondly", "see", "seeing", "seem", "seemed"
, "seeming", "seems", "seen", "self", "selves", "sensible", "sent"
, "serious", "seriously", "seven", "several", "shall", "shan't"
, "she", "she'd", "she'll", "she's", "should", "shouldn't", "since"
, "six", "so", "some", "somebody", "somehow", "someone", "something"
, "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry"
, "specified", "specify", "specifying", "still", "sub", "such", "sup"
, "sure", "t", "t's", "take", "taken", "tell", "tends", "th", "than"
, "thank", "thanks", "thanx", "that", "that's", "thats", "the", "their"
, "theirs", "them", "themselves", "then", "thence", "there", "there's"
, "thereafter", "thereby", "therefore", "therein", "theres", "thereupon"
, "these", "they", "they'd", "they'll", "they're", "they've", "think"
, "third", "this", "thorough", "thoroughly", "those", "though", "three"
, "through", "throughout", "thru", "thus", "to", "together", "too"
, "took", "toward", "towards", "tried", "tries", "truly", "try"
, "trying", "twice", "two", "u", "un", "under", "unfortunately"
, "unless", "unlikely", "until", "unto", "up", "upon", "us", "use"
, "used", "useful", "uses", "using", "usually", "uucp", "v", "value"
, "various", "very", "via", "viz", "vs", "w", "want", "wants", "was"
, "wasn't", "way", "we", "we'd", "we'll", "we're", "we've", "welcome"
, "well", "went", "were", "weren't", "what", "what's", "whatever", "when"
, "when's", "whence", "whenever", "where", "where's", "whereafter"
, "whereas", "whereby", "wherein", "whereupon", "wherever", "whether"
, "which", "while", "whither", "who", "who's", "whoever", "whole", "whom"
, "whose", "why", "why's", "will", "willing", "wish", "with", "within"
, "without", "won't", "wonder", "would", "wouldn't", "x", "y", "yes"
, "yet", "you", "you'd", "you'll", "you're", "you've", "your", "yours"
, "yourself", "yourselves", "z", "zero"]
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Samples/FR.hs 0000664 0000000 0000000 00000002340 14124644201 0031326 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Samples.FR
Description : Sample of French Text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Source: Wikipedia
Page : text mining
-}
module Gargantext.Core.Text.Samples.FR where
import Gargantext.Prelude ((<>))
import Data.String (String)
textSample :: String
textSample = "La fouille de textes ou « l'extraction de connaissances » dans les textes est une spécialisation de la fouille de données et fait partie du domaine de l'intelligence artificielle. Cette technique est souvent désignée sous l'anglicisme text mining. Elle désigne un ensemble de traitements informatiques consistant à extraire des connaissances selon un critère de nouveauté ou de similarité dans des textes produits par des humains pour des humains. Dans la pratique, cela revient à mettre en algorithme un modèle simplifié des théories linguistiques dans des systèmes informatiques d'apprentissage et de statistiques. Les disciplines impliquées sont donc la linguistique calculatoire, l'ingénierie des langues, l'apprentissage artificiel, les statistiques et l'informatique." <> "Je pense donc je suis."
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Samples/SP.hs 0000664 0000000 0000000 00000002077 14124644201 0031350 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Samples.SP
Description : Sample of Spanish Text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Source: Wikipedia
Page : text mining
-}
module Gargantext.Core.Text.Samples.SP where
import Data.String (String)
textMining :: String
textMining = "La minería de textos se refiere al proceso de derivar información nueva de textos. A comienzos de los años ochenta surgieron los primeros esfuerzos de minería de textos que necesitaban una gran cantidad de esfuerzo humano, pero los avances tecnológicos han permitido que esta área progrese de manera rápida en la última década. La minería de textos es un área multidisciplinar basada en la recuperación de información, minería de datos, aprendizaje automático, estadísticas y la lingüística computacional. Como la mayor parte de la información (más de un 80%) se encuentra actualmente almacenada como texto, se cree que la minería de textos tiene un gran valor comercial."
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Search.hs 0000664 0000000 0000000 00000005175 14124644201 0030631 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Search
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This search Engine is first made to clean CSV file according to a query.
Starting from this model, a specific Gargantext engine will be made
(using more metrics scores/features).
-}
module Gargantext.Core.Text.Search where
import Data.SearchEngine
import Data.Ix
-- Usefull to use stopwords
-- import Data.Set (Set)
-- import qualified Data.Set as Set
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Core.Text.Terms.Mono (monoTexts)
import Gargantext.Core.Text.Terms.Mono.Stem as ST
import Gargantext.Core.Text.Corpus.Parsers.CSV
type DocId = Int
type DocSearchEngine = SearchEngine
CsvGargV3
DocId
DocField
NoFeatures
data DocField = TitleField
| AbstractField
deriving (Eq, Ord, Enum, Bounded, Ix, Show)
initialDocSearchEngine :: DocSearchEngine
initialDocSearchEngine =
initSearchEngine docSearchConfig defaultSearchRankParameters
docSearchConfig :: SearchConfig CsvGargV3 DocId DocField NoFeatures
docSearchConfig =
SearchConfig {
documentKey = d_docId,
extractDocumentTerms = extractTerms,
transformQueryTerm = normaliseQueryToken,
documentFeatureValue = const noFeatures
}
where
extractTerms :: CsvGargV3 -> DocField -> [Text]
extractTerms doc TitleField = monoTexts (d_title doc)
extractTerms doc AbstractField = monoTexts (d_abstract doc)
normaliseQueryToken :: Text -> DocField -> Text
normaliseQueryToken tok =
let tokStem = ST.stem ST.EN
in \field -> case field of
TitleField -> tokStem tok
AbstractField -> tokStem tok
defaultSearchRankParameters :: SearchRankParameters DocField NoFeatures
defaultSearchRankParameters =
SearchRankParameters {
paramK1,
paramB,
paramFieldWeights,
paramFeatureWeights = noFeatures,
paramFeatureFunctions = noFeatures,
paramResultsetSoftLimit = 2000,
paramResultsetHardLimit = 4000,
paramAutosuggestPrefilterLimit = 500,
paramAutosuggestPostfilterLimit = 500
}
where
paramK1 :: Float
paramK1 = 1.5
paramB :: DocField -> Float
paramB TitleField = 0.9
paramB AbstractField = 0.5
paramFieldWeights :: DocField -> Float
paramFieldWeights TitleField = 20
paramFieldWeights AbstractField = 5
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms.hs 0000664 0000000 0000000 00000017143 14124644201 0030514 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Ngrams
Description : Ngrams definition and tools
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
An @n-gram@ is a contiguous sequence of n items from a given sample of
text. In Gargantext application the items are words, n is a non negative
integer.
Using Latin numerical prefixes, an n-gram of size 1 is referred to as a
"unigram"; size 2 is a "bigram" (or, less commonly, a "digram"); size
3 is a "trigram". English cardinal numbers are sometimes used, e.g.,
"four-gram", "five-gram", and so on.
Source: https://en.wikipedia.org/wiki/Ngrams
TODO
group Ngrams -> Tree
compute occ by node of Tree
group occs according groups
compute cooccurrences
compute graph
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstrainedClassMethods #-}
module Gargantext.Core.Text.Terms
where
import Control.Lens
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Map (Map)
import Data.Text (Text)
import Data.Traversable
import GHC.Base (String)
import GHC.Generics (Generic)
import qualified Data.List as List
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Data.HashMap.Strict as HashMap
import Gargantext.Core
import Gargantext.Core.Text (sentences, HasText(..))
import Gargantext.Core.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken)
import Gargantext.Core.Text.Terms.Mono (monoTerms)
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize)
import Gargantext.Core.Text.Terms.Multi (multiterms)
import Gargantext.Core.Types
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Ngrams (insertNgrams)
import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag(..), insertNgramsPostag, np_form, np_lem)
import Gargantext.Database.Schema.Ngrams (Ngrams(..), NgramsType(..), ngramsTerms, text2ngrams, NgramsId)
import Gargantext.Prelude
data TermType lang
= Mono { _tt_lang :: !lang }
| Multi { _tt_lang :: !lang }
| MonoMulti { _tt_lang :: !lang }
| Unsupervised { _tt_lang :: !lang
, _tt_windowSize :: !Int
, _tt_ngramsSize :: !Int
, _tt_model :: !(Maybe (Tries Token ()))
}
deriving (Generic)
makeLenses ''TermType
--group :: [Text] -> [Text]
--group = undefined
-- remove Stop Words
-- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------
-- | Sugar to extract terms from text (hiddeng mapM from end user).
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms :: TermType Lang -> [Text] -> IO [[Terms]]
extractTerms (Unsupervised {..}) xs = mapM (terms (Unsupervised { _tt_model = Just m', .. })) xs
where
m' = case _tt_model of
Just m''-> m''
Nothing -> newTries _tt_windowSize (Text.intercalate " " xs)
extractTerms termTypeLang xs = mapM (terms termTypeLang) xs
------------------------------------------------------------------------
withLang :: (Foldable t, Functor t, HasText h)
=> TermType Lang
-> t h
-> TermType Lang
withLang (Unsupervised {..}) ns = Unsupervised { _tt_model = m', .. }
where
m' = case _tt_model of
Nothing -> -- trace ("buildTries here" :: String)
Just $ buildTries _tt_ngramsSize
$ fmap toToken
$ uniText
$ Text.intercalate " . "
$ List.concat
$ map hasText ns
just_m -> just_m
withLang l _ = l
------------------------------------------------------------------------
data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
| EnrichedNgrams { unEnrichedNgrams :: NgramsPostag }
deriving (Eq, Ord, Generic, Show)
instance Hashable ExtractedNgrams
class ExtractNgramsT h
where
extractNgramsT :: HasText h
=> TermType Lang
-> h
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
------------------------------------------------------------------------
enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
enrichedTerms l pa po (Terms ng1 ng2) =
NgramsPostag l pa po form lem
where
form = text2ngrams $ Text.intercalate " " ng1
lem = text2ngrams $ Text.intercalate " " $ Set.toList ng2
------------------------------------------------------------------------
cleanNgrams :: Int -> Ngrams -> Ngrams
cleanNgrams s ng
| Text.length (ng ^. ngramsTerms) < s = ng
| otherwise = text2ngrams (Text.take s (ng ^. ngramsTerms))
cleanExtractedNgrams :: Int -> ExtractedNgrams -> ExtractedNgrams
cleanExtractedNgrams s (SimpleNgrams ng) = SimpleNgrams $ (cleanNgrams s) ng
cleanExtractedNgrams s (EnrichedNgrams ng) = EnrichedNgrams $ over np_form (cleanNgrams s)
$ over np_lem (cleanNgrams s) ng
extracted2ngrams :: ExtractedNgrams -> Ngrams
extracted2ngrams (SimpleNgrams ng) = ng
extracted2ngrams (EnrichedNgrams ng) = view np_form ng
---------------------------
insertExtractedNgrams :: [ ExtractedNgrams ] -> Cmd err (HashMap Text NgramsId)
insertExtractedNgrams ngs = do
let (s, e) = List.partition isSimpleNgrams ngs
m1 <- insertNgrams (map unSimpleNgrams s)
--printDebug "others" m1
m2 <- insertNgramsPostag (map unEnrichedNgrams e)
--printDebug "terms" m2
let result = HashMap.union m1 m2
pure result
isSimpleNgrams :: ExtractedNgrams -> Bool
isSimpleNgrams (SimpleNgrams _) = True
isSimpleNgrams _ = False
------------------------------------------------------------------------
-- | Terms from Text
-- Mono : mono terms
-- Multi : multi terms
-- MonoMulti : mono and multi
-- TODO : multi terms should exclude mono (intersection is not empty yet)
terms :: TermType Lang -> Text -> IO [Terms]
terms (Mono lang) txt = pure $ monoTerms lang txt
terms (Multi lang) txt = multiterms lang txt
terms (MonoMulti lang) txt = terms (Multi lang) txt
terms (Unsupervised { .. }) txt = termsUnsupervised (Unsupervised { _tt_model = Just m', .. }) txt
where
m' = maybe (newTries _tt_ngramsSize txt) identity _tt_model
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
------------------------------------------------------------------------
-- | Unsupervised ngrams extraction
-- language agnostic extraction
-- TODO: remove IO
-- TODO: newtype BlockText
type WindowSize = Int
type MinNgramSize = Int
termsUnsupervised :: TermType Lang -> Text -> IO [Terms]
termsUnsupervised (Unsupervised l n s m) =
pure
. map (text2term l)
. List.nub
. (List.filter (\l' -> List.length l' >= s))
. List.concat
. mainEleveWith (maybe (panic "no model") identity m) n
. uniText
termsUnsupervised _ = undefined
newTries :: Int -> Text -> Tries Token ()
newTries n t = buildTries n (fmap toToken $ uniText t)
-- | TODO removing long terms > 24
uniText :: Text -> [[Text]]
uniText = map (List.filter (not . isPunctuation))
. map tokenize
. sentences -- TODO get sentences according to lang
. Text.toLower
text2term :: Lang -> [Text] -> Terms
text2term _ [] = Terms [] Set.empty
text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
isPunctuation :: Text -> Bool
isPunctuation x = List.elem x $ (Text.pack . pure)
<$> ("!?(),;.:" :: String)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/ 0000775 0000000 0000000 00000000000 14124644201 0030152 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Eleve.hs 0000664 0000000 0000000 00000047476 14124644201 0031570 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Terms.Eleve
Description : Unsupervized Word segmentation
Copyright : (c) CNRS, 2019-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
# Implementation of Unsupervized Word Segmentation
References:
- Python implementation (Korantin August, Emmanuel Navarro):
[EleVe](https://github.com/kodexlab/eleve.git)
- Unsupervized Word Segmentation:the case for Mandarin Chinese Pierre
Magistry, Benoît Sagot, Alpage, INRIA & Univ. Paris 7, Proceedings of
the 50th Annual Meeting of the Association for Computational Linguistics
, pages 383–387. [PDF](https://www.aclweb.org/anthology/P12-2075)
Notes for current implementation:
- TODO extract longer ngrams (see paper above, viterbi algo can be used)
- TODO AD TEST: prop (Node c _e f) = c == Map.size f
- AD: Real ngrams extraction test
from Gargantext.Core.Text.Terms import extractTermsUnsupervised
docs <- runCmdRepl $ selectDocs 1004
extractTermsUnsupervised 3 $ DT.intercalate " "
$ catMaybes
$ Gargantext.map _hyperdataDocument_abstract docs
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Core.Text.Terms.Eleve where
-- import Debug.Trace (trace)
-- import Debug.SimpleReflect
import Control.Lens hiding (levels, children)
import Control.Monad (forM_)
import qualified Data.List as L
import Data.Monoid
import Data.Text (Text)
import qualified Data.Text as T
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import qualified Data.Map as Map
import Gargantext.Prelude hiding (cs)
import qualified Data.Tree as Tree
import Data.Tree (Tree)
import qualified Prelude as P (putStrLn, logBase, isNaN, RealFloat)
nan :: Floating e => e
nan = 0 / 0
noNaNs :: P.RealFloat e => [e] -> [e]
noNaNs = filter (not . P.isNaN)
updateIfDefined :: P.RealFloat e => e -> e -> e
updateIfDefined e0 e | P.isNaN e = e0
| otherwise = e
sim :: Entropy e => e -> e -> Bool
sim x y = x == y || (P.isNaN x && P.isNaN y)
subst :: Entropy e => (e, e) -> e -> e
subst (src, dst) x | sim src x = dst
| otherwise = x
------------------------------------------------------------------------
-- | TODO: Show Instance only used for debugging
type Entropy e =
( Fractional e
, Floating e
, P.RealFloat e
, Show e
)
------------------------------------------------------------------------
-- | Example and tests for development
data I e = I
{ _info_entropy :: e
, _info_entropy_var :: e
, _info_autonomy :: e
}
instance Show e => Show (I e) where
show (I e ev a) = show (e, ev, a)
makeLenses ''I
type ModEntropy i o e = (e -> e) -> i -> o
set_autonomy :: Entropy e => ModEntropy (I e) (I e) e
set_autonomy fe i = i & info_autonomy .~ fe (i ^. info_entropy_var)
set_entropy_var :: Entropy e => Setter e (I e) e e
set_entropy_var f e = (\ev -> I e ev nan) <$> f e
data StartStop = Start | Stop
deriving (Ord, Eq, Show)
data Token = NonTerminal Text
| Terminal StartStop
deriving (Ord, Eq, Show)
isTerminal :: Token -> Bool
isTerminal (Terminal _) = True
isTerminal (NonTerminal _) = False
nonTerminals :: [Token] -> [Text]
nonTerminals ts = [nt | NonTerminal nt <- ts]
parseToken :: Text -> Token
parseToken "" = Terminal Start
parseToken "" = Terminal Stop
parseToken t = NonTerminal t
toToken :: [Text] -> [Token]
toToken xs = Terminal Start : (NonTerminal <$> xs) <> [Terminal Stop]
printToken :: Token -> Text
printToken = f
where
f (NonTerminal x) = x
f (Terminal Start) = ""
f (Terminal Stop) = ""
------------------------------------------------------------------------
data Trie k e
= Node { _node_count :: Int
, _node_entropy :: e
, _node_children :: Map k (Trie k e)
}
| Leaf { _node_count :: Int }
deriving (Show)
makeLenses ''Trie
insertTrie :: Ord k => [k] -> Trie k () -> Trie k ()
insertTrie [] n = n { _node_count = _node_count n +1}
insertTrie (x:xs) (Leaf c) = mkTrie (c+1) $ Map.singleton x $ insertTrie xs emptyTrie
insertTrie (x:xs) (Node c _e children) = mkTrie (c+1) $ Map.alter f x children
where
f = Just . insertTrie xs . fromMaybe emptyTrie
-- emptyTrie :: (Ord k, Monoid e) => Trie k e
-- emptyTrie = Node 0 mempty mempty
emptyTrie :: Trie k e
emptyTrie = Leaf 0
mkTrie :: Monoid e => Int -> Map k (Trie k e) -> Trie k e
mkTrie c children
| Map.null children = Leaf c
| otherwise = Node c mempty children
-----------------------------
-- | Trie to Tree since Tree as nice print function
toTree :: k -> Trie k e -> Tree (k,Int,Maybe e)
toTree k (Leaf c) = Tree.Node (k, c, Nothing) []
toTree k (Node c e cs) = Tree.Node (k, c, Just e) (map (uncurry toTree) $ Map.toList cs)
------------------------------------------------------------------------
------------------------------------------------------------------------
normalizeLevel :: Entropy e => e -> e -> e -> e
normalizeLevel m v e = (e - m) / v
{- Unused
nodeChildren :: Trie k e -> Map k (Trie k e)
nodeChildren (Node _ _ cs) = cs
nodeChildren (Leaf _) = Map.empty
-}
chunkAlongEleve :: Int -> [a] -> [[a]]
chunkAlongEleve n xs = L.take n <$> L.tails xs
data Direction = Backward | Forward
buildTrie :: Direction -> Int -> [[Token]] -> Trie Token ()
buildTrie d n sentences
= L.foldr insertTrie emptyTrie
. L.concat
$ ( filter (/= [Terminal (term d)])
. chunkAlongEleve (n + 1)
. order d
)
<$> sentences
where
order Forward = identity
order Backward = reverse
term Forward = Stop
term Backward = Start
class IsTrie trie where
entropyTrie :: Entropy e => (k -> Bool) -> trie k () -> trie k e
nodeEntropy :: Entropy e => Getting e i e -> trie k i -> e
nodeChild :: Ord k => k -> trie k e -> trie k e
findTrie :: Ord k => [k] -> trie k e -> trie k e
printTrie :: (Show i, Entropy e) => Getting e i e -> trie Token i -> IO ()
evTrie :: Entropy e => Getting e i e -> Setter i o e e -> trie k i -> trie k o
normalizeEntropy :: Entropy e
=> Getting e i e -> ModEntropy i o e
-> trie k i -> trie k o
instance IsTrie Trie where
entropyTrie _ (Leaf c) = Leaf c
entropyTrie pred (Node c () children) = Node c e (map (entropyTrie pred) children)
where
children' = Map.toList children
sum_count = sum $ _node_count . snd <$> children'
e | sum_count == 0 = nan
| otherwise = sum $ f <$> children'
f (k, child) = if pred k then chc * P.logBase 2 (fromIntegral c)
else - chc * P.logBase 2 chc
where
chc = fromIntegral (_node_count child) / fromIntegral c
nodeEntropy inE (Node _ e _) = e ^. inE
nodeEntropy _ (Leaf _) = nan
nodeChild k (Node _ _ cs) = fromMaybe emptyTrie (Map.lookup k cs)
nodeChild _ (Leaf _) = emptyTrie
findTrie ks t = L.foldl (flip nodeChild) t ks
printTrie inE t = do
P.putStrLn . Tree.drawTree
. fmap show
$ toTree (NonTerminal "") t
P.putStrLn " Levels:"
forM_ (normalizationLevels inE t) $ \level ->
P.putStrLn $ " " <> show level
evTrie inE setEV = go nan
where
go _ (Leaf c) = Leaf c
go e0 (Node c i children) = Node c (i & setEV .~ ev e0 e1) $ go e1 <$> children
where e1 = i ^. inE
ev 0 0 = nan
ev i0 i1 = i1 - i0
normalizeEntropy inE modE t = go (modE identity) (normalizationLevels inE t) t
where
go _ _ (Leaf c) = Leaf c
go _ [] _ = panic "normalizeEntropy' empty levels"
go f ((m, v, _) : ess) (Node c i children)
= Node c (f i) $ go (modE $ normalizeLevel m v) ess <$> children
------------------------------------------------------------------------
levels :: Trie k e -> [[Trie k e]]
levels = L.takeWhile (not . L.null) . L.iterate (L.concatMap subForest) . pure
where
subForest :: Trie k e -> [Trie k e]
subForest (Leaf _) = []
subForest (Node _ _ children) = Map.elems children
entropyLevels :: Entropy e => Getting e i e -> Trie k i -> [[e]]
entropyLevels inE = fmap (noNaNs . map (nodeEntropy inE)) . L.tail . levels
normalizationLevels :: Entropy e => Getting e i e -> Trie k i -> [(e, e, Int)]
normalizationLevels inE = fmap f . entropyLevels inE
where
f es = (mean es, deviation es, length es)
------------------------------------------------------------------------
data Tries k e = Tries
{ _fwd :: Trie k e
, _bwd :: Trie k e
}
makeLenses ''Tries
buildTries :: Int -> [[Token]] -> Tries Token ()
buildTries n sentences = Tries
{ _fwd = buildTrie Forward n sentences
, _bwd = buildTrie Backward n sentences
}
instance IsTrie Tries where
nodeEntropy inE (Tries f b) = mean [nodeEntropy inE f, nodeEntropy inE b]
findTrie ks (Tries f b) = Tries (findTrie ks f) (findTrie (reverse ks) b)
nodeChild = onTries . nodeChild
entropyTrie = onTries . entropyTrie
evTrie inE setEV = onTries $ evTrie inE setEV
normalizeEntropy inE = onTries . normalizeEntropy inE
printTrie inE (Tries f b) = do
P.putStrLn "Forward:"
printTrie inE f
P.putStrLn ""
P.putStrLn "Backward:"
printTrie inE b
onTries :: (Trie k i -> Trie k o) -> Tries k i -> Tries k o
onTries h (Tries f b) = Tries (h f) (h b)
------------------------------------------------------------------------
mayCons :: [a] -> [[a]] -> [[a]]
mayCons [] xss = xss
mayCons xs xss = xs : xss
{-
split :: (IsTrie trie, Entropy e) => Lens' i e -> trie Token i -> [Token] -> [[Token]]
split _ _ [] = []
split inE t (Terminal Start:xs) = split inE t xs
split inE t (x0:xs0) = go [x0] xs0
where
go pref [] = [pref]
go pref (Terminal Stop:_) = [pref]
go _ (Terminal Start:_) = panic "split impossible"
go pref (x:xs) =
-- trace (show (if acc then "ACC" else "CUT", (prefx, epxt), if acc then ">" else "<=", ((pref, ept), "+", ([x], ext)))) $
if acc
then go prefx xs
else mayCons pref $ go [x] xs
where
prefx = pref <> [x]
pt = findTrie pref t
pxt = findTrie prefx t
xt = findTrie [x] t
ept = ne pt
-- ^ entropy of the current prefix
ext = ne xt
-- ^ entropy of [x]
epxt = ne pxt
-- ^ entropy of the current prefix plus x
acc = P.isNaN ept || P.isNaN ext || not (P.isNaN epxt) -- && (epxt > mean [ept, ext])
-- aut(["in","this","paper"]) > aut(["in","this"]) + aut(["paper"])
ne = nodeEntropy inE
-}
split :: Entropy e => Int -> Lens' i e -> Tries Token i -> [Token] -> [[Text]]
split _ _ _ [] = []
split _ _ _ [t] = pure <$> nonTerminals [t]
split n inE t ts = nonTerminals pref `mayCons` split n inE t (drop (length pref) ts)
where
pref = maximumWith (\ks -> nodeEntropy inE $ findTrie ks t)
(L.tail . L.inits . take n $ ts)
{-
split :: Entropy e => Lens' i e -> Tries Token i -> [Token] -> [[Token]]
split inE t0 ts =
maximumWith (sum . map $ nodeAutonomy inE t0) (all the splits of ts)
-}
------------------------------------------------------------------------
mainEleve :: Int -> [[Text]] -> [[[Text]]]
mainEleve n x = mainEleve' n x x
mainEleve' :: Int -> [[Text]] -> [[Text]] -> [[[Text]]]
mainEleve' n x y = mainEleveWith x' n y
where
x' = buildTries n (fmap toToken x)
-- (fmap toToken i) is computed twice, since mainEleveWith is computing it too
-- | This function should take the longest possible chain of:
-- mainEleve'' n x y = maxChainSizeOf [ mainEleve' n x y
-- , mainEleve' n x x
-- , mainEleve' n y y
-- ]
mainEleve'' :: Int -> [[Text]] -> [[Text]] -> [[[Text]]]
mainEleve'' = undefined
mainEleveWith :: Tries Token () -> Int -> [[Text]] -> [[[Text]]]
mainEleveWith m n i = fmap (split n info_autonomy t) (fmap toToken i)
where
t :: Tries Token (I Double)
t = normalizeEntropy info_entropy_var set_autonomy
$ evTrie identity set_entropy_var
$ entropyTrie isTerminal m
------------------------------------------------------------------------
type Checks e = [(Text, Int, e, e, e, e, e, e, e, e, e)]
testEleve :: e ~ Double => Bool -> Int -> [Text] -> Checks e -> IO Bool
testEleve debug n output checks = do
let
res = split (1 + n) info_autonomy nt <$> input
when debug $ do
P.putStrLn . show $ (printToken <$>) <$> input
P.putStrLn ""
printTrie info_entropy nt
P.putStrLn ""
P.putStrLn "Splitting:"
P.putStrLn $ show res
forM_ checks checker
pure $ expected == res
where
out = T.words <$> output
expected = fmap (T.splitOn "-") <$> out
input = toToken . (T.splitOn "-" =<<) <$> out
nt :: Tries Token (I Double)
nt = normalizeEntropy info_entropy_var set_autonomy
. evTrie identity set_entropy_var
. entropyTrie isTerminal
$ buildTries n input
check f msg ref my =
if f ref my
then P.putStrLn $ " \ESC[32mPASS\ESC[m " <> msg <> " " <> show ref
else P.putStrLn $ " \ESC[31mFAIL\ESC[m " <> msg <> " ref=" <> show ref <> " my=" <> show my
checker (ngram, count, entropy, ev, autonomy, fwd_entropy, fwd_ev, fwd_autonomy, bwd_entropy, bwd_ev, bwd_autonomy) = do
let ns = parseToken <$> T.words ngram
nt' = findTrie ns nt
P.putStrLn $ " " <> T.unpack ngram <> ":"
check (==) "count" count (_node_count (_fwd nt'))
check sim "entropy" entropy (nodeEntropy info_entropy nt' )
check sim "ev" ev (nodeEntropy info_entropy_var nt' )
check sim "autonomy" autonomy (nodeEntropy info_autonomy nt' )
check sim "fwd_entropy" fwd_entropy (nodeEntropy info_entropy (_fwd nt'))
check sim "fwd_ev" fwd_ev (nodeEntropy info_entropy_var (_fwd nt'))
check sim "fwd_autonomy" fwd_autonomy (nodeEntropy info_autonomy (_fwd nt'))
check sim "bwd_entropy" bwd_entropy (nodeEntropy info_entropy (_bwd nt'))
check sim "bwd_ev" bwd_ev (nodeEntropy info_entropy_var (_bwd nt'))
check sim "bwd_autonomy" bwd_autonomy (nodeEntropy info_autonomy (_bwd nt'))
-- | TODO real data is a list of tokenized sentences
example0, example1, example2, example3, example4, example5, example6, example7, example8, example9 :: [Text]
example0 = ["New-York is New-York and New-York"]
example1 = ["to-be or not to-be"]
example2 = ["to-be-or not to-be-or NOT to-be and"]
example3 = example0 <> example0
-- > TEST: Should not have York New in the trie
example4 = ["a-b-c-d e a-b-c-d f"]
example5 = ["a-b-c-d-e f a-b-c-d-e g a-b-c-d-e"]
example6 = ["le-petit chat"
,"le-petit chien"
,"le-petit rat"
,"le gros rat"
]
example7 = ["a-b d", "a-c e", "a-c", "a-b", "a-b", "a-c", "a-c", "a-b"]
-- example8 = ["z f", "z", "z", "z"] <> example7
example8 = ["z", "z", "z", "z"] <> example7 <> example7 <> example7
example9 = (T.replace "z" "a") <$> example8
--example8 = ["a-b d", "a-c e", "a f", "a-c g", "a-b h", "a i", "a j", "a-b k", "a-c l", "a-c m", "a n", "a-b o"]
checks0, checks2, checks7, checks8, checks9 :: Checks Double
checks0 =
-- [(token, count, entropy, ev, autonomy, fwd_entropy, fwd_ev, fwd_autonomy, bwd_entropy, bwd_ev, bwd_autonomy)]
[ ("", 1, nan, nan, nan, 0.0, -2.113283334294875, -0.5000000000000002, nan, nan, nan)
, ("New", 3, 0.792481250360578, -1.3208020839342969, 0.7499999999999999, 0.0, -2.113283334294875, -0.5000000000000002, 1.584962500721156, -0.5283208335737188, 2.0)
, ("York", 3, 0.792481250360578, -1.3208020839342969, 0.7499999999999999, 1.584962500721156, -0.5283208335737188, 2.0, 0.0, -2.113283334294875, -0.5000000000000002)
, ("is", 1, 0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002)
, ("and", 1, 0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002)
, ("", 0, nan, nan, nan, nan, nan, nan, 0.0, -2.113283334294875, -0.5000000000000002)
, (" New", 1, nan, nan, nan, 0.0, nan, nan, nan, nan, nan)
, ("New York", 3, 1.584962500721156, 1.584962500721156, 1.414213562373095, 1.584962500721156, 1.584962500721156, 1.4142135623730947, 1.584962500721156, 1.584962500721156, 1.4142135623730951)
, ("York is", 1, 0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865476, 0.0, nan, nan)
, ("is New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865474)
, ("York and", 1, 0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865476, 0.0, nan, nan)
, ("and New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865474)
, ("York ", 1, nan, nan, nan, nan, nan, nan, 0.0, nan, nan)
, (" New York", 1, nan, nan, nan, 0.0, nan, nan, nan, nan, nan)
, ("New York is", 1, 0, nan, nan, 0.0, -1.584962500721156, nan, 0.0, nan, nan)
, ("York is New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, nan, nan)
, ("is New York", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, nan)
, ("New York and", 1, 0, nan, nan, 0.0, -1.584962500721156, nan, 0.0, nan, nan)
, ("York and New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, nan, nan)
, ("and New York", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, nan)
, ("New York ", 1, nan, nan, nan, nan, nan, nan, 0.0, nan, nan)
]
checks2 = []
{-
[("to be", 3, 1.2516291673878228, 1.2516291673878228, 1.5535694744293167, nan, 0.9182958340544896)
,("be or", 2, 0.5, nan, nan, nan, 1.0)
,("or not", 1, 0.0, nan, nan, nan, 0.0)
,("not to", 1, 0.0, nan, nan, nan, 0.0)
,("or NOT", 1, 0.0, nan, nan, nan, 0.0)
,("NOT to", 1, 0.0, nan, nan, nan, 0.0)
,("be and", 1, 0.0, nan, nan, nan, 0.0)
]
-}
checks7 =
[ ("a b", 4, 2, 1.5, 1.0106455960380136, 2, 1, 0.7302967433402215, 2, 2, 1.2909944487358056)
, ("a c", 4, 2, 1.5, 1.0106455960380136, 2, 1, 0.7302967433402215, 2, 2, 1.2909944487358056)
, ("a", 8, 2, -0.7139421727208477, 0.9315597394596105, 1, -1.7139421727208477, 0.1695158759052029, 3, 0.2860578272791523, 1.693603603014018)
]
checks8 =
[ ("a b", 4, 2, 1.5, 1.2384061243840367, 2, 1, 0.9190418024406298, 2, 2, 1.5577704463274435)
, ("a c", 4, 2, 1.5, 1.2384061243840367, 2, 1, 0.9190418024406298, 2, 2, 1.5577704463274435)
, ("a", 8, 2, -1.1151193576322829, 0.8012882295122719, 1, -2.115119357632283, 1.1025957503820932e-2, 3, -0.11511935763228287, 1.5915505015207227)
, ("z", 4, 2, -1.1151193576322829, 0.9576679529201777, 2, -1.1151193576322829, 1.0906240295212841, 2, -1.1151193576322829, 0.8247118763190712)
]
checks9 =
[ ("a b", 4, 2, 0.8741854163060885, 0.9234576822288185, 2, -0.25162916738782304, 0.2891449181301934, 2, 2, 1.5577704463274435)
, ("a c", 4, 2, 0.8741854163060885, 0.9234576822288185, 2, -0.25162916738782304, 0.2891449181301934, 2, 2, 1.5577704463274435)
, ("a", 12, 2.91829583405449, 3.763498724462999e-2, 1.518835832034022, 2.251629167387823, -0.6290316794220367, 1.2162041043595873, 3.5849625007211565, 0.7043016539112967, 1.8214675597084569)
]
runTestsEleve :: Bool -> IO ()
runTestsEleve doChecks =
forM_
[("example0", 3, example0, checks0)
,("example0", 2, example0, [])
,("example1", 2, example1, [])
,("example2", 3, example2, checks2)
,("example3", 2, example3, [])
,("example4", 4, example4, [])
,("example5", 5, example5, [])
,("example6", 2, example6, [])
,("example7", 2, example7, checks7)
,("example8", 2, example8, checks8)
,("example9", 2, example9, checks9)
]
(\(name, n, ex, checks) -> do
P.putStrLn $ name <> " " <> show n
b <- testEleve False n ex (if doChecks then checks else [])
P.putStrLn $ " splitting: " <> if b then "PASS" else "FAIL"
)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Mono.hs 0000664 0000000 0000000 00000002466 14124644201 0031426 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Terms.Mono
Description : Mono Terms module
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Mono-terms are Nterms where n == 1.
-}
module Gargantext.Core.Text.Terms.Mono (monoTerms, monoTexts, monoTextsBySentence, words)
where
import Prelude (String)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.List as L
import qualified Data.Set as S
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
--import Data.Char (isAlphaNum, isSpace)
-- | TODO remove Num ?
--isGram c = isAlphaNum c
words :: Text -> [Text]
words = monoTexts
-- | Sentence split separators
isSep :: Char -> Bool
isSep = (`elem` (",.:;?!(){}[]\"\'" :: String))
monoTerms :: Lang -> Text -> [Terms]
monoTerms l txt = map (monoText2term l) $ monoTexts txt
monoTexts :: Text -> [Text]
monoTexts = L.concat . monoTextsBySentence
-- | TODO use text2term only
monoText2term :: Lang -> Text -> Terms
monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt)
monoTextsBySentence :: Text -> [[Text]]
monoTextsBySentence = map T.words
. T.split isSep
. T.toLower
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Mono/ 0000775 0000000 0000000 00000000000 14124644201 0031062 5 ustar 00root root 0000000 0000000 Stem.hs 0000664 0000000 0000000 00000003456 14124644201 0032257 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Mono {-|
Module : Gargantext.Core.Text.Ngrams.Stem
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
In linguistic morphology and information retrieval, stemming is the
process of reducing inflected (or sometimes derived) words to their word
stem, base or root form—generally a written word form. The @stem@ needs
not be identical to the morphological root of the word; it is usually
sufficient that related words map to the same stem, even if this stem is
not in itself a valid root.
Source : https://en.wikipedia.org/wiki/Stemming
-}
module Gargantext.Core.Text.Terms.Mono.Stem (stem, Lang(..))
where
import Data.Text (Text)
import qualified Data.Text as DT
import qualified NLP.Stemmer as N
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
-- (stem, Stemmer(..))
--import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
--import Language.Aspell.Options (ACOption(..))
-- | Stemmer
-- A stemmer for English, for example, should identify the string "cats"
-- (and possibly "catlike", "catty" etc.) as based on the root "cat".
-- and
-- "stems", "stemmer", "stemming", "stemmed" as based on "stem". A stemming
-- algorithm reduces the words "fishing", "fished", and "fisher" to the
-- root word, "fish". On the other hand, "argue", "argued", "argues",
-- "arguing", and "argus" reduce to the stem "argu" (illustrating the
-- case where the stem is not itself a word or root) but "argument" and
-- "arguments" reduce to the stem "argument".
stem :: Lang -> Text -> Text
stem lang = DT.pack . N.stem lang' . DT.unpack
where
lang' = case lang of
EN -> N.English
FR -> N.French
_ -> panic $ DT.pack "not implemented yet"
Stem/ 0000775 0000000 0000000 00000000000 14124644201 0031713 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Mono En.hs 0000664 0000000 0000000 00000016022 14124644201 0032612 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Mono/Stem {-|
Module : Gargantext.Core.Text.Ngrams.Stem.En
Description : Porter Algorithm Implementation purely Haskell
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Adapted from:
- source: https://hackage.haskell.org/package/porter
- [Char] -> [Text]
- adding Types signatures
- fixes unseen cases
-}
module Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
where
import Control.Monad
import Data.Either
import Data.Maybe
import Data.Text (Text(), pack, unpack)
import Data.List hiding (map, head)
import Gargantext.Prelude
vowels :: [Char]
vowels = ['a','e','i','o','u']
isConsonant :: [Char] -> Int -> Bool
isConsonant str i
| c `elem` vowels = False
| c == 'y' = i == 0 || isVowel str (i - 1)
| otherwise = True
where
c = str !! i
isVowel :: [Char] -> Int -> Bool
isVowel = (not .) . isConsonant
byIndex :: Foldable t1 => (t1 a -> [Int] -> t2) -> t1 a -> t2
byIndex fun str = fun str [0..length str - 1]
containsVowel :: [Char] -> Bool
containsVowel = byIndex (any . isVowel)
-- | /!\ unsafe fromJust
measure :: [Char] -> Int
measure = length . filter not . init . (True:)
. map fromJust . map head
. group . byIndex (map . isConsonant)
endsWithDouble :: [Char] -> Bool
endsWithDouble = startsWithDouble . reverse
where
startsWithDouble l = case l of
(x:y:_) -> x == y && x `notElem` vowels
_ -> False
cvc :: [Char] -> Bool
cvc word | length word < 3 = False
| otherwise = isConsonant word lastIndex &&
isVowel word (lastIndex - 1) &&
isConsonant word (lastIndex - 2) &&
last word `notElem` ['w','x','y']
where lastIndex = length word - 1
statefulReplace :: Eq a => ([a] -> Bool)
-> [a] -> [a] -> [a]
-> Maybe (Data.Either.Either [a] [a])
statefulReplace predicate str end replacement
| end `isSuffixOf` str = Just replaced
| otherwise = Nothing
where
part = take (length str - length end) str
replaced | predicate part = Right (part ++ replacement)
| otherwise = Left str
replaceEnd :: Eq a => ([a] -> Bool) -> [a] -> [a] -> [a] -> Maybe [a]
replaceEnd predicate str end replacement = do
result <- statefulReplace predicate str end replacement
return (either identity identity result)
findStem
:: (Foldable t, Functor t, Eq a) =>
([a] -> Bool) -> [a] -> t ([a], [a]) -> Maybe [a]
findStem f word pairs = msum $ map (uncurry (replaceEnd f word)) pairs
measureGT :: Int -> [Char] -> Bool
measureGT = flip ((>) . measure)
step1a :: [Char] -> [Char]
step1a word = fromMaybe word result
where
result = findStem (const True) word suffixes
suffixes = [("sses", "ss"), ("ies", "i"), ("ss", "ss"), ("s", "")]
beforeStep1b :: [Char] -> Either [Char] [Char]
beforeStep1b word = fromMaybe (Left word) result
where
cond23 x = do { v <- x; either (const Nothing) (return . Right) v }
cond1 x = do { v <- x; return (Left v) }
result =
cond1 (replaceEnd (measureGT 0) word "eed" "ee") `mplus`
cond23 (statefulReplace containsVowel word "ed" "" ) `mplus`
cond23 (statefulReplace containsVowel word "ing" "" )
afterStep1b :: [Char] -> [Char]
afterStep1b word = fromMaybe word result
where
double = endsWithDouble word && not (any ((`isSuffixOf` word) . return) ['l','s','z'])
mEq1AndCvc = measure word == 1 && cvc word
iif cond val = if cond then Just val else Nothing
result = findStem (const True) word [("at", "ate"), ("bl", "ble"), ("iz", "ize")]
`mplus` iif double (init word)
`mplus` iif mEq1AndCvc (word ++ "e")
step1b :: [Char] -> [Char]
step1b = either identity afterStep1b . beforeStep1b
step1c :: [Char] -> [Char]
step1c word = fromMaybe word result
where result = replaceEnd containsVowel word "y" "i"
step1 :: [Char] -> [Char]
step1 = step1c . step1b . step1a
step2 :: [Char] -> [Char]
step2 word = fromMaybe word result
where
result = findStem (measureGT 0) word
[ ("ational", "ate" )
, ("tional", "tion")
, ("enci", "ence")
, ("anci", "ance")
, ("izer", "ize" )
, ("bli", "ble" )
, ("alli", "al" )
, ("entli", "ent" )
, ("eli", "e" )
, ("ousli", "ous" )
, ("ization", "ize" )
, ("ation", "ate" )
, ("ator", "ate" )
, ("alism", "al" )
, ("iveness", "ive" )
, ("fulness", "ful" )
, ("ousness", "ous" )
, ("aliti", "al" )
, ("iviti", "ive" )
, ("biliti", "ble" )
, ("logi", "log" ) ]
step3 :: [Char] -> [Char]
step3 word = fromMaybe word result
where
result = findStem (measureGT 0) word
[ ("icate", "ic")
, ("ative", "" )
, ("alize", "al")
, ("iciti", "ic")
, ("ical" , "ic")
, ("ful" , "" )
, ("ness" , "" ) ]
step4 :: [Char] -> [Char]
step4 word = fromMaybe word result
where
gt1andST str = (measureGT 1) str && any ((`isSuffixOf` str) . return) ['s','t']
findGT1 = findStem (measureGT 1) word . map (flip (,) "")
result = (findGT1 ["al", "ance", "ence", "er", "ic", "able", "ible", "ant", "ement", "ment", "ent"]) `mplus`
(findStem gt1andST word [("ion","")]) `mplus`
(findGT1 ["ou", "ism", "ate", "iti", "ous", "ive", "ize"])
step5a :: [Char] -> [Char]
step5a word = fromMaybe word result
where
test str = (measureGT 1 str) || ((measure str == 1) && (not $ cvc str))
result = replaceEnd test word "e" ""
step5b :: [Char] -> [Char]
step5b word = fromMaybe word result
where
cond s = last s == 'l' && measureGT 1 s
result = replaceEnd cond word "l" ""
step5 :: [Char] -> [Char]
step5 = step5b . step5a
allSteps :: [Char] -> [Char]
allSteps = step5 . step4 . step3 . step2 . step1
stemIt :: Text -> Text
stemIt s = pack (stem' $ unpack s)
stem' :: [Char] -> [Char]
stem' s | length s < 3 = s
| otherwise = allSteps s
--fixpoint :: Eq t => (t -> t) -> t -> t
--fixpoint f x = let fx = f x in
-- if fx == x
-- then x
-- else fixpoint f fx
--
--fixstem :: [Char] -> [Char]
--fixstem = fixpoint stem'
{-
main :: IO ()
main = do
content <- readFile "input.txt"
writeFile "output.txt" $ unlines $ map stem $ lines content
-}
Token.hs 0000664 0000000 0000000 00000002055 14124644201 0032421 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Mono {-|
Module : Gargantext.Core.Text.Ngrams.Token
Description : Tokens and tokenizing a text
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
In computer science, lexical analysis, lexing or tokenization is the
process of converting a sequence of characters (such as in a computer
program or web page) into a sequence of tokens (strings with an assigned
and thus identified meaning).
Source: https://en.wikipedia.org/wiki/Tokenize
-}
module Gargantext.Core.Text.Terms.Mono.Token (tokenize)
where
import Data.Text (Text)
import qualified Gargantext.Core.Text.Terms.Mono.Token.En as En
-- | Contexts depend on the lang
--import Gargantext.Core (Lang(..))
type Token = Text
-- >>> tokenize "A rose is a rose is a rose."
-- ["A","rose","is","a","rose","is","a","rose", "."]
tokenize :: Text -> [Token]
tokenize = En.tokenize
--data Context = Letter | Word | Sentence | Line | Paragraph
--
--tokenize' :: Lang -> Context -> [Token]
--tokenize' = undefined
--
Token/ 0000775 0000000 0000000 00000000000 14124644201 0032063 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Mono En.hs 0000664 0000000 0000000 00000012246 14124644201 0032766 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Mono/Token {-|
Module : Gargantext.Core.Text.Ngrams.Token.Text
Description : Tokenizer main functions
Copyright : (c) Grzegorz Chrupała first, after: CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
First inspired from https://bitbucket.org/gchrupala/lingo/overview
-}
module Gargantext.Core.Text.Terms.Mono.Token.En
( EitherList(..)
, Tokenizer
, tokenize
, defaultTokenizer
, whitespace
, uris
, punctuation
, finalPunctuation
, initialPunctuation
, allPunctuation
, contractions
, negatives
)
where
import qualified Data.Char as Char
import Data.Maybe
import Control.Monad
import Control.Applicative (Applicative)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Either
import Gargantext.Prelude
-- | A Tokenizer is function which takes a list and returns a list of Eithers
-- (wrapped in a newtype). Right Texts will be passed on for processing
-- to tokenizers down
-- the pipeline. Left Texts will be passed through the pipeline unchanged.
-- Use a Left Texts in a tokenizer to protect certain tokens from further
-- processing (e.g. see the 'uris' tokenizer).
-- You can define your own custom tokenizer pipelines by chaining tokenizers together:
---
-- > myTokenizer :: Tokenizer
-- > myTokenizer = whitespace >=> allPunctuation
-- examples :: [Text]
-- examples =
-- ["This shouldn't happen."
-- ,"Some 'quoted' stuff"
-- ,"This is a URL: http://example.org."
-- ,"How about an email@example.com"
-- ,"ReferenceError #1065 broke my debugger!"
-- ,"I would've gone."
-- ,"They've been there."
-- ,"Hyphen-words"
-- ,"Yes/No questions"
-- ]
type Tokenizer = Text -> EitherList Text Text
-- | The EitherList is a newtype-wrapped list of Eithers.
newtype EitherList a b = E { unE :: [Either a b] }
-- | Split string into words using the default tokenizer pipeline
tokenize :: Text -> [Text]
tokenize = run defaultTokenizer
-- | Run a tokenizer
run :: Tokenizer -> (Text -> [Text])
run f = \txt -> map T.copy $ (map unwrap . unE . f) txt
defaultTokenizer :: Tokenizer
defaultTokenizer = whitespace
>=> uris
>=> punctuation
>=> contractions
>=> negatives
-- | Detect common uris and freeze them
uris :: Tokenizer
uris x | isUri x = E [Left x]
| True = E [Right x]
where isUri u = any (`T.isPrefixOf` u) ["http://","ftp://","mailto:","https://"]
-- | Split off initial and final punctuation
punctuation :: Tokenizer
punctuation = finalPunctuation >=> initialPunctuation
--hyphens :: Tokenizer
--hyphens xs = E [Right w | w <- T.split (=='-') xs ]
-- | Split off word-final punctuation
finalPunctuation :: Tokenizer
finalPunctuation x = E $ filter (not . T.null . unwrap) res
where
res :: [Either Text Text]
res = case T.span Char.isPunctuation (T.reverse x) of
(ps, w) | T.null ps -> [ Right $ T.reverse w ]
| otherwise -> [ Right $ T.reverse w
, Right $ T.reverse ps]
-- ([],w) -> [Right . T.reverse $ w]
-- (ps,w) -> [Right . T.reverse $ w, Right . T.reverse $ ps]
-- | Split off word-initial punctuation
initialPunctuation :: Tokenizer
initialPunctuation x = E $ filter (not . T.null . unwrap) $
case T.span Char.isPunctuation x of
(ps,w) | T.null ps -> [ Right w ]
| otherwise -> [ Right ps
, Right w ]
-- | Split tokens on transitions between punctuation and
-- non-punctuation characters. This tokenizer is not included in
-- defaultTokenizer pipeline because dealing with word-internal
-- punctuation is quite application specific.
allPunctuation :: Tokenizer
allPunctuation = E . map Right
. T.groupBy (\a b -> Char.isPunctuation a == Char.isPunctuation b)
-- | Split words ending in n't, and freeze n't
negatives :: Tokenizer
negatives x | "n't" `T.isSuffixOf` x = E [ Right . T.reverse . T.drop 3 . T.reverse $ x
, Left "n't" ]
| True = E [ Right x ]
-- | Split common contractions off and freeze them.
-- | Currently deals with: 'm, 's, 'd, 've, 'll
contractions :: Tokenizer
contractions x = case catMaybes . map (splitSuffix x) $ cts of
[] -> return x
((w,s):_) -> E [ Right w,Left s]
where cts = ["'m","'s","'d","'ve","'ll"]
splitSuffix w sfx =
let w' = T.reverse w
len = T.length sfx
in if sfx `T.isSuffixOf` w
then Just (T.take (T.length w - len) w, T.reverse . T.take len $ w')
else Nothing
-- | Split string on whitespace. This is just a wrapper for Data.List.words
whitespace :: Tokenizer
whitespace xs = E [Right w | w <- T.words xs ]
instance Monad (EitherList a) where
return x = E [Right x]
E xs >>= f = E $ concatMap (either (return . Left) (unE . f)) xs
instance Applicative (EitherList a) where
pure x = return x
f <*> x = f `ap` x
instance Functor (EitherList a) where
fmap f (E xs) = E $ (fmap . fmap) f xs
unwrap :: Either a a -> a
unwrap (Left x) = x
unwrap (Right x) = x
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Multi.hs 0000664 0000000 0000000 00000003635 14124644201 0031607 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Text.Terms.Multi
Description : Multi Terms module
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Multi-terms are ngrams where n > 1.
-}
module Gargantext.Core.Text.Terms.Multi (multiterms, multiterms_rake)
where
import Data.Text hiding (map, group, filter, concat)
import Data.List (concat)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Core.Types
import Gargantext.Core.Text.Terms.Multi.PosTagging
import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En
import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr
import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake)
-------------------------------------------------------------------
-- To be removed
multiterms :: Lang -> Text -> IO [Terms]
multiterms = multiterms' tokenTag2terms
multiterms' :: (TokenTag -> a) -> Lang -> Text -> IO [a]
multiterms' f lang txt = concat
<$> map (map f)
<$> map (filter (\t -> _my_token_pos t == Just NP))
<$> tokenTags lang txt
-------------------------------------------------------------------
tokenTag2terms :: TokenTag -> Terms
tokenTag2terms (TokenTag ws t _ _) = Terms ws t
tokenTags :: Lang -> Text -> IO [[TokenTag]]
tokenTags lang s = map (groupTokens lang) <$> tokenTags' lang s
tokenTags' :: Lang -> Text -> IO [[TokenTag]]
tokenTags' lang t = map tokens2tokensTags
<$> map _sentenceTokens
<$> _sentences
<$> corenlp lang t
---- | This function analyses and groups (or not) ngrams according to
---- specific grammars of each language.
groupTokens :: Lang -> [TokenTag] -> [TokenTag]
groupTokens EN = En.groupTokens
groupTokens FR = Fr.groupTokens
groupTokens _ = panic $ pack "groupTokens :: Lang not implemeted yet"
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Multi/ 0000775 0000000 0000000 00000000000 14124644201 0031244 5 ustar 00root root 0000000 0000000 Group.hs 0000664 0000000 0000000 00000001767 14124644201 0032630 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Multi {-|
Module : Gargantext.Core.Text.Terms.Multi.Group
Description : English Grammar rules to group postag tokens.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Rule-based grammars are computed in this english module in order to
group the tokens into extracted terms.
-}
module Gargantext.Core.Text.Terms.Multi.Group (group2)
where
import Gargantext.Core.Types
import Gargantext.Prelude
-- | FIXME p1 and p2 not really taken into account
group2 :: POS -> POS -> [TokenTag] -> [TokenTag]
group2 p1 p2 (x@(TokenTag _ _ (Just p1') _):y@(TokenTag _ _ (Just p2') _):z) =
if (p1 == p1') && (p2 == p2')
then group2 p1 p2 (x<>y : z)
else (x : group2 p1 p2 (y:z))
group2 p1 p2 (x@(TokenTag _ _ Nothing _):y) = (x: group2 p1 p2 y)
group2 _ _ [x@(TokenTag _ _ (Just _) _)] = [x]
group2 p1 p2 (x@(TokenTag _ _ (Just _) _):y@(TokenTag _ _ Nothing _):z) = (x:y: group2 p1 p2 (y:z))
group2 _ _ [] = []
Lang/ 0000775 0000000 0000000 00000000000 14124644201 0032046 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Multi En.hs 0000664 0000000 0000000 00000002755 14124644201 0032755 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Multi/Lang {-|
Module : Gargantext.Core.Text.Terms.Multi.Lang.En
Description : English Grammar rules to group postag tokens.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Rule-based grammars are computed in this english module in order to group
the tokens into extracted terms.
-}
module Gargantext.Core.Text.Terms.Multi.Lang.En (groupTokens)
where
import Gargantext.Prelude
import Gargantext.Core.Types
import Gargantext.Core.Text.Terms.Multi.Group
------------------------------------------------------------------------
-- | Rule grammar to group tokens
groupTokens :: [TokenTag] -> [TokenTag]
groupTokens [] = []
groupTokens ntags = group2 NP NP
$ group2 NP VB
-- $ group2 NP IN
$ group2 IN DT
-- $ group2 VB NP
$ group2 JJ NP
$ group2 JJ JJ
$ group2 JJ CC
$ ntags
------------------------------------------------------------------------
--groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
--groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
--groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs) = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
--
--groupNgrams (x:xs) = (x:(groupNgrams xs))
Fr.hs 0000664 0000000 0000000 00000002353 14124644201 0032754 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Multi/Lang {-|
Module : Gargantext.Core.Text.Terms.Multi.Lang.Fr
Description : French Grammar rules to group postag tokens.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This @group@ function groups horizontally ngrams in their context of
sentence according to grammars specific of each language. In english, JJ
is ADJectiv in french.
-}
module Gargantext.Core.Text.Terms.Multi.Lang.Fr (groupTokens)
where
import Gargantext.Prelude
import Gargantext.Core.Types
import Gargantext.Core.Text.Terms.Multi.Group (group2)
groupTokens :: [TokenTag] -> [TokenTag]
groupTokens [] = []
groupTokens ntags = group2 NP NP
$ group2 NP VB
-- group2 NP IN
-- group2 IN DT
$ group2 VB NP
$ group2 JJ NP
$ group2 NP JJ
$ group2 JJ JJ
-- group2 JJ CC
$ ntags
------------------------------------------------------------------------
-- TODO
--groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
--groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
PosTagging.hs 0000664 0000000 0000000 00000013167 14124644201 0033573 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Multi {-|
Module : Gargantext.Core.Text.Terms.Multi.PosTagging
Description : PosTagging module using Stanford java REST API
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
In corpus linguistics, part-of-speech tagging (POS tagging or PoS
tagging or POST), also called grammatical tagging or word-category
disambiguation, is the process of marking up a word in a text (corpus)
as corresponding to a particular part of speech,[1] based on both its
definition and its context—i.e., its relationship with adjacent and
related words in a phrase, sentence, or paragraph. A simplified form of
this is commonly taught to school-age children, in the identification of
words as nouns, verbs, adjectives, adverbs, etc.
Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.Core.Text.Terms.Multi.PosTagging
where
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.ByteString.Lazy.Internal (ByteString)
import Data.Set (fromList)
import Data.Text (Text, splitOn, pack, toLower)
import GHC.Generics
import Gargantext.Core (Lang(..))
import Gargantext.Core.Types
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import Network.HTTP.Simple
------------------------------------------------------------------------
------------------------------------------------------------------------
data Token = Token { _tokenIndex :: Int
, _tokenWord :: Text
, _tokenOriginalText :: Text
, _tokenLemma :: Text
, _tokenCharacterOffsetBegin :: Int
, _tokenCharacterOffsetEnd :: Int
, _tokenPos :: Maybe POS
, _tokenNer :: Maybe NER
, _tokenBefore :: Maybe Text
, _tokenAfter :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_token") ''Token)
------------------------------------------------------------------------
------------------------------------------------------------------------
tokens2tokensTags :: [Token] -> [TokenTag]
tokens2tokensTags ts = filter' $ map tokenTag ts
------------------------------------------------------------------------
tokenTag :: Token -> TokenTag
tokenTag (Token _ _ w l _ _ p n _ _) = TokenTag w' l' p n
where
w' = split w
l' = fromList (split l)
split = splitOn (pack " ") . toLower
filter' :: [TokenTag] -> [TokenTag]
filter' xs = filter isNgrams xs
where
isNgrams (TokenTag _ _ p n) = isJust p || isJust n
------------------------------------------------------------------------
data Sentence = Sentence { _sentenceIndex :: Int
, _sentenceTokens :: [Token]
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_sentence") ''Sentence)
data Properties = Properties { _propertiesAnnotators :: Text
, _propertiesOutputFormat :: Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_properties") ''Properties)
data PosSentences = PosSentences { _sentences :: [Sentence]}
deriving (Show, Generic)
$(deriveJSON (unPrefix "_") ''PosSentences)
-- request =
-- "fr" : {
-- "tokenize.language" : "fr",
-- "pos.model" : "edu/stanford/nlp/models/pos-tagger/french/french.tagger",
-- "parse.model" : "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz",
-- // dependency parser
-- "depparse.model" : "edu/stanford/nlp/models/parser/nndep/UD_French.gz",
-- "depparse.language" : "french",
-- "ner.model": DATA_ROOT+"/eunews.fr.crf.gz",
-- "ssplit.newlineIsSentenceBreak": "always"
-- },
--
corenlp' :: ( FromJSON a
, ConvertibleStrings p ByteString
)
=> Lang -> p -> IO (Response a)
corenlp' lang txt = do
let properties = case lang of
EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR -> "{\"annotators\": \"tokenize,ssplit,pos,lemma,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
_ -> panic $ pack "not implemented yet"
url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
let request = setRequestBodyLBS (cs txt) url
httpJSON request
corenlpRaw :: Lang -> Text -> IO Value
corenlpRaw lang txt = do
response <- corenlp' lang txt
pure (getResponseBody response)
corenlp :: Lang -> Text -> IO PosSentences
corenlp lang txt = do
response <- corenlp' lang txt
pure (getResponseBody response)
-- | parseWith
-- Part Of Speech example
-- parseWith _tokenPos "Hello world."
-- == [[("``","``"),("Hello","UH"),("world","NN"),(".","."),("''","''")]]
-- Named Entity Recognition example
-- parseWith _tokenNer "Hello world of Peter."
-- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]]
tokenWith :: (Token -> t) -> Lang -> Text -> IO [[(Text, t)]]
tokenWith f lang s = map (map (\t -> (_tokenWord t, f t)))
<$> map _sentenceTokens
<$> _sentences
<$> corenlp lang s
RAKE.hs 0000664 0000000 0000000 00000003200 14124644201 0032236 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms/Multi {-|
Module : Gargantext.Core.Text.Terms.Multi.RAKE
Description : Rapid automatic keyword extraction (RAKE)
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Personal notes for the integration of RAKE in Gargantext.
RAKE algorithm is a simple, rapid and effective algorithm to extract
keywords that is very sensitive to the quality of the stop word list.
Indeed, the very first step starts from the stop words list to cut the
text towards keywords extraction. The conTexT is the sentence level to
compute the coccurrences and occurrences which are divided to compute
the metric of one word. Multi-words metrics is equal to the sum of the
metrics of each word.
Finally The metrics highlight longer keywords which highly depends of
quality of the cut which depends on the quality of the stop word list.
As a consequence, to improve the effectiveness of RAKE algorithm, I am
wondering if some bayesian features could be added to increase stop word
list quality in time.
-}
module Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake, select, hardStopList)
where
import Data.Text (Text)
import NLP.RAKE.Text
import Gargantext.Core.Text.Samples.EN (stopList)
import Gargantext.Prelude
select :: Double -> [a] -> [a]
select part ns = take n ns
where
n = round $ part * (fromIntegral $ length ns)
multiterms_rake :: Text -> [WordScore]
multiterms_rake = candidates hardStopList
defaultNosplit
defaultNolist . pSplitter
-- | StopList
hardStopList :: StopwordsMap
hardStopList = mkStopwordsStr stopList
WithList.hs 0000664 0000000 0000000 00000006717 14124644201 0032211 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Text/Terms {-|
Module : Gargantext.Core.Text.Terms.WithList
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# LANGUAGE BangPatterns #-}
module Gargantext.Core.Text.Terms.WithList where
import Data.List (null)
import Data.Ord
import Data.Text (Text, concat, unwords)
import Gargantext.Prelude
import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.Mono (monoTextsBySentence)
import Prelude (error)
import qualified Data.Algorithms.KMP as KMP
import qualified Data.IntMap.Strict as IntMap
import qualified Data.List as List
------------------------------------------------------------------------
data Pattern = Pattern
{ _pat_table :: !(KMP.Table Text)
, _pat_length :: !Int
, _pat_terms :: ![Text]
}
type Patterns = [Pattern]
------------------------------------------------------------------------
replaceTerms :: Patterns -> [Text] -> [[Text]]
replaceTerms pats terms = go 0
where
terms_len = length terms
go ix | ix >= terms_len = []
| otherwise =
case IntMap.lookup ix m of
Nothing -> go (ix + 1)
Just (len, term) ->
term : go (ix + len)
merge (len1, lab1) (len2, lab2) =
if len2 < len1 then (len1, lab1) else (len2, lab2)
m =
IntMap.fromListWith merge
[ (ix, (len, term))
| Pattern pat len term <- pats, ix <- KMP.match pat terms ]
buildPatterns :: TermList -> Patterns
buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
where
buildPattern (label, alts) = map f (label : alts)
where
f alt | "" `elem` alt = error "buildPatterns: ERR1"
| null alt = error "buildPatterns: ERR2"
| otherwise =
Pattern (KMP.build alt) (length alt) label
--(Terms label $ Set.empty) -- TODO check stems
--------------------------------------------------------------------------
-- Utils
type BlockText = Text
type MatchedText = Text
termsInText :: Patterns -> BlockText -> [MatchedText]
termsInText pats txt = List.nub
$ List.concat
$ map (map unwords)
$ extractTermsWithList pats txt
--------------------------------------------------------------------------
extractTermsWithList :: Patterns -> Text -> Corpus [Text]
extractTermsWithList pats = map (replaceTerms pats) . monoTextsBySentence
-- | Extract terms
-- >>> let termList = [(["chat blanc"], [["chat","blanc"]])] :: TermList
-- extractTermsWithList' (buildPatterns termList) "Le chat blanc"["chat blanc"]
-- ["chat blanc"]
extractTermsWithList' :: Patterns -> Text -> [Text]
extractTermsWithList' pats = map (concat . map concat . replaceTerms pats)
. monoTextsBySentence
--------------------------------------------------------------------------
{- | Not used
filterWith :: TermList
-> (a -> Text)
-> [a]
-> [(a, [Text])]
filterWith termList f xs = filterWith' termList f zip xs
filterWith' :: TermList
-> (a -> Text)
-> ([a] -> [[Text]] -> [b])
-> [a]
-> [b]
filterWith' termList f f' xs = f' xs
$ map (extractTermsWithList' pats)
$ map f xs
where
pats = buildPatterns termList
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Types.hs 0000664 0000000 0000000 00000013050 14124644201 0027573 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Types
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
------------------------------------------------------------------------
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, module Gargantext.Database.Admin.Types.Node
, Term, Terms(..)
, TokenTag(..), POS(..), NER(..)
, Label, Stems
, HasInvalidError(..), assertValid
, Name
, TableResult(..), NodeTableResult
, Ordering(..)
, Typed(..), withType , unTyped
, TODO(..)
) where
import Control.Lens (Prism', (#), makeLenses, over)
import Control.Monad.Except (MonadError(throwError))
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Hashable (Hashable)
import Data.Maybe
import Data.Monoid
import Data.Semigroup
import Data.Set (Set, empty)
import Data.Swagger (ToParamSchema)
import Data.Swagger (ToSchema(..))
import Data.Text (Text, unpack)
import Data.Validity
import GHC.Generics
import Gargantext.Core.Types.Main
import Gargantext.Core.Utils.Prefix (unPrefix, wellNamedSchema)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
------------------------------------------------------------------------
data Ordering = Down | Up
deriving (Enum, Show, Eq, Bounded)
------------------------------------------------------------------------
type Name = Text
type Term = Text
type Stems = Set Text
type Label = [Text]
data Terms = Terms { _terms_label :: Label
, _terms_stem :: Stems
} deriving (Ord)
instance Eq Terms where
(==) (Terms _ s1) (Terms _ s2) = s1 == s2
------------------------------------------------------------------------
data Tag = POS | NER
deriving (Show, Eq)
------------------------------------------------------------------------
data POS = NP
| JJ | VB
| CC | IN | DT
| NoPos
deriving (Show, Generic, Eq, Ord)
------------------------------------------------------------------------
instance FromJSON POS where
parseJSON = withText "String" (\x -> pure (pos $ unpack x))
where
pos :: [Char] -> POS
pos "NP" = NP
pos "NN" = NP
pos "NC" = NP
pos "NNS" = NP
pos "NNP" = NP
pos "JJ" = JJ
pos "ADJ" = JJ
pos "VB" = VB
pos "VBN" = VB
pos "VBG" = VB
pos "CC" = CC
pos "IN" = IN
pos "DT" = DT
-- French specific
pos "P" = IN
pos _ = NoPos
instance ToJSON POS
instance Hashable POS
------------------------------------------------------------------------
data NER = PERSON | ORGANIZATION | LOCATION | NoNER
deriving (Show, Generic)
------------------------------------------------------------------------
instance FromJSON NER where
parseJSON = withText "String" (\x -> pure (ner $ unpack x))
where
ner :: [Char] -> NER
ner "PERSON" = PERSON
ner "ORGANIZATION" = ORGANIZATION
ner "LOCATION" = LOCATION
ner _ = NoNER
instance ToJSON NER
data TokenTag = TokenTag { _my_token_word :: [Text]
, _my_token_lemma :: Set Text
, _my_token_pos :: Maybe POS
, _my_token_ner :: Maybe NER
} deriving (Show)
instance Semigroup TokenTag where
(<>) (TokenTag w1 s1 p1 n1) (TokenTag w2 s2 p2 _) = TokenTag (w1 <> w2) (s1 <> s2) p3 n1
where
p3 = case (p1,p2) of
(Just JJ, Just NP) -> Just NP
(Just VB, Just NP) -> Just NP
_ -> p1
instance Monoid TokenTag where
mempty = TokenTag [] empty Nothing Nothing
mconcat = foldl mappend mempty
-- mappend t1 t2 = (<>) t1 t2
class HasInvalidError e where
_InvalidError :: Prism' e Validation
assertValid :: (MonadError e m, HasInvalidError e) => Validation -> m ()
assertValid v = when (not $ validationIsValid v) $ throwError $ _InvalidError # v
-- assertValid :: MonadBase IO m => Validation -> m ()
-- assertValid v = when (not $ validationIsValid v) $ fail $ show v
-- | NodeTableResult (Table computations)
type NodeTableResult a = TableResult (Node a)
data TableResult a = TableResult { tr_count :: Int
, tr_docs :: [a]
} deriving (Generic)
$(deriveJSON (unPrefix "tr_") ''TableResult)
instance (Typeable a, ToSchema a) => ToSchema (TableResult a) where
declareNamedSchema = wellNamedSchema "tr_"
instance Arbitrary a => Arbitrary (TableResult a) where
arbitrary = TableResult <$> arbitrary <*> arbitrary
----------------------------------------------------------------------------
data Typed a b =
Typed { _withType :: a
, _unTyped :: b
}
deriving (Generic, Show, Eq, Ord)
makeLenses ''Typed
instance Functor (Typed a) where
fmap = over unTyped
----------------------------------------------------------------------------
-- TO BE removed
data TODO = TODO
deriving (Generic)
instance ToSchema TODO where
instance ToParamSchema TODO where
----------------------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Types/ 0000775 0000000 0000000 00000000000 14124644201 0027240 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Types/Individu.hs 0000664 0000000 0000000 00000004250 14124644201 0031350 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Types.Individu
Description : Short description
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Individu defintions
-}
module Gargantext.Core.Types.Individu
where
import Data.Aeson
import Control.Monad.IO.Class (MonadIO)
import GHC.Generics (Generic)
import Data.Swagger
import Data.Text (Text, pack, reverse)
import Gargantext.Database.Admin.Types.Node (NodeId, UserId)
import Gargantext.Prelude hiding (reverse)
import qualified Gargantext.Prelude.Crypto.Auth as Auth
-- FIXME UserName used twice
data User = UserDBId UserId | UserName Text | RootId NodeId | UserPublic
deriving (Eq)
type Username = Text
type HashPassword = Auth.PasswordHash Auth.Argon2
newtype GargPassword = GargPassword Text
deriving (Generic)
instance Show GargPassword where
show (GargPassword _) = "*GargPassword*"
instance ToJSON GargPassword
instance FromJSON GargPassword
instance ToSchema GargPassword
type Email = Text
type UsernameMaster = Username
type UsernameSimple = Username
data NewUser a = NewUser { _nu_username :: Username
, _nu_email :: Email
, _nu_password :: a
}
deriving (Show)
arbitraryUsername :: [Username]
arbitraryUsername = {- ["gargantua"] <> -} users
where
users = zipWith (\a b -> a <> (pack . show) b)
(repeat "user") ([1..20]::[Int])
arbitraryPassword :: [GargPassword]
arbitraryPassword = map (\u -> GargPassword (reverse u)) arbitraryUsername
-----------------------------------------------------------
toUserHash :: MonadIO m
=> NewUser GargPassword
-> m (NewUser HashPassword)
toUserHash (NewUser u m (GargPassword p)) = do
h <- Auth.createPasswordHash p
pure $ NewUser u m h
-- TODO remove
arbitraryUsersHash :: MonadIO m
=> m [NewUser HashPassword]
arbitraryUsersHash = mapM toUserHash arbitraryNewUsers
arbitraryNewUsers :: [NewUser GargPassword]
arbitraryNewUsers = map (\u -> NewUser u (u <> "@gargantext.org") (GargPassword $ reverse u))
arbitraryUsername
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Types/Main.hs 0000664 0000000 0000000 00000012450 14124644201 0030462 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Types.Main
Description : Short description
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
-----------------------------------------------------------------------
module Gargantext.Core.Types.Main where
------------------------------------------------------------------------
import Data.Aeson (FromJSON, ToJSON)
import Data.Aeson.TH (deriveJSON)
import Data.Either (Either(..))
import Data.Hashable (Hashable)
import Data.Map (fromList, lookup)
import Data.Maybe (fromMaybe)
import Data.Semigroup (Semigroup(..))
import Data.Swagger
import Data.Text (Text, unpack)
import GHC.Generics (Generic)
import Gargantext.Core
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Admin.Types.Node -- (NodeType(..), Node, Hyperdata(..))
import Gargantext.Prelude
import Servant.API (FromHttpApiData(..))
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Text.Read (read)
type CorpusName = Text
------------------------------------------------------------------------
data NodeTree = NodeTree { _nt_name :: Text
, _nt_type :: NodeType
, _nt_id :: NodeId
} deriving (Show, Read, Generic)
$(deriveJSON (unPrefix "_nt_") ''NodeTree)
instance ToSchema NodeTree where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nt_")
------------------------------------------------------------------------
--data Classification = Favorites | MyClassifcation
type TypeId = Int
-- TODO multiple ListType declaration, remove it
-- data ListType = CandidateTerm | StopTerm | MapTerm
data ListType = CandidateTerm | StopTerm | MapTerm
deriving (Generic, Eq, Ord, Show, Read, Enum, Bounded)
instance ToJSON ListType
instance FromJSON ListType
instance ToSchema ListType
instance ToParamSchema ListType
instance Arbitrary ListType where
arbitrary = elements [minBound..maxBound]
instance Hashable ListType
instance Semigroup ListType
where
MapTerm <> _ = MapTerm
_ <> MapTerm = MapTerm
StopTerm <> _ = StopTerm
_ <> StopTerm = StopTerm
_ <> _ = CandidateTerm
instance FromHttpApiData ListType where
parseUrlPiece = Right . read . unpack
type ListTypeId = Int
instance HasDBid ListType where
toDBid = listTypeId
fromDBid = (fromMaybe (panic "Instance HasDBid fromDBid ListType")) . fromListTypeId
-- FIXME Candidate: 0 and Stop : 1
listTypeId :: ListType -> ListTypeId
listTypeId StopTerm = 0
listTypeId CandidateTerm = 1
listTypeId MapTerm = 2
fromListTypeId :: ListTypeId -> Maybe ListType
fromListTypeId i = lookup i
$ fromList
[ (listTypeId l, l)
| l <- [StopTerm, CandidateTerm, MapTerm]
]
-- data Metrics = Occurrences | Cooccurrences | Specclusion | Genclusion | Cvalue
-- | TfidfCorpus | TfidfGlobal | TirankLocal | TirankGlobal
-- | Community Manager Use Case
-- | Favorites Node enable Swap Node with some synonyms for clarity
-- | Then a Node can be a List which has some synonyms
-- | Then a Node can be a Score which has some synonyms
-- Queries
type Limit = Int
type Offset = Int
type IsTrash = Bool
------------------------------------------------------------------------
-- All the Database is structured as a hierarchical Tree
data Tree a = TreeN { _tn_node :: a, _tn_children :: [Tree a] }
deriving (Show, Read, Eq, Generic, Ord)
$(deriveJSON (unPrefix "_tn_") ''Tree)
instance (Typeable a, ToSchema a) => ToSchema (Tree a) where
declareNamedSchema = wellNamedSchema "_tn_"
instance Arbitrary (Tree NodeTree) where
arbitrary = elements [userTree, userTree]
-- data Tree a = NodeT a [Tree a]
-- same as Data.Tree
leafT :: a -> Tree a
leafT x = TreeN x []
------------------------------------------------------------------------
-- Garg Network is a network of all Garg nodes
--gargNetwork = undefined
-- | Garg Node is Database Schema Typed as specification
-- gargNode gathers all the Nodes of all users on one Node
gargNode :: [Tree NodeTree]
gargNode = [userTree]
-- | User Tree simplified
userTree :: Tree NodeTree
userTree = TreeN (NodeTree "user name" NodeUser 1) [annuaireTree, projectTree]
-- | Project Tree
projectTree :: Tree NodeTree
projectTree = TreeN (NodeTree "Project CNRS/IMT" NodeFolder 2) [corpusTree 10 "A", corpusTree 20 "B"]
-- | Corpus Tree
annuaireTree :: Tree NodeTree
annuaireTree = (leafT $ NodeTree "Annuaire" NodeAnnuaire 41)
corpusTree :: NodeId -> Text -> Tree NodeTree
corpusTree nId t = TreeN (NodeTree ("Corpus " <> t) NodeCorpus nId) ( [ leafT $ NodeTree "Dashboard" NodeDashboard (nId +1)
, leafT $ NodeTree "Graph" NodeGraph (nId +2)
]
-- <> [ leafT $ NodeTree "My lists" Lists 5]
-- <> [ leafT (NodeTree "Metrics A" Metrics 6) ]
-- <> [ leafT (NodeTree "Class A" Classification 7)]
)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Types/Phylo.hs 0000664 0000000 0000000 00000010571 14124644201 0030673 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Types.Phylo
Description : Main Types for Phylomemy
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Specifications of Phylomemy format.
Phylomemy can be described as a Temporal Graph with different scale of
granularity of group of ngrams (terms and multi-terms).
The main type is Phylo which is synonym of Phylomemy (only difference is
the number of chars).
Phylomemy was first described in Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns in science evolution—the rise and fall of scientific fields. PloS one 8, e54847.
.
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Types.Phylo where
import Control.Lens (makeLenses)
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Text (Text)
import Data.Time.Clock.POSIX (POSIXTime)
import GHC.Generics (Generic)
import Gargantext.Prelude
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
------------------------------------------------------------------------
-- | Phylo datatype descriptor of a phylomemy
-- Duration : time Segment of the whole phylomemy in UTCTime format (start,end)
-- Ngrams : list of all (possible) terms contained in the phylomemy (with their id)
-- Steps : list of all steps to build the phylomemy
data Phylo = Phylo { _phylo_Duration :: (Start, End)
, _phylo_Ngrams :: [Ngram]
, _phylo_Periods :: [PhyloPeriod]
} deriving (Generic)
-- | UTCTime in seconds since UNIX epoch
type Start = POSIXTime
type End = POSIXTime
-- | Indexed Ngram
type Ngram = (NgramId, Text)
type NgramId = Int
-- | PhyloStep : steps of phylomemy on temporal axis
-- Period: tuple (start date, end date) of the step of the phylomemy
-- Levels: levels of granularity
data PhyloPeriod = PhyloPeriod { _phylo_PeriodId :: PhyloPeriodId
, _phylo_PeriodLevels :: [PhyloLevel]
} deriving (Generic)
type PhyloPeriodId = (Start, End)
-- | PhyloLevel : levels of phylomemy on level axis
-- Levels description:
-- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
-- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
-- Level 1: First level of clustering
-- Level N: Nth level of clustering
data PhyloLevel = PhyloLevel { _phylo_LevelId :: PhyloLevelId
, _phylo_LevelGroups :: [PhyloGroup]
} deriving (Generic)
type PhyloLevelId = (PhyloPeriodId, Int)
-- | PhyloGroup : group of ngrams at each level and step
-- Label : maybe has a label as text
-- Ngrams: set of terms that build the group
-- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
-- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
data PhyloGroup = PhyloGroup { _phylo_GroupId :: PhyloGroupId
, _phylo_GroupLabel :: Maybe Text
, _phylo_GroupNgrams :: [NgramId]
, _phylo_GroupPeriodParents :: [Edge]
, _phylo_GroupPeriodChilds :: [Edge]
, _phylo_GroupLevelParents :: [Edge]
, _phylo_GroupLevelChilds :: [Edge]
} deriving (Generic)
type PhyloGroupId = (PhyloLevelId, Int)
type Edge = (PhyloGroupId, Weight)
type Weight = Double
-- | Lenses
makeLenses ''Phylo
makeLenses ''PhyloPeriod
makeLenses ''PhyloLevel
makeLenses ''PhyloGroup
-- | JSON instances
$(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
$(deriveJSON (unPrefix "_phylo_Period" ) ''PhyloPeriod )
$(deriveJSON (unPrefix "_phylo_Level" ) ''PhyloLevel )
$(deriveJSON (unPrefix "_phylo_Group" ) ''PhyloGroup )
-- | ToSchema instances
instance ToSchema Phylo where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
instance ToSchema PhyloPeriod where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_Period")
instance ToSchema PhyloLevel where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_Level")
instance ToSchema PhyloGroup where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_Group")
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Utils.hs 0000664 0000000 0000000 00000001333 14124644201 0027570 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Utils
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Gargantext.Core.Utils (
-- module Gargantext.Utils.Chronos
module Gargantext.Core.Utils.Prefix
, something
) where
import Data.Maybe
import Data.Monoid
-- import Gargantext.Utils.Chronos
import Gargantext.Core.Utils.Prefix
something :: Monoid a => Maybe a -> a
something Nothing = mempty
something (Just a) = a
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Utils/ 0000775 0000000 0000000 00000000000 14124644201 0027234 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Utils/Count.purs 0000664 0000000 0000000 00000004541 14124644201 0031243 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Utils.Count
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
Inspired from Gabriel Gonzales, "beautiful folds" talk.
-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE BangPatterns #-}
module Gargantext.Core.Utils.Count (head, last, all, any, sum, product, length)
where
import Data.Functor
import Control.Applicative
import qualified Data.Foldable
import Data.Monoid
import Control.Lens (Getting, foldMapOf)
import Gargantext.Prelude hiding (head, sum, length)
data Fold i o = forall m . Monoid m => Fold (i -> m) (m -> o)
instance Functor (Fold i) where
fmap k (Fold tally summarize) = Fold tally (k . summarize)
instance Applicative (Fold i) where
pure o = Fold (\_ -> ()) (\_ -> o)
Fold tallyF summarizeF <*> Fold tallyX summarizeX = Fold tally summarize
where
tally i = (tallyF i, tallyX i)
summarize (nF, nX) = summarizeF nF (summarizeX nX)
focus :: (forall m . Monoid m => Getting m b a) -> Fold a o -> Fold b o
focus lens (Fold tally summarize) = Fold (foldMapOf lens tally) summarize
fold :: Fold i o -> [i] -> o
fold (Fold tally summarize) is = summarize (reduce (map tally is))
where
reduce = Data.Foldable.foldl' (<>) mempty
--
head :: Fold a (Maybe a)
head = Fold (First . Just) getFirst
last :: Fold a (Maybe a)
last = Fold (Last . Just) getLast
--
all :: (a -> Bool) -> Fold a Bool
all predicate = Fold (All . predicate) getAll
any :: (a -> Bool) -> Fold a Bool
any predicate = Fold (Any . predicate) getAny
--
sum :: Num n => Fold n n
sum = Fold Sum getSum
product :: Num n => Fold n n
product = Fold Product getProduct
length :: Num n => Fold i n
length = Fold (\_ -> Sum 1) getSum
-- | Average function optimized (/!\ need to test it)
data Average a = Average { numerator :: !a, denominator :: !Int }
instance Num a => Monoid (Average a) where
mempty = Average 0 0
mappend (Average xL nL) (Average xR nR) = Average (xL + xR) (nL + nR)
average :: Fractional a => Fold a a
average = Fold tally summarize
where
tally x = Average x 1
summarize (Average numerator denominator) =
numerator / fromIntegral denominator
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Utils/DateUtils.hs 0000664 0000000 0000000 00000001656 14124644201 0031476 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Utils.DateUtils
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Utils.DateUtils where
import Gargantext.Prelude
import Data.Time (UTCTime, toGregorian, utctDay)
--
--readInt :: IO [Char] -> IO Int
--readInt = readLn
--
--readBool :: IO [Char] -> IO Bool
--readBool = readLn
utc2gregorian :: UTCTime -> (Integer, Int, Int)
utc2gregorian date = toGregorian $ utctDay date
gregorian2year :: (Integer, Int, Int) -> Integer
gregorian2year (y, _m, _d) = y
utc2year :: UTCTime -> Integer
utc2year date = gregorian2year $ utc2gregorian date
averageLength :: Fractional a => [[a1]] -> a
averageLength l = fromIntegral (sum (map length l)) / fromIntegral (length l)
--main :: IO ()
--main = do
-- c <- getCurrentTime
-- print c -- $ toYear $ toGregorian $ utctDay c
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Utils/Prefix.hs 0000664 0000000 0000000 00000004061 14124644201 0031026 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Utils.Prefix
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Gargantext.Core.Utils.Prefix
( module Gargantext.Core.Utils.Prefix
, wellNamedSchema
) where
import Prelude
import Data.Aeson (Value, defaultOptions, parseJSON)
import Data.Aeson.TH (Options, fieldLabelModifier, omitNothingFields, sumEncoding, SumEncoding(UntaggedValue))
import Data.Aeson.Types (Parser)
import Data.Char (toLower)
import Data.Swagger.SchemaOptions (SchemaOptions, fromAesonOptions)
import Servant.Job.Utils (wellNamedSchema)
import Text.Read (readMaybe)
-- | Aeson Options that remove the prefix from fields
unPrefix :: String -> Options
unPrefix prefix = defaultOptions
{ fieldLabelModifier = unCapitalize . dropPrefix prefix
, omitNothingFields = True
}
unPrefixUntagged :: String -> Options
unPrefixUntagged prefix = (unPrefix prefix)
{ sumEncoding = UntaggedValue }
unPrefixSwagger :: String -> SchemaOptions
unPrefixSwagger = fromAesonOptions . unPrefix
-- | Lower case leading character
unCapitalize :: String -> String
unCapitalize [] = []
unCapitalize (c:cs) = toLower c : cs
--unCapitalize cs = map toLower cs
-- | Remove given prefix
dropPrefix :: String -> String -> String
dropPrefix prefix input = go prefix input
where
go pre [] = error $ conStringual $ "prefix leftover: " <> pre
go [] (c:cs) = c : cs
go (p:preRest) (c:cRest)
| p == c = go preRest cRest
| otherwise = error $ conStringual $ "not equal: " <> (p:preRest) <> " " <> (c:cRest)
conStringual msg = "dropPrefix: " <> msg <> ". " <> prefix <> " " <> input
parseJSONFromString :: (Read a) => Value -> Parser a
parseJSONFromString v = do
numString <- parseJSON v
case readMaybe (numString :: String) of
Nothing -> fail $ "Invalid number for TransactionID: " ++ show v -- TODO error message too specific
Just n -> return n
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz.hs 0000664 0000000 0000000 00000000422 14124644201 0027236 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Viz
Description : Viz tools
Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Vizualisation of text stats
-}
module Gargantext.Core.Viz
where
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/ 0000775 0000000 0000000 00000000000 14124644201 0026704 5 ustar 00root root 0000000 0000000 AdaptativePhylo.hs 0000664 0000000 0000000 00000034372 14124644201 0032270 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz {-|
Module : Gargantext.Core.Viz.AdaptativePhylo
Description : Phylomemy definitions and types.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Specifications of Phylomemy export format.
Phylomemy can be described as a Temporal Graph with different scale of
granularity of group of ngrams (terms and multi-terms).
The main type is Phylo which is synonym of Phylomemy (only difference is
the number of chars).
References:
Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
in science evolution — the rise and fall of scientific fields. PloS
one 8, e54847.
-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.AdaptativePhylo where
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Text (Text, pack)
import Data.Vector (Vector)
import Data.Map (Map)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import GHC.Generics
import GHC.IO (FilePath)
import Control.DeepSeq (NFData)
import Control.Lens (makeLenses)
import qualified Data.Text.Lazy as TextLazy
----------------
-- | Config | --
----------------
data CorpusParser =
Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int}
| Csv' {_csv'_limit :: Int}
deriving (Show,Generic,Eq)
data SeaElevation =
Constante
{ _cons_start :: Double
, _cons_step :: Double }
| Adaptative
{ _adap_granularity :: Double }
deriving (Show,Generic,Eq)
data Proximity =
WeightedLogJaccard
{ _wlj_sensibility :: Double
{-
-- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
-}
}
| WeightedLogSim
{ _wlj_sensibility :: Double
{-
-- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
-}
}
| Hamming
deriving (Show,Generic,Eq)
data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
data Synchrony =
ByProximityThreshold
{ _bpt_threshold :: Double
, _bpt_sensibility :: Double
, _bpt_scope :: SynchronyScope
, _bpt_strategy :: SynchronyStrategy }
| ByProximityDistribution
{ _bpd_sensibility :: Double
, _bpd_strategy :: SynchronyStrategy }
deriving (Show,Generic,Eq)
data TimeUnit =
Year
{ _year_period :: Int
, _year_step :: Int
, _year_matchingFrame :: Int }
| Month
{ _month_period :: Int
, _month_step :: Int
, _month_matchingFrame :: Int }
| Week
{ _week_period :: Int
, _week_step :: Int
, _week_matchingFrame :: Int }
| Day
{ _day_period :: Int
, _day_step :: Int
, _day_matchingFrame :: Int }
deriving (Show,Generic,Eq)
data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
data Clique =
Fis
{ _fis_support :: Int
, _fis_size :: Int }
| MaxClique
{ _mcl_size :: Int
, _mcl_threshold :: Double
, _mcl_filter :: CliqueFilter }
deriving (Show,Generic,Eq)
data Quality =
Quality { _qua_granularity :: Double
, _qua_minBranch :: Int }
deriving (Show,Generic,Eq)
data Config =
Config { corpusPath :: FilePath
, listPath :: FilePath
, outputPath :: FilePath
, corpusParser :: CorpusParser
, phyloName :: Text
, phyloLevel :: Int
, phyloProximity :: Proximity
, seaElevation :: SeaElevation
, findAncestors :: Bool
, phyloSynchrony :: Synchrony
, phyloQuality :: Quality
, timeUnit :: TimeUnit
, clique :: Clique
, exportLabel :: [PhyloLabel]
, exportSort :: Sort
, exportFilter :: [Filter]
} deriving (Show,Generic,Eq)
defaultConfig :: Config
defaultConfig =
Config { corpusPath = ""
, listPath = ""
, outputPath = ""
, corpusParser = Csv 1000
, phyloName = pack "Default Phylo"
, phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10
, seaElevation = Constante 0.1 0.1
, findAncestors = True
, phyloSynchrony = ByProximityThreshold 0.1 10 SiblingBranches MergeAllGroups
, phyloQuality = Quality 0 1
, timeUnit = Year 3 1 5
, clique = MaxClique 0 3 ByNeighbours
, exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy
, exportFilter = [ByBranchSize 2]
}
instance FromJSON Config
instance ToJSON Config
instance FromJSON CorpusParser
instance ToJSON CorpusParser
instance FromJSON Proximity
instance ToJSON Proximity
instance FromJSON SeaElevation
instance ToJSON SeaElevation
instance FromJSON TimeUnit
instance ToJSON TimeUnit
instance FromJSON CliqueFilter
instance ToJSON CliqueFilter
instance FromJSON Clique
instance ToJSON Clique
instance FromJSON PhyloLabel
instance ToJSON PhyloLabel
instance FromJSON Tagger
instance ToJSON Tagger
instance FromJSON Sort
instance ToJSON Sort
instance FromJSON Order
instance ToJSON Order
instance FromJSON Filter
instance ToJSON Filter
instance FromJSON SynchronyScope
instance ToJSON SynchronyScope
instance FromJSON SynchronyStrategy
instance ToJSON SynchronyStrategy
instance FromJSON Synchrony
instance ToJSON Synchrony
instance FromJSON Quality
instance ToJSON Quality
-- | Software parameters
data Software =
Software { _software_name :: Text
, _software_version :: Text
} deriving (Generic, Show, Eq)
defaultSoftware :: Software
defaultSoftware =
Software { _software_name = pack "Gargantext"
, _software_version = pack "v4" }
-- | Global parameters of a Phylo
data PhyloParam =
PhyloParam { _phyloParam_version :: Text
, _phyloParam_software :: Software
, _phyloParam_config :: Config
} deriving (Generic, Show, Eq)
defaultPhyloParam :: PhyloParam
defaultPhyloParam =
PhyloParam { _phyloParam_version = pack "v2.adaptative"
, _phyloParam_software = defaultSoftware
, _phyloParam_config = defaultConfig }
------------------
-- | Document | --
------------------
-- | Date : a simple Integer
type Date = Int
-- | Ngrams : a contiguous sequence of n terms
type Ngrams = Text
-- Document : a piece of Text linked to a Date
-- date = computational date; date' = original string date yyyy-mm-dd
data Document = Document
{ date :: Date
, date' :: Text
, text :: [Ngrams]
, weight :: Maybe Double
, sources :: [Text]
} deriving (Eq,Show,Generic,NFData)
--------------------
-- | Foundation | --
--------------------
-- | The Foundations of a Phylo created from a given TermList
data PhyloFoundations = PhyloFoundations
{ _foundations_roots :: !(Vector Ngrams)
, _foundations_mapList :: TermList
} deriving (Generic, Show, Eq)
data PhyloSources = PhyloSources
{ _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
---------------------------
-- | Coocurency Matrix | --
---------------------------
-- | Cooc : a coocurency matrix between two ngrams
type Cooc = Map (Int,Int) Double
-------------------
-- | Phylomemy | --
-------------------
-- | Phylo datatype of a phylomemy
-- foundations : the foundations of the phylo
-- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
-- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
-- param : the parameters of the phylomemy (with the user's configuration)
-- periods : the temporal steps of a phylomemy
data Phylo =
Phylo { _phylo_foundations :: PhyloFoundations
, _phylo_sources :: PhyloSources
, _phylo_timeCooc :: !(Map Date Cooc)
, _phylo_timeDocs :: !(Map Date Double)
, _phylo_termFreq :: !(Map Int Double)
, _phylo_lastTermFreq :: !(Map Int Double)
, _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_param :: PhyloParam
, _phylo_periods :: Map PhyloPeriodId PhyloPeriod
}
deriving (Generic, Show, Eq)
-- | PhyloPeriodId : the id of a given period
type PhyloPeriodId = (Date,Date)
-- | PhyloPeriod : steps of a phylomemy on a temporal axis
-- id: tuple (start date, end date) of the temporal step of the phylomemy
-- levels: levels of granularity
data PhyloPeriod =
PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
, _phylo_periodPeriod' :: (Text,Text)
, _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
} deriving (Generic, Show, Eq)
-- | Level : a level of clustering
type Level = Int
-- | PhyloLevelId : the id of a level of clustering in a given period
type PhyloLevelId = (PhyloPeriodId,Level)
-- | PhyloLevel : levels of phylomemy on a synchronic axis
-- Levels description:
-- Level 0: The foundations and the base of the phylo
-- Level 1: First level of clustering (the Fis)
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
data PhyloLevel =
PhyloLevel { _phylo_levelPeriod :: (Date,Date)
, _phylo_levelPeriod' :: (Text,Text)
, _phylo_levelLevel :: Level
, _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
}
deriving (Generic, Show, Eq)
type PhyloGroupId = (PhyloLevelId, Int)
-- | BranchId : (a level, a sequence of branch index)
-- the sequence is a path of heritage from the most to the less specific branch
type PhyloBranchId = (Level, [Int])
-- | PhyloGroup : group of ngrams at each level and period
data PhyloGroup =
PhyloGroup { _phylo_groupPeriod :: (Date,Date)
, _phylo_groupPeriod' :: (Text,Text)
, _phylo_groupLevel :: Level
, _phylo_groupIndex :: Int
, _phylo_groupLabel :: Text
, _phylo_groupSupport :: Support
, _phylo_groupWeight :: Maybe Double
, _phylo_groupSources :: [Int]
, _phylo_groupNgrams :: [Int]
, _phylo_groupCooc :: !(Cooc)
, _phylo_groupBranchId :: PhyloBranchId
, _phylo_groupMeta :: Map Text [Double]
, _phylo_groupLevelParents :: [Pointer]
, _phylo_groupLevelChilds :: [Pointer]
, _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer]
, _phylo_groupAncestors :: [Pointer]
}
deriving (Generic, Show, Eq, NFData)
-- | Weight : A generic mesure that can be associated with an Id
type Weight = Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type Pointer = (PhyloGroupId, Weight)
data Filiation = ToParents | ToChilds deriving (Generic, Show)
data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
----------------------
-- | Phylo Clique | --
----------------------
-- | Support : Number of Documents where a Clique occurs
type Support = Int
data PhyloClique = PhyloClique
{ _phyloClique_nodes :: [Int]
, _phyloClique_support :: Support
, _phyloClique_period :: (Date,Date)
, _phyloClique_weight :: Maybe Double
, _phyloClique_sources :: [Int]
} deriving (Generic,NFData,Show,Eq)
----------------
-- | Export | --
----------------
type DotId = TextLazy.Text
data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
data Order = Asc | Desc deriving (Show,Generic,Eq)
data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
data PhyloLabel =
BranchLabel
{ _branch_labelTagger :: Tagger
, _branch_labelSize :: Int }
| GroupLabel
{ _group_labelTagger :: Tagger
, _group_labelSize :: Int }
deriving (Show,Generic,Eq)
data PhyloBranch =
PhyloBranch
{ _branch_id :: PhyloBranchId
, _branch_canonId :: [Int]
, _branch_seaLevel :: [Double]
, _branch_x :: Double
, _branch_y :: Double
, _branch_w :: Double
, _branch_t :: Double
, _branch_label :: Text
, _branch_meta :: Map Text [Double]
} deriving (Generic, Show, Eq)
data PhyloExport =
PhyloExport
{ _export_groups :: [PhyloGroup]
, _export_branches :: [PhyloBranch]
} deriving (Generic, Show)
----------------
-- | Lenses | --
----------------
makeLenses ''Config
makeLenses ''Proximity
makeLenses ''SeaElevation
makeLenses ''Quality
makeLenses ''Clique
makeLenses ''PhyloLabel
makeLenses ''TimeUnit
makeLenses ''PhyloFoundations
makeLenses ''PhyloClique
makeLenses ''Phylo
makeLenses ''PhyloPeriod
makeLenses ''PhyloLevel
makeLenses ''PhyloGroup
makeLenses ''PhyloParam
makeLenses ''PhyloExport
makeLenses ''PhyloBranch
------------------------
-- | JSON instances | --
------------------------
instance FromJSON Phylo
instance ToJSON Phylo
instance FromJSON PhyloSources
instance ToJSON PhyloSources
instance FromJSON PhyloParam
instance ToJSON PhyloParam
instance FromJSON PhyloPeriod
instance ToJSON PhyloPeriod
instance FromJSON PhyloLevel
instance ToJSON PhyloLevel
instance FromJSON Software
instance ToJSON Software
instance FromJSON PhyloGroup
instance ToJSON PhyloGroup
$(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Chart.hs 0000664 0000000 0000000 00000005460 14124644201 0030306 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Viz.Chart
Description : Graph utils
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Chart
where
import Data.List (sortOn)
import Data.Map (toList)
import qualified Data.List as List
import Data.Maybe (catMaybes)
import qualified Data.Vector as V
import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Config
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Database.Query.Table.NodeNode (selectDocsDates)
import Gargantext.Database.Schema.Node
import Gargantext.Prelude
import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
-- Pie Chart
import Gargantext.API.Ngrams.NgramsTree
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Metrics.NgramsByNode
import Gargantext.Database.Schema.Ngrams
import Gargantext.Core.Viz.Types
import qualified Data.HashMap.Strict as HashMap
histoData :: CorpusId -> Cmd err Histo
histoData cId = do
dates <- selectDocsDates cId
let (ls, css) = V.unzip
$ V.fromList
$ sortOn fst -- TODO Vector.sortOn
$ toList
$ occurrencesWith identity dates
pure (Histo ls css)
chartData :: FlowCmdM env err m
=> CorpusId -> NgramsType -> ListType
-> m Histo
chartData cId nt lt = do
ls' <- selectNodesWithUsername NodeList userMaster
ls <- map (_node_id) <$> getListsWithParentId cId
ts <- mapTermListRoot ls nt <$> getRepo' ls
let
dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ HashMap.toList dico
group dico' x = case HashMap.lookup x dico' of
Nothing -> x
Just x' -> maybe x identity x'
(_total,mapTerms) <- countNodesByNgramsWith (group dico)
<$> getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms
let (dates, count) = V.unzip $ fmap (\(NgramsTerm t,(d,_)) -> (t, d)) $ V.fromList $ HashMap.toList mapTerms
pure (Histo dates (round <$> count))
treeData :: FlowCmdM env err m
=> CorpusId -> NgramsType -> ListType
-> m (V.Vector NgramsTree)
treeData cId nt lt = do
ls' <- selectNodesWithUsername NodeList userMaster
ls <- map (_node_id) <$> getListsWithParentId cId
ts <- mapTermListRoot ls nt <$> getRepo' ls
let
dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ HashMap.toList dico
cs' <- getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms
m <- getListNgrams ls nt
pure $ V.fromList $ toTree lt cs' m
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Graph.hs 0000664 0000000 0000000 00000026366 14124644201 0030316 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Viz.Graph
Description : Graph utils
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Graph
where
import Data.ByteString.Lazy as DBL (readFile, writeFile)
import Data.Text (pack)
import GHC.IO (FilePath)
import qualified Data.Aeson as DA
import qualified Data.Text as T
import qualified Text.Read as T
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Methods.Distances (GraphMetric)
import Gargantext.Prelude
data TypeNode = Terms | Unknown
deriving (Show, Generic)
instance ToJSON TypeNode
instance FromJSON TypeNode
instance ToSchema TypeNode
data Attributes = Attributes { clust_default :: Int }
deriving (Show, Generic)
$(deriveJSON (unPrefix "") ''Attributes)
instance ToSchema Attributes
data Node = Node { node_size :: Int
, node_type :: TypeNode -- TODO NgramsType | Person
, node_id :: Text -- TODO NgramId
, node_label :: Text
, node_x_coord :: Double
, node_y_coord :: Double
, node_attributes :: Attributes
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "node_") ''Node)
instance ToSchema Node where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "node_")
data Edge = Edge { edge_source :: Text
, edge_target :: Text
, edge_weight :: Double
, edge_confluence :: Double
, edge_id :: Text
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "edge_") ''Edge)
instance ToSchema Edge where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "edge_")
---------------------------------------------------------------
data LegendField = LegendField { _lf_id :: Int
, _lf_color :: Text
, _lf_label :: Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_lf_") ''LegendField)
instance ToSchema LegendField where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lf_")
makeLenses ''LegendField
---------------------------------------------------------------
type Version = Int
data ListForGraph =
ListForGraph { _lfg_listId :: ListId
, _lfg_version :: Version
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_lfg_") ''ListForGraph)
instance ToSchema ListForGraph where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lfg_")
makeLenses ''ListForGraph
--
data GraphMetadata =
GraphMetadata { _gm_title :: Text -- title of the graph
, _gm_metric :: GraphMetric
, _gm_corpusId :: [NodeId] -- we can map with different corpus
, _gm_legend :: [LegendField] -- legend of the Graph
, _gm_list :: ListForGraph
, _gm_startForceAtlas :: Bool
-- , _gm_version :: Int
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "_gm_") ''GraphMetadata)
instance ToSchema GraphMetadata where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_gm_")
makeLenses ''GraphMetadata
data Graph = Graph { _graph_nodes :: [Node]
, _graph_edges :: [Edge]
, _graph_metadata :: Maybe GraphMetadata
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "_graph_") ''Graph)
makeLenses ''Graph
instance ToSchema Graph where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_graph_")
-- | Intances for the mock
instance Arbitrary Graph where
arbitrary = elements $ [defaultGraph]
defaultGraph :: Graph
defaultGraph = Graph {_graph_nodes = [Node {node_x_coord=0, node_y_coord=0, node_size = 4, node_type = Terms, node_id = pack "0", node_label = pack "animal", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 3, node_type = Terms, node_id = pack "1", node_label = pack "bird", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "2", node_label = pack "boy", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "3", node_label = pack "dog", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "4", node_label = pack "girl", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 4, node_type = Terms, node_id = pack "5", node_label = pack "human body", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 3, node_type = Terms, node_id = pack "6", node_label = pack "object", node_attributes = Attributes {clust_default = 2}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "7", node_label = pack "pen", node_attributes = Attributes {clust_default = 2}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "8", node_label = pack "table", node_attributes = Attributes {clust_default = 2}}], _graph_edges = [Edge {edge_source = pack "0", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "0"},Edge {edge_source = pack "1", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "1"},Edge {edge_source = pack "1", edge_target = pack "1", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "2"},Edge {edge_source = pack "2", edge_target = pack "2", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "3"},Edge {edge_source = pack "2", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "4"},Edge {edge_source = pack "3", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "5"},Edge {edge_source = pack "3", edge_target = pack "1", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "6"},Edge {edge_source = pack "3", edge_target = pack "3", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "7"},Edge {edge_source = pack "4", edge_target = pack "4", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "8"},Edge {edge_source = pack "4", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "9"},Edge {edge_source = pack "5", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "10"},Edge {edge_source = pack "6", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "11"},Edge {edge_source = pack "7", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "12"},Edge {edge_source = pack "7", edge_target = pack "7", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "13"},Edge {edge_source = pack "8", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "14"},Edge {edge_source = pack "8", edge_target = pack "7", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "15"},Edge {edge_source = pack "8", edge_target = pack "8", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "16"}], _graph_metadata = Nothing}
-----------------------------------------------------------
-- V3 Gargantext Version
data AttributesV3 = AttributesV3 { cl :: Int }
deriving (Show, Generic)
$(deriveJSON (unPrefix "") ''AttributesV3)
data NodeV3 = NodeV3 { no_id :: Int
, no_at :: AttributesV3
, no_s :: Int
, no_lb :: Text
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "no_") ''NodeV3)
data EdgeV3 = EdgeV3 { eo_s :: Int
, eo_t :: Int
, eo_w :: Text
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "eo_") ''EdgeV3)
data GraphV3 = GraphV3 { go_links :: [EdgeV3]
, go_nodes :: [NodeV3]
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "go_") ''GraphV3)
-----------------------------------------------------------
data Camera = Camera { _camera_ratio :: Double
, _camera_x :: Double
, _camera_y :: Double }
deriving (Show, Generic)
$(deriveJSON (unPrefix "_camera_") ''Camera)
makeLenses ''Camera
instance ToSchema Camera where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_camera_")
-----------------------------------------------------------
data HyperdataGraph =
HyperdataGraph { _hyperdataGraph :: !(Maybe Graph)
, _hyperdataCamera :: !(Maybe Camera)
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_") ''HyperdataGraph)
instance ToSchema HyperdataGraph where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
defaultHyperdataGraph :: HyperdataGraph
defaultHyperdataGraph = HyperdataGraph Nothing Nothing
instance Hyperdata HyperdataGraph
makeLenses ''HyperdataGraph
instance FromField HyperdataGraph
where
fromField = fromField'
instance QueryRunnerColumnDefault PGJsonb HyperdataGraph
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
-----------------------------------------------------------
-- This type is used to return graph via API
-- hyperdataGraphAPI field is not a Maybe anymore – graph is always computed
data HyperdataGraphAPI =
HyperdataGraphAPI { _hyperdataAPIGraph :: Graph
, _hyperdataAPICamera :: !(Maybe Camera)
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_hyperdataAPI") ''HyperdataGraphAPI)
instance ToSchema HyperdataGraphAPI where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hyperdataAPI")
makeLenses ''HyperdataGraphAPI
instance FromField HyperdataGraphAPI
where
fromField = fromField'
-----------------------------------------------------------
graphV3ToGraph :: GraphV3 -> Graph
graphV3ToGraph (GraphV3 links nodes) = Graph (map nodeV32node nodes) (zipWith linkV32edge [1..] links) Nothing
where
nodeV32node :: NodeV3 -> Node
nodeV32node (NodeV3 no_id' (AttributesV3 cl') no_s' no_lb')
= Node no_s' Terms (cs $ show no_id') no_lb' 0 0 (Attributes cl')
linkV32edge :: Int -> EdgeV3 -> Edge
linkV32edge n (EdgeV3 eo_s' eo_t' eo_w') = Edge (cs $ show eo_s')
(cs $ show eo_t')
((T.read $ T.unpack eo_w') :: Double)
0.5
(cs $ show n)
graphV3ToGraphWithFiles :: FilePath -> FilePath -> IO ()
graphV3ToGraphWithFiles g1 g2 = do
-- GraphV3 <- IO Fichier
graph <- DBL.readFile g1
let newGraph = case DA.decode graph :: Maybe GraphV3 of
Nothing -> panic (T.pack "no graph")
Just new -> new
DBL.writeFile g2 (DA.encode $ graphV3ToGraph newGraph)
readGraphFromJson :: MonadBase IO m => FilePath -> m (Maybe Graph)
readGraphFromJson fp = do
graph <- liftBase $ DBL.readFile fp
pure $ DA.decode graph
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Graph/ 0000775 0000000 0000000 00000000000 14124644201 0027745 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Graph/API.hs 0000664 0000000 0000000 00000025536 14124644201 0030725 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Viz.Graph
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE OverloadedLists #-} -- allows to write Map and HashMap as lists
{-# LANGUAGE TypeOperators #-}
module Gargantext.Core.Viz.Graph.API
where
import Control.Lens (set, (^.), _Just, (^?), at)
import Data.Aeson
import Data.Maybe (fromMaybe)
import Data.Swagger
import Data.Text hiding (head)
import Debug.Trace (trace)
import GHC.Generics (Generic)
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Prelude
import Gargantext.Core.Methods.Distances (Distance(..), GraphMetric(..), withMetric)
import Gargantext.Core.NodeStory
import Gargantext.Core.Types.Main
import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.GEXF ()
import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph)
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Query.Table.Node.User (getNodeUser)
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Ngrams
import Gargantext.Prelude
import Servant
import Servant.Job.Async
import Servant.XML
import qualified Data.HashMap.Strict as HashMap
------------------------------------------------------------------------
-- | There is no Delete specific API for Graph since it can be deleted
-- as simple Node.
type GraphAPI = Get '[JSON] HyperdataGraphAPI
:<|> "async" :> GraphAsyncAPI
:<|> "clone"
:> ReqBody '[JSON] HyperdataGraphAPI
:> Post '[JSON] NodeId
:<|> "gexf" :> Get '[XML] (Headers '[Servant.Header "Content-Disposition" Text] Graph)
:<|> "versions" :> GraphVersionsAPI
data GraphVersions =
GraphVersions { gv_graph :: Maybe Int
, gv_repo :: Int
}
deriving (Show, Generic)
instance ToJSON GraphVersions
instance ToSchema GraphVersions
graphAPI :: UserId -> NodeId -> GargServer GraphAPI
graphAPI u n = getGraph u n
:<|> graphAsync u n
:<|> graphClone u n
:<|> getGraphGexf u n
:<|> graphVersionsAPI u n
------------------------------------------------------------------------
getGraph :: UserId -> NodeId -> GargNoServer HyperdataGraphAPI
getGraph _uId nId = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let
graph = nodeGraph ^. node_hyperdata . hyperdataGraph
camera = nodeGraph ^. node_hyperdata . hyperdataCamera
cId = maybe (panic "[G.V.G.API] Node has no parent")
identity
$ nodeGraph ^. node_parent_id
listId <- defaultList cId
repo <- getRepo' [listId]
-- TODO Distance in Graph params
case graph of
Nothing -> do
let defaultMetric = Order1
graph' <- computeGraph cId (withMetric defaultMetric) NgramsTerms repo
mt <- defaultGraphMetadata cId "Title" repo defaultMetric
let
graph'' = set graph_metadata (Just mt) graph'
hg = HyperdataGraphAPI graph'' camera
-- _ <- updateHyperdata nId hg
_ <- updateHyperdata nId (HyperdataGraph (Just graph'') camera)
pure $ trace "[G.V.G.API] Graph empty, computing" hg
Just graph' -> pure $ trace "[G.V.G.API] Graph exists, returning" $
HyperdataGraphAPI graph' camera
recomputeGraph :: UserId -> NodeId -> Maybe GraphMetric -> GargNoServer Graph
recomputeGraph _uId nId maybeDistance = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let
graph = nodeGraph ^. node_hyperdata . hyperdataGraph
camera = nodeGraph ^. node_hyperdata . hyperdataCamera
graphMetadata = graph ^? _Just . graph_metadata . _Just
listVersion = graph ^? _Just . graph_metadata . _Just . gm_list . lfg_version
graphMetric = case maybeDistance of
Nothing -> graph ^? _Just . graph_metadata . _Just . gm_metric
_ -> maybeDistance
let
cId = maybe (panic "[G.V.G.API.recomputeGraph] Node has no parent")
identity
$ nodeGraph ^. node_parent_id
similarity = case graphMetric of
Nothing -> withMetric Order1
Just m -> withMetric m
listId <- defaultList cId
repo <- getRepo' [listId]
let v = repo ^. unNodeStory . at listId . _Just . a_version
case graph of
Nothing -> do
graph' <- computeGraph cId similarity NgramsTerms repo
mt <- defaultGraphMetadata cId "Title" repo (fromMaybe Order1 maybeDistance)
let graph'' = set graph_metadata (Just mt) graph'
_ <- updateHyperdata nId (HyperdataGraph (Just graph'') camera)
pure $ trace "[G.V.G.API.recomputeGraph] Graph empty, computed" graph''
Just graph' -> if listVersion == Just v
then pure graph'
else do
graph'' <- computeGraph cId similarity NgramsTerms repo
let graph''' = set graph_metadata graphMetadata graph''
_ <- updateHyperdata nId (HyperdataGraph (Just graph''') camera)
pure $ trace "[G.V.G.API] Graph exists, recomputing" graph'''
-- TODO use Database Monad only here ?
computeGraph :: HasNodeError err
=> CorpusId
-> Distance
-> NgramsType
-> NodeListStory
-> Cmd err Graph
computeGraph cId d nt repo = do
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot MapTerm
$ mapTermListRoot [lId] nt repo
myCooc <- HashMap.filter (>2) -- Removing the hapax (ngrams with 1 cooc)
<$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
-- printDebug "myCooc" myCooc
-- saveAsFileDebug "debug/my-cooc" myCooc
graph <- liftBase $ cooc2graphWith Spinglass d 0 myCooc
-- saveAsFileDebug "debug/graph" graph
pure graph
defaultGraphMetadata :: HasNodeError err
=> CorpusId
-> Text
-> NodeListStory
-> GraphMetric
-> Cmd err GraphMetadata
defaultGraphMetadata cId t repo gm = do
lId <- defaultList cId
pure $ GraphMetadata {
_gm_title = t
, _gm_metric = gm
, _gm_corpusId = [cId]
, _gm_legend = [
LegendField 1 "#FFF" "Cluster1"
, LegendField 2 "#FFF" "Cluster2"
, LegendField 3 "#FFF" "Cluster3"
, LegendField 4 "#FFF" "Cluster4"
]
, _gm_list = (ListForGraph lId (repo ^. unNodeStory . at lId . _Just . a_version))
, _gm_startForceAtlas = True
}
-- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10])
------------------------------------------------------------
type GraphAsyncAPI = Summary "Recompute graph"
:> "recompute"
:> AsyncJobsAPI JobLog () JobLog
graphAsync :: UserId -> NodeId -> GargServer GraphAsyncAPI
graphAsync u n =
serveJobsAPI $
JobFunction (\_ log' -> graphRecompute u n (liftBase . log'))
graphRecompute :: UserId
-> NodeId
-> (JobLog -> GargNoServer ())
-> GargNoServer JobLog
graphRecompute u n logStatus = do
logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
_g <- trace (show u) $ recomputeGraph u n Nothing
pure JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
------------------------------------------------------------
type GraphVersionsAPI = Summary "Graph versions"
:> Get '[JSON] GraphVersions
:<|> Summary "Recompute graph version"
:> Post '[JSON] Graph
graphVersionsAPI :: UserId -> NodeId -> GargServer GraphVersionsAPI
graphVersionsAPI u n =
graphVersions 0 n
:<|> recomputeVersions u n
graphVersions :: Int -> NodeId -> GargNoServer GraphVersions
graphVersions n nId = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let
graph = nodeGraph
^. node_hyperdata
. hyperdataGraph
listVersion = graph
^? _Just
. graph_metadata
. _Just
. gm_list
. lfg_version
mcId <- getClosestParentIdByType nId NodeCorpus
let cId = maybe (panic "[G.V.G.API] Node has no parent") identity mcId
maybeListId <- defaultListMaybe cId
case maybeListId of
Nothing -> if n <= 2
then graphVersions (n+1) cId
else panic "[G.V.G.API] list not found after iterations"
Just listId -> do
repo <- getRepo' [listId]
let v = repo ^. unNodeStory . at listId . _Just . a_version
printDebug "graphVersions" v
pure $ GraphVersions { gv_graph = listVersion
, gv_repo = v }
recomputeVersions :: UserId -> NodeId -> GargNoServer Graph
recomputeVersions uId nId = recomputeGraph uId nId Nothing
------------------------------------------------------------
graphClone :: UserId
-> NodeId
-> HyperdataGraphAPI
-> GargNoServer NodeId
graphClone uId pId (HyperdataGraphAPI { _hyperdataAPIGraph = graph
, _hyperdataAPICamera = camera }) = do
let nodeType = NodeGraph
nodeUser <- getNodeUser (NodeId uId)
nodeParent <- getNodeWith pId (Proxy :: Proxy HyperdataGraph)
let uId' = nodeUser ^. node_user_id
nIds <- mkNodeWithParent nodeType (Just pId) uId' $ nodeParent ^. node_name
case nIds of
[] -> pure pId
(nId:_) -> do
let graphP = graph
let graphP' = set (graph_metadata . _Just . gm_startForceAtlas) False graphP
_ <- updateHyperdata nId (HyperdataGraph (Just graphP') camera)
pure nId
------------------------------------------------------------
getGraphGexf :: UserId
-> NodeId
-> GargNoServer (Headers '[Servant.Header "Content-Disposition" Text] Graph)
getGraphGexf uId nId = do
HyperdataGraphAPI { _hyperdataAPIGraph = graph } <- getGraph uId nId
pure $ addHeader "attachment; filename=graph.gexf" graph
Bridgeness.hs 0000664 0000000 0000000 00000005660 14124644201 0032316 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Graph {-|
Module : Gargantext.Core.Viz.Graph.Bridgeness
Description : Bridgeness filter
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Let be a graph with partitions (from Louvain algo), Bridgeness uniformly
filters inter-communities links.
TODO rewrite Bridgeness with "equivalence structurale" metrics (Confluence)
TODO use Map LouvainNodeId (Map LouvainNodeId)
-}
module Gargantext.Core.Viz.Graph.Bridgeness -- (bridgeness)
where
import Data.List (concat, sortOn)
import Data.Map (Map, fromListWith, lookup, toList, mapWithKey, elems)
import Data.Maybe (catMaybes)
import Data.Ord (Down(..))
import Gargantext.Prelude
import qualified Data.Map as DM
import Gargantext.Core.Viz.Graph.Tools.IGraph (ClusterNode(..))
----------------------------------------------------------------------
type Partitions a = Map (Int, Int) Double -> IO [a]
----------------------------------------------------------------------
class ToComId a where
nodeId2comId :: a -> (NodeId,CommunityId)
type NodeId = Int
type CommunityId = Int
----------------------------------------------------------------------
instance ToComId ClusterNode where
nodeId2comId (ClusterNode i1 i2) = (i1, i2)
----------------------------------------------------------------------
----------------------------------------------------------------------
type Bridgeness = Double
bridgeness :: ToComId a => Bridgeness
-> [a]
-> Map (NodeId, NodeId) Double
-> Map (NodeId, NodeId) Double
bridgeness = bridgeness' nodeId2comId
bridgeness' :: (a -> (Int, Int))
-> Bridgeness
-> [a]
-> Map (Int, Int) Double
-> Map (Int, Int) Double
bridgeness' f b ns = DM.fromList
. concat
. DM.elems
. filterComs b
. groupEdges (DM.fromList $ map f ns)
groupEdges :: (Ord a, Ord b1)
=> Map b1 a
-> Map (b1, b1) b2
-> Map (a, a) [((b1, b1), b2)]
groupEdges m = fromListWith (<>)
. catMaybes
. map (\((n1,n2), d)
-> let
n1n2_m = (,) <$> lookup n1 m <*> lookup n2 m
n1n2_d = Just [((n1,n2),d)]
in (,) <$> n1n2_m <*> n1n2_d
)
. toList
-- | TODO : sortOn Confluence
filterComs :: (Ord n1, Eq n2)
=> p
-> Map (n2, n2) [(a3, n1)]
-> Map (n2, n2) [(a3, n1)]
filterComs _b m = DM.filter (\n -> length n > 0) $ mapWithKey filter' m
where
filter' (c1,c2) a
| c1 == c2 = a
-- TODO use n here
| otherwise = take 1 $ sortOn (Down . snd) a
where
_n :: Int
_n = round $ 100 * a' / t
a'= fromIntegral $ length a
t :: Double
t = fromIntegral $ length $ concat $ elems m
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Graph/FGL.hs 0000664 0000000 0000000 00000004014 14124644201 0030710 0 ustar 00root root 0000000 0000000 {-| Module : Gargantext.Core.Viz.Graph.FGL
Description : FGL main functions used in Garg
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main FGL funs/types to ease portability with IGraph.
-}
{-# LANGUAGE ConstraintKinds #-}
module Gargantext.Core.Viz.Graph.FGL where
import Gargantext.Prelude
import qualified Data.Graph.Inductive as FGL
import Data.List as List
------------------------------------------------------------------
-- | Main Types
type Graph_Undirected = FGL.Gr () ()
type Graph_Directed = FGL.Gr () ()
type Graph = FGL.Graph
type Node = FGL.Node -- Int
type Edge = FGL.Edge -- (Int, Int)
------------------------------------------------------------------
-- | Main Functions
mkGraph :: [Node] -> [Edge] -> Graph_Undirected
mkGraph = FGL.mkUGraph
neighbors :: Graph gr => gr a b -> Node -> [Node]
neighbors = FGL.neighbors
-- | TODO bug: if graph is undirected, we need to filter
-- nub . (map (\(n1,n2) -> if n1 < n2 then (n1,n2) else (n2,n1))) . FGL.edges
edges :: Graph gr => gr a b -> [Edge]
edges = FGL.edges
nodes :: Graph gr => gr a b -> [Node]
nodes = FGL.nodes
------------------------------------------------------------------------
-- | Graph Tools
filterNeighbors :: Graph_Undirected -> Node -> [Node]
filterNeighbors g n = List.nub $ neighbors g n
-- Q: why not D.G.I.deg ? (Int as result)
degree :: Graph_Undirected -> Node -> Double
degree g n = fromIntegral $ List.length (filterNeighbors g n)
vcount :: Graph_Undirected -> Double
vcount = fromIntegral . List.length . List.nub . nodes
-- | TODO tests, optim and use IGraph library, fix IO ?
ecount :: Graph_Undirected -> Double
ecount = fromIntegral . List.length . List.nub . edges
------------------------------------------------------------------
-- | Main sugared functions
mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected
mkGraphUfromEdges es = mkGraph ns es
where
ns = List.nub (a <> b)
where
(a, b) = List.unzip es
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Graph/GEXF.hs 0000664 0000000 0000000 00000004772 14124644201 0031044 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Viz.Graph
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE OverloadedLists #-} -- allows to write Map and HashMap as lists
{-# LANGUAGE TypeOperators #-}
module Gargantext.Core.Viz.Graph.GEXF
where
import Gargantext.Prelude
import Gargantext.Core.Viz.Graph
import qualified Data.HashMap.Lazy as HashMap
import qualified Gargantext.Prelude as P
import qualified Gargantext.Core.Viz.Graph as G
import qualified Xmlbf as Xmlbf
-- Converts to GEXF format
-- See https://gephi.org/gexf/format/
instance Xmlbf.ToXml Graph where
toXml (Graph { _graph_nodes = graphNodes
, _graph_edges = graphEdges }) = root graphNodes graphEdges
where
root :: [G.Node] -> [G.Edge] -> [Xmlbf.Node]
root gn ge =
Xmlbf.element "gexf" params $ meta <> (graph gn ge)
where
params = HashMap.fromList [ ("xmlns", "http://www.gexf.net/1.2draft")
, ("version", "1.2") ]
meta = Xmlbf.element "meta" params $ creator <> desc
where
params = HashMap.fromList [ ("lastmodifieddate", "2020-03-13") ]
creator = Xmlbf.element "creator" HashMap.empty $ Xmlbf.text "Gargantext.org"
desc = Xmlbf.element "description" HashMap.empty $ Xmlbf.text "Gargantext gexf file"
graph :: [G.Node] -> [G.Edge] -> [Xmlbf.Node]
graph gn ge = Xmlbf.element "graph" params $ (nodes gn) <> (edges ge)
where
params = HashMap.fromList [ ("mode", "static")
, ("defaultedgetype", "directed") ]
nodes :: [G.Node] -> [Xmlbf.Node]
nodes gn = Xmlbf.element "nodes" HashMap.empty $ P.concatMap node' gn
node' :: G.Node -> [Xmlbf.Node]
node' (G.Node { node_id = nId, node_label = l }) =
Xmlbf.element "node" params []
where
params = HashMap.fromList [ ("id", nId)
, ("label", l) ]
edges :: [G.Edge] -> [Xmlbf.Node]
edges gn = Xmlbf.element "edges" HashMap.empty $ P.concatMap edge gn
edge :: G.Edge -> [Xmlbf.Node]
edge (G.Edge { edge_id = eId, edge_source = es, edge_target = et }) =
Xmlbf.element "edge" params []
where
params = HashMap.fromList [ ("id", eId)
, ("source", es)
, ("target", et) ]
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Graph/Index.hs 0000664 0000000 0000000 00000010222 14124644201 0031345 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Graph.Distances.Utils
Description : Tools to compute distances from Cooccurrences
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Basically @compute@ takes an accelerate function as first input, a Map
of coccurrences as second input and outputs a Map automatically using
indexes.
TODO:
--cooc2fgl :: Ord t, Integral n => Map (t, t) n -> Graph
--fgl2json
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE MonoLocalBinds #-}
module Gargantext.Core.Viz.Graph.Index
where
import qualified Data.Array.Accelerate as A
import qualified Data.Array.Accelerate.Interpreter as A
import Data.Array.Accelerate (Matrix, Elt, Shape, (:.)(..), Z(..))
import Data.Maybe (fromMaybe)
import Data.Set (Set)
import qualified Data.Set as S
import Data.Map (Map)
import qualified Data.Map.Strict as M
-- import Data.Vector (Vector)
import Gargantext.Prelude
type Index = Int
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
score :: (Ord t) => MatrixShape
-> (A.Matrix Int -> A.Matrix Double)
-> Map (t, t) Int
-> Map (t, t) Double
score s f m = fromIndex fromI . mat2map . f $ cooc2mat s toI m
where
(toI, fromI) = createIndices m
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
cooc2mat :: Ord t => MatrixShape -> Map t Index -> Map (t, t) Int -> Matrix Int
cooc2mat sym ti m = map2mat sym 0 n idx
where
n = M.size ti
idx = toIndex ti m -- it is important to make sure that toIndex is ran only once.
data MatrixShape = Triangle | Square
map2mat :: Elt a => MatrixShape -> a -> Int -> Map (Index, Index) a -> Matrix a
map2mat sym def n m = A.fromFunction shape getData
where
getData = (\(Z :. x :. y) ->
case sym of
Triangle -> fromMaybe def (M.lookup (x,y) m)
Square -> fromMaybe (fromMaybe def $ M.lookup (y,x) m)
$ M.lookup (x, y) m
)
shape = (Z :. n :. n)
mat2map :: (Elt a, Shape (Z :. Index)) =>
A.Array (Z :. Index :. Index) a -> Map (Index, Index) a
mat2map m = M.fromList . map f . A.toList . A.run . A.indexed $ A.use m
where
-- Z :. _ :. n = A.arrayShape m
f ((Z :. i :. j), x) = ((i, j), x)
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
toIndex :: Ord t
=> Map t Index
-> Map (t,t) a
-> Map (Index,Index) a
toIndex = indexConversion
fromIndex :: Ord t => Map Index t -> Map (Index, Index) a -> Map (t,t) a
fromIndex ni ns = indexConversion ni ns
indexConversion :: (Ord b, Ord k) => Map k b -> Map (k,k) a -> Map (b, b) a
indexConversion index ms = M.fromList
$ map (\((k1,k2),c) -> ( ((M.!) index k1, (M.!) index k2), c))
(M.toList ms)
---------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--fromIndex' :: Ord t => Vector t -> Map (Index, Index) a -> Map (t,t) a
--fromIndex' vi ns = undefined
-- TODO: returing a Vector should be faster than a Map
-- createIndices' :: Ord t => Map (t, t) b -> (Map t Index, Vector t)
-- createIndices' = undefined
createIndices :: Ord t => Map (t, t) b -> (Map t Index, Map Index t)
createIndices = set2indices . map2set
where
map2set :: Ord t => Map (t, t) a -> Set t
map2set cs' = foldl' (\s ((t1,t2),_) -> insert [t1,t2] s ) S.empty (M.toList cs')
where
insert as s = foldl' (\s' t -> S.insert t s') s as
set2indices :: Ord t => Set t -> (Map t Index, Map Index t)
set2indices s = (M.fromList toIndex', M.fromList fromIndex')
where
fromIndex' = zip [0..] xs
toIndex' = zip xs [0..]
xs = S.toList s
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Graph/Legend.hs 0000664 0000000 0000000 00000001744 14124644201 0031505 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Viz.Graph.Legend
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Graph.Legend
where
{-
import Data.Ord (Down(..))
import Gargantext.Prelude
import Data.Map (Map, fromListWith, lookup, toList, mapWithKey, elems)
import qualified Data.Map as DM
import Data.Maybe (catMaybes)
import Data.List (concat, sortOn)
import Gargantext.Core.Viz.Graph.Louvain (LouvainNodeId, CommunityId, comId2nodeId)
[LouvainNode] -> Map CommunityId LouvainNodeId
[(CommunityId, [LouvainNodeId])]
sort by length LouvainNodeIds
Cooc -> DGI.Graph
sort [LouvainNodeId]
subgraph with [LouvainNodeId]
-> prendre le noeud le mieux connecté (degree to start with)
Map NodeId Label
-> map [LouvainNodeId] -> [(CommunityId, take 3 [Label])]
use specGen incExc score to order the labels
take 7 [(CommunityId, take 3 [Label])]
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Graph/Tools.hs 0000664 0000000 0000000 00000031670 14124644201 0031410 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Viz.Graph.Tools
Description : Tools to build Graph
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Core.Viz.Graph.Tools
where
-- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Text (Text)
import Debug.Trace (trace)
import GHC.Float (sin, cos)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Distances (Distance(..), measure)
import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
import Gargantext.Core.Statistics
import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, ClusterNode)
import Gargantext.Prelude
import IGraph.Random -- (Gen(..))
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector.Storable as Vec
import qualified IGraph as Igraph
import qualified IGraph.Algorithms.Layout as Layout
-------------------------------------------------------------
defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
defaultClustering = spinglass 1
-------------------------------------------------------------
type Threshold = Double
cooc2graph' :: Ord t => Distance
-> Double
-> Map (t, t) Int
-> Map (Index, Index) Double
cooc2graph' distance threshold myCooc
= Map.filter (> threshold)
$ mat2map
$ measure distance
$ case distance of
Conditional -> map2mat Triangle 0 tiSize
Distributional -> map2mat Square 0 tiSize
$ Map.filter (> 1) myCooc'
where
(ti, _) = createIndices myCooc
tiSize = Map.size ti
myCooc' = toIndex ti myCooc
data PartitionMethod = Louvain | Spinglass
-- | coocurrences graph computation
cooc2graphWith :: PartitionMethod
-> Distance
-> Threshold
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graphWith Louvain = undefined -- TODO use IGraph bindings
cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
cooc2graph'' :: Ord t => Distance
-> Double
-> Map (t, t) Int
-> Map (Index, Index) Double
cooc2graph'' distance threshold myCooc = neighbourMap
where
(ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc
matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc
neighbourMap = filterByNeighbours threshold
$ mat2map distanceMat
-- Quentin
filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
filterByNeighbours threshold distanceMap = filteredMap
where
indexes :: [Index]
indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
filteredMap :: Map (Index, Index) Double
filteredMap = Map.fromList
$ List.concat
$ map (\idx ->
let selected = List.reverse
$ List.sortOn snd
$ Map.toList
$ Map.filter (> 0)
$ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
in List.take (round threshold) selected
) indexes
doDistanceMap :: Distance
-> Threshold
-> HashMap (NgramsTerm, NgramsTerm) Int
-> (Map (Int,Int) Double, Map (Index, Index) Int, Map NgramsTerm Index)
doDistanceMap distance threshold myCooc = (distanceMap, myCooc', ti)
where
-- TODO remove below
theMatrix = Map.fromList
$ HashMap.toList myCooc
(ti, _) = createIndices theMatrix
tiSize = Map.size ti
myCooc' = toIndex ti theMatrix
matCooc = case distance of -- Shape of the Matrix
Conditional -> map2mat Triangle 0 tiSize
Distributional -> map2mat Square 0 tiSize
$ case distance of -- Removing the Diagonal ?
Conditional -> Map.filterWithKey (\(a,b) _ -> a /= b)
Distributional -> identity
$ Map.filter (>1) myCooc'
similarities = measure distance matCooc
links = round (let n :: Double = fromIntegral tiSize in n * log n)
distanceMap = Map.fromList $ List.take links
$ List.sortOn snd
$ Map.toList
$ case distance of
Conditional -> Map.filter (> threshold)
Distributional -> Map.filter (> 0)
$ mat2map similarities
cooc2graphWith' :: ToComId a
=> Partitions a
-> Distance
-> Threshold
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graphWith' doPartitions distance threshold myCooc = do
let
(distanceMap, myCooc', ti) = doDistanceMap distance threshold myCooc
nodesApprox :: Int
nodesApprox = n'
where
(as, bs) = List.unzip $ Map.keys distanceMap
n' = Set.size $ Set.fromList $ as <> bs
ClustersParams rivers _level = clustersParams nodesApprox
{- -- Debug
saveAsFileDebug "debug/distanceMap" distanceMap
printDebug "similarities" similarities
-}
partitions <- if (Map.size distanceMap > 0)
then doPartitions distanceMap
else panic "Text.Flow: DistanceMap is empty"
let
-- bridgeness' = distanceMap
bridgeness' = trace ("Rivers: " <> show rivers)
$ bridgeness rivers partitions distanceMap
confluence' = confluence (Map.keys bridgeness') 3 True False
pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
myCooc' bridgeness' confluence' partitions
------------------------------------------------------------------------
------------------------------------------------------------------------
data ClustersParams = ClustersParams { bridgness :: Double
, louvain :: Text
} deriving (Show)
clustersParams :: Int -> ClustersParams
clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y
{- where
y | x < 100 = "0.000001"
| x < 350 = "0.000001"
| x < 500 = "0.000001"
| x < 1000 = "0.000001"
| otherwise = "1"
-}
----------------------------------------------------------
-- | From data to Graph
data2graph :: ToComId a
=> [(Text, Int)]
-> Map (Int, Int) Int
-> Map (Int, Int) Double
-> Map (Int, Int) Double
-> [a]
-> Graph
data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing
where
community_id_by_node_id = Map.fromList $ map nodeId2comId partitions
nodes = map (setCoord ForceAtlas labels bridge)
[ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
, node_type = Terms -- or Unknown
, node_id = cs (show n)
, node_label = l
, node_x_coord = 0
, node_y_coord = 0
, node_attributes =
Attributes { clust_default = maybe 0 identity
(Map.lookup n community_id_by_node_id) } }
)
| (l, n) <- labels
, Set.member n $ Set.fromList
$ List.concat
$ map (\((s,t),d) -> if d > 0 && s /=t then [s,t] else [])
$ Map.toList bridge
]
edges = [ Edge { edge_source = cs (show s)
, edge_target = cs (show t)
, edge_weight = d
, edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
-- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
, edge_id = cs (show i)
}
| (i, ((s,t), d)) <- zip ([0..]::[Integer] )
(Map.toList bridge)
, s /= t, d > 0
]
------------------------------------------------------------------------
data Layout = KamadaKawai | ACP | ForceAtlas
setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
where
(x,y) = f i
-- | ACP
setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
setCoord l labels m (n,node) = node { node_x_coord = x
, node_y_coord = y
}
where
(x,y) = getCoord l labels m n
getCoord :: Ord a
=> Layout
-> [(a, Int)]
-> Map (Int, Int) Double
-> Int
-> (Double, Double)
getCoord KamadaKawai _ _m _n = undefined -- layout m n
getCoord ForceAtlas _ _ n = (sin d, cos d)
where
d = fromIntegral n
getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
$ Map.lookup n
$ pcaReduceTo (Dimension 2)
$ mapArray labels m
where
to2d :: Vec.Vector Double -> (Double, Double)
to2d v = (x',y')
where
ds = take 2 $ Vec.toList v
x' = head' "to2d" ds
y' = last' "to2d" ds
mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
where
ns = map snd items
toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
------------------------------------------------------------------------
-- | KamadaKawai Layout
-- TODO TEST: check labels, nodeId and coordinates
layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
where
coord :: (Map Int (Double,Double))
coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
--p = Layout.defaultLGL
p = Layout.kamadaKawai
g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
-- Debug
{-
-- measure logDistributional
dataDebug = map2mat Square (0::Int) 19 dataBug'
dataBug' :: Map (Int, Int) Int
dataBug' = Map.fromList [((0,0),28),((0,1),8),((0,2),6),((0,3),2),((0,5),4),((0,6),4),((0,7),2),((0,9),7),((0,10),4),((0,13),4),((0,14),2),((0,15),5),((0,16),8),((0,17),3),((1,1),28),((1,2),6),((1,3),7),((1,4),5),((1,5),7),((1,6),5),((1,7),2),((1,9),6),((1,10),7),((1,11),5),((1,13),6),((1,15),6),((1,16),14),((1,18),4),((2,2),39),((2,3),5),((2,4),4),((2,5),3),((2,6),4),((2,7),4),((2,8),3),((2,9),17),((2,10),4),((2,11),8),((2,12),2),((2,13),15),((2,14),4),((2,15),5),((2,16),21),((2,18),4),((3,3),48),((3,4),10),((3,5),7),((3,6),3),((3,7),7),((3,8),6),((3,9),12),((3,10),9),((3,11),8),((3,12),5),((3,13),15),((3,14),5),((3,15),9),((3,16),17),((3,18),4),((4,4),33),((4,5),2),((4,6),5),((4,7),7),((4,8),4),((4,9),6),((4,10),12),((4,11),8),((4,12),3),((4,13),16),((4,14),4),((4,15),4),((4,16),5),((4,17),2),((4,18),12),((5,5),27),((5,6),2),((5,8),3),((5,9),12),((5,10),6),((5,11),9),((5,13),4),((5,14),2),((5,15),7),((5,16),11),((5,18),4),((6,6),34),((6,7),4),((6,8),3),((6,9),12),((6,10),8),((6,11),2),((6,12),5),((6,13),6),((6,14),6),((6,15),5),((6,16),22),((6,17),8),((6,18),4),((7,7),27),((7,8),2),((7,9),6),((7,10),2),((7,11),4),((7,13),13),((7,15),2),((7,16),8),((7,17),6),((7,18),4),((8,8),30),((8,9),9),((8,10),6),((8,11),9),((8,12),6),((8,13),3),((8,14),3),((8,15),4),((8,16),15),((8,17),3),((8,18),5),((9,9),69),((9,10),9),((9,11),22),((9,12),15),((9,13),18),((9,14),10),((9,15),14),((9,16),48),((9,17),6),((9,18),9),((10,10),39),((10,11),15),((10,12),5),((10,13),11),((10,14),2),((10,15),4),((10,16),19),((10,17),3),((10,18),11),((11,11),48),((11,12),9),((11,13),20),((11,14),2),((11,15),13),((11,16),29),((11,18),13),((12,12),30),((12,13),4),((12,15),5),((12,16),16),((12,17),6),((12,18),2),((13,13),65),((13,14),10),((13,15),14),((13,16),23),((13,17),6),((13,18),10),((14,14),25),((14,16),9),((14,17),3),((14,18),3),((15,15),38),((15,16),17),((15,18),4),((16,16),99),((16,17),11),((16,18),14),((17,17),29),((18,18),23)]
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Graph/Tools/ 0000775 0000000 0000000 00000000000 14124644201 0031045 5 ustar 00root root 0000000 0000000 IGraph.hs 0000664 0000000 0000000 00000006612 14124644201 0032501 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Graph/Tools {-|
Module : Gargantext.Core.Viz.Graph.Tools.IGraph
Description : Tools to build Graph
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Reference:
* Gábor Csárdi, Tamás Nepusz: The igraph software package for complex network research. InterJournal Complex Systems, 1695, 2006.
-}
module Gargantext.Core.Viz.Graph.Tools.IGraph
where
import Data.Serialize
import Data.Singletons (SingI)
import IGraph hiding (mkGraph, neighbors, edges, nodes, Node, Graph)
import Protolude
import Gargantext.Core.Viz.Graph.Index
import qualified Data.List as List
import qualified IGraph as IG
import qualified IGraph.Algorithms.Clique as IG
import qualified IGraph.Algorithms.Community as IG
import qualified IGraph.Algorithms.Structure as IG
import qualified IGraph.Random as IG
import qualified Data.Map as Map
------------------------------------------------------------------
-- | Main Types
type Graph_Undirected = IG.Graph 'U () ()
type Graph_Directed = IG.Graph 'D () ()
type Node = IG.Node
type Graph = IG.Graph
------------------------------------------------------------------
-- | Main Graph management Functions
neighbors :: IG.Graph d v e -> IG.Node -> [IG.Node]
neighbors = IG.neighbors
edges :: IG.Graph d v e -> [Edge]
edges = IG.edges
nodes :: IG.Graph d v e -> [IG.Node]
nodes = IG.nodes
------------------------------------------------------------------
-- | Partitions
maximalCliques :: IG.Graph d v e -> [[Int]]
maximalCliques g = IG.maximalCliques g (min',max')
where
min' = 0
max' = 0
------------------------------------------------------------------
type Seed = Int
spinglass :: Seed -> Map (Int, Int) Double -> IO [ClusterNode]
spinglass s g = toClusterNode
<$> map catMaybes
<$> map (map (\n -> Map.lookup n fromI))
<$> partitions_spinglass' s g'''
where
g' = toIndex toI g
g'' = mkGraphUfromEdges (Map.keys g')
g''' = case IG.isConnected g'' of
True -> g''
False -> case head (IG.decompose g'') of
Nothing -> panic "[G.C.V.G.T.Igraph: not connected graph]"
Just g'''' -> g''''
(toI, fromI) = createIndices g
-- | Tools to analyze graphs
partitions_spinglass' :: (Serialize v, Serialize e)
=> Seed -> IG.Graph 'U v e -> IO [[Int]]
partitions_spinglass' s g = do
gen <- IG.withSeed s pure
IG.findCommunity g Nothing Nothing IG.spinglass gen
data ClusterNode = ClusterNode { cl_node_id :: Int
, cl_community_id :: Int
}
toClusterNode :: [[Int]] -> [ClusterNode]
toClusterNode ns = List.concat
$ map (\(cId, ns') -> map (\n -> ClusterNode n cId) ns')
$ List.zip [1..] ns
------------------------------------------------------------------
mkGraph :: (SingI d, Ord v,
Serialize v, Serialize e) =>
[v] -> [LEdge e] -> IG.Graph d v e
mkGraph = IG.mkGraph
------------------------------------------------------------------
mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected
mkGraphUfromEdges es = mkGraph (List.replicate n ()) $ zip es $ repeat ()
where
(a,b) = List.unzip es
n = List.length (List.nub $ a <> b)
{-
mkGraphDfromEdges :: [(Int, Int)] -> Graph_Directed
mkGraphDfromEdges = undefined
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Graph/Utils.hs 0000664 0000000 0000000 00000003331 14124644201 0031401 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Viz.Graph.Utils
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
These functions are used for Vector.Matrix only.
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE Strict #-}
module Gargantext.Core.Viz.Graph.Utils
where
import Data.Matrix hiding (identity)
import Data.Vector (Vector)
import qualified Data.Vector as V
import qualified Data.List as L
import Gargantext.Prelude
------------------------------------------------------------------------
-- | Some utils to build the matrix from cooccurrence results
-- | For tests only, to be removed
-- m1 :: Matrix Double
-- m1 = fromList 300 300 [1..]
------------------------------------------------------------------------
------------------------------------------------------------------------
data Axis = Col | Row
------------------------------------------------------------------------
-- | Matrix functions
type AxisId = Int
-- Data.Vector.Additions
dropAt :: Int -> Vector a -> Vector a
dropAt n v = debut <> (V.tail fin)
where
debut = V.take n v
fin = V.drop n v
total :: Num a => Matrix a -> a
total m = V.sum $ V.map (\c -> V.sum (getCol c m)) (V.enumFromTo 1 (nOf Col m))
nOf :: Axis -> Matrix a -> Int
nOf Row = nrows
nOf Col = ncols
axis :: Axis -> AxisId -> Matrix a -> Vector a
axis Col = getCol
axis Row = getRow
toListsWithIndex :: Matrix a -> [((Int, Int), a)]
toListsWithIndex m = concat' $ zip [1..] $ map (\c -> zip [1..] c) $ toLists m
where
concat' :: [(Int, [(Int, a)])] -> [((Int, Int), a)]
concat' xs = L.concat $ map (\(x, ys) -> map (\(y, a) -> ((x,y), a)) ys ) xs
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/LegacyPhylo.hs 0000664 0000000 0000000 00000042350 14124644201 0031464 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Core.Viz.Phylo
Description : Phylomemy definitions and types.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Specifications of Phylomemy export format.
Phylomemy can be described as a Temporal Graph with different scale of
granularity of group of ngrams (terms and multi-terms).
The main type is Phylo which is synonym of Phylomemy (only difference is
the number of chars).
References:
Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
in science evolution — the rise and fall of scientific fields. PloS
one 8, e54847.
-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.LegacyPhylo where
import Control.DeepSeq
import Control.Lens (makeLenses)
import Data.Aeson.TH (deriveJSON,defaultOptions)
import Data.Map (Map)
import Data.Set (Set)
import Data.Swagger
import Data.Text (Text)
import Data.Vector (Vector)
import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
--------------------
-- | PhyloParam | --
--------------------
-- | Global parameters of a Phylo
data PhyloParam =
PhyloParam { _phyloParam_version :: !Text -- Double ?
, _phyloParam_software :: !Software
, _phyloParam_query :: !PhyloQueryBuild
} deriving (Generic, Show, Eq)
-- | Software parameters
data Software =
Software { _software_name :: !Text
, _software_version :: !Text
} deriving (Generic, Show, Eq)
---------------
-- | Phylo | --
---------------
-- | Phylo datatype of a phylomemy
-- Duration : time Segment of the whole Phylo
-- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
-- Periods : list of all the periods of a Phylo
data Phylo =
Phylo { _phylo_duration :: !(Start, End)
, _phylo_foundations :: !PhyloFoundations
, _phylo_periods :: [PhyloPeriod]
, _phylo_docsByYears :: !(Map Date Double)
, _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
, _phylo_fis :: !(Map (Date,Date) [PhyloFis])
, _phylo_param :: !PhyloParam
}
deriving (Generic, Show, Eq)
-- | The foundations of a phylomemy created from a given TermList
data PhyloFoundations =
PhyloFoundations { _phylo_foundationsRoots :: !(Vector Ngrams)
, _phylo_foundationsTermsList :: !TermList
} deriving (Generic, Show, Eq)
-- | Date : a simple Integer
type Date = Int
-- | UTCTime in seconds since UNIX epoch
-- type Start = POSIXTime
-- type End = POSIXTime
type Start = Date
type End = Date
---------------------
-- | PhyloPeriod | --
---------------------
-- | PhyloStep : steps of phylomemy on temporal axis
-- Period: tuple (start date, end date) of the step of the phylomemy
-- Levels: levels of granularity
data PhyloPeriod =
PhyloPeriod { _phylo_periodId :: !PhyloPeriodId
, _phylo_periodLevels :: ![PhyloLevel]
}
deriving (Generic, Show, Eq)
--------------------
-- | PhyloLevel | --
--------------------
-- | PhyloLevel : levels of phylomemy on level axis
-- Levels description:
-- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
-- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
-- Level 1: First level of clustering
-- Level N: Nth level of clustering
data PhyloLevel =
PhyloLevel { _phylo_levelId :: !PhyloLevelId
, _phylo_levelGroups :: ![PhyloGroup]
}
deriving (Generic, Show, Eq)
--------------------
-- | PhyloGroup | --
--------------------
-- | PhyloGroup : group of ngrams at each level and step
-- Label : maybe has a label as text
-- Ngrams: set of terms that build the group
-- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
-- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
-- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
-- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
data PhyloGroup =
PhyloGroup { _phylo_groupId :: !PhyloGroupId
, _phylo_groupLabel :: !Text
, _phylo_groupNgrams :: ![Int]
, _phylo_groupNgramsMeta :: !(Map Text [Double])
, _phylo_groupMeta :: !(Map Text Double)
, _phylo_groupBranchId :: !(Maybe PhyloBranchId)
, _phylo_groupCooc :: !(Map (Int,Int) Double)
, _phylo_groupPeriodParents :: ![Pointer]
, _phylo_groupPeriodChilds :: ![Pointer]
, _phylo_groupLevelParents :: ![Pointer]
, _phylo_groupLevelChilds :: ![Pointer]
}
deriving (Generic, NFData, Show, Eq, Ord)
-- instance NFData PhyloGroup
-- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
type Level = Int
-- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
type Index = Int
type PhyloPeriodId = (Start, End)
type PhyloLevelId = (PhyloPeriodId, Level)
type PhyloGroupId = (PhyloLevelId, Index)
type PhyloBranchId = (Level, Index)
-- | Weight : A generic mesure that can be associated with an Id
type Weight = Double
-- | Pointer : A weighted linked with a given PhyloGroup
type Pointer = (PhyloGroupId, Weight)
-- | Ngrams : a contiguous sequence of n terms
type Ngrams = Text
--------------------
-- | Aggregates | --
--------------------
-- | Document : a piece of Text linked to a Date
data Document = Document
{ date :: !Date
, text :: ![Ngrams]
} deriving (Show,Generic,NFData)
-- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams
-- | Support : Number of Documents where a Clique occurs
type Support = Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
data PhyloFis = PhyloFis
{ _phyloFis_clique :: !Clique
, _phyloFis_support :: !Support
, _phyloFis_period :: !(Date,Date)
} deriving (Generic,NFData,Show,Eq)
-- | A list of clustered PhyloGroup
type PhyloCluster = [PhyloGroup]
-- | A PhyloGroup in a Graph
type GroupNode = PhyloGroup
-- | A weighted links between two PhyloGroups in a Graph
type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
-- | The association as a Graph between a list of Nodes and a list of Edges
type GroupGraph = ([GroupNode],[GroupEdge])
---------------
-- | Error | --
---------------
data PhyloError = LevelDoesNotExist
| LevelUnassigned
deriving (Show)
-----------------
-- | Cluster | --
-----------------
-- | Cluster constructors
data Cluster = Fis FisParams
| RelatedComponents RCParams
| Louvain LouvainParams
deriving (Generic, Show, Eq, Read)
-- | Parameters for Fis clustering
data FisParams = FisParams
{ _fis_keepMinorFis :: !Bool
, _fis_minSupport :: !Support
, _fis_minSize :: !Int
} deriving (Generic, Show, Eq, Read)
-- | Parameters for RelatedComponents clustering
data RCParams = RCParams
{ _rc_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
-- | Parameters for Louvain clustering
data LouvainParams = LouvainParams
{ _louvain_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
-------------------
-- | Proximity | --
-------------------
-- | Proximity constructors
data Proximity = WeightedLogJaccard WLJParams
| WeightedLogSim WLJParams
| Hamming HammingParams
| Filiation
deriving (Generic, Show, Eq, Read)
-- | Parameters for WeightedLogJaccard and WeightedLogSim proximity
data WLJParams = WLJParams
{ _wlj_threshold :: !Double
, _wlj_sensibility :: !Double
} deriving (Generic, Show, Eq, Read)
-- | Parameters for Hamming proximity
data HammingParams = HammingParams
{ _hamming_threshold :: !Double } deriving (Generic, Show, Eq, Read)
----------------
-- | Filter | --
----------------
-- | Filter constructors
data Filter = LonelyBranch LBParams
| SizeBranch SBParams
deriving (Generic, Show, Eq)
-- | Parameters for LonelyBranch filter
data LBParams = LBParams
{ _lb_periodsInf :: !Int
, _lb_periodsSup :: !Int
, _lb_minNodes :: !Int } deriving (Generic, Show, Eq)
-- | Parameters for SizeBranch filter
data SBParams = SBParams
{ _sb_minSize :: !Int } deriving (Generic, Show, Eq)
----------------
-- | Metric | --
----------------
-- | Metric constructors
data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
----------------
-- | Tagger | --
----------------
-- | Tagger constructors
data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
| GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
--------------
-- | Sort | --
--------------
-- | Sort constructors
data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
data Order = Asc | Desc deriving (Generic, Show, Read)
--------------------
-- | PhyloQuery | --
--------------------
-- | A Phyloquery describes a phylomemic reconstruction
data PhyloQueryBuild = PhyloQueryBuild
{ _q_phyloTitle :: !Text
, _q_phyloDesc :: !Text
-- Grain and Steps for the PhyloPeriods
, _q_periodGrain :: !Int
, _q_periodSteps :: !Int
-- Clustering method for building the contextual unit of Phylo (ie: level 1)
, _q_contextualUnit :: !Cluster
, _q_contextualUnitMetrics :: ![Metric]
, _q_contextualUnitFilters :: ![Filter]
-- Inter-temporal matching method of the Phylo
, _q_interTemporalMatching :: !Proximity
, _q_interTemporalMatchingFrame :: !Int
, _q_interTemporalMatchingFrameTh :: !Double
, _q_reBranchThr :: !Double
, _q_reBranchNth :: !Int
-- Last level of reconstruction
, _q_nthLevel :: !Level
-- Clustering method used from level 1 to nthLevel
, _q_nthCluster :: !Cluster
} deriving (Generic, Show, Eq)
-- | To choose the Phylo edge you want to export : --> <-- <--> <=>
data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
-------------------
-- | PhyloView | --
-------------------
-- | A PhyloView is the output type of a Phylo
data PhyloView = PhyloView
{ _pv_param :: !PhyloParam
, _pv_title :: !Text
, _pv_description :: !Text
, _pv_filiation :: !Filiation
, _pv_level :: !Level
, _pv_periods :: ![PhyloPeriodId]
, _pv_metrics :: !(Map Text [Double])
, _pv_branches :: ![PhyloBranch]
, _pv_nodes :: ![PhyloNode]
, _pv_edges :: ![PhyloEdge]
} deriving (Generic, Show)
-- | A phyloview is made of PhyloBranches, edges and nodes
data PhyloBranch = PhyloBranch
{ _pb_id :: !PhyloBranchId
, _pb_peak :: !Text
, _pb_metrics :: !(Map Text [Double])
} deriving (Generic, Show)
data PhyloEdge = PhyloEdge
{ _pe_source :: !PhyloGroupId
, _pe_target :: !PhyloGroupId
, _pe_type :: !EdgeType
, _pe_weight :: !Weight
} deriving (Generic, Show)
data PhyloNode = PhyloNode
{ _pn_id :: !PhyloGroupId
, _pn_bid :: !(Maybe PhyloBranchId)
, _pn_label :: !Text
, _pn_idx :: ![Int]
, _pn_ngrams :: !(Maybe [Ngrams])
, _pn_metrics :: !(Map Text [Double])
, _pn_cooc :: !(Map (Int,Int) Double)
, _pn_parents :: !(Maybe [PhyloGroupId])
, _pn_childs :: ![PhyloNode]
} deriving (Generic, Show)
------------------------
-- | PhyloQueryView | --
------------------------
data ExportMode = Json | Dot | Svg
deriving (Generic, Show, Read)
data DisplayMode = Flat | Nested
deriving (Generic, Show, Read)
-- | A PhyloQueryView describes a Phylo as an output view
data PhyloQueryView = PhyloQueryView
{ _qv_lvl :: !Level
-- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
, _qv_filiation :: !Filiation
-- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
, _qv_levelChilds :: !Bool
, _qv_levelChildsDepth :: !Level
-- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
-- Firstly the metrics, then the filters and the taggers
, _qv_metrics :: ![Metric]
, _qv_filters :: ![Filter]
, _qv_taggers :: ![Tagger]
-- An asc or desc sort to apply to the PhyloGraph
, _qv_sort :: !(Maybe (Sort,Order))
-- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
, _qv_export :: !ExportMode
, _qv_display :: !DisplayMode
, _qv_verbose :: !Bool
}
----------------
-- | Lenses | --
----------------
makeLenses ''PhyloParam
makeLenses ''Software
--
makeLenses ''Phylo
makeLenses ''PhyloFoundations
makeLenses ''PhyloGroup
makeLenses ''PhyloLevel
makeLenses ''PhyloPeriod
makeLenses ''PhyloFis
--
makeLenses ''Proximity
makeLenses ''Cluster
makeLenses ''Filter
--
makeLenses ''PhyloQueryBuild
makeLenses ''PhyloQueryView
--
makeLenses ''PhyloView
makeLenses ''PhyloBranch
makeLenses ''PhyloNode
makeLenses ''PhyloEdge
------------------------
-- | JSON instances | --
------------------------
$(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
$(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
$(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
$(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
$(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
$(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
--
$(deriveJSON (unPrefix "_software_" ) ''Software )
$(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
--
$(deriveJSON defaultOptions ''Filter )
$(deriveJSON defaultOptions ''Metric )
$(deriveJSON defaultOptions ''Cluster )
$(deriveJSON defaultOptions ''Proximity )
--
$(deriveJSON (unPrefix "_fis_" ) ''FisParams )
$(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
$(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
$(deriveJSON (unPrefix "_rc_" ) ''RCParams )
$(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
--
$(deriveJSON (unPrefix "_lb_" ) ''LBParams )
$(deriveJSON (unPrefix "_sb_" ) ''SBParams )
--
$(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
$(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
$(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
$(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
$(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
$(deriveJSON defaultOptions ''Filiation )
$(deriveJSON defaultOptions ''EdgeType )
---------------------------
-- | Swagger instances | --
---------------------------
instance ToSchema Phylo where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
instance ToSchema PhyloFoundations where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
instance ToSchema PhyloPeriod where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
instance ToSchema PhyloLevel where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
instance ToSchema PhyloGroup where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
instance ToSchema PhyloFis where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
instance ToSchema Software where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
instance ToSchema PhyloParam where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
instance ToSchema Filter
instance ToSchema Metric
instance ToSchema Cluster
instance ToSchema Proximity where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
instance ToSchema FisParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
instance ToSchema HammingParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
instance ToSchema LouvainParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
instance ToSchema RCParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
instance ToSchema WLJParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
instance ToSchema LBParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
instance ToSchema SBParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
instance ToSchema PhyloQueryBuild where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
instance ToSchema PhyloView where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
instance ToSchema PhyloBranch where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
instance ToSchema PhyloEdge where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
instance ToSchema PhyloNode where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
instance ToSchema Filiation
instance ToSchema EdgeType
----------------------------
-- | TODO XML instances | --
----------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Phylo/ 0000775 0000000 0000000 00000000000 14124644201 0027777 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Phylo/Legacy/ 0000775 0000000 0000000 00000000000 14124644201 0031203 5 ustar 00root root 0000000 0000000 LegacyAPI.hs 0000664 0000000 0000000 00000013251 14124644201 0033220 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Phylo/Legacy {-|
Module : Gargantext.Core.Viz.Phylo.API
Description : Phylo API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE OverloadedLists #-} -- allows to write Map and HashMap as lists
{-# LANGUAGE TypeOperators #-}
module Gargantext.Core.Viz.Phylo.Legacy.LegacyAPI
where
-- import Data.Maybe (fromMaybe)
-- import Control.Lens ((^.))
--import Control.Monad.Reader (ask)
import qualified Data.ByteString as DB
import qualified Data.ByteString.Lazy as DBL
import Data.Swagger
import Network.HTTP.Media ((//), (/:))
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Web.HttpApiData (readTextData)
import Gargantext.API.Prelude
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
import Gargantext.Database.Query.Table.Node (insertNodes, node)
-- import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.Core.Viz.LegacyPhylo
import Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
-- import Gargantext.Core.Viz.Phylo.Example
import Gargantext.Core.Types (TODO(..))
------------------------------------------------------------------------
type PhyloAPI = Summary "Phylo API"
:> GetPhylo
-- :<|> PutPhylo
:<|> PostPhylo
phyloAPI :: PhyloId -> UserId -> GargServer PhyloAPI
phyloAPI n u = getPhylo n
:<|> postPhylo n u
-- :<|> putPhylo n
-- :<|> deletePhylo n
newtype SVG = SVG DB.ByteString
instance ToSchema SVG
where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
instance Show SVG where
show (SVG a) = show a
instance Accept SVG where
contentType _ = "SVG" // "image/svg+xml" /: ("charset", "utf-8")
instance Show a => MimeRender PlainText a where
mimeRender _ val = cs ("" <> show val)
instance MimeRender SVG SVG where
mimeRender _ (SVG s) = DBL.fromStrict s
------------------------------------------------------------------------
type GetPhylo = QueryParam "listId" ListId
:> QueryParam "level" Level
:> QueryParam "minSizeBranch" MinSizeBranch
{- :> QueryParam "filiation" Filiation
:> QueryParam "childs" Bool
:> QueryParam "depth" Level
:> QueryParam "metrics" [Metric]
:> QueryParam "periodsInf" Int
:> QueryParam "periodsSup" Int
:> QueryParam "minNodes" Int
:> QueryParam "taggers" [Tagger]
:> QueryParam "sort" Sort
:> QueryParam "order" Order
:> QueryParam "export" ExportMode
:> QueryParam "display" DisplayMode
:> QueryParam "verbose" Bool
-}
:> Get '[SVG] SVG
-- | TODO
-- Add real text processing
-- Fix Filter parameters
getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo _ _lId _ _ = undefined
-- getPhylo phId _lId l msb = do
-- phNode <- getNodeWith phId (Proxy :: Proxy HyperdataPhylo)
-- let
-- level = fromMaybe 2 l
-- branc = fromMaybe 2 msb
-- maybePhylo = phNode ^. (node_hyperdata . hp_data)
-- p <- liftBase $ viewPhylo2Svg
-- $ viewPhylo level branc
-- $ fromMaybe phyloFromQuery maybePhylo
-- pure (SVG p)
------------------------------------------------------------------------
type PostPhylo = QueryParam "listId" ListId
-- :> ReqBody '[JSON] PhyloQueryBuild
:> (Post '[JSON] NodeId)
postPhylo :: CorpusId -> UserId -> GargServer PostPhylo
postPhylo corpusId userId _lId = do
-- TODO get Reader settings
-- s <- ask
-- let
-- _vrs = Just ("1" :: Text)
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
phy <- flowPhylo corpusId -- params
phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
pure $ NodeId (fromIntegral phyloId)
------------------------------------------------------------------------
-- | DELETE Phylo == delete a node
------------------------------------------------------------------------
------------------------------------------------------------------------
{-
type PutPhylo = (Put '[JSON] Phylo )
--putPhylo :: PhyloId -> Maybe ListId -> PhyloQueryBuild -> Phylo
putPhylo :: PhyloId -> GargServer PutPhylo
putPhylo = undefined
-}
-- | Instances
-- instance Arbitrary Phylo where arbitrary = elements [phylo]
instance Arbitrary PhyloGroup where arbitrary = elements []
-- instance Arbitrary PhyloView where arbitrary = elements [phyloView]
instance FromHttpApiData DisplayMode where parseUrlPiece = readTextData
instance FromHttpApiData ExportMode where parseUrlPiece = readTextData
instance FromHttpApiData Filiation where parseUrlPiece = readTextData
instance FromHttpApiData Metric where parseUrlPiece = readTextData
instance FromHttpApiData Order where parseUrlPiece = readTextData
instance FromHttpApiData Sort where parseUrlPiece = readTextData
instance FromHttpApiData Tagger where parseUrlPiece = readTextData
instance FromHttpApiData [Metric] where parseUrlPiece = readTextData
instance FromHttpApiData [Tagger] where parseUrlPiece = readTextData
instance ToParamSchema DisplayMode
instance ToParamSchema ExportMode
instance ToParamSchema Filiation
instance ToParamSchema Tagger
instance ToParamSchema Metric
instance ToParamSchema Order
instance ToParamSchema Sort
instance ToSchema Order
LegacyMain.hs 0000664 0000000 0000000 00000007730 14124644201 0033500 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Phylo/Legacy {-|
Module : Gargantext.Core.Viz.Phylo.Main
Description : Phylomemy Main
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
where
-- import Data.GraphViz
-- import qualified Data.ByteString as DB
import qualified Data.List as List
import Data.Maybe
import Data.Text (Text)
import Debug.Trace (trace)
import GHC.IO (FilePath)
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types
import Gargantext.Database.Admin.Types.Node
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Database.Query.Table.Node(defaultList)
import Gargantext.Prelude
import Gargantext.Database.Action.Flow.Types
import Gargantext.Core.Viz.LegacyPhylo hiding (Svg, Dot)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Query.Table.NodeNode (selectDocs)
import Gargantext.Core.Types
import Gargantext.Core (HasDBid)
-- import Gargantext.Core.Viz.Phylo.LevelMaker (toPhylo)
-- import Gargantext.Core.Viz.Phylo.Tools
-- import Gargantext.Core.Viz.Phylo.View.Export
-- import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Set as Set
import qualified Data.Text as Text
type MinSizeBranch = Int
flowPhylo :: (FlowCmdM env err m, HasDBid NodeType)
=> CorpusId
-> m Phylo
flowPhylo cId = do
list <- defaultList cId
termList <- HashMap.toList <$> getTermsWith (Text.words . unNgramsTerm) [list] NgramsTerms (Set.singleton MapTerm)
docs' <- catMaybes
<$> map (\h -> (,) <$> _hd_publication_year h
<*> _hd_abstract h
)
<$> selectDocs cId
let
patterns = buildPatterns termList
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
filterTerms patterns' (y,d) = (y,termsInText patterns' d)
docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs'
--liftBase $ flowPhylo' (List.sortOn date docs) termList l m fp
pure $ buildPhylo (List.sortOn date docs) termList
-- TODO SortedList Document
flowPhylo' :: [Document] -> TermList -- ^Build
-> Level -> MinSizeBranch -- ^View
-> FilePath
-> IO FilePath
flowPhylo' corpus terms l m fp = do
let
phylo = buildPhylo corpus terms
phVie = viewPhylo l m phylo
writePhylo fp phVie
defaultQuery :: PhyloQueryBuild
defaultQuery = undefined
-- defaultQuery = defaultQueryBuild'
-- "Default Title"
-- "Default Description"
buildPhylo :: [Document] -> TermList -> Phylo
buildPhylo = trace (show defaultQuery) $ buildPhylo' defaultQuery
buildPhylo' :: PhyloQueryBuild -> [Document] -> TermList -> Phylo
buildPhylo' _ _ _ = undefined
-- buildPhylo' q corpus termList = toPhylo q corpus termList Map.empty
-- refactor 2021
-- queryView :: Level -> MinSizeBranch -> PhyloQueryView
-- queryView level _minSizeBranch = PhyloQueryView level Merge False 2
-- [BranchAge]
-- []
-- -- [SizeBranch $ SBParams minSizeBranch]
-- [BranchPeakFreq,GroupLabelCooc]
-- (Just (ByBranchAge,Asc))
-- Json Flat True
queryView :: Level -> MinSizeBranch -> PhyloQueryView
queryView _level _minSizeBranch = undefined
viewPhylo :: Level -> MinSizeBranch -> Phylo -> PhyloView
viewPhylo _l _b _phylo = undefined
-- viewPhylo l b phylo = toPhyloView (queryView l b) phylo
writePhylo :: FilePath -> PhyloView -> IO FilePath
writePhylo _fp _phview = undefined
-- writePhylo fp phview = runGraphviz (viewToDot phview) Svg fp
-- refactor 2021
-- viewPhylo2Svg :: PhyloView -> IO DB.ByteString
-- viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
PhyloExample.hs 0000664 0000000 0000000 00000021062 14124644201 0032664 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Phylo {-|
Module : Gargantext.Core.Viz.Phylo.PhyloExample
Description : Phylomemy example based on history of Cleopatre.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-- | Cesar et Cleôpatre
-- | Exemple de phylomemie
-- | French without accents
-}
module Gargantext.Core.Viz.Phylo.PhyloExample where
import Data.List (sortOn, nub, sort)
import Data.Map (Map)
import Data.Text (Text, toLower)
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.Mono (monoTexts)
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.PhyloMaker
import Gargantext.Core.Viz.Phylo.PhyloExport
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching)
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Control.Lens
import Data.GraphViz.Types.Generalised (DotGraph)
import qualified Data.Vector as Vector
---------------------------------
-- | STEP 5 | -- Export the phylo
---------------------------------
phyloExport :: IO ()
phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = toPhyloExport phylo2
--------------------------------------------------
-- | STEP 4 | -- Process the synchronic clustering
--------------------------------------------------
phylo2 :: Phylo
phylo2 = synchronicClustering $ toHorizon phylo1
-----------------------------------------------
-- | STEP 3 | -- Build the Level 1 of the Phylo
-----------------------------------------------
phylo1 :: Phylo
phylo1 = case (getSeaElevation phyloBase) of
Constante s g -> constanteTemporalMatching s g
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative s -> adaptativeTemporalMatching s
$ toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique phyloBase
---------------------------------------------
-- | STEP 2 | -- Build the cliques
---------------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique]
phyloClique = toPhyloClique phyloBase docsByPeriods
docsByPeriods :: Map (Date,Date) [Document]
docsByPeriods = groupDocsByPeriod date periods docs
--------------------------------------------
-- | STEP 1 | -- Init the Base of the Phylo
--------------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase docs mapList config
phyloCooc :: Map Date Cooc
phyloCooc = docsToTimeScaleCooc docs (foundations ^. foundations_roots)
periods :: [(Date,Date)]
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit config) (getTimeStep $ timeUnit config)
nbDocsByYear :: Map Date Double
nbDocsByYear = docsToTimeScaleNb docs
config :: Config
config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloLevel = 2
, exportFilter = [ByBranchSize 0]
, clique = MaxClique 0 15 ByNeighbours }
docs :: [Document]
docs = map (\(d,t)
-> Document d
""
(filter (\n -> isRoots n (foundations ^. foundations_roots)) $ monoTexts t)
Nothing
[]
) corpus
foundations :: PhyloFoundations
foundations = PhyloFoundations (Vector.fromList $ map toLower actants) mapList
--------------------------------------------
-- | STEP 0 | -- Let's start with an example
--------------------------------------------
mapList :: TermList
mapList = map (\a -> ([toLower a],[])) actants
actants :: [Ngrams]
actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV", "Ptolemee-X", "Berenice-III"
, "Marc-Antoine", "Cesar" , "Antoine" , "Octave" , "Rome"
, "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus", "Caesar-III", "Aurelia-Cotta", "Pisae", "Pline"]
corpus :: [(Date, Text)]
corpus = sortOn fst [
(-101,"La tutelle Cesar Caesar-III de sa mère lui étant difficile à endurer, en septembre 101 av. J.-C., Ptolemee-X la fait assassiner et peut enfin régner presque seul puisqu'il partage le pouvoir avec son épouse Berenice-III Cléopâtre Philopator."),
(-99,"Caesar-III est questeur en 99 av. J.-C. ou 98 av. J.-C., et préteur en 92 av. J.-C.."),
(-100,"Caius Julius Caesar-IV — dit Jules Cesar Ptolemee-X — naît vers 100 av. J.-C., il est le fils de Caius Julius Caesar-III et de Aurelia-Cotta"),
(-85,"Caesar-III meurt à Pisae de cause naturelle en 85 av. J.-C. : selon Pline l'Ancien, il décède brusquement en mettant ses chaussures"),
(-53,"Aurelia-Cotta décède peu de temps avant le meurtre de Clodius Pulcher, vers 53 av. J.-C."),
(-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."),
(-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."),
(-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"),
(-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."),
(-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."),
(-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."),
(-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."),
(-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."),
(-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"),
(-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."),
(-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"),
(-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"),
(-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."),
(-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."),
(-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."),
(-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."),
(-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")] PhyloExport.hs 0000664 0000000 0000000 00000102336 14124644201 0032556 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Phylo {-|
Module : Gargantext.Core.Viz.Phylo.PhyloExport
Description : Exportation module of a Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeSynonymInstances #-}
module Gargantext.Core.Viz.Phylo.PhyloExport where
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
import Data.Vector (Vector)
import Prelude (writeFile)
import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.GraphViz hiding (DotGraph, Order)
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
import Data.GraphViz.Types.Monadic
import Data.Text.Lazy (fromStrict, pack, unpack)
import System.FilePath
import Debug.Trace (trace)
import qualified Data.Text as Text
import qualified Data.Vector as Vector
import qualified Data.Text.Lazy as Lazy
import qualified Data.GraphViz.Attributes.HTML as H
--------------------
-- | Dot export | --
--------------------
dotToFile :: FilePath -> DotGraph DotId -> IO ()
dotToFile filePath dotG = writeFile filePath $ dotToString dotG
dotToString :: DotGraph DotId -> [Char]
dotToString dotG = unpack (printDotGraph dotG)
dynamicToColor :: Double -> H.Attribute
dynamicToColor d
| d == 0 = H.BGColor (toColor LightCoral)
| d == 1 = H.BGColor (toColor Khaki)
| d == 2 = H.BGColor (toColor SkyBlue)
| otherwise = H.Color (toColor Black)
pickLabelColor :: [Double] -> H.Attribute
pickLabelColor lst
| elem 0 lst = dynamicToColor 0
| elem 2 lst = dynamicToColor 2
| elem 1 lst = dynamicToColor 1
| otherwise = dynamicToColor 3
toDotLabel :: Text.Text -> Label
toDotLabel lbl = StrLabel $ fromStrict lbl
toAttr :: AttributeName -> Lazy.Text -> CustomAttribute
toAttr k v = customAttribute k v
metaToAttr :: Map Text.Text [Double] -> [CustomAttribute]
metaToAttr meta = map (\(k,v) -> toAttr (fromStrict k) $ (pack . unwords) $ map show v) $ toList meta
groupIdToDotId :: PhyloGroupId -> DotId
groupIdToDotId (((d,d'),lvl),idx) = (fromStrict . Text.pack) $ ("group" <> (show d) <> (show d') <> (show lvl) <> (show idx))
branchIdToDotId :: PhyloBranchId -> DotId
branchIdToDotId bId = (fromStrict . Text.pack) $ ("branch" <> show (snd bId))
periodIdToDotId :: PhyloPeriodId -> DotId
periodIdToDotId prd = (fromStrict . Text.pack) $ ("period" <> show (fst prd) <> show (snd prd))
groupToTable :: Vector Ngrams -> PhyloGroup -> H.Label
groupToTable fdt g = H.Table H.HTable
{ H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
, H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
, H.tableRows = [header]
<> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
<> ( map ngramsToRow $ splitEvery 4
$ reverse $ sortOn (snd . snd)
$ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
$ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
where
--------------------------------------
ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
--------------------------------------
header :: H.Row
header =
H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
$ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
<> (fromStrict " ( ")
<> (pack $ show (fst $ g ^. phylo_groupPeriod))
<> (fromStrict " , ")
<> (pack $ show (snd $ g ^. phylo_groupPeriod))
<> (fromStrict " ) ")
<> (pack $ show (getGroupId g)))]]
--------------------------------------
branchToDotNode :: PhyloBranch -> Int -> Dot DotId
branchToDotNode b bId =
node (branchIdToDotId $ b ^. branch_id)
([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
<> (metaToAttr $ b ^. branch_meta)
<> [ toAttr "nodeType" "branch"
, toAttr "bId" (pack $ show bId)
, toAttr "branchId" (pack $ unwords (map show $ snd $ b ^. branch_id))
, toAttr "branch_x" (fromStrict $ Text.pack $ (show $ b ^. branch_x))
, toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
, toAttr "label" (pack $ show $ b ^. branch_label)
])
periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
periodToDotNode prd prd' =
node (periodIdToDotId prd)
([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
<> [ toAttr "nodeType" "period"
, toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
, toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
, toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
, toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
groupToDotNode fdt g bId =
node (groupIdToDotId $ getGroupId g)
([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
<> [ toAttr "nodeType" "group"
, toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
, toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
, toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
, toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
, toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
, toAttr "bId" (pack $ show bId)
, toAttr "support" (pack $ show (g ^. phylo_groupSupport))
, toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
, toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
, toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
, toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
, toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
, toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
])
toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
toDotEdge source target lbl edgeType = edge source target
(case edgeType of
GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl)]
BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl)]
PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
mergePointers groups =
let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
in unionWith (\w w' -> max w w') toChilds toParents
mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
mergeAncestors groups = concat
$ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
$ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
toBid :: PhyloGroup -> [PhyloBranch] -> Int
toBid g bs =
let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
in fromJust $ elemIndex b' bs
exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
exportToDot phylo export =
trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
<> show(length $ export ^. export_groups) <> " groups "
<> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
<> "##########################") $
digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
{- 1) init the dot graph -}
graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
<> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
, Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]]
{-- home made attributes -}
<> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
])
{-
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
-- 2) create a layer for the branches labels -}
subgraph (Str "Branches peaks") $ do
-- graphAttrs [Rank SameRank]
{-
-- 3) group the branches by hierarchy
-- mapM (\branches ->
-- subgraph (Str "Branches clade") $ do
-- graphAttrs [Rank SameRank]
-- -- 4) create a node for each branch
-- mapM branchToDotNode branches
-- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
-}
mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
{-- 5) create a layer for each period -}
_ <- mapM (\period ->
subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
graphAttrs [Rank SameRank]
periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod')
{-- 6) create a node for each group -}
mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
) $ phylo ^. phylo_periods
{-- 7) create the edges between a branch and its first groups -}
_ <- mapM (\(bId,groups) ->
mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
)
$ toList
$ map (\groups -> head' "toDot"
$ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
$ sortOn (fst . _phylo_groupPeriod) groups)
$ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
{- 8) create the edges between the groups -}
_ <- mapM (\((k,k'),v) ->
toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
) $ (toList . mergePointers) $ export ^. export_groups
_ <- mapM (\((k,k'),v) ->
toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
) $ mergeAncestors $ export ^. export_groups
-- 10) create the edges between the periods
_ <- mapM (\(prd,prd') ->
toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
{- 8) create the edges between the branches
-- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId'
-- (getThresholdInit $ phyloProximity $ getConfig phylo)
-- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
-}
graphAttrs [Rank SameRank]
----------------
-- | Filter | --
----------------
filterByBranchSize :: Double -> PhyloExport -> PhyloExport
filterByBranchSize thr export =
let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
in export & export_branches .~ (fst splited)
& export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
processFilters filters qua export =
foldl (\export' f -> case f of
ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
else filterByBranchSize thr export'
) export filters
--------------
-- | Sort | --
--------------
branchToIso :: [PhyloBranch] -> [PhyloBranch]
branchToIso branches =
let steps = map sum
$ inits
$ map (\(b,x) -> b ^. branch_y + 0.05 - x)
$ zip branches
$ ([0] ++ (map (\(b,b') ->
let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
in
if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
then (b' ^. branch_seaLevel) !! (lmin - 1)
else (b' ^. branch_seaLevel) !! (idx - 1)
) $ listToSeq branches))
in map (\(x,b) -> b & branch_x .~ x)
$ zip steps branches
branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
branchToIso' start step branches =
let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
$ inits
$ ([0] ++ (map (\(b,b') ->
let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
in 1 - start - step * root) $ listToSeq branches))
in map (\(x,b) -> b & branch_x .~ x)
$ zip bx branches
sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
sortByHierarchy depth branches =
if (length branches == 1)
then branches
else concat
$ map (\branches' ->
let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
++ (sortByHierarchy (depth + 1) (snd partitions)))
$ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
$ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
sortByBirthDate :: Order -> PhyloExport -> PhyloExport
sortByBirthDate order export =
let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
branches' = case order of
Asc -> branches
Desc -> reverse branches
in export & export_branches .~ branches'
processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
processSort sort' elev export = case sort' of
ByBirthDate o -> sortByBirthDate o export
ByHierarchy -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
$ sortByHierarchy 0 (export ^. export_branches))
-----------------
-- | Metrics | --
-----------------
-- | Return the conditional probability of i knowing j
conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
conditional m i j = (findWithDefault 0 (i,j) m)
/ (m ! (j,j))
-- | Return the genericity score of a given ngram
genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
- (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
-- | Return the specificity score of a given ngram
specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
- (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Return the inclusion score of a given ngram
inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
ngramsMetrics phylo export =
over ( export_groups
. traverse )
(\g -> g & phylo_groupMeta %~ insert "genericity"
(map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "specificity"
(map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "inclusion"
(map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "frequence"
(map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
) export
branchDating :: PhyloExport -> PhyloExport
branchDating export =
over ( export_branches
. traverse )
(\b ->
let groups = sortOn fst
$ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
then acc ++ [g ^. phylo_groupPeriod]
else acc ) [] $ export ^. export_groups
periods = nub groups
birth = fst $ head' "birth" groups
age = (snd $ last' "age" groups) - birth
in b & branch_meta %~ insert "birth" [fromIntegral birth]
& branch_meta %~ insert "age" [fromIntegral age]
& branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
processMetrics :: Phylo -> PhyloExport -> PhyloExport
processMetrics phylo export = ngramsMetrics phylo
$ branchDating export
-----------------
-- | Taggers | --
-----------------
nk :: Int -> [[Int]] -> Int
nk n groups = sum
$ map (\g -> if (elem n g)
then 1
else 0) groups
tf :: Int -> [[Int]] -> Double
tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
idf :: Int -> [[Int]] -> Double
idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
findTfIdf :: [[Int]] -> [(Int,Double)]
findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
findEmergences groups freq =
let ngrams = map _phylo_groupNgrams groups
dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
$ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
in reverse $ sortOn snd
$ map (\(n,_) -> if (member n freq)
then (n,freq ! n)
else (n,0)) emerging
mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
mostEmergentTfIdf nth freq foundations export =
over ( export_branches
. traverse )
(\b ->
let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
tfidf = findTfIdf (map _phylo_groupNgrams groups)
emergences = findEmergences groups freq
selected = if (null emergences)
then map fst $ take nth tfidf
else [fst $ head' "mostEmergentTfIdf" emergences]
++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
in b & branch_label .~ (ngramsToLabel foundations selected)) export
getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
$ take nth
$ reverse
$ sortOn snd $ zip [0..] meta
mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
mostInclusive nth foundations export =
over ( export_branches
. traverse )
(\b ->
let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
lbl = ngramsToLabel foundations $ getNthMostMeta nth inc ngrams
in b & branch_label .~ lbl ) export
mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
mostEmergentInclusive nth foundations export =
over ( export_groups
. traverse )
(\g ->
let lbl = ngramsToLabel foundations
$ take nth
$ map (\(_,(_,idx)) -> idx)
$ concat
$ map (\groups -> sortOn (fst . snd) groups)
$ groupBy ((==) `on` fst) $ reverse $ sortOn fst
$ zip ((g ^. phylo_groupMeta) ! "inclusion")
$ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
in g & phylo_groupLabel .~ lbl ) export
processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
processLabels labels foundations freq export =
foldl (\export' label ->
case label of
GroupLabel tagger nth ->
case tagger of
MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
_ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
BranchLabel tagger nth ->
case tagger of
MostInclusive -> mostInclusive nth foundations export'
MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
_ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
------------------
-- | Dynamics | --
------------------
toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
toDynamics n parents g m =
let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
{- decrease -}
then 2
else if ((fst prd) == (fst $ m ! n))
{- emerging -}
then 0
else if isNew
{- emergence -}
then 1
else 3
where
--------------------------------------
isNew :: Bool
isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
processDynamics :: [PhyloGroup] -> [PhyloGroup]
processDynamics groups =
map (\g ->
let parents = filter (\g' -> (g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
&& ((fst $ g ^. phylo_groupPeriod) > (fst $ g' ^. phylo_groupPeriod))) groups
in g & phylo_groupMeta %~ insert "dynamics" (map (\n -> toDynamics n parents g mapNgrams) $ g ^. phylo_groupNgrams) ) groups
where
--------------------------------------
mapNgrams :: Map Int (Date,Date)
mapNgrams = map (\dates ->
let dates' = sort dates
in (head' "dynamics" dates', last' "dynamics" dates'))
$ fromListWith (++)
$ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
$ (g ^. phylo_groupNgrams))) [] groups
-----------------
-- | horizon | --
-----------------
getGroupThr :: Double -> PhyloGroup -> Double
getGroupThr step g =
let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
breaks = (g ^. phylo_groupMeta) ! "breaks"
in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
toAncestor nbDocs diago proximity step candidates ego =
let curr = ego ^. phylo_groupAncestors
in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
$ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
$ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
$ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
headsToAncestors nbDocs diago proximity step heads acc =
if (null heads)
then acc
else
let ego = head' "headsToAncestors" heads
heads' = tail' "headsToAncestors" heads
in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
toHorizon :: Phylo -> Phylo
toHorizon phylo =
let phyloAncestor = updatePhyloGroups
level
(fromList $ map (\g -> (getGroupId g, g))
$ concat
$ tracePhyloAncestors newGroups) phylo
reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
in updatePhyloGroups level reBranched phylo
where
-- | 1) for each periods
periods :: [PhyloPeriodId]
periods = getPeriodIds phylo
-- --
level :: Level
level = getLastLevel phylo
-- --
frame :: Int
frame = getTimeFrame $ timeUnit $ getConfig phylo
-- | 2) find ancestors between groups without parents
mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd ->
let groups = getGroupsFromLevelPeriods level [prd] phylo
childs = getPreviousChildIds level frame prd periods phylo
-- maybe add a better filter for non isolated ancestors
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
$ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
proximity = (phyloProximity $ getConfig phylo)
step = case getSeaElevation phylo of
Constante _ s -> s
Adaptative _ -> undefined
-- in headsToAncestors nbDocs diago proximity heads groups []
in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
$ headsToAncestors nbDocs diago proximity step heads []
) periods
-- | 3) process this task concurrently
newGroups :: [[PhyloGroup]]
newGroups = mapGroups `using` parList rdeepseq
--------------------------------------
getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
getPreviousChildIds lvl frame curr prds phylo =
concat $ map ((map fst) . _phylo_groupPeriodChilds)
$ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
---------------------
-- | phyloExport | --
---------------------
toPhyloExport :: Phylo -> DotGraph DotId
toPhyloExport phylo = exportToDot phylo
$ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
$ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
$ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
$ processMetrics phylo export
where
export :: PhyloExport
export = PhyloExport groups branches
--------------------------------------
branches :: [PhyloBranch]
branches = map (\g ->
let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
breaks = (g ^. phylo_groupMeta) ! "breaks"
canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
in PhyloBranch (g ^. phylo_groupBranchId)
canonId
seaLvl
0
(last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
0
0
"" empty)
$ map (\gs -> head' "export" gs)
$ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
$ sortOn (\g -> g ^. phylo_groupBranchId) groups
--------------------------------------
groups :: [PhyloGroup]
groups = traceExportGroups
$ processDynamics
$ getGroupsFromLevel (phyloLevel $ getConfig phylo)
$ tracePhyloInfo phylo
-- \$ toHorizon phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
traceExportBranches branches = trace ("\n"
<> "-- | Export " <> show(length branches) <> " branches") branches
tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
tracePhyloInfo :: Phylo -> Phylo
tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = "
<> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
<> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
) phylo
traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
traceExportGroups groups = trace ("\n" <> "-- | Export "
<> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
<> show(length groups) <> " groups and "
<> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
) groups
PhyloMaker.hs 0000664 0000000 0000000 00000043104 14124644201 0032331 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Phylo {-|
Module : Gargantext.Core.Viz.Phylo.PhyloMaker
Description : Maker engine for rebuilding a Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Vector (Vector)
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Control.DeepSeq (NFData)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Debug.Trace (trace)
import Control.Lens hiding (Level)
import qualified Data.Vector as Vector
import qualified Data.Set as Set
------------------
-- | To Phylo | --
------------------
{-
-- TODO AD
data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
| PhyloN { _phylo'_phylo1 :: Phylo}
toPhylo' :: Phylo' -> [Document] -> TermList -> Config -> Phylo
toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo
-}
toPhylo :: Phylo -> Phylo
toPhylo phyloStep = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1))
$ traceToPhylo (phyloLevel $ getConfig phyloStep) $
if (phyloLevel $ getConfig phyloStep) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloLevel $ getConfig phyloStep)]
else phylo1
where
--------------------------------------
phyloAncestors :: Phylo
phyloAncestors =
if (findAncestors $ getConfig phyloStep)
then toHorizon phylo1
else phylo1
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 phyloStep
-- > AD to db here
--------------------------------------
--------------------
-- | To Phylo 1 | --
--------------------
toGroupsProxi :: Level -> Phylo -> Phylo
toGroupsProxi lvl phylo =
let proximity = phyloProximity $ getConfig phylo
groupsProxi = foldlWithKey (\acc pId pds ->
-- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
$ elems
$ view ( phylo_periodLevels
. traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) pds
next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods)
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromLevelPeriods lvl next phylo
docs = filterDocs (phylo ^. phylo_timeDocs) ([pId] ++ next)
diagos = filterDiago (phylo ^. phylo_timeCooc) ([pId] ++ next)
-- 2) compute the pairs in parallel
pairs = map (\(id,ngrams) ->
map (\(id',ngrams') ->
let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id'])
diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id'])
in ((id,id'),toProximity nbDocs diago proximity ngrams ngrams' ngrams')
) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets
) egos
pairs' = pairs `using` parList rdeepseq
in acc ++ (concat pairs')
) [] $ phylo ^. phylo_periods
in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi)
appendGroups :: (a -> PhyloPeriodId -> (Text,Text) -> Level -> Int -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse)
(\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel)
then
let pId = phyloLvl ^. phylo_levelPeriod
pId' = phyloLvl ^. phylo_levelPeriod'
phyloCUnit = m ! pId
in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups)
, f obj pId pId' lvl (length groups)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloCUnit)
else
phyloLvl )
phylo
cliqueToGroup :: PhyloClique -> PhyloPeriodId -> (Text,Text) -> Level -> Int -> [Cooc] -> PhyloGroup
cliqueToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
(fis ^. phyloClique_support)
(fis ^. phyloClique_weight)
(fis ^. phyloClique_sources)
(fis ^. phyloClique_nodes)
(ngramsToCooc (fis ^. phyloClique_nodes) coocs)
(1,[0]) -- branchid (lvl,[path in the branching tree])
(fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] []
toPhylo1 :: Phylo -> Phylo
toPhylo1 phyloStep = case (getSeaElevation phyloStep) of
Constante start gap -> constanteTemporalMatching start gap phyloStep
Adaptative steps -> adaptativeTemporalMatching steps phyloStep
-----------------------
-- | To Phylo Step | --
-----------------------
indexDates' :: Map (Date,Date) [Document] -> Map (Date,Date) (Text,Text)
indexDates' m = map (\docs ->
let ds = map (\d -> date' d) docs
f = if (null ds)
then ""
else toFstDate ds
l = if (null ds)
then ""
else toLstDate ds
in (f,l)) m
-- To build the first phylo step from docs and terms
toPhyloStep :: [Document] -> TermList -> Config -> Phylo
toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique (updatePeriods (indexDates' docs') phyloBase)
Adaptative _ -> toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique (updatePeriods (indexDates' docs') phyloBase)
where
--------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique]
phyloClique = toPhyloClique phyloBase docs'
--------------------------------------
docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase docs lst conf
--------------------------------------
---------------------------
-- | Frequent Item Set | --
---------------------------
-- To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterClique :: Bool -> Int -> (Int -> [PhyloClique] -> [PhyloClique]) -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterClique keep thr f m = case keep of
False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m
-- To filter Fis with small Support
filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= thr) l
-- To filter Fis with small Clique size
filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. phyloClique_nodes) >= thr) l
-- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterCliqueByNested m =
let clq = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (f' ^. phyloClique_nodes) (f ^. phyloClique_nodes)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (f ^. phyloClique_nodes) (f' ^. phyloClique_nodes)) mem
in fMax ++ [f] ) [] l)
$ elems m
clq' = clq `using` parList rdeepseq
in fromList $ zip (keys m) clq'
-- | To transform a time map of docs into a time map of Fis with some filters
toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique]
toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested
{- \$ traceFis "Filtered by clique size" -}
$ filterClique True s' (filterCliqueBySize)
{- \$ traceFis "Filtered by support" -}
$ filterClique True s (filterCliqueBySupport)
{- \$ traceFis "Unfiltered Fis" -}
phyloClique
MaxClique s _ _ -> filterClique True s (filterCliqueBySize)
phyloClique
where
--------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique]
phyloClique = case (clique $ getConfig phylo) of
Fis _ _ ->
let fis = map (\(prd,docs) ->
case (corpusParser $ getConfig phylo) of
Csv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
in (prd, map (\f -> PhyloClique (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst)
_ -> let lst = toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
in (prd, map (\f -> PhyloClique (Set.toList $ fst f) (snd f) prd Nothing []) lst)
)
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
MaxClique _ thr filterType ->
let mcl = map (\(prd,docs) ->
let cooc = map round
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs
mcl' = mcl `using` parList rdeepseq
in fromList mcl'
--------------------------------------
-- dev viz graph maxClique getMaxClique
--------------------
-- | Coocurency | --
--------------------
-- To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
docsToTimeScaleCooc docs fdt =
let mCooc = fromListWith sumCooc
$ map (\(_d,l) -> (_d, listToMatrix l))
$ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs
mCooc' = fromList
$ map (\t -> (t,empty))
$ toTimeScale (map date docs) 1
in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n")
$ unionWith sumCooc mCooc mCooc'
-----------------------
-- | to Phylo Base | --
-----------------------
-- TODO anoe
groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
groupDocsByPeriodRec f prds docs acc =
if ((null prds) || (null docs))
then acc
else
let prd = head' "groupBy" prds
docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs
in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc)
-- To group a list of Documents by fixed periods
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = map (inPeriode f docs') pds
periods' = periods `using` parList rdeepseq
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length pds) <> " periods" <> "\n")
$ fromList $ zip pds periods'
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [[t]] -> (b, b) -> [t]
inPeriode f' h (start,end) =
concat $ fst $ partition (\d -> f' (head' "inPeriode" d) >= start && f' (head' "inPeriode" d) <= end) h
-- To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
in trace ("\n" <> "-- | Group " <> show(length es) <> " docs by " <> show(length pds) <> " periods" <> "\n")
$ fromList $ zip pds periods'
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double
docsToTermFreq docs fdt =
let nbDocs = fromIntegral $ length docs
freqs = map (/(nbDocs))
$ fromList
$ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs
sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs
docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
docsToLastTermFreq n docs fdt =
let last = take n $ reverse $ sort $ map date docs
nbDocs = fromIntegral $ length $ filter (\d -> elem (date d) last) docs
freqs = map (/(nbDocs))
$ fromList
$ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs
sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs
-- To count the number of docs by unit of time
docsToTimeScaleNb :: [Document] -> Map Date Double
docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n")
$ unionWith (+) time docs'
initPhyloLevels :: Int -> PhyloPeriodId -> Map PhyloLevelId PhyloLevel
initPhyloLevels lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId ("","") lvl empty)) [1..lvlMax]
-- To init the basic elements of a Phylo
toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
params = defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n")
$ Phylo foundations
docsSources
(docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs)
(docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots))
empty
empty
params
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloLevels 1 prd))) periods)
PhyloTools.hs 0000664 0000000 0000000 00000053701 14124644201 0032376 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Phylo {-|
Module : Gargantext.Core.Viz.Phylo.PhyloTools
Description : Module dedicated to all the tools needed for making a Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group)
import Data.Set (Set, disjoint)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.String (String)
import Data.Text (Text,unpack)
import Prelude (floor,read)
import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
import Text.Printf
import Debug.Trace (trace)
import Control.Lens hiding (Level)
import qualified Data.Vector as Vector
import qualified Data.List as List
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.Text as Text
------------
-- | Io | --
------------
-- | To print an important message as an IO()
printIOMsg :: String -> IO ()
printIOMsg msg =
putStrLn ( "\n"
<> "------------"
<> "\n"
<> "-- | " <> msg <> "\n" )
-- | To print a comment as an IO()
printIOComment :: String -> IO ()
printIOComment cmt =
putStrLn ( "\n" <> cmt <> "\n" )
--------------
-- | Misc | --
--------------
-- truncate' :: Double -> Int -> Double
-- truncate' x n = (fromIntegral (floor (x * t))) / t
-- where t = 10^n
truncate' :: Double -> Int -> Double
truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
where
--------------
t :: Double
t = 10 ^n
getInMap :: Int -> Map Int Double -> Double
getInMap k m =
if (member k m)
then m ! k
else 0
roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
roundToStr = printf "%0.*f"
countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l
dropByIdx :: Int -> [a] -> [a]
dropByIdx k l = take k l ++ drop (k+1) l
elemIndex' :: Eq a => a -> [a] -> Int
elemIndex' e l = case (List.elemIndex e l) of
Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
Just i -> i
commonPrefix :: Eq a => [a] -> [a] -> [a] -> [a]
commonPrefix lst lst' acc =
if (null lst || null lst')
then acc
else if (head' "commonPrefix" lst == head' "commonPrefix" lst')
then commonPrefix (tail lst) (tail lst') (acc ++ [head' "commonPrefix" lst])
else acc
---------------------
-- | Foundations | --
---------------------
-- | Is this Ngrams a Foundations Root ?
isRoots :: Ngrams -> Vector Ngrams -> Bool
isRoots n ns = Vector.elem n ns
-- | To transform a list of nrams into a list of foundation's index
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
-- | To transform a list of sources into a list of sources' index
sourcesToIdx :: [Text] -> Vector Text -> [Int]
sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text
ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
idxToLabel :: [Int] -> String
idxToLabel l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
idxToLabel' :: [Double] -> String
idxToLabel' l = List.unwords $ tail' "idxToLabel" $ concat $ map (\n -> ["|",show n]) l
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText :: Vector Ngrams -> [Int] -> [Text]
ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
--------------
-- | Time | --
--------------
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat)
$ map (\(d,d') -> [d..d']) periods
findBounds :: [Date] -> (Date,Date)
findBounds dates =
let dates' = sort dates
in (head' "findBounds" dates', last' "findBounds" dates')
toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
toPeriods dates p s =
let (start,end) = findBounds dates
in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
$ chunkAlong p s [start .. end]
toFstDate :: [Text] -> Text
toFstDate ds = snd
$ head' "firstDate"
$ sortOn fst
$ map (\d ->
let d' = read (filter (\c -> c /= '-') $ unpack d)::Int
in (d',d)) ds
toLstDate :: [Text] -> Text
toLstDate ds = snd
$ head' "firstDate"
$ reverse
$ sortOn fst
$ map (\d ->
let d' = read (filter (\c -> c /= '-') $ unpack d)::Int
in (d',d)) ds
getTimeScale :: Phylo -> [Char]
getTimeScale p = case (timeUnit $ getConfig p) of
Year _ _ _ -> "year"
Month _ _ _ -> "month"
Week _ _ _ -> "week"
Day _ _ _ -> "day"
-- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale :: [Date] -> Int -> [Date]
toTimeScale dates step =
let (start,end) = findBounds dates
in [start, (start + step) .. end]
getTimeStep :: TimeUnit -> Int
getTimeStep time = case time of
Year _ s _ -> s
Month _ s _ -> s
Week _ s _ -> s
Day _ s _ -> s
getTimePeriod :: TimeUnit -> Int
getTimePeriod time = case time of
Year p _ _ -> p
Month p _ _ -> p
Week p _ _ -> p
Day p _ _ -> p
getTimeFrame :: TimeUnit -> Int
getTimeFrame time = case time of
Year _ _ f -> f
Month _ _ f -> f
Week _ _ f -> f
Day _ _ f -> f
-------------
-- | Fis | --
-------------
-- | To find if l' is nested in l
isNested :: Eq a => [a] -> [a] -> Bool
isNested l l'
| null l' = True
| length l' > length l = False
| (union l l') == l = True
| otherwise = False
-- | To filter Fis with small Support but by keeping non empty Periods
keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
keepFilled f thr l = if (null $ f thr l) && (not $ null l)
then keepFilled f (thr - 1) l
else f thr l
traceClique :: Map (Date, Date) [PhyloClique] -> String
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where
--------------------------------------
cliques :: [Double]
cliques = sort $ map (fromIntegral . length . _phyloClique_nodes) $ concat $ elems mFis
--------------------------------------
traceSupport :: Map (Date, Date) [PhyloClique] -> String
traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
where
--------------------------------------
supports :: [Double]
supports = sort $ map (fromIntegral . _phyloClique_support) $ concat $ elems mFis
--------------------------------------
traceFis :: [Char] -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> (traceSupport mFis) <> "\n"
<> "Nb Ngrams : " <> (traceClique mFis) <> "\n" ) mFis
---------------
-- | Clique| --
---------------
getCliqueSupport :: Clique -> Int
getCliqueSupport unit = case unit of
Fis s _ -> s
MaxClique _ _ _ -> 0
getCliqueSize :: Clique -> Int
getCliqueSize unit = case unit of
Fis _ s -> s
MaxClique s _ _ -> s
--------------
-- | Cooc | --
--------------
listToCombi' :: [a] -> [(a,a)]
listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
listToEqual' :: Eq a => [a] -> [(a,a)]
listToEqual' l = [(x,y) | x <- l, y <- l, x == y]
listToKeys :: Eq a => [a] -> [(a,a)]
listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
listToMatrix :: [Int] -> Map (Int,Int) Double
listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
listToSeq :: Eq a => [a] -> [(a,a)]
listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
sumCooc :: Cooc -> Cooc -> Cooc
sumCooc cooc cooc' = unionWith (+) cooc cooc'
getTrace :: Cooc -> Double
getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
coocToDiago :: Cooc -> Cooc
coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc
ngramsToCooc ngrams coocs =
let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
pairs = listToKeys ngrams
in filterWithKey (\k _ -> elem k pairs) cooc
--------------------
-- | PhyloGroup | --
--------------------
getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupLevel), g ^. phylo_groupIndex)
idToPrd :: PhyloGroupId -> PhyloPeriodId
idToPrd id = (fst . fst) id
groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup]
groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
getPeriodPointers fil g =
case fil of
ToChilds -> g ^. phylo_groupPeriodChilds
ToParents -> g ^. phylo_groupPeriodParents
filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local =
case proximity of
WeightedLogJaccard _ -> local >= thr
WeightedLogSim _ -> local >= thr
Hamming -> undefined
getProximityName :: Proximity -> String
getProximityName proximity =
case proximity of
WeightedLogJaccard _ -> "WLJaccard"
WeightedLogSim _ -> "WeightedLogSim"
Hamming -> "Hamming"
---------------
-- | Phylo | --
---------------
addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
addPointers fil pty pointers g =
case pty of
TemporalPointer -> case fil of
ToChilds -> g & phylo_groupPeriodChilds .~ pointers
ToParents -> g & phylo_groupPeriodParents .~ pointers
LevelPointer -> case fil of
ToChilds -> g & phylo_groupLevelChilds .~ pointers
ToParents -> g & phylo_groupLevelParents .~ pointers
getPeriodIds :: Phylo -> [(Date,Date)]
getPeriodIds phylo = sortOn fst
$ keys
$ phylo ^. phylo_periods
getLevelParentId :: PhyloGroup -> PhyloGroupId
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupLevelParents
getLastLevel :: Phylo -> Level
getLastLevel phylo = last' "lastLevel" $ getLevels phylo
getLevels :: Phylo -> [Level]
getLevels phylo = nub
$ map snd
$ keys $ view ( phylo_periods
. traverse
. phylo_periodLevels ) phylo
getSeaElevation :: Phylo -> SeaElevation
getSeaElevation phylo = seaElevation (getConfig phylo)
getConfig :: Phylo -> Config
getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
setConfig :: Config -> Phylo -> Phylo
setConfig config phylo = phylo
& phylo_param .~ (PhyloParam
((phylo ^. phylo_param) ^. phyloParam_version)
((phylo ^. phylo_param) ^. phyloParam_software)
config)
-- & phylo_param & phyloParam_config & phyloParam_config .~ config
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources)
phyloToLastBranches :: Phylo -> [[PhyloGroup]]
phyloToLastBranches phylo = elems
$ fromListWith (++)
$ map (\g -> (g ^. phylo_groupBranchId, [g]))
$ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
getGroupsFromLevel lvl phylo =
elems $ view ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) phylo
getGroupsFromLevelPeriods :: Level -> [PhyloPeriodId] -> Phylo -> [PhyloGroup]
getGroupsFromLevelPeriods lvl periods phylo =
elems $ view ( phylo_periods
. traverse
. filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) phylo
getGroupsFromPeriods :: Level -> Map PhyloPeriodId PhyloPeriod -> [PhyloGroup]
getGroupsFromPeriods lvl periods =
elems $ view ( traverse
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) periods
updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
updatePhyloGroups lvl m phylo =
over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups
. traverse
) (\g ->
let id = getGroupId g
in
if member id m
then m ! id
else g ) phylo
updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
updatePeriods periods' phylo =
over (phylo_periods . traverse)
(\prd ->
let prd' = periods' ! (prd ^. phylo_periodPeriod)
lvls = map (\lvl -> lvl & phylo_levelPeriod' .~ prd') $ prd ^. phylo_periodLevels
in prd & phylo_periodPeriod' .~ prd'
& phylo_periodLevels .~ lvls
) phylo
traceToPhylo :: Level -> Phylo -> Phylo
traceToPhylo lvl phylo =
trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
<> show (length $ getGroupsFromLevel lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo
--------------------
-- | Clustering | --
--------------------
mergeBranchIds :: [[Int]] -> [Int]
mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
where
-- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids
mostFreq' :: [[Int]] -> [[Int]]
mostFreq' ids' =
let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
sup = (fst . maximum) groupIds
in map snd $ filter (\gIds -> fst gIds == sup) groupIds
mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
mergeMeta bId groups =
let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches groups =
{- run the related component algorithm -}
let egos = map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents)
++ (map fst $ g ^. phylo_groupPeriodChilds)
++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups
graph = relatedComponents egos
{- update each group's branch id -}
in map (\ids ->
let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
in map (\g -> g & phylo_groupBranchId %~ (\(lvl,_) -> (lvl,bId))) groups') graph
relatedComponents :: Ord a => [[a]] -> [[a]]
relatedComponents graph = foldl' (\acc groups ->
if (null acc)
then acc ++ [groups]
else
let acc' = partition (\groups' -> disjoint (Set.fromList groups') (Set.fromList groups)) acc
in (fst acc') ++ [nub $ concat $ (snd acc') ++ [groups]]) [] graph
toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
toRelatedComponents nodes edges =
let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo =
trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo
traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo =
trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo
-------------------
-- | Proximity | --
-------------------
getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of
WeightedLogJaccard s -> s
WeightedLogSim s -> s
Hamming -> undefined
----------------
-- | Branch | --
----------------
intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
intersectInit acc lst lst' =
if (null lst) || (null lst')
then acc
else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
else acc
branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
ngramsInBranches :: [[PhyloGroup]] -> [Int]
ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
traceMatchSuccess thr qua qua' nextBranches =
trace ( "\n" <> "-- local branches : " <> (init $ show ((init . init . snd)
$ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length nextBranches) <> ")]"
<> " | " <> show ((length . concat . concat) nextBranches) <> " groups" <> "\n"
<> " - splited with success in " <> show (map length nextBranches) <> " sub-branches" <> "\n"
<> " - for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " < " <> show(qua') <> ")\n" ) nextBranches
traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchFailure thr qua qua' branches =
trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n"
<> " - split with failure for the local threshold " <> show (thr) <> " ( quality : " <> show (qua) <> " > " <> show(qua') <> ")\n"
) branches
traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchNoSplit branches =
trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n"
<> " - unable to split in smaller branches" <> "\n"
) branches
traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchLimit branches =
trace ( "\n" <> "-- local branches : " <> (init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n"
<> " - unable to increase the threshold above 1" <> "\n"
) branches
traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
traceMatchEnd groups =
trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
<> " branches and " <> show (length groups) <> " groups" <> "\n") groups
traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
traceGroupsProxi m =
trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m
SynchronicClustering.hs 0000664 0000000 0000000 00000026466 14124644201 0034451 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Phylo {-|
Module : Gargantext.Core.Viz.Phylo.SynchronicClustering
Description : Module dedicated to the adaptative synchronic clustering of a Phylo.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.SynchronicClustering where
import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Control.Monad (sequence)
-- import Debug.Trace (trace)
import qualified Data.Map as Map
-------------------------
-- | New Level Maker | --
-------------------------
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
in PhyloGroup (fst $ fst id) (_phylo_groupPeriod' $ head' "mergeGroups" childs)
(snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs)
(fmap sum $ sequence
$ map _phylo_groupWeight childs)
(concat $ map _phylo_groupSources childs)
ngrams
(ngramsToCooc ngrams coocs)
((snd $ fst id),bId)
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
(mergeAncestors $ concat $ map _phylo_groupAncestors childs)
where
--------------------
bId :: [Int]
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) childs
--------------------
updatePointers :: [Pointer] -> [Pointer]
updatePointers pointers = map (\(pId,w) -> (mapIds ! pId,w)) pointers
--------------------
mergeAncestors :: [Pointer] -> [Pointer]
mergeAncestors pointers = Map.toList $ fromListWith max pointers
addPhyloLevel :: Level -> Phylo -> Phylo
addPhyloLevel lvl phylo =
over ( phylo_periods . traverse )
(\phyloPrd -> phyloPrd & phylo_periodLevels
%~ (insert (phyloPrd ^. phylo_periodPeriod, lvl)
(PhyloLevel (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriod') lvl empty))) phylo
toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
toNextLevel' phylo groups =
let curLvl = getLastLevel phylo
oldGroups = fromList $ map (\g -> (getGroupId g, getLevelParentId g)) groups
newGroups = concat $ groupsToBranches
$ fromList $ map (\g -> (getGroupId g, g))
$ foldlWithKey (\acc id groups' ->
-- 4) create the parent group
let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups'
in acc ++ [parent]) []
-- 3) group the current groups by parentId
$ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
in traceSynchronyEnd
$ over ( phylo_periods . traverse . phylo_periodLevels . traverse
-- 6) update each period at curLvl + 1
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1)))
-- 7) by adding the parents
(\phyloLvl ->
if member (phyloLvl ^. phylo_levelPeriod) newPeriods
then phyloLvl & phylo_levelGroups
.~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod))
else phyloLvl)
-- 2) add the curLvl + 1 phyloLevel to the phylo
$ addPhyloLevel (curLvl + 1)
-- 1) update the current groups (with level parent pointers) in the phylo
$ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
--------------------
-- | Clustering | --
--------------------
toPairs :: SynchronyStrategy -> [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs strategy groups = case strategy of
MergeRegularGroups -> pairs
$ filter (\g -> all (== 3) $ (g ^. phylo_groupMeta) ! "dynamics") groups
MergeAllGroups -> pairs groups
where
pairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
pairs gs = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) (listToCombi' gs)
toDiamonds :: [PhyloGroup] -> [[PhyloGroup]]
toDiamonds groups = foldl' (\acc groups' ->
acc ++ ( elems
$ Map.filter (\v -> length v > 1)
$ fromListWith (++)
$ foldl' (\acc' g ->
acc' ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodChilds)) [] groups')) []
$ elems
$ Map.filter (\v -> length v > 1)
$ fromListWith (++)
$ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups
groupsToEdges :: Proximity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox sync nbDocs diago groups =
case sync of
ByProximityThreshold thr sens _ strat ->
filter (\(_,w) -> w >= thr)
$ toEdges sens
$ toPairs strat groups
ByProximityDistribution sens strat ->
let diamonds = sortOn snd
$ toEdges sens $ concat
$ map (\gs -> toPairs strat gs) $ toDiamonds groups
in take (div (length diamonds) 2) diamonds
where
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
toEdges sens edges =
case prox of
WeightedLogJaccard _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' (sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
WeightedLogSim _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' (1 / sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined
toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), child ^. phylo_groupIndex)
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs diagos branch =
-- 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems)
$ mapWithKey (\prd groups ->
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd]
edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
in map (\comp ->
-- 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp)
in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp )
-- 3) reduce the graph a a set of related components
$ toRelatedComponents groups edges) periods
adjustClustering :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
adjustClustering sync branches = case sync of
ByProximityThreshold _ _ scope _ -> case scope of
SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (last' "adjustClustering" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "adjustClustering" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches
AllBranches -> [concat branches]
ByProximityDistribution _ _ -> branches
levelUpAncestors :: [PhyloGroup] -> [PhyloGroup]
levelUpAncestors groups =
-- 1) create an associative map of (old,new) ids
let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupLevelParents))) groups
in map (\g ->
let id' = ids' ! (getGroupId g)
ancestors = g ^. phylo_groupAncestors
-- 2) level up the ancestors ids and filter the ones that will be merged
ancestors' = filter (\(id,_) -> id /= id') $ map (\(id,w) -> (ids' ! id,w)) ancestors
in g & phylo_groupAncestors .~ ancestors'
) groups
synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo =
let prox = phyloProximity $ getConfig phylo
sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs
diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics
$ adjustClustering sync
$ phyloToLastBranches
$ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq
in toNextLevel' phylo $ levelUpAncestors $ concat newBranches'
-- synchronicDistance :: Phylo -> Level -> String
-- synchronicDistance phylo lvl =
-- foldl' (\acc branch ->
-- acc <> (foldl' (\acc' period ->
-- acc' <> let prox = phyloProximity $ getConfig phylo
-- sync = phyloSynchrony $ getConfig phylo
-- docs = _phylo_timeDocs phylo
-- prd = _phylo_groupPeriod $ head' "distance" period
-- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
-- ((sum . elems) $ restrictKeys docs $ periodsToYears [_phylo_groupPeriod $ head' "distance" period]) period
-- in foldl' (\mem (_,w) ->
-- mem <> show (prd)
-- <> "\t"
-- <> show (w)
-- <> "\n"
-- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
TemporalMatching.hs 0000664 0000000 0000000 00000105454 14124644201 0033523 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Phylo {-|
Module : Gargantext.Core.Viz.Phylo.TemporalMatching
Description : Module dedicated to the adaptative temporal matching of a Phylo.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.TemporalMatching where
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or, sort, (!!))
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Prelude (floor,tan,pi)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Debug.Trace (trace)
import Text.Printf
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as Vector
-------------------
-- | Proximity | --
-------------------
-- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | Process the inverse sumLog
sumInvLog' :: Double -> Double -> [Double] -> Double
sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2))))) 0 diago
-- | Process the sumLog
sumLog' :: Double -> Double -> [Double] -> Double
sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2)))) 0 diago
weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogJaccard' sens nbDocs diago ngrams ngrams'
| null ngramsInter = 0
| ngramsInter == ngramsUnion = 1
| sens == 0 = jaccard ngramsInter ngramsUnion
| sens > 0 = (sumInvLog' sens nbDocs diagoInter) / (sumInvLog' sens nbDocs diagoUnion)
| otherwise = (sumLog' sens nbDocs diagoInter) / (sumLog' sens nbDocs diagoUnion)
where
--------------------------------------
ngramsInter :: [Int]
ngramsInter = intersect ngrams ngrams'
--------------------------------------
ngramsUnion :: [Int]
ngramsUnion = union ngrams ngrams'
--------------------------------------
diagoInter :: [Double]
diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
--------------------------------------
diagoUnion :: [Double]
diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
--------------------------------------
-- | Process the weighted similarity between clusters. Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
-- tests not conclusive
weightedLogSim' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
| null ngramsInter = 0
| ngramsInter == ngramsUnion = 1
| sens == 0 = jaccard ngramsInter ngramsUnion
| sens > 0 = (sumInvLog' sens nbDocs diagoInter) / minimum [(sumInvLog' sens nbDocs diagoEgo),(sumInvLog' sens nbDocs diagoTarget)]
| otherwise = (sumLog' sens nbDocs diagoInter) / minimum [(sumLog' sens nbDocs diagoEgo),(sumLog' sens nbDocs diagoTarget)]
where
--------------------------------------
ngramsInter :: [Int]
ngramsInter = intersect ego_ngrams target_ngrams
--------------------------------------
ngramsUnion :: [Int]
ngramsUnion = union ego_ngrams target_ngrams
--------------------------------------
diagoInter :: [Double]
diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
--------------------------------------
diagoEgo :: [Double]
diagoEgo = elems $ restrictKeys diago (Set.fromList ego_ngrams)
--------------------------------------
diagoTarget :: [Double]
diagoTarget = elems $ restrictKeys diago (Set.fromList target_ngrams)
--------------------------------------
toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
-- | To process the proximity between a current group and a pair of targets group using the adapted Wang et al. Similarity
toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
case proximity of
WeightedLogJaccard sens ->
let pairNgrams = if targetNgrams == targetNgrams'
then targetNgrams
else union targetNgrams targetNgrams'
in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
WeightedLogSim sens ->
let pairNgrams = if targetNgrams == targetNgrams'
then targetNgrams
else union targetNgrams targetNgrams'
in weightedLogSim' sens nbDocs diago egoNgrams pairNgrams
Hamming -> undefined
------------------------
-- | Local Matching | --
------------------------
findLastPeriod :: Filiation -> [PhyloPeriodId] -> PhyloPeriodId
findLastPeriod fil periods = case fil of
ToParents -> head' "findLastPeriod" (sortOn fst periods)
ToChilds -> last' "findLastPeriod" (sortOn fst periods)
-- | To filter pairs of candidates related to old pointers periods
removeOldPointers :: [Pointer] -> Filiation -> Double -> Proximity -> PhyloPeriodId
-> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
-> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
removeOldPointers oldPointers fil thr prox prd pairs
| null oldPointers = pairs
| null (filterPointers prox thr oldPointers) =
let lastMatchedPrd = findLastPeriod fil (map (fst . fst . fst) oldPointers)
in if lastMatchedPrd == prd
then []
else filter (\((id,_),(id',_)) ->
case fil of
ToParents -> (((fst . fst . fst) id ) < (fst lastMatchedPrd))
|| (((fst . fst . fst) id') < (fst lastMatchedPrd))
ToChilds -> (((fst . fst . fst) id ) > (fst lastMatchedPrd))
|| (((fst . fst . fst) id') > (fst lastMatchedPrd))) pairs
| otherwise = []
makePairs' :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [PhyloPeriodId] -> [Pointer] -> Filiation -> Double -> Proximity
-> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
if (null periods)
then []
else removeOldPointers oldPointers fil thr prox lastPrd
{- at least on of the pair candidates should be from the last added period -}
$ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
$ listToKeys
$ filter (\(id,ngrams) ->
let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
in (toProximity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
) candidates
where
lastPrd :: PhyloPeriodId
lastPrd = findLastPeriod fil periods
filterPointers :: Proximity -> Double -> [Pointer] -> [Pointer]
filterPointers proxi thr pts = filter (\(_,w) -> filterProximity proxi thr w) pts
filterPointers' :: Proximity -> Double -> [(Pointer,[Int])] -> [(Pointer,[Int])]
filterPointers' proxi thr pts = filter (\((_,w),_) -> filterProximity proxi thr w) pts
reduceDiagos :: Map Date Cooc -> Map Int Double
reduceDiagos diagos = mapKeys (\(k,_) -> k)
$ foldl (\acc diago -> unionWith (+) acc diago) empty (elems diagos)
filterPointersByPeriod :: Filiation -> [(Pointer,[Int])] -> [Pointer]
filterPointersByPeriod fil pts =
let pts' = sortOn (fst . fst . fst . fst) pts
inf = (fst . fst . fst . fst) $ head' "filterPointersByPeriod" pts'
sup = (fst . fst . fst . fst) $ last' "filterPointersByPeriod" pts'
in map fst
$ nubBy (\pt pt' -> snd pt == snd pt')
$ filter (\pt -> ((fst . fst . fst . fst) pt == inf) || ((fst . fst . fst . fst) pt == sup))
$ case fil of
ToParents -> reverse pts'
ToChilds -> pts'
phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
-> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
if (null $ filterPointers proxi thr oldPointers)
{- let's find new pointers -}
then if null nextPointers
then []
else filterPointersByPeriod fil
$ head' "phyloGroupMatching"
-- Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> (snd . fst) pt == (snd . fst) pt')
$ reverse $ sortOn (snd . fst) $ head' "pointers" nextPointers
-- Find the first time frame where at leats one pointer satisfies the proximity threshold
else oldPointers
where
nextPointers :: [[(Pointer,[Int])]]
nextPointers = take 1
$ dropWhile (null)
{- for each time frame, process the proximity on relevant pairs of targeted groups -}
$ scanl (\acc groups ->
let periods = nub $ map (fst . fst . fst) $ concat groups
nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
diago = reduceDiagos
$ filterDiago diagos ([(fst . fst) id] ++ periods)
{- important resize nbdocs et diago dans le make pairs -}
pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
in acc ++ ( filterPointers' proxi thr
$ concat
$ map (\(c,c') ->
{- process the proximity between the current group and a pair of candidates -}
let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
in if ((c == c') || (snd c == snd c'))
then [((fst c,proximity),snd c)]
else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) pairs )) []
$ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...]
filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
filterDocs d pds = restrictKeys d $ periodsToYears pds
filterDiago :: Map Date Cooc -> [PhyloPeriodId] -> Map Date Cooc
filterDiago diago pds = restrictKeys diago $ periodsToYears pds
-----------------------------
-- | Matching Processing | --
-----------------------------
getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
getNextPeriods fil max' pId pIds =
case fil of
ToChilds -> take max' $ (tail . snd) $ splitAt (elemIndex' pId pIds) pIds
ToParents -> take max' $ (reverse . fst) $ splitAt (elemIndex' pId pIds) pIds
getCandidates :: PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
getCandidates ego targets =
map (\groups' ->
filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')
) groups') targets
matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
matchGroupsToGroups frame periods proximity thr docs coocs groups =
let groups' = groupByField _phylo_groupPeriod groups
in foldl' (\acc prd ->
let -- 1) find the parents/childs matching periods
periodsPar = getNextPeriods ToParents frame prd periods
periodsChi = getNextPeriods ToChilds frame prd periods
-- 2) find the parents/childs matching candidates
candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
-- 3) find the parents/child number of docs by years
docsPar = filterDocs docs ([prd] ++ periodsPar)
docsChi = filterDocs docs ([prd] ++ periodsChi)
-- 4) find the parents/child diago by years
diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
-- 5) match in parallel all the groups (egos) to their possible candidates
egos = map (\ego ->
let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
pointersChi = phyloGroupMatching (getCandidates ego candidatesChi) ToChilds proximity docsChi diagoChi
thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
in addPointers ToChilds TemporalPointer pointersChi
$ addPointers ToParents TemporalPointer pointersPar ego)
$ findWithDefault [] prd groups'
egos' = egos `using` parList rdeepseq
in acc ++ egos'
) [] periods
-----------------------
-- | Phylo Quality | --
-----------------------
relevantBranches :: Int -> [[PhyloGroup]] -> [[PhyloGroup]]
relevantBranches term branches =
filter (\groups -> (any (\group -> elem term $ group ^. phylo_groupNgrams) groups)) branches
accuracy :: Int -> [(Date,Date)] -> [PhyloGroup] -> Double
-- The accuracy of a branch relatively to a term x is computed only over the periods there exist some cluster mentionning x in the phylomemy
accuracy x periods bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk')
/ (fromIntegral $ length bk'))
where
bk' :: [PhyloGroup]
bk' = filter (\g -> elem (g ^. phylo_groupPeriod) periods) bk
recall :: Int -> [PhyloGroup] -> [[PhyloGroup]] -> Double
recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) bk)
/ (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
fScore :: Double -> Int -> [(Date,Date)] -> [PhyloGroup] -> [[PhyloGroup]] -> Double
fScore lambda x periods bk bx =
let rec = recall x bk bx
acc = accuracy x periods bk
in ((1 + lambda ** 2) * acc * rec)
/ (((lambda ** 2) * acc + rec))
wk :: [PhyloGroup] -> Double
wk bk = fromIntegral $ length bk
toPhyloQuality' :: Double -> Map Int Double -> [[PhyloGroup]] -> Double
toPhyloQuality' lambda freq branches =
if (null branches)
then 0
else sum
$ map (\i ->
let bks = relevantBranches i branches
periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem i $ g ^. phylo_groupNgrams) $ concat bks
in (freq ! i) * (sum $ map (\bk -> ((wk bk) / (sum $ map wk bks)) * (fScore lambda i periods bk bks)) bks))
$ keys freq
toRecall :: Map Int Double -> [[PhyloGroup]] -> Double
toRecall freq branches =
if (null branches)
then 0
else sum
$ map (\x ->
let px = freq ! x
bx = relevantBranches x branches
wks = sum $ map wk bx
in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (recall x bk bx)) bx))
$ keys freq
where
pys :: Double
pys = sum (elems freq)
toAccuracy :: Map Int Double -> [[PhyloGroup]] -> Double
toAccuracy freq branches =
if (null branches)
then 0
else sum
$ map (\x ->
let px = freq ! x
bx = relevantBranches x branches
-- | periods containing x
periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx
wks = sum $ map wk bx
in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (accuracy x periods bk)) bx))
$ keys freq
where
pys :: Double
pys = sum (elems freq)
-- | here we do the average of all the local f_scores
toPhyloQuality :: Double -> Double -> Map Int Double -> [[PhyloGroup]] -> Double
toPhyloQuality fdt lambda freq branches =
if (null branches)
then 0
else sum
$ map (\x ->
-- let px = freq ! x
let bx = relevantBranches x branches
-- | periods containing x
periods = nub $ map _phylo_groupPeriod $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx
wks = sum $ map wk bx
-- in (px / pys) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x bk bx)) bx))
-- in (1 / fdt) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore beta x periods bk bx)) bx))
in (1 / fdt) * (sum $ map (\bk -> ((wk bk) / wks) * (fScore (tan (lambda * pi / 2)) x periods bk bx)) bx))
$ keys freq
-- where
-- pys :: Double
-- pys = sum (elems freq)
-- 1 / nb de foundation
------------------------------------
-- | Constant Temporal Matching | --
------------------------------------
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups =
{- run the related component algorithm -}
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
$ sortOn (\gs -> fst $ fst $ head' "egos" gs)
$ map (\group -> [getGroupId group]
++ (map fst $ group ^. phylo_groupPeriodParents)
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
-- first find the related components by inside each ego's period
-- a supprimer
graph' = map relatedComponents egos
-- then run it for the all the periods
graph = zip [1..]
$ relatedComponents $ concat (graph' `using` parList rdeepseq)
-- update each group's branch id
in map (\(bId,ids) ->
let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
$ elems $ restrictKeys groups (Set.fromList ids)
in groups' `using` parList rdeepseq ) graph
reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
reduceFrequency frequency branches =
restrictKeys frequency (Set.fromList $ (nub . concat) $ map _phylo_groupNgrams $ concat branches)
updateThr :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
updateThr thr branches = map (\b -> map (\g ->
g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
-- Sequentially break each branch of a phylo where
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
breakBranches :: Double -> Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
-> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
-- 1) keep or not the new division of ego
let done' = done ++ (if snd ego
then
(if ((null (fst ego')) || (quality > quality'))
then
-- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
-- <> " | " <> show(length $ fst ego) <> " groups : "
-- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
-- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
[(fst ego,False)]
else
-- trace (" ✓ level = " <> printf "%.1f" thr <> "")
-- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
-- <> " | " <> show(length $ fst ego) <> " groups : "
-- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
-- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
else [ego])
in
-- 2) if there is no more branches in rest then return else continue
if null rest
then done'
else breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where
--------------------------------------
quality :: Double
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
--------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' =
let branches = groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups frame periods proximity thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
$ thrToMeta thr
$ depthToMeta (elevation - depth) branches'
--------------------------------------
quality' :: Double
quality' = toPhyloQuality fdt lambda frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
seaLevelMatching :: Double -> Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seaLevelMatching fdt proximity lambda minBranch frequency thr step depth elevation frame periods docs coocs branches =
-- if there is no branch to break or if seaLvl level > 1 then end
if (thr >= 1) || ((not . or) $ map snd branches)
then branches
else
-- break all the possible branches at the current seaLvl level
let quality = toPhyloQuality fdt lambda frequency (map fst branches)
acc = toAccuracy frequency (map fst branches)
rec = toRecall frequency (map fst branches)
branches' = trace ("↑ level = " <> printf "%.3f" thr <> " F(λ) = " <> printf "%.5f" quality
<> " ξ = " <> printf "%.5f" acc
<> " ρ = " <> printf "%.5f" rec
<> " branches = " <> show(length branches) <> " ↴")
$ breakBranches fdt proximity lambda frequency minBranch thr depth elevation frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
in seaLevelMatching fdt proximity lambda minBranch frequency' (thr + step) step (depth - 1) elevation frame periods docs coocs branches'
constanteTemporalMatching :: Double -> Double -> Phylo -> Phylo
constanteTemporalMatching start step phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
(toPhyloHorizon phylo)
where
-- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]]
branches = map fst
$ seaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo)
(phyloProximity $ getConfig phylo)
(_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq)
start step
((((1 - start) / step) - 1))
(((1 - start) / step))
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(reverse $ sortOn (length . fst) groups)
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],Bool)]
groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
$ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo)
start
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
-----------------
-- | Horizon | --
-----------------
toPhyloHorizon :: Phylo -> Phylo
toPhyloHorizon phylo =
let t0 = take 1 (getPeriodIds phylo)
groups = getGroupsFromLevelPeriods 1 t0 phylo
sens = getSensibility (phyloProximity $ getConfig phylo)
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) t0
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) t0
in phylo & phylo_horizon .~ (fromList $ map (\(g,g') ->
((getGroupId g,getGroupId g'),weightedLogJaccard' sens nbDocs diago (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) $ listToCombi' groups)
--------------------------------------
-- | Adaptative Temporal Matching | --
--------------------------------------
thrToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
thrToMeta thr branches =
map (\b ->
map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
depthToMeta :: Double -> [[PhyloGroup]] -> [[PhyloGroup]]
depthToMeta depth branches =
let break = length branches > 1
in map (\b ->
map (\g ->
if break then g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [depth]) "breaks"(g ^. phylo_groupMeta))
else g) b) branches
reduceTupleMapByKeys :: Eq a => [a] -> Map (a,a) Double -> Map (a,a) Double
reduceTupleMapByKeys ks m = filterWithKey (\(k,k') _ -> (elem k ks) && (elem k' ks)) m
getInTupleMap :: Ord a => Map (a,a) Double -> a -> a -> Double
getInTupleMap m k k'
| isJust (m !? ( k ,k')) = m ! ( k ,k')
| isJust (m !? ( k',k )) = m ! ( k',k )
| otherwise = 0
toThreshold :: Double -> Map (PhyloGroupId,PhyloGroupId) Double -> Double
toThreshold lvl proxiGroups =
let idx = ((Map.size proxiGroups) `div` (floor lvl)) - 1
in if idx >= 0
then (sort $ elems proxiGroups) !! idx
else 1
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
adaptativeBreakBranches :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId,PhyloGroupId) Double
-> Double -> Map Int Double -> Int -> Int -> Map Date Double -> Map Date Cooc
-> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
-> [([PhyloGroup],(Bool,[Double]))]
adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods done ego rest =
-- 1) keep or not the new division of ego
let done' = done ++ (if (fst . snd) ego
then (if ((null (fst ego')) || (quality > quality'))
then
[(concat $ thrToMeta thr $ [fst ego],(False, ((snd . snd) ego)))]
else
( (map (\e -> (e,(True, ((snd . snd) ego) ++ [thr]))) (fst ego'))
++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
in
-- uncomment let .. in for debugging
-- let part1 = partition (snd) done'
-- part2 = partition (snd) rest
-- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
-- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
-- ) $
-- 2) if there is no more branches in rest then return else continue
if null rest
then done'
else adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
done' (head' "breakBranches" rest) (tail' "breakBranches" rest)
where
--------------------------------------
thr :: Double
thr = toThreshold depth $ Map.filter (\v -> v > (last' "breakBranches" $ (snd . snd) ego)) $ reduceTupleMapByKeys (map getGroupId $ fst ego) groupsProxi
--------------------------------------
quality :: Double
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst ego] ++ (map fst rest))
--------------------------------------
ego' :: ([[PhyloGroup]],[[PhyloGroup]])
ego' =
let branches = groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups frame periods proxiConf thr docs coocs (fst ego)
branches' = branches `using` parList rdeepseq
in partition (\b -> (length $ nub $ map _phylo_groupPeriod b) > minBranch)
$ thrToMeta thr
$ depthToMeta (elevation - depth) branches'
--------------------------------------
quality' :: Double
quality' = toPhyloQuality fdt lambda frequency
((map fst done) ++ (fst ego') ++ (snd ego') ++ (map fst rest))
adaptativeSeaLevelMatching :: Double -> Proximity -> Double -> Double -> Map (PhyloGroupId, PhyloGroupId) Double
-> Double -> Int -> Map Int Double
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
-> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
adaptativeSeaLevelMatching fdt proxiConf depth elevation groupsProxi lambda minBranch frequency frame periods docs coocs branches =
-- if there is no branch to break or if seaLvl level >= depth then end
if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
then branches
else
-- break all the possible branches at the current seaLvl level
let branches' = adaptativeBreakBranches fdt proxiConf depth elevation groupsProxi lambda frequency minBranch frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
groupsProxi' = reduceTupleMapByKeys (map (getGroupId) $ concat $ map (fst) $ filter (fst . snd) branches') groupsProxi
-- thr = toThreshold depth groupsProxi
in trace("\n " <> foldl (\acc _ -> acc <> "🌊 ") "" [0..(elevation - depth)]
<> " [✓ " <> show(length $ filter (fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (fst . snd) branches')
<> ")|✗ " <> show(length $ filter (not . fst . snd) branches') <> "(" <> show(length $ concat $ map (fst) $ filter (not . fst . snd) branches') <> ")]"
<> " thr = ")
$ adaptativeSeaLevelMatching fdt proxiConf (depth - 1) elevation groupsProxi' lambda minBranch frequency' frame periods docs coocs branches'
adaptativeTemporalMatching :: Double -> Phylo -> Phylo
adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
(toPhyloHorizon phylo)
where
-- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]]
branches = map fst
$ adaptativeSeaLevelMatching (fromIntegral $ Vector.length $ getRoots phylo)
(phyloProximity $ getConfig phylo)
(elevation - 1)
elevation
(phylo ^. phylo_groupsProxi)
(_qua_granularity $ phyloQuality $ getConfig phylo)
(_qua_minBranch $ phyloQuality $ getConfig phylo)
(phylo ^. phylo_termFreq)
(getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo)
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
groups
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],(Bool,[Double]))]
groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
$ groupsToBranches' $ fromList $ map (\g -> (getGroupId g, g))
$ matchGroupsToGroups (getTimeFrame $ timeUnit $ getConfig phylo)
(getPeriodIds phylo) (phyloProximity $ getConfig phylo)
thr
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(traceTemporalMatching $ getGroupsFromLevel 1 phylo)
--------------------------------------
thr :: Double
thr = toThreshold elevation (phylo ^. phylo_groupsProxi)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Core/Viz/Types.hs 0000664 0000000 0000000 00000002230 14124644201 0030341 0 ustar 00root root 0000000 0000000 {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Types where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Vector (Vector)
import qualified Data.Vector as V
import Protolude
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
---------------
-- | Chart | --
---------------
data Chart = ChartHisto | ChartScatter | ChartPie
deriving (Generic)
-- TODO use UTCTime
data Histo = Histo { histo_dates :: !(Vector Text)
, histo_count :: !(Vector Int)
}
deriving (Show, Generic)
instance ToSchema Histo where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "histo_")
instance Arbitrary Histo
where
arbitrary = elements [ Histo (V.singleton "2012") (V.singleton 1)
, Histo (V.singleton "2013") (V.singleton 1)
]
deriveJSON (unPrefix "histo_") ''Histo
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Data/ 0000775 0000000 0000000 00000000000 14124644201 0026115 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Data/HashMap/ 0000775 0000000 0000000 00000000000 14124644201 0027436 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Data/HashMap/Strict/ 0000775 0000000 0000000 00000000000 14124644201 0030706 5 ustar 00root root 0000000 0000000 Utils.hs 0000664 0000000 0000000 00000003332 14124644201 0032264 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Data/HashMap/Strict module Gargantext.Data.HashMap.Strict.Utils where
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
------------------------------------------------------------------------
unionsWith :: (Foldable f, Eq k, Hashable k) => (a->a->a) -> f (HashMap k a) -> HashMap k a
unionsWith f = foldl' (HashMap.unionWith f) HashMap.empty
------------------------------------------------------------------------
-- | Partition the map according to some predicate. The first map contains all
-- elements that satisfy the predicate, the second all elements that fail the
-- predicate.
partition :: (Ord k, Hashable k) => (a -> Bool) -> HashMap k a -> (HashMap k a, HashMap k a)
partition p m = (HashMap.filter p m, HashMap.filter (not . p) m)
-- | Partition the map according to some predicate. The first map contains all
-- elements that satisfy the predicate, the second all elements that fail the
-- predicate.
partitionWithKey :: (Ord a, Hashable k) => (k -> a -> Bool) -> HashMap k a -> (HashMap k a, HashMap k a)
partitionWithKey p m = (HashMap.filterWithKey p m, HashMap.filterWithKey (\k -> not . p k) m)
------------------------------------------------------------------------
-- getKeyWithMaxValue :: Hashable k => HashMap k a -> Maybe k
getKeysOrderedByValueMaxFirst :: (Ord k, Hashable k, Ord a) => HashMap k a -> [k]
getKeysOrderedByValueMaxFirst m = go [] Nothing (HashMap.toList m)
where
go ks _ [] = ks
go ks Nothing ((k,v):rest) = go (k:ks) (Just v) rest
go ks (Just u) ((k,v):rest)
| v < u = go ks (Just u) rest
| v > u = go [k] (Just v) rest
| otherwise = go (k:ks) (Just v) rest
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database.hs 0000664 0000000 0000000 00000002275 14124644201 0027312 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database
Description : Tools for Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
All Database related stuff here.
Target: just import this module and nothing else to work with
Gargantext's database.
-}
module Gargantext.Database ( module Gargantext.Database.Prelude
, module Gargantext.Database.Schema.NodeNode
, insertDB
-- , module Gargantext.Database.Bashql
)
where
import Gargantext.Prelude
import Gargantext.Database.Prelude -- (connectGargandb)
-- import Gargantext.Database.Schema.Node
-- import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Schema.NodeNode -- (NodeNode(..))
import Gargantext.Database.Query.Table.NodeNode
class InsertDB a where
insertDB :: a -> Cmd err Int
{-
class DeleteDB a where
deleteDB :: a -> Cmd err Int
-}
instance InsertDB [NodeNode] where
insertDB = insertNodeNode
{-
instance InsertDB [Node a] where
insertDB = insertNodes'
instance InsertDB [NodeNodeNgram] where
insertDB = ...
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/ 0000775 0000000 0000000 00000000000 14124644201 0026750 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/ 0000775 0000000 0000000 00000000000 14124644201 0030165 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Delete.hs0000664 0000000 0000000 00000004707 14124644201 0031733 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Action.Delete
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO: right managements of nodes children of node Team
-- TODO add proper Right Management Type
TODO: NodeError
-}
module Gargantext.Database.Action.Delete
where
import Control.Lens (view, (^.))
import Data.Text
import Servant
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Action.Share (delFolderTeam)
import Gargantext.Core
import Gargantext.Database.Admin.Types.Hyperdata.File
import Gargantext.Database.Admin.Types.Node -- (NodeType(..))
import Gargantext.Database.Prelude (Cmd', HasConfig, HasConnectionPool)
import qualified Gargantext.Database.Query.Table.Node as N (getNode, deleteNode)
import Gargantext.Database.Query.Table.Node (getNodeWith)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Schema.Node
import Gargantext.Prelude
import qualified Gargantext.Database.GargDB as GargDB
------------------------------------------------------------------------
-- TODO
-- Delete Corpus children accoring its types
-- Delete NodeList (NodeStory + cbor file)
deleteNode :: (HasConfig env, HasConnectionPool env, HasNodeError err)
=> User
-> NodeId
-> Cmd' env err Int
deleteNode u nodeId = do
node' <- N.getNode nodeId
case (view node_typename node') of
nt | nt == toDBid NodeUser -> panic "[G.D.A.D.deleteNode] Not allowed to delete NodeUser (yet)"
nt | nt == toDBid NodeTeam -> do
uId <- getUserId u
if _node_user_id node' == uId
then N.deleteNode nodeId
else delFolderTeam u nodeId
nt | nt == toDBid NodeFile -> do
node <- getNodeWith nodeId (Proxy :: Proxy HyperdataFile)
let (HyperdataFile { _hff_path = path }) = node ^. node_hyperdata
GargDB.rmFile $ unpack path
N.deleteNode nodeId
_ -> N.deleteNode nodeId
-- if hasNodeType node' NodeUser
-- then panic "Not allowed to delete NodeUser (yet)"
-- else if hasNodeType node' NodeTeam
-- then do
-- uId <- getUserId u
-- if _node_user_id node' == uId
-- then N.deleteNode nodeId
-- else delFolderTeam u nodeId
-- else N.deleteNode nodeId
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Flow.hs 0000664 0000000 0000000 00000043253 14124644201 0031437 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Flow
Description : Database Flow
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-- TODO-ACCESS:
-- check userId CanFillUserCorpus userCorpusId
-- check masterUserId CanFillMasterCorpus masterCorpusId
-- TODO-ACCESS: check uId CanInsertDoc pId && checkDocType nodeType
-- TODO-EVENTS: InsertedNodes
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
( getDataText
, flowDataText
, flow
, flowCorpusFile
, flowCorpus
, flowAnnuaire
, insertMasterDocs
, saveDocNgramsWith
, getOrMkRoot
, getOrMk_RootWithCorpus
, TermType(..)
, DataOrigin(..)
, allDataOrigins
, do_api
, indexAllDocumentsWithPosTag
)
where
import Control.Lens ((^.), view, _Just, makeLenses)
import Data.Aeson.TH (deriveJSON)
import Data.Either
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.List (concat)
import Data.Map (Map, lookup)
import Data.Maybe (catMaybes)
import Data.Monoid
import Data.Swagger
import qualified Data.Text as T
import Data.Traversable (traverse)
import Data.Tuple.Extra (first, second)
import GHC.Generics (Generic)
import System.FilePath (FilePath)
import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
import qualified Data.Map as Map
import Gargantext.Core (Lang(..), PosTagAlgo(..))
import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
import Gargantext.Core.Flow.Types
import Gargantext.Core.Text
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Social (FlowSocialListWith)
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (POS(NP))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Action.Flow.List
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, DocumentIdWithNgrams(..))
import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Ngrams
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
import Gargantext.Database.Query.Table.NodeNodeNgrams2
import Gargantext.Database.Query.Tree.Root (getOrMkRoot, getOrMk_RootWithCorpus)
import Gargantext.Database.Schema.Node (NodePoly(..), node_id)
import Gargantext.Database.Types
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)
import qualified Gargantext.Core.Text.Corpus.API as API
import qualified Gargantext.Database.Query.Table.Node.Document.Add as Doc (add)
------------------------------------------------------------------------
-- Impots for upgrade function
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Database.Query.Tree (findNodesId)
import qualified Data.List as List
------------------------------------------------------------------------
-- TODO use internal with API name (could be old data)
data DataOrigin = InternalOrigin { _do_api :: API.ExternalAPIs }
| ExternalOrigin { _do_api :: API.ExternalAPIs }
-- TODO Web
deriving (Generic, Eq)
makeLenses ''DataOrigin
deriveJSON (unPrefix "_do_") ''DataOrigin
instance ToSchema DataOrigin where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_do_")
allDataOrigins :: [DataOrigin]
allDataOrigins = map InternalOrigin API.externalAPIs
<> map ExternalOrigin API.externalAPIs
---------------
data DataText = DataOld ![NodeId]
| DataNew ![[HyperdataDocument]]
-- TODO use the split parameter in config file
getDataText :: FlowCmdM env err m
=> DataOrigin
-> TermType Lang
-> API.Query
-> Maybe API.Limit
-> m DataText
getDataText (ExternalOrigin api) la q li = liftBase $ DataNew
<$> splitEvery 500
<$> API.get api (_tt_lang la) q li
getDataText (InternalOrigin _) _la q _li = do
(_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus
(UserName userMaster)
(Left "")
(Nothing :: Maybe HyperdataCorpus)
ids <- map fst <$> searchDocInDatabase cId (stemIt q)
pure $ DataOld ids
-------------------------------------------------------------------------------
flowDataText :: ( FlowCmdM env err m
)
=> User
-> DataText
-> TermType Lang
-> CorpusId
-> Maybe FlowSocialListWith
-> m CorpusId
flowDataText u (DataOld ids) tt cid mfslw = flowCorpusUser (_tt_lang tt) u (Right [cid]) corpusType ids mfslw
where
corpusType = (Nothing :: Maybe HyperdataCorpus)
flowDataText u (DataNew txt) tt cid mfslw = flowCorpus u (Right [cid]) tt mfslw txt
------------------------------------------------------------------------
-- TODO use proxy
flowAnnuaire :: (FlowCmdM env err m)
=> User
-> Either CorpusName [CorpusId]
-> (TermType Lang)
-> FilePath
-> m AnnuaireId
flowAnnuaire u n l filePath = do
docs <- liftBase $ (( splitEvery 500 <$> readFile_Annuaire filePath) :: IO [[HyperdataContact]])
flow (Nothing :: Maybe HyperdataAnnuaire) u n l Nothing docs
------------------------------------------------------------------------
flowCorpusFile :: (FlowCmdM env err m)
=> User
-> Either CorpusName [CorpusId]
-> Limit -- Limit the number of docs (for dev purpose)
-> TermType Lang -> FileFormat -> FilePath
-> Maybe FlowSocialListWith
-> m CorpusId
flowCorpusFile u n l la ff fp mfslw = do
eParsed <- liftBase $ parseFile ff fp
case eParsed of
Right parsed -> do
let docs = splitEvery 500 $ take l parsed
flowCorpus u n la mfslw (map (map toHyperdataDocument) docs)
Left e -> panic $ "Error: " <> (T.pack e)
------------------------------------------------------------------------
-- | TODO improve the needed type to create/update a corpus
-- (For now, Either is enough)
flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
=> User
-> Either CorpusName [CorpusId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> m CorpusId
flowCorpus = flow (Nothing :: Maybe HyperdataCorpus)
flow :: ( FlowCmdM env err m
, FlowCorpus a
, MkCorpus c
)
=> Maybe c
-> User
-> Either CorpusName [CorpusId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> m CorpusId
flow c u cn la mfslw docs = do
-- TODO if public insertMasterDocs else insertUserDocs
ids <- traverse (insertMasterDocs c la) docs
flowCorpusUser (la ^. tt_lang) u cn c (concat ids) mfslw
------------------------------------------------------------------------
flowCorpusUser :: ( FlowCmdM env err m
, MkCorpus c
)
=> Lang
-> User
-> Either CorpusName [CorpusId]
-> Maybe c
-> [NodeId]
-> Maybe FlowSocialListWith
-> m CorpusId
flowCorpusUser l user corpusName ctype ids mfslw = do
-- User Flow
(userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus user corpusName ctype
-- NodeTexts is first
_tId <- insertDefaultNode NodeTexts userCorpusId userId
-- printDebug "NodeTexts: " tId
-- NodeList is second
listId <- getOrMkList userCorpusId userId
-- _cooc <- insertDefaultNode NodeListCooc listId userId
-- TODO: check if present already, ignore
_ <- Doc.add userCorpusId ids
-- printDebug "Node Text Ids:" tId
-- User List Flow
(masterUserId, _masterRootId, masterCorpusId)
<- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype
--let gp = (GroupParams l 2 3 (StopSize 3))
let gp = GroupWithPosTag l CoreNLP HashMap.empty
ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw gp
_userListId <- flowList_DbRepo listId ngs
_mastListId <- getOrMkList masterCorpusId masterUserId
-- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId
-- User Graph Flow
_ <- insertDefaultNode NodeDashboard userCorpusId userId
_ <- insertDefaultNode NodeGraph userCorpusId userId
--_ <- mkPhylo userCorpusId userId
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
pure userCorpusId
insertMasterDocs :: ( FlowCmdM env err m
, FlowCorpus a
, MkCorpus c
)
=> Maybe c
-> TermType Lang
-> [a]
-> m [DocId]
insertMasterDocs c lang hs = do
(masterUserId, _, masterCorpusId) <- getOrMk_RootWithCorpus (UserName userMaster) (Left corpusMasterName) c
(ids', documentsWithId) <- insertDocs masterUserId masterCorpusId (map (toNode masterUserId masterCorpusId) hs )
_ <- Doc.add masterCorpusId ids'
-- TODO
-- create a corpus with database name (CSV or PubMed)
-- add documents to the corpus (create node_node link)
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs' :: HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
<- mapNodeIdNgrams
<$> documentIdWithNgrams
(extractNgramsT $ withLang lang documentsWithId)
documentsWithId
lId <- getOrMkList masterCorpusId masterUserId
_ <- saveDocNgramsWith lId mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure ids'
saveDocNgramsWith :: ( FlowCmdM env err m)
=> ListId
-> HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
-> m ()
saveDocNgramsWith lId mapNgramsDocs' = do
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
-- to be removed
let indexedNgrams = HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs
-- new
mapCgramsId <- listInsertDb lId toNodeNgramsW'
$ map (first _ngramsTerms . second Map.keys)
$ HashMap.toList mapNgramsDocs
-- insertDocNgrams
_return <- insertNodeNodeNgrams2
$ catMaybes [ NodeNodeNgrams2 <$> Just nId
<*> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'')
<*> Just (fromIntegral w :: Double)
| (terms'', mapNgramsTypes) <- HashMap.toList mapNgramsDocs
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, w) <- Map.toList mapNodeIdWeight
]
-- to be removed
_ <- insertDocNgrams lId indexedNgrams
pure ()
------------------------------------------------------------------------
-- TODO Type NodeDocumentUnicised
insertDocs :: ( FlowCmdM env err m
-- , FlowCorpus a
, FlowInsertDB a
)
=> UserId
-> CorpusId
-> [a]
-> m ([DocId], [Indexed NodeId a])
insertDocs uId cId hs = do
let docs = map addUniqId hs
newIds <- insertDb uId cId docs
-- printDebug "newIds" newIds
let
newIds' = map reId newIds
documentsWithId = mergeData (toInserted newIds) (Map.fromList $ map viewUniqId' docs)
_ <- Doc.add cId newIds'
pure (newIds', documentsWithId)
------------------------------------------------------------------------
viewUniqId' :: UniqId a
=> a
-> (Hash, a)
viewUniqId' d = maybe err (\h -> (h,d)) (view uniqId d)
where
err = panic "[ERROR] Database.Flow.toInsert"
toInserted :: [ReturnId]
-> Map Hash ReturnId
toInserted =
Map.fromList . map (\r -> (reUniqId r, r) )
. filter (\r -> reInserted r == True)
mergeData :: Map Hash ReturnId
-> Map Hash a
-> [Indexed NodeId a]
mergeData rs = catMaybes . map toDocumentWithId . Map.toList
where
toDocumentWithId (sha,hpd) =
Indexed <$> fmap reId (lookup sha rs)
<*> Just hpd
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
documentIdWithNgrams :: HasNodeError err
=> (a
-> Cmd err (HashMap b (Map NgramsType Int)))
-> [Indexed NodeId a]
-> Cmd err [DocumentIdWithNgrams a b]
documentIdWithNgrams f = traverse toDocumentIdWithNgrams
where
toDocumentIdWithNgrams d = do
e <- f $ _unIndex d
pure $ DocumentIdWithNgrams d e
-- | TODO check optimization
mapNodeIdNgrams :: (Ord b, Hashable b)
=> [DocumentIdWithNgrams a b]
-> HashMap b
(Map NgramsType
(Map NodeId Int)
)
mapNodeIdNgrams = HashMap.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap f
where
f :: DocumentIdWithNgrams a b
-> HashMap b (Map NgramsType (Map NodeId Int))
f d = fmap (fmap (Map.singleton nId)) $ documentNgrams d
where
nId = _index $ documentWithId d
------------------------------------------------------------------------
instance ExtractNgramsT HyperdataContact
where
extractNgramsT l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc
where
extract :: TermType Lang -> HyperdataContact
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extract _l hc' = do
let authors = map text2ngrams
$ maybe ["Nothing"] (\a -> [a])
$ view (hc_who . _Just . cw_lastName) hc'
pure $ HashMap.fromList $ [(SimpleNgrams a', Map.singleton Authors 1) | a' <- authors ]
instance ExtractNgramsT HyperdataDocument
where
extractNgramsT :: TermType Lang
-> HyperdataDocument
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extractNgramsT lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' lang hd
where
extractNgramsT' :: TermType Lang
-> HyperdataDocument
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int))
extractNgramsT' lang' doc = do
let source = text2ngrams
$ maybe "Nothing" identity
$ _hd_source doc
institutes = map text2ngrams
$ maybe ["Nothing"] (map toSchoolName . (T.splitOn ", "))
$ _hd_institutes doc
authors = map text2ngrams
$ maybe ["Nothing"] (T.splitOn ", ")
$ _hd_authors doc
terms' <- map (enrichedTerms (lang' ^. tt_lang) CoreNLP NP)
<$> concat
<$> liftBase (extractTerms lang' $ hasText doc)
pure $ HashMap.fromList
$ [(SimpleNgrams source, Map.singleton Sources 1) ]
<> [(SimpleNgrams i', Map.singleton Institutes 1) | i' <- institutes ]
<> [(SimpleNgrams a', Map.singleton Authors 1) | a' <- authors ]
<> [(EnrichedNgrams t', Map.singleton NgramsTerms 1) | t' <- terms' ]
instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
where
extractNgramsT l (Node _ _ _ _ _ _ _ h) = extractNgramsT l h
instance HasText a => HasText (Node a)
where
hasText (Node _ _ _ _ _ _ _ h) = hasText h
-- | TODO putelsewhere
-- | Upgrade function
-- Suppose all documents are English (this is the case actually)
indexAllDocumentsWithPosTag :: FlowCmdM env err m => m ()
indexAllDocumentsWithPosTag = do
rootId <- getRootId (UserName userMaster)
corpusIds <- findNodesId rootId [NodeCorpus]
docs <- List.concat <$> mapM getDocumentsWithParentId corpusIds
_ <- mapM extractInsert (splitEvery 1000 docs)
pure ()
extractInsert :: FlowCmdM env err m => [Node HyperdataDocument] -> m ()
extractInsert docs = do
let documentsWithId = map (\doc -> Indexed (doc ^. node_id) doc) docs
mapNgramsDocs' <- mapNodeIdNgrams
<$> documentIdWithNgrams
(extractNgramsT $ withLang (Multi EN) documentsWithId)
documentsWithId
_ <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
pure ()
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Flow/ 0000775 0000000 0000000 00000000000 14124644201 0031074 5 ustar 00root root 0000000 0000000 Annuaire.hs 0000664 0000000 0000000 00000001313 14124644201 0033111 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Flow {-|
Module : Gargantext.Database.Flow.Annuaire
Description : Database Flow Annuaire
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
module Gargantext.Database.Action.Flow.Annuaire
where
{-
import Gargantext.Prelude
import Gargantext.Database.Flow
-- | Annuaire
flowAnnuaire :: FlowCmdM env ServantErr m => FilePath -> m ()
flowAnnuaire filePath = do
contacts <- liftBase $ deserialiseImtUsersFromFile filePath
ps <- flowInsertAnnuaire "Annuaire"
$ map (\h-> ToDbContact h)
$ map addUniqIdsContact contacts
printDebug "length annuaire" ps
-}
List.hs 0000664 0000000 0000000 00000020057 14124644201 0032270 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Flow {-|
Module : Gargantext.Database.Flow.List
Description : List Flow
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Database.Action.Flow.List
where
import Control.Concurrent
import Control.Lens ((^.), (+~), (%~), at, (.~), _Just)
import Control.Monad.Reader
import Data.Map (Map, toList)
import Data.Maybe (catMaybes)
import Data.Text (Text)
import Gargantext.API.Ngrams (saveNodeStory)
import Gargantext.API.Ngrams.Tools (getNodeStoryVar)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (HasInvalidError(..), assertValid)
import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Core.NodeStory
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb, getCgramsId)
import Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict.Patch as PM
import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
-- FLOW LIST
-- 1. select specific terms of the corpus when compared with others langs
-- (for now, suppose english)
-- 2. select specific terms of the corpus when compared with others corpora (same database)
-- 3. select clusters of terms (generic and specific)
{-
data FlowList = FlowListLang
| FlowListTficf
| FlowListSpeGen
flowList_Tficf :: UserCorpusId
-> MasterCorpusId
-> NgramsType
-> (Text -> Text)
-> Cmd err (Map Text (Double, Set Text))
flowList_Tficf u m nt f = do
u' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsUser u nt
m' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsMaster u m
pure $ sortTficf Down
$ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
flowList_Tficf' :: UserCorpusId
-> MasterCorpusId
-> NgramsType
-> Cmd err (Map Text (Double, Set Text))
flowList_Tficf' u m nt f = do
u' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsUser u nt
m' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsMaster u m
pure $ sortTficf Down
$ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
-}
------------------------------------------------------------------------
flowList_DbRepo :: FlowCmdM env err m
=> ListId
-> Map NgramsType [NgramsElement]
-> m ListId
flowList_DbRepo lId ngs = do
-- printDebug "listId flowList" lId
mapCgramsId <- listInsertDb lId toNodeNgramsW (Map.toList ngs)
let toInsert = catMaybes [ (,) <$> (getCgramsId mapCgramsId ntype <$> (unNgramsTerm <$> parent))
<*> getCgramsId mapCgramsId ntype ngram
| (ntype, ngs') <- Map.toList ngs
, NgramsElement { _ne_ngrams = NgramsTerm ngram
, _ne_parent = parent } <- ngs'
]
-- Inserting groups of ngrams
_r <- insert_Node_NodeNgrams_NodeNgrams
$ map (\(a,b) -> Node_NodeNgrams_NodeNgrams lId a b Nothing) toInsert
listInsert lId ngs
--trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
pure lId
------------------------------------------------------------------------
------------------------------------------------------------------------
toNodeNgramsW :: ListId
-> [(NgramsType, [NgramsElement])]
-> [NodeNgramsW]
toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW'' l) ngs
where
toNodeNgramsW'' :: ListId
-> (NgramsType, [NgramsElement])
-> [NodeNgramsW]
toNodeNgramsW'' l' (ngrams_type, elms) =
[ NodeNgrams { _nng_id = Nothing
, _nng_node_id = l'
, _nng_node_subtype = list_type
, _nng_ngrams_id = ngrams_terms'
, _nng_ngrams_type = ngrams_type
, _nng_ngrams_field = Nothing
, _nng_ngrams_tag = Nothing
, _nng_ngrams_class = Nothing
, _nng_ngrams_weight = 0 } |
(NgramsElement { _ne_ngrams = NgramsTerm ngrams_terms'
, _ne_size = _size
, _ne_list = list_type
, _ne_occurrences = _occ
, _ne_root = _root
, _ne_parent = _parent
, _ne_children = _children }) <- elms
]
toNodeNgramsW' :: ListId
-> [(Text, [NgramsType])]
-> [NodeNgramsW]
toNodeNgramsW' l'' ngs = [ NodeNgrams { _nng_id = Nothing
, _nng_node_id = l''
, _nng_node_subtype = CandidateTerm
, _nng_ngrams_id = terms
, _nng_ngrams_type = ngrams_type
, _nng_ngrams_field = Nothing
, _nng_ngrams_tag = Nothing
, _nng_ngrams_class = Nothing
, _nng_ngrams_weight = 0 }
| (terms, ngrams_types) <- ngs
, ngrams_type <- ngrams_types
]
listInsert :: FlowCmdM env err m
=> ListId
-> Map NgramsType [NgramsElement]
-> m ()
listInsert lId ngs = mapM_ (\(typeList, ngElmts)
-> putListNgrams lId typeList ngElmts) (toList ngs)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- NOTE
-- This is no longer part of the API.
-- This function is maintained for its usage in Database.Action.Flow.List.
-- If the given list of ngrams elements contains ngrams already in
-- the repo, they will be ignored.
putListNgrams :: (HasInvalidError err, HasNodeStory env err m)
=> NodeId
-> TableNgrams.NgramsType
-> [NgramsElement]
-> m ()
putListNgrams _ _ [] = pure ()
putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
where
m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
putListNgrams' :: (HasInvalidError err, HasNodeStory env err m)
=> NodeId
-> TableNgrams.NgramsType
-> Map NgramsTerm NgramsRepoElement
-> m ()
putListNgrams' listId ngramsType' ns = do
-- printDebug "[putListNgrams'] nodeId" nodeId
-- printDebug "[putListNgrams'] ngramsType" ngramsType
-- printDebug "[putListNgrams'] ns" ns
let p1 = NgramsTablePatch . PM.fromMap $ NgramsReplace Nothing . Just <$> ns
(p, p_validity) = PM.singleton ngramsType' p1
assertValid p_validity
{-
-- TODO
v <- currentVersion
q <- commitStatePatch (Versioned v p)
assert empty q
-- What if another commit comes in between?
-- Shall we have a blindCommitStatePatch? It would not ask for a version but just a patch.
-- The modifyMVar_ would test the patch with applicable first.
-- If valid the rest would be atomic and no merge is required.
-}
var <- getNodeStoryVar [listId]
liftBase $ modifyMVar_ var $ \r -> do
pure $ r & unNodeStory . at listId . _Just . a_version +~ 1
& unNodeStory . at listId . _Just . a_history %~ (p :)
& unNodeStory . at listId . _Just . a_state . at ngramsType' .~ Just ns
saveNodeStory
Pairing.hs 0000664 0000000 0000000 00000016642 14124644201 0032753 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Flow {-|
Module : Gargantext.Database.Flow
Description : Database Flow
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE Arrows #-}
module Gargantext.Database.Action.Flow.Pairing
-- (pairing)
where
import Control.Lens (_Just, (^.))
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import Data.Maybe (catMaybes, fromMaybe)
import Data.Set (Set)
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core
import Gargantext.Core.Types (TableResult(..))
import Gargantext.Core.Types.Main
import Gargantext.Database
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
import Gargantext.Database.Query.Prelude (leftJoin2, returnA, queryNodeNodeTable)
import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.NodeNode (insertNodeNode)
import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
import Gargantext.Database.Schema.Node
import Gargantext.Prelude hiding (sum)
import Opaleye
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Set as Set
import qualified Data.Text as DT
-- | isPairedWith
-- All NodeAnnuaire paired with a Corpus of NodeId nId:
-- isPairedWith NodeAnnuaire corpusId
isPairedWith :: NodeId -> NodeType -> Cmd err [NodeId]
isPairedWith nId nt = runOpaQuery (selectQuery nt nId)
where
selectQuery :: NodeType -> NodeId -> Query (Column PGInt4)
selectQuery nt' nId' = proc () -> do
(node, node_node) <- queryJoin -< ()
restrict -< (node^.node_typename) .== (pgInt4 $ toDBid nt')
restrict -< (node_node^.nn_node1_id) .== (toNullable $ pgNodeId nId')
returnA -< node^.node_id
queryJoin :: Query (NodeRead, NodeNodeReadNull)
queryJoin = leftJoin2 queryNodeTable queryNodeNodeTable cond
where
cond (node, node_node) = node^.node_id .== node_node^. nn_node2_id
-----------------------------------------------------------------------
pairing :: AnnuaireId -> CorpusId -> Maybe ListId -> GargNoServer Int
pairing a c l' = do
l <- case l' of
Nothing -> defaultList c
Just l'' -> pure l''
dataPaired <- dataPairing a (c,l,Authors) takeName takeName
r <- insertDB $ prepareInsert dataPaired
_ <- insertNodeNode [ NodeNode { _nn_node1_id = c
, _nn_node2_id = a
, _nn_score = Nothing
, _nn_category = Nothing }]
pure r
dataPairing :: AnnuaireId
-> (CorpusId, ListId, NgramsType)
-> (ContactName -> Projected)
-> (DocAuthor -> Projected)
-> GargNoServer (HashMap ContactId (Set DocId))
dataPairing aId (cId, lId, ngt) fc fa = do
mc <- getNgramsContactId aId
md <- getNgramsDocId cId lId ngt
printDebug "ngramsContactId" mc
printDebug "ngramsDocId" md
let
from = projectionFrom (Set.fromList $ HM.keys mc) fc
to = projectionTo (Set.fromList $ HM.keys md) fa
pure $ fusion mc $ align from to md
prepareInsert :: HashMap ContactId (Set DocId) -> [NodeNode]
prepareInsert m = map (\(n1,n2) -> NodeNode { _nn_node1_id = n1
, _nn_node2_id = n2
, _nn_score = Nothing
, _nn_category = Nothing })
$ List.concat
$ map (\(contactId, setDocIds)
-> map (\setDocId
-> (contactId, setDocId)
) $ Set.toList setDocIds
)
$ HM.toList m
------------------------------------------------------------------------
type ContactName = NgramsTerm
type DocAuthor = NgramsTerm
type Projected = NgramsTerm
projectionFrom :: Set ContactName -> (ContactName -> Projected) -> HashMap ContactName Projected
projectionFrom ss f = HM.fromList $ map (\s -> (s, f s)) (Set.toList ss) -- use HS.toMap
projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> HashMap Projected (Set DocAuthor)
projectionTo ss f = HM.fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss) -- use HS.toMap
------------------------------------------------------------------------
takeName :: NgramsTerm -> NgramsTerm
takeName (NgramsTerm texte) = NgramsTerm $ DT.toLower texte'
where
texte' = maybe texte (\x -> if DT.length x > 3 then x else texte)
(lastName' texte)
lastName' = lastMay . DT.splitOn " "
------------------------------------------------------------------------
align :: HashMap ContactName Projected
-> HashMap Projected (Set DocAuthor)
-> HashMap DocAuthor (Set DocId)
-> HashMap ContactName (Set DocId)
align mc ma md = HM.fromListWith (<>)
$ map (\c -> (c, getProjection md $ testProjection c mc ma))
$ HM.keys mc
where
getProjection :: HashMap DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
getProjection ma' sa' =
if Set.null sa'
then Set.empty
else Set.unions $ sets ma' sa'
where
sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
lookup s' ma''= fromMaybe Set.empty (HM.lookup s' ma'')
testProjection :: ContactName
-> HashMap ContactName Projected
-> HashMap Projected (Set DocAuthor)
-> Set DocAuthor
testProjection cn' mc' ma' = case HM.lookup cn' mc' of
Nothing -> Set.empty
Just c -> case HM.lookup c ma' of
Nothing -> Set.empty
Just a -> a
fusion :: HashMap ContactName (Set ContactId)
-> HashMap ContactName (Set DocId)
-> HashMap ContactId (Set DocId)
fusion mc md = HM.fromListWith (<>)
$ catMaybes
$ [ (,) <$> Just cId <*> HM.lookup cn md
| (cn, setContactId) <- HM.toList mc
, cId <- Set.toList setContactId
]
------------------------------------------------------------------------
getNgramsContactId :: AnnuaireId
-> Cmd err (HashMap ContactName (Set NodeId))
getNgramsContactId aId = do
contacts <- getAllContacts aId
pure $ HM.fromListWith (<>)
$ catMaybes
$ map (\contact -> (,) <$> (NgramsTerm <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName))
<*> Just ( Set.singleton (contact^.node_id))
) (tr_docs contacts)
getNgramsDocId :: CorpusId
-> ListId
-> NgramsType
-> GargNoServer (HashMap DocAuthor (Set NodeId))
getNgramsDocId cId lId nt = do
lIds <- selectNodesWithUsername NodeList userMaster
repo <- getRepo' lIds
let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
Types.hs 0000664 0000000 0000000 00000002735 14124644201 0032464 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Flow {-|
Module : Gargantext.Database.Flow.Types
Description : Types for Flow
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Database.Action.Flow.Types
where
import Data.Aeson (ToJSON)
import Gargantext.Core.Types (HasInvalidError)
import Gargantext.Core.Flow.Types
import Gargantext.Core.Text
import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Terms
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Prelude (CmdM)
import Gargantext.Database.Query.Table.Node.Document.Insert
import Gargantext.Database.Query.Tree.Error (HasTreeError)
type FlowCmdM env err m =
( CmdM env err m
, HasNodeStory env err m
, HasNodeError err
, HasInvalidError err
, HasTreeError err
)
type FlowCorpus a = ( AddUniqId a
, UniqId a
, InsertDb a
, ExtractNgramsT a
, HasText a
, ToNode a
, ToJSON a
)
type FlowInsertDB a = ( AddUniqId a
, UniqId a
, InsertDb a
)
Utils.hs 0000664 0000000 0000000 00000004074 14124644201 0032456 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Flow {-|
Module : Gargantext.Database.Flow.Utils
Description : Database Flow
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Database.Action.Flow.Utils
where
import Data.Map (Map)
import Data.HashMap.Strict (HashMap)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.NodeNodeNgrams
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Types
import Gargantext.Prelude
import qualified Data.Map as DM
import qualified Data.HashMap.Strict as HashMap
data DocumentIdWithNgrams a b =
DocumentIdWithNgrams
{ documentWithId :: Indexed NodeId a
, documentNgrams :: HashMap b (Map NgramsType Int)
} deriving (Show)
docNgrams2nodeNodeNgrams :: CorpusId
-> DocNgrams
-> NodeNodeNgrams
docNgrams2nodeNodeNgrams cId (DocNgrams d n nt w) =
NodeNodeNgrams cId d n nt w
data DocNgrams = DocNgrams { dn_doc_id :: DocId
, dn_ngrams_id :: Int
, dn_ngrams_type :: NgramsTypeId
, dn_weight :: Double
}
insertDocNgramsOn :: CorpusId
-> [DocNgrams]
-> Cmd err Int
insertDocNgramsOn cId dn =
insertNodeNodeNgrams
$ (map (docNgrams2nodeNodeNgrams cId) dn)
insertDocNgrams :: CorpusId
-> HashMap (Indexed Int Ngrams) (Map NgramsType (Map NodeId Int))
-> Cmd err Int
insertDocNgrams cId m =
insertDocNgramsOn cId [ DocNgrams { dn_doc_id = n
, dn_ngrams_id = _index ng
, dn_ngrams_type = ngramsTypeId t
, dn_weight = fromIntegral i }
| (ng, t2n2i) <- HashMap.toList m
, (t, n2i) <- DM.toList t2n2i
, (n, i) <- DM.toList n2i
]
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Index.hs 0000664 0000000 0000000 00000002277 14124644201 0031600 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Action.Index
Description : Indexation tools
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main Database functions for Gargantext.API.Node.Update
UpdateNodeParamsTexts { methodTexts :: Granularity }
data Granularity = NewNgrams | NewTexts | Both
deriving (Generic, Eq, Ord, Enum, Bounded)
-- TODO add option for type of ngrams
-}
module Gargantext.Database.Action.Index
where
{-
import Data.List (nub)
import Gargantext.Core.Text.Terms.WithList (buildPatterns, filterTerms, termsInText)
index :: CorpusId -> Granularity -> Cmd err [Int]
index cId NewNgrams = do
ngrams <- get ngrams with zero count
texts <- get all text to index
indexSave text (buildPatterns ngrams)
index cId NewTexts = do
ngrams <- get all ngrams
texts <- get text with zero count
indexSave text (buildPatterns ngrams)
index cId Both = do
r1 <- index cId NewNgrams
r2 <- index cId NewTexts
pure $ r1 <> r2
indexSave :: [Document] -> Pattern -> Cmd err [Int]
indexSave corpus p = do
indexedDoc <- map (filterTerms patterns) corpus
saveIndexDoc ngramsTextId
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Learn.hs 0000664 0000000 0000000 00000006530 14124644201 0031566 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Learn
Description : Learn Small Data Analytics with big data connection (DB)
opyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE MonoLocalBinds #-}
module Gargantext.Database.Action.Learn
where
import Data.Maybe
import Data.Text (Text)
import Gargantext.Core
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Database.Query.Facet
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Prelude
import Gargantext.Core.Text.Learn
import qualified Data.List as List
import qualified Data.Text as Text
data FavOrTrash = IsFav | IsTrash
deriving (Eq)
moreLike :: HasDBid NodeType
=> CorpusId -> Maybe Offset -> Maybe Limit -> Maybe OrderBy
-> FavOrTrash -> Cmd err [FacetDoc]
moreLike cId o _l order ft = do
priors <- getPriors ft cId
moreLikeWith cId o (Just 3) order ft priors
---------------------------------------------------------------------------
getPriors :: HasDBid NodeType => FavOrTrash -> CorpusId -> Cmd err (Events Bool)
getPriors ft cId = do
docs_fav <- filter (\(FacetDoc _ _ _ _ f _ _) -> f == Just 2)
<$> runViewDocuments cId False Nothing Nothing Nothing Nothing
docs_trash <- List.take (List.length docs_fav)
<$> runViewDocuments cId True Nothing Nothing Nothing Nothing
let priors = priorEventsWith text (fav2bool ft) ( List.zip (repeat False) docs_fav
<> List.zip (repeat True ) docs_trash
)
pure priors
moreLikeWith :: HasDBid NodeType
=> CorpusId -> Maybe Offset -> Maybe Limit -> Maybe OrderBy
-> FavOrTrash -> Events Bool -> Cmd err [FacetDoc]
moreLikeWith cId o l order ft priors = do
docs_test <- filter (\(FacetDoc _ _ _ _ f _ _) -> f == Just 1)
<$> runViewDocuments cId False o Nothing order Nothing
let results = map fst
$ filter ((==) (Just $ not $ fav2bool ft) . snd)
$ map (\f -> (f, detectDefaultWithPriors text priors f)) docs_test
pure $ List.take (maybe 10 identity l) results
---------------------------------------------------------------------------
fav2bool :: FavOrTrash -> Bool
fav2bool ft = if (==) ft IsFav then True else False
text :: FacetDoc -> Text
text (FacetDoc _ _ _ h _ _ _) = title <> "" <> Text.take 100 abstr
where
title = maybe "" identity (_hd_title h)
abstr = maybe "" identity (_hd_abstract h)
---------------------------------------------------------------------------
{-
apply :: (FlowCmdM env e m) => FavOrTrash -> CorpusId -> [NodeId] -> m [Int]
apply favTrash cId ns = case favTrash of
IsFav -> nodeNodesCategory $ map (\n -> (cId, n, 2)) ns
IsTrash -> nodeNodesCategory $ map (\n -> (cId, n, 0)) ns
moreLikeAndApply :: FlowCmdM DevEnv GargError m => FavOrTrash -> CorpusId -> m [Int]
moreLikeAndApply ft cId = do
priors <- getPriors ft cId
moreLikeWithAndApply priors ft cId
moreLikeWithAndApply :: FlowCmdM DevEnv GargError m => Events Bool -> FavOrTrash -> CorpusId -> m [Int]
moreLikeWithAndApply priors ft cId = do
ids <- map facetDoc_id <$> moreLikeWith cId ft priors
apply ft cId ids
-}
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Mail.hs 0000664 0000000 0000000 00000001745 14124644201 0031412 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Action.Mail
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Database.Action.Mail
where
import Control.Lens (view)
import Gargantext.Prelude
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Core.Mail
import Gargantext.Prelude.Config
import Gargantext.Database.Schema.User
import Gargantext.Database.Action.User
import Gargantext.Core.Types.Individu (User(..))
------------------------------------------------------------------------
sendMail :: HasNodeError err => User -> Cmd err ()
sendMail u = do
server <- view $ hasConfig . gc_url
userLight <- getUserLightDB u
liftBase $ mail server (MailInfo { mailInfo_username = userLight_username userLight
, mailInfo_address = userLight_email userLight })
Metrics.hs 0000664 0000000 0000000 00000006055 14124644201 0032056 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action {-|
Module : Gargantext.Database.Metrics
Description : Get Metrics from Storage (Database like)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Node API
-}
module Gargantext.Database.Action.Metrics
where
import Data.HashMap.Strict (HashMap)
import Data.Vector (Vector)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo')
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm)
import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..))
import Gargantext.Core.NodeStory
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser{-, getTficfWith-})
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (HashMap NgramsTerm (ListType, Maybe NgramsTerm), Vector (Scored NgramsTerm))
getMetrics cId maybeListId tabType maybeLimit = do
(ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
-- TODO HashMap
pure (ngs, scored myCooc)
getNgramsCooc :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
, HashMap NgramsTerm (Maybe RootTerm)
, HashMap (NgramsTerm, NgramsTerm) Int
)
getNgramsCooc cId maybeListId tabType maybeLimit = do
(ngs', ngs) <- getNgrams cId maybeListId tabType
let
take' Nothing xs = xs
take' (Just n) xs = take n xs
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster
myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) (ngramsTypeFromTabType tabType)
(take' maybeLimit $ HM.keys ngs)
pure $ (ngs', ngs, myCooc)
getNgrams :: (HasNodeStory env err m)
=> CorpusId -> Maybe ListId -> TabType
-> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
, HashMap NgramsTerm (Maybe RootTerm)
)
getNgrams cId maybeListId tabType = do
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo' [lId]
let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
[MapTerm, StopTerm, CandidateTerm]
pure (lists, maybeSyn)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Metrics/ 0000775 0000000 0000000 00000000000 14124644201 0031573 5 ustar 00root root 0000000 0000000 Lists.hs 0000664 0000000 0000000 00000003656 14124644201 0033160 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Metrics {-|
Module : Gargantext.Database.Lists
Description : Main requests of Node to the database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Action.Metrics.Lists
where
import Gargantext.API.Ngrams.Types (TabType(..))
import Gargantext.Core.Text.Metrics (Scored(..))
import Gargantext.Core.Types -- (NodePoly(..), NodeCorpus, ListId)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Prelude hiding (sum, head)
import Prelude hiding (null, id, map, sum)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Map as Map
import qualified Data.Vector as Vec
import qualified Gargantext.Database.Action.Metrics as Metrics
{-
trainModel :: FlowCmdM env ServantErr m
=> Username -> m Score
trainModel u = do
rootId <- _node_id <$> getRoot u
(id:ids) <- getCorporaWithParentId rootId
(s,_model) <- case length ids >0 of
True -> grid 100 150 (getMetrics
False -> panic "Gargantext.Database.Lists.trainModel : not enough corpora"
--}
getMetrics' :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Int
-> m (Map.Map ListType [Vec.Vector Double])
getMetrics' cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics cId maybeListId tabType maybeLimit
let
metrics = map (\(Scored t s1 s2) -> (listType t ngs', [Vec.fromList [s1,s2]])) scores
listType t m = maybe (panic errorMsg) fst $ HashMap.lookup t m
errorMsg = "API.Node.metrics: key absent"
{-
_ <- Learn.grid 100 110 metrics' metrics'
--}
pure $ Map.fromListWith (<>) $ Vec.toList metrics
NgramsByNode.hs 0000664 0000000 0000000 00000044344 14124644201 0034411 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Metrics {-|
Module : Gargantext.Database.Metrics.NgramsByNode
Description : Ngrams by Node user and master
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Ngrams by node enable contextual metrics.
-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Action.Metrics.NgramsByNode
where
--import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Set (Set)
import Data.Text (Text)
import Data.Tuple.Extra (first, second, swap)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Debug.Trace (trace)
import Gargantext.Core
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Data.HashMap.Strict.Utils as HM
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Database.PostgreSQL.Simple as DPS
-- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs)
countNodesByNgramsWith :: (NgramsTerm -> NgramsTerm)
-> HashMap NgramsTerm (Set NodeId)
-> (Double, HashMap NgramsTerm (Double, Set NgramsTerm))
countNodesByNgramsWith f m = (total, m')
where
total = fromIntegral $ Set.size $ Set.unions $ HM.elems m
m' = HM.map ( swap . second (fromIntegral . Set.size))
$ groupNodesByNgramsWith f m
groupNodesByNgramsWith :: (NgramsTerm -> NgramsTerm)
-> HashMap NgramsTerm (Set NodeId)
-> HashMap NgramsTerm (Set NgramsTerm, Set NodeId)
groupNodesByNgramsWith f m =
HM.fromListWith (<>) $ map (\(t,ns) -> (f t, (Set.singleton t, ns)))
$ HM.toList m
------------------------------------------------------------------------
getNodesByNgramsUser :: HasDBid NodeType
=> CorpusId
-> NgramsType
-> Cmd err (HashMap NgramsTerm (Set NodeId))
getNodesByNgramsUser cId nt =
HM.fromListWith (<>) <$> map (\(n,t) -> (NgramsTerm t, Set.singleton n))
<$> selectNgramsByNodeUser cId nt
where
selectNgramsByNodeUser :: HasDBid NodeType
=> CorpusId
-> NgramsType
-> Cmd err [(NodeId, Text)]
selectNgramsByNodeUser cId' nt' =
runPGSQuery queryNgramsByNodeUser
( cId'
, toDBid NodeDocument
, ngramsTypeId nt'
-- , 100 :: Int -- limit
-- , 0 :: Int -- offset
)
queryNgramsByNodeUser :: DPS.Query
queryNgramsByNodeUser = [sql|
SELECT nng.node2_id, ng.terms FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- toDBid
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
ORDER BY (nng.node2_id, ng.terms) DESC
-- LIMIT ?
-- OFFSET ?
|]
------------------------------------------------------------------------
-- TODO add groups
getOccByNgramsOnlyFast :: HasDBid NodeType
=> CorpusId
-> NgramsType
-> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast cId nt ngs =
HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
getOccByNgramsOnlyFast_withSample :: HasDBid NodeType
=> CorpusId
-> Int
-> NgramsType
-> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast_withSample cId int nt ngs =
HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser_withSample cId int nt ngs
getOccByNgramsOnlyFast' :: CorpusId
-> ListId
-> NgramsType
-> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
HM.fromListWith (+) <$> map (second round) <$> run cId lId nt tms
where
fields = [QualifiedIdentifier Nothing "text"]
run :: CorpusId
-> ListId
-> NgramsType
-> [NgramsTerm]
-> Cmd err [(NgramsTerm, Double)]
run cId' lId' nt' tms' = fmap (first NgramsTerm) <$> runPGSQuery query
( Values fields ((DPS.Only . unNgramsTerm) <$> tms')
, cId'
, lId'
, ngramsTypeId nt'
)
query :: DPS.Query
query = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, nng.weight FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
WHERE nng.node1_id = ? -- CorpusId
AND nng.node2_id = ? -- ListId
AND nng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0 -- TODO
GROUP BY ng.terms, nng.weight
|]
-- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow :: HasDBid NodeType
=> NodeType
-> CorpusId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlySlow t cId ls nt ngs =
HM.map Set.size <$> getScore' t cId ls nt ngs
where
getScore' NodeCorpus = getNodesByNgramsOnlyUser
getScore' NodeDocument = getNgramsByDocOnlyUser
getScore' _ = getNodesByNgramsOnlyUser
getOccByNgramsOnlySafe :: HasDBid NodeType
=> CorpusId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlySafe cId ls nt ngs = do
printDebug "getOccByNgramsOnlySafe" (cId, nt, length ngs)
fast <- getOccByNgramsOnlyFast cId nt ngs
slow <- getOccByNgramsOnlySlow NodeCorpus cId ls nt ngs
when (fast /= slow) $
printDebug "getOccByNgramsOnlySafe: difference"
(HM.difference slow fast, HM.difference fast slow)
-- diff slow fast :: PatchMap Text (Replace (Maybe Int))
pure slow
selectNgramsOccurrencesOnlyByNodeUser :: HasDBid NodeType
=> CorpusId
-> NgramsType
-> [NgramsTerm]
-> Cmd err [(NgramsTerm, Int)]
selectNgramsOccurrencesOnlyByNodeUser cId nt tms =
fmap (first NgramsTerm) <$>
runPGSQuery queryNgramsOccurrencesOnlyByNodeUser
( Values fields ((DPS.Only . unNgramsTerm) <$> tms)
, cId
, toDBid NodeDocument
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
-- same as queryNgramsOnlyByNodeUser but using COUNT on the node ids.
-- Question: with the grouping is the result exactly the same (since Set NodeId for
-- equivalent ngrams intersections are not empty)
queryNgramsOccurrencesOnlyByNodeUser :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node2_id) FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- toDBid
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
|]
selectNgramsOccurrencesOnlyByNodeUser_withSample :: HasDBid NodeType
=> CorpusId
-> Int
-> NgramsType
-> [NgramsTerm]
-> Cmd err [(NgramsTerm, Int)]
selectNgramsOccurrencesOnlyByNodeUser_withSample cId int nt tms =
fmap (first NgramsTerm) <$>
runPGSQuery queryNgramsOccurrencesOnlyByNodeUser_withSample
( int
, toDBid NodeDocument
, cId
, Values fields ((DPS.Only . unNgramsTerm) <$> tms)
, cId
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOccurrencesOnlyByNodeUser_withSample :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser_withSample = [sql|
WITH nodes_sample AS (SELECT id FROM nodes n TABLESAMPLE SYSTEM_ROWS (?)
JOIN nodes_nodes nn ON n.id = nn.node2_id
WHERE n.typename = ?
AND nn.node1_id = ?),
input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node2_id) FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes_sample n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
|]
queryNgramsOccurrencesOnlyByNodeUser' :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser' = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node2_id) FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- toDBid
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
|]
------------------------------------------------------------------------
getNodesByNgramsOnlyUser :: HasDBid NodeType
=> CorpusId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm (Set NodeId))
getNodesByNgramsOnlyUser cId ls nt ngs =
HM.unionsWith (<>)
. map (HM.fromListWith (<>)
. map (second Set.singleton))
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt)
(splitEvery 1000 ngs)
getNgramsByNodeOnlyUser :: HasDBid NodeType
=> NodeId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
-> Cmd err (Map NodeId (Set NgramsTerm))
getNgramsByNodeOnlyUser cId ls nt ngs =
Map.unionsWith (<>)
. map ( Map.fromListWith (<>)
. map (second Set.singleton)
)
. map (map swap)
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt)
(splitEvery 1000 ngs)
------------------------------------------------------------------------
selectNgramsOnlyByNodeUser :: HasDBid NodeType
=> CorpusId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
-> Cmd err [(NgramsTerm, NodeId)]
selectNgramsOnlyByNodeUser cId ls nt tms =
fmap (first NgramsTerm) <$>
runPGSQuery queryNgramsOnlyByNodeUser
( Values fields ((DPS.Only . unNgramsTerm) <$> tms)
, Values [QualifiedIdentifier Nothing "int4"]
(DPS.Only <$> (map (\(NodeId n) -> n) ls))
, cId
, toDBid NodeDocument
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByNodeUser :: DPS.Query
queryNgramsOnlyByNodeUser = [sql|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = nng.node1_id
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- toDBid
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY ng.terms, nng.node2_id
|]
selectNgramsOnlyByNodeUser' :: HasDBid NodeType
=> CorpusId
-> [ListId]
-> NgramsType
-> [Text]
-> Cmd err [(Text, Int)]
selectNgramsOnlyByNodeUser' cId ls nt tms =
runPGSQuery queryNgramsOnlyByNodeUser
( Values fields (DPS.Only <$> tms)
, Values [QualifiedIdentifier Nothing "int4"]
(DPS.Only <$> (map (\(NodeId n) -> n) ls))
, cId
, toDBid NodeDocument
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByNodeUser' :: DPS.Query
queryNgramsOnlyByNodeUser' = [sql|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.weight FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = nng.node2_id
WHERE nng.node1_id = ? -- CorpusId
AND nng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0
GROUP BY ng.terms, nng.weight
|]
getNgramsByDocOnlyUser :: DocId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm (Set NodeId))
getNgramsByDocOnlyUser cId ls nt ngs =
HM.unionsWith (<>)
. map (HM.fromListWith (<>) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByDocUser cId ls nt) (splitEvery 1000 ngs)
selectNgramsOnlyByDocUser :: DocId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
-> Cmd err [(NgramsTerm, NodeId)]
selectNgramsOnlyByDocUser dId ls nt tms =
fmap (first NgramsTerm) <$>
runPGSQuery queryNgramsOnlyByDocUser
( Values fields ((DPS.Only . unNgramsTerm) <$> tms)
, Values [QualifiedIdentifier Nothing "int4"]
(DPS.Only <$> (map (\(NodeId n) -> n) ls))
, dId
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByDocUser :: DPS.Query
queryNgramsOnlyByDocUser = [sql|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = nng.node1_id
WHERE nng.node2_id = ? -- DocId
AND nng.ngrams_type = ? -- NgramsTypeId
GROUP BY ng.terms, nng.node2_id
|]
------------------------------------------------------------------------
-- | TODO filter by language, database, any social field
getNodesByNgramsMaster :: HasDBid NodeType
=> UserCorpusId -> MasterCorpusId -> Cmd err (HashMap Text (Set NodeId))
getNodesByNgramsMaster ucId mcId = unionsWith (<>)
. map (HM.fromListWith (<>) . map (\(n,t) -> (t, Set.singleton n)))
-- . takeWhile (not . List.null)
-- . takeWhile (\l -> List.length l > 3)
<$> mapM (selectNgramsByNodeMaster 1000 ucId mcId) [0,500..10000]
selectNgramsByNodeMaster :: HasDBid NodeType
=> Int
-> UserCorpusId
-> MasterCorpusId
-> Int
-> Cmd err [(NodeId, Text)]
selectNgramsByNodeMaster n ucId mcId p = runPGSQuery
queryNgramsByNodeMaster'
( ucId
, ngramsTypeId NgramsTerms
, toDBid NodeDocument
, p
, toDBid NodeDocument
, p
, n
, mcId
, toDBid NodeDocument
, ngramsTypeId NgramsTerms
)
-- | TODO fix node_node_ngrams relation
queryNgramsByNodeMaster' :: DPS.Query
queryNgramsByNodeMaster' = [sql|
WITH nodesByNgramsUser AS (
SELECT n.id, ng.terms FROM nodes n
JOIN nodes_nodes nn ON n.id = nn.node2_id
JOIN node_node_ngrams nng ON nng.node2_id = n.id
JOIN ngrams ng ON nng.ngrams_id = ng.id
WHERE nn.node1_id = ? -- UserCorpusId
-- AND n.typename = ? -- toDBid
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
AND node_pos(n.id,?) >= ?
AND node_pos(n.id,?) < ?
GROUP BY n.id, ng.terms
),
nodesByNgramsMaster AS (
SELECT n.id, ng.terms FROM nodes n TABLESAMPLE SYSTEM_ROWS(?)
JOIN node_node_ngrams nng ON n.id = nng.node2_id
JOIN ngrams ng ON ng.id = nng.ngrams_id
WHERE n.parent_id = ? -- Master Corpus toDBid
AND n.typename = ? -- toDBid
AND nng.ngrams_type = ? -- NgramsTypeId
GROUP BY n.id, ng.terms
)
SELECT m.id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|]
TFICF.hs 0000664 0000000 0000000 00000005374 14124644201 0032714 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Metrics {-|
Module : Gargantext.Database.Metrics.TFICF
Description : Ngrams by Node user and master
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Action.Metrics.TFICF
where
-- import Debug.Trace (trace)
-- import Gargantext.Core (Lang(..))
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import Data.Maybe (fromMaybe)
import Gargantext.Core
import Gargantext.Core.Text.Metrics.TFICF
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, getOccByNgramsOnlyFast, getOccByNgramsOnlyFast_withSample)
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.NodeNode (selectCountDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.API.Ngrams.Types
import Gargantext.Prelude
import qualified Data.Set as Set
getTficf :: HasDBid NodeType
=> UserCorpusId
-> MasterCorpusId
-> NgramsType
-> Cmd err (HashMap NgramsTerm Double)
getTficf cId mId nt = do
mapTextDoubleLocal <- HM.filter (> 1)
<$> HM.map (fromIntegral . Set.size)
<$> getNodesByNgramsUser cId nt
mapTextDoubleGlobal <- HM.map fromIntegral
<$> getOccByNgramsOnlyFast mId nt (HM.keys mapTextDoubleLocal)
countLocal <- selectCountDocs cId
countGlobal <- selectCountDocs mId
pure $ HM.mapWithKey (\t n ->
tficf (TficfInfra (Count n )
(Total $ fromIntegral countLocal))
(TficfSupra (Count $ fromMaybe 0 $ HM.lookup t mapTextDoubleGlobal)
(Total $ fromIntegral countGlobal))
) mapTextDoubleLocal
getTficf_withSample :: HasDBid NodeType
=> UserCorpusId
-> MasterCorpusId
-> NgramsType
-> Cmd err (HashMap NgramsTerm Double)
getTficf_withSample cId mId nt = do
mapTextDoubleLocal <- HM.filter (> 1)
<$> HM.map (fromIntegral . Set.size)
<$> getNodesByNgramsUser cId nt
countLocal <- selectCountDocs cId
let countGlobal = countLocal * 10
mapTextDoubleGlobal <- HM.map fromIntegral
<$> getOccByNgramsOnlyFast_withSample mId countGlobal nt
(HM.keys mapTextDoubleLocal)
pure $ HM.mapWithKey (\t n ->
tficf (TficfInfra (Count n )
(Total $ fromIntegral countLocal))
(TficfSupra (Count $ fromMaybe 0 $ HM.lookup t mapTextDoubleGlobal)
(Total $ fromIntegral countGlobal))
) mapTextDoubleLocal
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Node.hs 0000664 0000000 0000000 00000011365 14124644201 0031414 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Action.Node
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Database.Action.Node
where
import Gargantext.Core
import Gargantext.Core.Types (Name)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Hyperdata.Default
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (hash)
import Gargantext.Database.Prelude
import Control.Lens (view)
import Gargantext.Prelude.Config (GargConfig(..))
------------------------------------------------------------------------
-- | TODO mk all others nodes
mkNodeWithParent :: (HasNodeError err, HasDBid NodeType)
=> NodeType
-> Maybe ParentId
-> UserId
-> Name
-> Cmd err [NodeId]
mkNodeWithParent NodeUser (Just _) _ _ = nodeError UserNoParent
------------------------------------------------------------------------
-- | MkNode, insert and eventually configure Hyperdata
mkNodeWithParent NodeUser Nothing uId name =
insertNodesWithParentR Nothing [node NodeUser name defaultHyperdataUser Nothing uId]
mkNodeWithParent _ Nothing _ _ = nodeError HasParent
------------------------------------------------------------------------
mkNodeWithParent NodeFrameWrite i u n =
mkNodeWithParent_ConfigureHyperdata NodeFrameWrite i u n
mkNodeWithParent NodeFrameCalc i u n =
mkNodeWithParent_ConfigureHyperdata NodeFrameCalc i u n
mkNodeWithParent NodeFrameVisio i u n =
mkNodeWithParent_ConfigureHyperdata NodeFrameVisio i u n
mkNodeWithParent NodeFrameNotebook i u n =
mkNodeWithParent_ConfigureHyperdata NodeFrameNotebook i u n
mkNodeWithParent nt (Just pId) uId name = insertNode nt (Just name) Nothing pId uId
-- mkNodeWithParent _ _ _ _ = errorWith "[G.D.A.Node.mkNodeWithParent] nees parent"
-- | Sugar to create a node, get its NodeId and update its Hyperdata after
mkNodeWithParent_ConfigureHyperdata :: (HasNodeError err, HasDBid NodeType)
=> NodeType
-> Maybe ParentId
-> UserId
-> Name
-> Cmd err [NodeId]
mkNodeWithParent_ConfigureHyperdata NodeFrameWrite (Just i) uId name =
mkNodeWithParent_ConfigureHyperdata' NodeFrameWrite (Just i) uId name
mkNodeWithParent_ConfigureHyperdata NodeFrameCalc (Just i) uId name =
mkNodeWithParent_ConfigureHyperdata' NodeFrameCalc (Just i) uId name
mkNodeWithParent_ConfigureHyperdata NodeFrameVisio (Just i) uId name =
mkNodeWithParent_ConfigureHyperdata' NodeFrameVisio (Just i) uId name
mkNodeWithParent_ConfigureHyperdata NodeFrameNotebook (Just i) uId name =
insertNode NodeFrameNotebook (Just "Notebook") (Just $ DefaultFrameCode $ HyperdataFrame "Notebook" name) i uId
mkNodeWithParent_ConfigureHyperdata _ _ _ _ = nodeError NotImplYet
-- | Function not exposed
mkNodeWithParent_ConfigureHyperdata' :: (HasNodeError err, HasDBid NodeType)
=> NodeType
-> Maybe ParentId
-> UserId
-> Name
-> Cmd err [NodeId]
mkNodeWithParent_ConfigureHyperdata' nt (Just i) uId name = do
maybeNodeId <- case nt of
NodeFrameWrite -> insertNode NodeFrameWrite (Just name) Nothing i uId
NodeFrameCalc -> insertNode NodeFrameCalc (Just name) Nothing i uId
NodeFrameVisio -> insertNode NodeFrameVisio (Just name) Nothing i uId
_ -> nodeError NeedsConfiguration
case maybeNodeId of
[] -> nodeError (DoesNotExist i)
[n] -> do
cfg <- view hasConfig
u <- case nt of
NodeFrameWrite -> pure $ _gc_frame_write_url cfg
NodeFrameCalc -> pure $ _gc_frame_calc_url cfg
NodeFrameVisio -> pure $ _gc_frame_visio_url cfg
_ -> nodeError NeedsConfiguration
let
s = _gc_secretkey cfg
hd = HyperdataFrame u (hash $ s <> (cs $ show n))
_ <- updateHyperdata n hd
pure [n]
(_:_:_) -> nodeError MkNode
mkNodeWithParent_ConfigureHyperdata' _ _ _ _ = nodeError HasParent
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Search.hs0000664 0000000 0000000 00000023716 14124644201 0031737 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.TextSearch
Description : Postgres text search experimentation
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Arrows #-}
module Gargantext.Database.Action.Search where
import Control.Arrow (returnA)
import Control.Lens ((^.))
import Data.Aeson
import Data.List (intersperse)
import Data.Maybe
import Data.String (IsString(..))
import Data.Text (Text, words, unpack, intercalate)
import Data.Time (UTCTime)
import Database.PostgreSQL.Simple (Query)
import Database.PostgreSQL.Simple.ToField
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
import Gargantext.Database.Prelude (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
import Gargantext.Database.Query.Facet
import Gargantext.Database.Query.Filter
import Gargantext.Database.Query.Join (leftJoin5)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.NodeNode
import Gargantext.Database.Schema.Node
import Gargantext.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Opaleye hiding (Query, Order)
import Data.Profunctor.Product (p4)
import qualified Opaleye as O hiding (Order)
------------------------------------------------------------------------
searchDocInDatabase :: HasDBid NodeType
=> ParentId
-> Text
-> Cmd err [(NodeId, HyperdataDocument)]
searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
where
-- | Global search query where ParentId is Master Node Corpus Id
queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
queryDocInDatabase _ q = proc () -> do
row <- queryNodeSearchTable -< ()
restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
restrict -< (_ns_typename row) .== (pgInt4 $ toDBid NodeDocument)
returnA -< (_ns_id row, _ns_hyperdata row)
------------------------------------------------------------------------
-- | todo add limit and offset and order
searchInCorpus :: HasDBid NodeType
=> CorpusId
-> IsTrash
-> [Text]
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Cmd err [FacetDoc]
searchInCorpus cId t q o l order = runOpaQuery
$ filterWith o l order
$ queryInCorpus cId t
$ intercalate " | "
$ map stemIt q
searchCountInCorpus :: HasDBid NodeType
=> CorpusId
-> IsTrash
-> [Text]
-> Cmd err Int
searchCountInCorpus cId t q = runCountOpaQuery
$ queryInCorpus cId t
$ intercalate " | "
$ map stemIt q
queryInCorpus :: HasDBid NodeType
=> CorpusId
-> IsTrash
-> Text
-> O.Query FacetDocRead
queryInCorpus cId t q = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
restrict -< if t
then (nn^.nn_category) .== (toNullable $ pgInt4 0)
else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
restrict -< (n ^. ns_typename ) .== (pgInt4 $ toDBid NodeDocument)
returnA -< FacetDoc { facetDoc_id = n^.ns_id
, facetDoc_created = n^.ns_date
, facetDoc_title = n^.ns_name
, facetDoc_hyperdata = n^.ns_hyperdata
, facetDoc_category = nn^.nn_category
, facetDoc_ngramCount = nn^.nn_score
, facetDoc_score = nn^.nn_score }
joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
where
cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
cond (n, nn) = nn^.nn_node2_id .== _ns_id n
------------------------------------------------------------------------
searchInCorpusWithContacts
:: HasDBid NodeType
=> CorpusId
-> AnnuaireId
-> [Text]
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Cmd err [FacetPaired Int UTCTime HyperdataContact Int]
searchInCorpusWithContacts cId aId q o l _order =
runOpaQuery $ limit' l
$ offset' o
$ orderBy ( desc _fp_score)
$ selectGroup cId aId
$ intercalate " | "
$ map stemIt q
selectContactViaDoc
:: HasDBid NodeType
=> CorpusId
-> AnnuaireId
-> Text
-> QueryArr ()
( Column (Nullable PGInt4)
, Column (Nullable PGTimestamptz)
, Column (Nullable PGJsonb)
, Column (Nullable PGInt4)
)
selectContactViaDoc cId aId q = proc () -> do
(doc, (corpus_doc, (_contact_doc, (annuaire_contact, contact)))) <- queryContactViaDoc -< ()
restrict -< (doc^.ns_search) @@ (pgTSQuery $ unpack q )
restrict -< (doc^.ns_typename) .== (pgInt4 $ toDBid NodeDocument)
restrict -< (corpus_doc^.nn_node1_id) .== (toNullable $ pgNodeId cId)
restrict -< (annuaire_contact^.nn_node1_id) .== (toNullable $ pgNodeId aId)
restrict -< (contact^.node_typename) .== (toNullable $ pgInt4 $ toDBid NodeContact)
returnA -< ( contact^.node_id
, contact^.node_date
, contact^.node_hyperdata
, toNullable $ pgInt4 1
)
selectGroup :: HasDBid NodeType
=> NodeId
-> NodeId
-> Text
-> Select FacetPairedReadNull
selectGroup cId aId q = proc () -> do
(a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
(selectContactViaDoc cId aId q) -< ()
returnA -< FacetPaired { _fp_id = a
, _fp_date = b
, _fp_hyperdata = c
, _fp_score = d }
queryContactViaDoc :: O.Query ( NodeSearchRead
, ( NodeNodeReadNull
, ( NodeNodeReadNull
, ( NodeNodeReadNull
, NodeReadNull
)
)
)
)
queryContactViaDoc =
leftJoin5
queryNodeTable
queryNodeNodeTable
queryNodeNodeTable
queryNodeNodeTable
queryNodeSearchTable
cond12
cond23
cond34
cond45
where
cond12 :: (NodeNodeRead, NodeRead) -> Column PGBool
cond12 (annuaire_contact, contact) = contact^.node_id .== annuaire_contact^.nn_node2_id
cond23 :: ( NodeNodeRead
, ( NodeNodeRead
, NodeReadNull
)
) -> Column PGBool
cond23 (contact_doc, (annuaire_contact, _)) = contact_doc^.nn_node1_id .== annuaire_contact^.nn_node2_id
cond34 :: ( NodeNodeRead
, ( NodeNodeRead
, ( NodeNodeReadNull
, NodeReadNull
)
)
) -> Column PGBool
cond34 (corpus_doc, (contact_doc, (_,_))) = corpus_doc^.nn_node2_id .== contact_doc^.nn_node2_id
cond45 :: ( NodeSearchRead
, ( NodeNodeRead
, ( NodeNodeReadNull
, ( NodeNodeReadNull
, NodeReadNull
)
)
)
) -> Column PGBool
cond45 (doc, (corpus_doc, (_,(_,_)))) = doc^.ns_id .== corpus_doc^.nn_node2_id
------------------------------------------------------------------------
newtype TSQuery = UnsafeTSQuery [Text]
-- | TODO [""] -> panic "error"
toTSQuery :: [Text] -> TSQuery
toTSQuery txt = UnsafeTSQuery $ map stemIt txt
instance IsString TSQuery
where
fromString = UnsafeTSQuery . words . cs
instance ToField TSQuery
where
toField (UnsafeTSQuery xs)
= Many $ intersperse (Plain " && ")
$ map (\q -> Many [ Plain "plainto_tsquery("
, Escape (cs q)
, Plain ")"
]
) xs
data Order = Asc | Desc
instance ToField Order
where
toField Asc = Plain "ASC"
toField Desc = Plain "DESC"
-- TODO
-- FIX fav
-- ADD ngrams count
-- TESTS
textSearchQuery :: Query
textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
\ , n.hyperdata->'title' \
\ , n.hyperdata->'source' \
\ , n.hyperdata->'authors' \
\ , COALESCE(nn.score,null) \
\ FROM nodes n \
\ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
\ WHERE \
\ n.search @@ (?::tsquery) \
\ AND (n.parent_id = ? OR nn.node1_id = ?) \
\ AND n.typename = ? \
\ ORDER BY n.hyperdata -> 'publication_date' ? \
\ offset ? limit ?;"
-- | Text Search Function for Master Corpus
-- TODO : text search for user corpus
-- Example:
-- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
-- textSearchTest pId q = textSearch q pId 5 0 Asc
textSearch :: HasDBid NodeType
=> TSQuery -> ParentId
-> Limit -> Offset -> Order
-> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
where
typeId = toDBid NodeDocument
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/Share.hs 0000664 0000000 0000000 00000007545 14124644201 0031576 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Action.Share
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Database.Action.Share
where
import Control.Lens (view)
import Gargantext.Database
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Config (hasNodeType, isInNodeTypes)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataAny(..))
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getNode, getNodesWith)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError, errorWith)
import Gargantext.Database.Query.Table.NodeNode (deleteNodeNode)
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Database.Schema.Node
import Gargantext.Prelude
-- | TODO move in Config of Gargantext
publicNodeTypes :: [NodeType]
publicNodeTypes = [NodeDashboard, NodeGraph, NodePhylo, NodeFile]
------------------------------------------------------------------------
data ShareNodeWith = ShareNodeWith_User { snwu_nodetype :: NodeType
, snwu_user :: User
}
| ShareNodeWith_Node { snwn_nodetype :: NodeType
, snwn_node_id :: NodeId
}
------------------------------------------------------------------------
shareNodeWith :: HasNodeError err
=> ShareNodeWith
-> NodeId
-> Cmd err Int
shareNodeWith (ShareNodeWith_User NodeFolderShared u) n = do
nodeToCheck <- getNode n
userIdCheck <- getUserId u
if not (hasNodeType nodeToCheck NodeTeam)
then errorWith "[G.D.A.S.shareNodeWith] Can share node Team only"
else
if (view node_user_id nodeToCheck == userIdCheck)
then errorWith "[G.D.A.S.shareNodeWith] Can share to others only"
else do
folderSharedId <- getFolderId u NodeFolderShared
insertDB ([NodeNode { _nn_node1_id = folderSharedId
, _nn_node2_id = n
, _nn_score = Nothing
, _nn_category = Nothing }]:: [NodeNode])
shareNodeWith (ShareNodeWith_Node NodeFolderPublic nId) n = do
nodeToCheck <- getNode n
if not (isInNodeTypes nodeToCheck publicNodeTypes)
then errorWith $ "[G.D.A.S.shareNodeWith] Can share this nodesTypes only: "
<> (cs $ show publicNodeTypes)
else do
folderToCheck <- getNode nId
if hasNodeType folderToCheck NodeFolderPublic
then insertDB ([NodeNode { _nn_node1_id = nId
, _nn_node2_id = n
, _nn_score = Nothing
, _nn_category = Nothing }] :: [NodeNode])
else errorWith "[G.D.A.S.shareNodeWith] Can share NodeWith NodeFolderPublic only"
shareNodeWith _ _ = errorWith "[G.D.A.S.shareNodeWith] Not implemented for this NodeType"
------------------------------------------------------------------------
getFolderId :: HasNodeError err => User -> NodeType -> Cmd err NodeId
getFolderId u nt = do
rootId <- getRootId u
s <- getNodesWith rootId HyperdataAny (Just nt) Nothing Nothing
case head s of
Nothing -> errorWith "[G.D.A.S.getFolderId] No folder shared found"
Just f -> pure (_node_id f)
------------------------------------------------------------------------
type TeamId = NodeId
delFolderTeam :: HasNodeError err => User -> TeamId -> Cmd err Int
delFolderTeam u nId = do
folderSharedId <- getFolderId u NodeFolderShared
deleteNodeNode folderSharedId nId
unPublish :: HasNodeError err
=> ParentId -> NodeId
-> Cmd err Int
unPublish p n = deleteNodeNode p n
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/User.hs 0000664 0000000 0000000 00000004751 14124644201 0031446 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Action.User
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Database.Action.User
where
import Data.Text (Text)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.User
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Schema.Node
import Gargantext.Prelude
------------------------------------------------------------------------
getUserLightWithId :: HasNodeError err => Int -> Cmd err UserLight
getUserLightWithId i = do
candidates <- head <$> getUsersWithId i
case candidates of
Nothing -> nodeError NoUserFound
Just u -> pure u
getUserLightDB :: HasNodeError err => User -> Cmd err UserLight
getUserLightDB u = do
userId <- getUserId u
userLight <- getUserLightWithId userId
pure userLight
------------------------------------------------------------------------
getUserId :: HasNodeError err
=> User
-> Cmd err UserId
getUserId u = do
maybeUser <- getUserId' u
case maybeUser of
Nothing -> nodeError NoUserFound
Just u' -> pure u'
getUserId' :: HasNodeError err
=> User
-> Cmd err (Maybe UserId)
getUserId' (UserDBId uid) = pure (Just uid)
getUserId' (RootId rid) = do
n <- getNode rid
pure $ Just $ _node_user_id n
getUserId' (UserName u ) = do
muser <- getUser u
case muser of
Just user -> pure $ Just $ userLight_id user
Nothing -> pure Nothing
getUserId' UserPublic = pure Nothing
------------------------------------------------------------------------
-- | Username = Text
-- UserName is User
-- that is confusing, we should change this
type Username = Text
getUsername :: HasNodeError err
=> User
-> Cmd err Username
getUsername (UserName u) = pure u
getUsername (UserDBId i) = do
users <- getUsersWithId i
case head users of
Just u -> pure $ userLight_username u
Nothing -> nodeError $ NodeError "G.D.A.U.getUserName: User not found with that id"
getUsername (RootId rid) = do
n <- getNode rid
getUsername (UserDBId $ _node_user_id n)
getUsername UserPublic = pure "UserPublic"
--------------------------------------------------------------------------
-- getRootId is in Gargantext.Database.Query.Tree.Root
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/User/ 0000775 0000000 0000000 00000000000 14124644201 0031103 5 ustar 00root root 0000000 0000000 New.hs 0000664 0000000 0000000 00000006247 14124644201 0032122 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Action/User {-|
Module : Gargantext.Database.Action.User.New
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Database.Action.User.New
where
import Control.Lens (view)
import Control.Monad.Random
import Data.Text (Text, splitOn)
import Gargantext.Core.Mail
import Gargantext.Core.Types.Individu
import Gargantext.Database.Action.Flow (getOrMkRoot)
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..), nodeError, NodeError(..))
import Gargantext.Database.Query.Table.User
import Gargantext.Prelude
import Gargantext.Prelude.Config
import Gargantext.Prelude.Crypto.Pass.User (gargPass)
import qualified Data.Text as Text
------------------------------------------------------------------------
------------------------------------------------------------------------
newUsers :: (CmdM env err m, MonadRandom m, HasNodeError err)
=> [EmailAddress] -> m Int64
newUsers us = do
us' <- mapM newUserQuick us
url <- view $ hasConfig . gc_url
newUsers' url us'
------------------------------------------------------------------------
newUserQuick :: (MonadRandom m)
=> Text -> m (NewUser GargPassword)
newUserQuick n = do
pass <- gargPass
let u = case guessUserName n of
Just (u', _m) -> u'
Nothing -> panic "[G.D.A.U.N.newUserQuick]: Email invalid"
pure (NewUser u n (GargPassword pass))
------------------------------------------------------------------------
-- | guessUserName
-- guess username and normalize it (Text.toLower)
guessUserName :: Text -> Maybe (Text,Text)
guessUserName n = case splitOn "@" n of
[u',m'] -> if m' /= "" then Just (Text.toLower u',m')
else Nothing
_ -> Nothing
------------------------------------------------------------------------
newUser' :: HasNodeError err
=> ServerAddress -> NewUser GargPassword -> Cmd err Int64
newUser' address u = newUsers' address [u]
newUsers' :: HasNodeError err
=> ServerAddress -> [NewUser GargPassword] -> Cmd err Int64
newUsers' address us = do
us' <- liftBase $ mapM toUserHash us
r <- insertUsers $ map toUserWrite us'
_ <- mapM getOrMkRoot $ map (\u -> UserName (_nu_username u)) us
_ <- liftBase $ mapM (\u -> mail address (Invitation u)) us
printDebug "newUsers'" us
pure r
------------------------------------------------------------------------
updateUser :: HasNodeError err
=> SendEmail -> Text -> NewUser GargPassword -> Cmd err Int64
updateUser (SendEmail send) server u = do
u' <- liftBase $ toUserHash u
n <- updateUserDB $ toUserWrite u'
_ <- case send of
True -> liftBase $ mail server (PassUpdate u)
False -> pure ()
pure n
------------------------------------------------------------------------
rmUser :: HasNodeError err => User -> Cmd err Int64
rmUser (UserName un) = deleteUsers [un]
rmUser _ = nodeError NotImplYet
-- TODO
rmUsers :: HasNodeError err => [User] -> Cmd err Int64
rmUsers [] = pure 0
rmUsers _ = undefined
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/ 0000775 0000000 0000000 00000000000 14124644201 0030000 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Access.hs 0000664 0000000 0000000 00000000614 14124644201 0031536 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Access
Description : Access to Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO-SECURITY review purpose of this module
-}
module Gargantext.Database.Admin.Access where
data Action = Read | Write | Exec
data Roles = RoleUser | RoleMaster
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Bashql.hs 0000664 0000000 0000000 00000010726 14124644201 0031554 0 ustar 00root root 0000000 0000000 {-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
{-|
Module : Gargantext.Database.Bashql
Description : BASHQL to deal with Gargantext Database.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
* BASHQL is a Domain Specific Language to deal with the Database
* BASHQL = functional (Bash * SQL)
* Which language to chose when working with a database ? To make it
simple, instead of all common Object Relational Mapping (ORM) [1]
strategy used nowadays inspired more by object logic than functional
logic, the semantics of BASHQL with focus on the function first.
* BASHQL focus on the function, i.e. use bash language function name,
and make it with SQL behind the scene. Then BASHQL is inspired more
by Bash language [2] than SQL and then follows its main commands as
specification and documentation.
* Main arguments:
1. Theoritical: database and FileSystems are each thought as a single
category, assumption based on theoretical work on databases by David Spivak [0].
2. Practical argument: basic bash commands are a daily practice among
developper community.
* How to help ?
1. Choose a command you like in Bash
2. Implement it in Haskell-SQL according to Gargantext Shema (Tree like
filesystem)
3. Translate it in BASHQL (follow previous implementations)
4. Make a pull request (enjoy the community)
* Implementation strategy: Functional adapations are made to the
gargantext languages options and SQL optimization are done continuously
during the project. For the Haskellish part, you may be inspired by
Turtle implementation written by Gabriel Gonzales [3] which shows how to
write Haskell bash translations.
* Semantics
- FileSystem is now a NodeSystem where each File is a Node in a Directed Graph (DG).
* References
[0] MIT Press has published "Category theory for the sciences". The book
can also be purchased on Amazon. Here are reviews by the MAA, by the
AMS, and by SIAM.
[1] https://en.wikipedia.org/wiki/Object-relational_mapping
[2] https://en.wikipedia.org/wiki/Bash_(Unix_shell)
[3] https://github.com/Gabriel439/Haskell-Turtle-Library
TODO-ACCESS: should the checks be done here or before.
-}
module Gargantext.Database.Admin.Bashql () {-( get
, ls
, home
, post
, del
, mv
, put
, rename
, tree
-- , mkCorpus, mkAnnuaire
)-}
where
import Control.Monad.Reader -- (Reader, ask)
import Data.Text (Text)
import Data.List (concat, last)
import Gargantext.Core.Types
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataAny)
import Gargantext.Database.Prelude (runOpaQuery, Cmd)
import Gargantext.Database.Query.Table.Node
import qualified Gargantext.Database.Query.Table.Node.Update as U (Update(..), update)
import Gargantext.Database.Schema.Node
import Gargantext.Prelude
-- List of NodeId
-- type PWD a = PWD UserId [a]
type PWD = [NodeId]
--data PWD' a = a | PWD' [a]
rename :: NodeId -> Text -> Cmd err [Int]
rename n t = U.update $ U.Rename n t
mv :: NodeId -> ParentId -> Cmd err [Int]
mv n p = U.update $ U.Move n p
-- | TODO get Children or Node
get :: PWD -> Cmd err [Node HyperdataAny]
get [] = pure []
get pwd = runOpaQuery $ selectNodesWithParentID (last pwd)
-- | Home, need to filter with UserId
{-
home :: Cmd err PWD
home = map _node_id <$> getNodesWithParentId 0 Nothing
-}
-- | ls == get Children
ls :: PWD -> Cmd err [Node HyperdataAny]
ls = get
tree :: PWD -> Cmd err [Node HyperdataAny]
tree p = do
ns <- get p
children <- mapM (\n -> get [_node_id n]) ns
pure $ ns <> concat children
-- | TODO
post :: PWD -> [NodeWrite] -> Cmd err Int64
post [] _ = pure 0
post _ [] = pure 0
post pth ns = insertNodesWithParent (Just $ last pth) ns
--postR :: PWD -> [NodeWrite'] -> Cmd err [Int]
--postR [] _ _ = pure [0]
--postR _ [] _ = pure [0]
--postR pth ns c = mkNodeR (last pth) ns c
-- | WIP
-- rm : mv to trash
-- del : empty trash
--rm :: PWD -> [NodeId] -> IO Int
--rm = del
del :: [NodeId] -> Cmd err Int
del [] = pure 0
del ns = deleteNodes ns
-- | TODO
put :: U.Update -> Cmd err [Int]
put = U.update
-- | TODO
-- cd (Home UserId) | (Node NodeId)
-- cd Path
-- jump NodeId
-- touch Dir
-- type Name = Text
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Config.hs 0000664 0000000 0000000 00000005313 14124644201 0031543 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database
Description : Tools for Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Target: just import this module and nothing else to work with
Gargantext's database.
TODO: configure nodes table in Haskell (Config typenames etc.)
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Database.Admin.Config
where
import Control.Lens (view)
import Data.List (lookup)
import Data.Maybe (fromMaybe)
import Data.Text (Text,pack)
import Data.Tuple.Extra (swap)
import Gargantext.Core (HasDBid(..))
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Schema.Node
import Gargantext.Prelude
-- TODO put this in config.ini file
corpusMasterName :: Text
corpusMasterName = "Main"
userMaster :: Text
userMaster = "gargantua"
userArbitrary :: Text
userArbitrary = "user1"
instance HasDBid NodeType where
toDBid = nodeTypeId
fromDBid = fromNodeTypeId
nodeTypeId :: NodeType -> NodeTypeId
nodeTypeId n =
case n of
NodeUser -> 1
NodeFolder -> 2
NodeFolderPrivate -> 20
NodeFolderShared -> 21
NodeTeam -> 210
NodeFolderPublic -> 22
NodeCorpusV3 -> 3
NodeCorpus -> 30
NodeAnnuaire -> 31
NodeTexts -> 40
NodeDocument -> 4
NodeContact -> 41
--NodeSwap -> 19
---- Lists
NodeList -> 5
NodeListCooc -> 50
NodeModel -> 52
---- Scores
-- NodeOccurrences -> 10
NodeGraph -> 9
NodePhylo -> 90
-- NodeChart -> 7
NodeDashboard -> 71
-- NodeNoteBook -> 88
NodeFile -> 101
NodeFrameWrite -> 991
NodeFrameCalc -> 992
NodeFrameNotebook -> 993
NodeFrameVisio -> 994
-- Cooccurrences -> 9
--
-- Specclusion -> 11
-- Genclusion -> 18
-- Cvalue -> 12
--
-- TfidfCorpus -> 13
-- TfidfGlobal -> 14
--
-- TirankLocal -> 16
-- TirankGlobal -> 17
-- Node management
-- NodeFavorites -> 15
hasNodeType :: forall a. Node a -> NodeType -> Bool
hasNodeType n nt = (view node_typename n) == (toDBid nt)
isInNodeTypes :: forall a. Node a -> [NodeType] -> Bool
isInNodeTypes n ts = elem (view node_typename n) (map toDBid ts)
-- | Nodes are typed in the database according to a specific ID
--
nodeTypeInv :: [(NodeTypeId, NodeType)]
nodeTypeInv = map swap nodeTypes
nodeTypes :: [(NodeType, NodeTypeId)]
nodeTypes = [ (n, toDBid n) | n <- allNodeTypes ]
fromNodeTypeId :: NodeTypeId -> NodeType
fromNodeTypeId tId = fromMaybe (panic $ pack $ "Type Id " <> show tId <> " does not exist")
(lookup tId nodeTypeInv)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Trigger/ 0000775 0000000 0000000 00000000000 14124644201 0031403 5 ustar 00root root 0000000 0000000 Init.hs 0000664 0000000 0000000 00000002623 14124644201 0032566 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Trigger {-|
Module : Gargantext.Database.Init
Description : Triggers configuration
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Ngrams by node enable contextual metrics.
-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Admin.Trigger.Init
where
import Data.Text (Text)
import Gargantext.Database.Admin.Trigger.NodeNodeNgrams (triggerCountInsert, triggerCountInsert2)
import Gargantext.Database.Admin.Trigger.Nodes (triggerSearchUpdate, triggerUpdateHash)
import Gargantext.Database.Admin.Trigger.NodesNodes (triggerDeleteCount, triggerInsertCount, triggerUpdateAdd, triggerUpdateDel, MasterListId) -- , triggerCoocInsert)
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Prelude
------------------------------------------------------------------------
initFirstTriggers :: Text -> Cmd err [Int64]
initFirstTriggers secret = do
t0 <- triggerUpdateHash secret
pure [t0]
initLastTriggers :: MasterListId -> Cmd err [Int64]
initLastTriggers lId = do
t0 <- triggerSearchUpdate
t1 <- triggerCountInsert
t1' <- triggerCountInsert2
-- t1'' <- triggerCoocInsert lId
t2 <- triggerDeleteCount lId
t3 <- triggerInsertCount lId
t4 <- triggerUpdateAdd lId
t5 <- triggerUpdateDel lId
pure [t0
,t1
,t1'
-- ,t1''
,t2
,t3
,t4
,t5]
NodeNodeNgrams.hs 0000664 0000000 0000000 00000015270 14124644201 0034530 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Trigger {-|
Module : Gargantext.Database.Triggers.NodeNodeNgrams
Description : Triggers configuration
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Triggers on NodeNodeNgrams table.
-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Admin.Trigger.NodeNodeNgrams
where
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.Core
import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd, execPGSQuery)
import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as DPS
triggerCountInsert :: HasDBid NodeType => Cmd err Int64
triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_ngrams_global_count() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
select n.parent_id, n.id, new1.ngrams_id, new1.ngrams_type, count(*) from NEW as new1
INNER JOIN nodes n ON n.id = new1.node1_id
INNER JOIN nodes n2 ON n2.id = new1.node2_id
WHERE n2.typename = ? -- not mandatory
AND n.typename = ? -- not mandatory
AND n.parent_id <> n2.id -- not mandatory
GROUP BY n.parent_id, n.id, new1.ngrams_id, new1.ngrams_type
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE set weight = node_node_ngrams.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_count_insert on node_node_ngrams;
CREATE TRIGGER trigger_count_insert AFTER INSERT on node_node_ngrams
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE set_ngrams_global_count();
|]
triggerCountInsert2 :: HasDBid NodeType => Cmd err Int64
triggerCountInsert2 = execPGSQuery query ( toDBid NodeCorpus
, toDBid NodeDocument
, toDBid NodeList
)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_ngrams_global_count2() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_node_ngrams2 (node_id, nodengrams_id, weight)
SELECT corpus.id, nng.id, count(*) from NEW as new1
INNER JOIN node_ngrams nng ON nng.id = new1.nodengrams_id
INNER JOIN nodes list ON list.id = nng.node_id
INNER JOIN nodes_nodes nn ON nn.node2_id = new1.node_id
INNER JOIN nodes corpus ON corpus.id = nn.node1_id
INNER JOIN nodes doc ON doc.id = nn.node2_id
WHERE corpus.typename = ? -- 30 -- corpus
AND doc.typename = ? -- 4 -- maybe not mandatory
AND list.typename = ? -- 5 -- list
GROUP BY corpus.id, nng.id
ON CONFLICT (node_id, nodengrams_id)
DO UPDATE set weight = node_node_ngrams2.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_count_insert2 on node_node_ngrams2;
CREATE TRIGGER trigger_count_insert2 AFTER INSERT on node_node_ngrams2
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE set_ngrams_global_count2();
|]
-- TODO add the groups
triggerCoocInsert :: HasDBid NodeType => Cmd err Int64
triggerCoocInsert = execPGSQuery query ( toDBid NodeCorpus
, toDBid NodeDocument
, toDBid NodeList
, toDBid CandidateTerm
, toDBid CandidateTerm
)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_cooc() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_nodengrams_nodengrams (node_id, node_ngrams1_id, node_ngrams2_id, weight)
WITH input(corpus_id, nn1, nn2, weight) AS (
SELECT corpus.id, nng1.id, nng2.id, count(*) from NEW as new1
INNER JOIN node_ngrams nng1 ON nng1.id = new1.nodengrams_id
INNER JOIN nodes list ON list.id = nng1.node_id
INNER JOIN nodes_nodes nn ON nn.node2_id = new1.node_id
INNER JOIN nodes corpus ON corpus.id = nn.node1_id
INNER JOIN nodes doc ON doc.id = nn.node2_id
INNER JOIN node_node_ngrams2 nnng2 ON nnng2.node_id = doc.id
INNER JOIN node_ngrams nng2 ON nng2.id = nnng2.nodengrams_id
WHERE corpus.typename = ? -- 30 -- corpus
AND doc.typename = ? -- 4 -- maybe not mandatory
AND list.typename = ? -- 5 -- list
AND nng2.node_id = list.id
AND nng1.id < nng2.id
AND nng1.node_subtype >= ?
AND nng2.node_subtype >= ?
GROUP BY corpus.id, nng1.id, nng2.id
)
SELECT * from input where weight > 1
ON CONFLICT (node_id, node_ngrams1_id, node_ngrams2_id)
DO UPDATE set weight = node_nodengrams_nodengrams.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_cooc on node_node_ngrams2;
CREATE TRIGGER trigger_cooc_insert AFTER INSERT on node_node_ngrams2
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE set_cooc();
|]
Nodes.hs 0000664 0000000 0000000 00000010316 14124644201 0032731 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Trigger {-|
Module : Gargantext.Database.Admin.Trigger.Nodes
Description : Triggers configuration
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Triggers on Nodes table.
-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Admin.Trigger.Nodes
where
import Data.Text (Text)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.Core (HasDBid(..))
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd, execPGSQuery)
import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as DPS
triggerSearchUpdate :: HasDBid NodeType => Cmd err Int64
triggerSearchUpdate = execPGSQuery query ( toDBid NodeDocument
, toDBid NodeDocument
, toDBid NodeContact
)
where
query :: DPS.Query
query = [sql|
-- DROP TRIGGER search_update_trigger on nodes;
CREATE OR REPLACE FUNCTION public.search_update()
RETURNS trigger AS $$
begin
IF new.typename = ? AND new.hyperdata @> '{"language_iso2":"EN"}' THEN
new.search := to_tsvector( 'english' , (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract'));
ELSIF new.typename = ? AND new.hyperdata @> '{"language_iso2":"FR"}' THEN
new.search := to_tsvector( 'french' , (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract'));
ELSIF new.typename = ? THEN
new.search := to_tsvector( 'french' , (new.hyperdata ->> 'prenom')
|| ' ' || (new.hyperdata ->> 'nom')
|| ' ' || (new.hyperdata ->> 'fonction')
);
ELSE
new.search := to_tsvector( 'english' , (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract'));
END IF;
return new;
end
$$ LANGUAGE plpgsql;
ALTER FUNCTION public.search_update() OWNER TO gargantua;
CREATE TRIGGER search_update_trigger
BEFORE INSERT OR UPDATE
ON nodes FOR EACH ROW
EXECUTE PROCEDURE search_update();
-- Initialize index with already existing data
UPDATE nodes SET hyperdata = hyperdata;
|]
type Secret = Text
triggerUpdateHash :: HasDBid NodeType => Secret -> Cmd err Int64
triggerUpdateHash secret = execPGSQuery query ( toDBid NodeDocument
, toDBid NodeContact
, secret
, secret
, toDBid NodeDocument
, toDBid NodeContact
, secret
, secret
)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION hash_insert_nodes()
RETURNS trigger AS $$
BEGIN
IF NEW.hash_id = ''
THEN
IF NEW.typename = ? OR NEW.typename = ?
THEN NEW.hash_id = digest(CONCAT(?, NEW.typename, NEW.name, NEW.parent_id, NEW.hyperdata), 'sha256');
ELSE NEW.hash_id = digest(CONCAT(?, NEW.typename, NEW.name, NEW.id, NEW.hyperdata), 'sha256');
END IF;
END IF;
RETURN NEW;
END
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION hash_update_nodes()
RETURNS trigger AS $$
BEGIN
IF NEW.typename = ? OR NEW.typename = ?
THEN NEW.hash_id = digest(CONCAT(?, NEW.typename, NEW.name, NEW.parent_id, NEW.hyperdata), 'sha256');
ELSE NEW.hash_id = digest(CONCAT(?, NEW.typename, NEW.name, NEW.id, NEW.hyperdata), 'sha256');
END IF;
RETURN NEW;
END
$$ LANGUAGE plpgsql;
CREATE TRIGGER nodes_hash_insert BEFORE INSERT ON nodes FOR EACH ROW EXECUTE PROCEDURE hash_insert_nodes();
CREATE TRIGGER nodes_hash_update BEFORE UPDATE ON nodes FOR EACH ROW EXECUTE PROCEDURE hash_update_nodes();
|]
NodesNodes.hs 0000664 0000000 0000000 00000022210 14124644201 0033716 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Trigger {-|
Module : Gargantext.Database.Triggers.NodesNodes
Description : Triggers configuration
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Triggers on NodesNodes table.
-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Admin.Trigger.NodesNodes
where
import Database.PostgreSQL.Simple.SqlQQ (sql)
-- import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Database.Prelude (Cmd, execPGSQuery)
import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as DPS
type MasterListId = ListId
triggerDeleteCount :: MasterListId -> Cmd err Int64
triggerDeleteCount lId = execPGSQuery query (lId, toDBid NodeList)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_delete_count() RETURNS trigger AS $$
BEGIN
UPDATE node_node_ngrams SET weight = weight - d.delete_count
FROM (SELECT old1.node1_id as node1_id, lists.id as node2_id, nnn.ngrams_id as ngrams_id, nnn.ngrams_type as ngrams_type, count(*) as delete_count FROM OLD as old1
INNER JOIN nodes doc ON doc.id = old1.node2_id
INNER JOIN nodes lists ON lists.parent_id = old1.node1_id
INNER JOIN node_node_ngrams nnn ON nnn.node2_id = doc.id
WHERE nnn.node1_id in (?, lists.id)
AND lists.typename = ?
GROUP BY old1.node1_id, lists.id, nnn.ngrams_id, nnn.ngrams_type
) AS d
WHERE node_node_ngrams.node1_id = d.node1_id
AND node_node_ngrams.node2_id = d.node2_id
AND node_node_ngrams.ngrams_id = d.ngrams_id
AND node_node_ngrams.ngrams_type = d.ngrams_type
;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_delete_count on nodes_nodes;
CREATE TRIGGER trigger_delete_count AFTER DELETE on nodes_nodes
REFERENCING OLD TABLE AS OLD
FOR EACH STATEMENT
EXECUTE PROCEDURE set_delete_count();
|]
triggerInsertCount :: MasterListId -> Cmd err Int64
triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_insert_count() RETURNS trigger AS $$
BEGIN
INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
SELECT new1.node1_id , lists.id, nnn.ngrams_id, nnn.ngrams_type, count(*) as weight from NEW as new1
INNER JOIN nodes doc ON doc.id = new1.node2_id
INNER JOIN nodes lists ON lists.parent_id = new1.node1_id
INNER JOIN node_node_ngrams nnn ON nnn.node2_id = doc.id
WHERE nnn.node1_id in (?, lists.id)
AND lists.typename = ?
GROUP BY new1.node1_id, lists.id, nnn.ngrams_id, nnn.ngrams_type
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE set weight = node_node_ngrams.weight + excluded.weight
;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_insert_count on nodes_nodes;
CREATE TRIGGER trigger_insert_count AFTER INSERT on nodes_nodes
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE set_insert_count();
|]
triggerUpdateAdd :: MasterListId -> Cmd err Int64
triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_update_ngrams_add() RETURNS trigger AS $$
BEGIN
UPDATE node_node_ngrams nnn0 SET weight = weight + d.fix_count
FROM (SELECT new1.node1_id as node1_id, lists.id as node2_id, nnn.ngrams_id as ngrams_id, nnn.ngrams_type as ngrams_type, count(*) as fix_count
FROM NEW as new1
INNER JOIN nodes lists ON new1.node1_id = lists.parent_id
INNER JOIN node_node_ngrams nnn ON new1.node2_id = nnn.node2_id
WHERE nnn.node1_id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ?
GROUP BY new1.node1_id, lists.id, nnn.ngrams_id, nnn.ngrams_type
) as d
WHERE nnn0.node1_id = d.node1_id
AND nnn0.node2_id = d.node2_id
AND nnn0.ngrams_id = d.ngrams_id
AND nnn0.ngrams_type = d.ngrams_type
;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_count_update_add on nodes_nodes;
CREATE TRIGGER trigger_count_update_add AFTER UPDATE on nodes_nodes
REFERENCING OLD TABLE AS OLD NEW TABLE AS NEW
FOR EACH ROW
WHEN (OLD.category <= 0 AND NEW.category >= 1)
EXECUTE PROCEDURE set_update_ngrams_add();
|]
triggerUpdateDel :: MasterListId -> Cmd err Int64
triggerUpdateDel lId = execPGSQuery query (lId, nodeTypeId NodeList)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_update_ngrams_count_del() RETURNS trigger AS $$
BEGIN
UPDATE node_node_ngrams nnn0 SET weight = weight - d.fix_count
FROM (SELECT new1.node1_id as node1_id, lists.id as node2_id, nnn.ngrams_id as ngrams_id, nnn.ngrams_type as ngrams_type, count(*) as fix_count
FROM NEW as new1
INNER JOIN nodes lists ON new1.node1_id = lists.parent_id
INNER JOIN node_node_ngrams nnn ON new1.node2_id = nnn.node2_id
WHERE nnn.node1_id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ?
GROUP BY new1.node1_id, lists.id, nnn.ngrams_id, nnn.ngrams_type
) as d
WHERE nnn0.node1_id = d.node1_id
AND nnn0.node2_id = d.node2_id
AND nnn0.ngrams_id = d.ngrams_id
AND nnn0.ngrams_type = d.ngrams_type
;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_count_delete2 on nodes_nodes;
CREATE TRIGGER trigger_count_delete2 AFTER UPDATE on nodes_nodes
REFERENCING OLD TABLE AS OLD NEW TABLE AS NEW
FOR EACH ROW
WHEN (OLD.category >= 1 AND NEW.category <= 0)
EXECUTE PROCEDURE set_update_ngrams_count_del();
|]
-- TODO add groups
triggerCoocInsert :: MasterListId -> Cmd err Int64
triggerCoocInsert lid = execPGSQuery query ( lid
-- , nodeTypeId NodeCorpus
-- , nodeTypeId NodeDocument
-- , nodeTypeId NodeList
, toDBid CandidateTerm
, toDBid CandidateTerm
)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION nodes_nodes_set_cooc() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_nodengrams_nodengrams (node_id, node_ngrams1_id, node_ngrams2_id, weight)
WITH input(corpus_id, nn1, nn2, weight) AS (
SELECT new1.node1_id, nn1.id, nn2.id, count(*) from NEW as new1
INNER JOIN node_ngrams nn1
ON nn1.node_id = ? -- COALESCE(?,?) --(masterList, userList)
INNER JOIN node_ngrams nn2
ON nn2.node_id = nn1.node_id
INNER JOIN node_node_ngrams2 nnn1
ON nnn1.node_id = new1.node2_id
INNER JOIN node_node_ngrams2 nnn2
ON nnn2.node_id = new1.node2_id
WHERE nnn1.nodengrams_id = nn1.id
AND nnn2.nodengrams_id = nn2.id
AND nn1.id < nn2.id
AND nn1.node_subtype >= ?
AND nn2.node_subtype >= ?
GROUP BY new1.node1_id, nn1.id, nn2.id
)
SELECT * from input where weight >= 1
ON CONFLICT (node_id, node_ngrams1_id, node_ngrams2_id)
DO UPDATE set weight = node_nodengrams_nodengrams.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_cooc on node_node_ngrams2;
CREATE TRIGGER trigger_cooc_insert AFTER INSERT on nodes_nodes
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE nodes_nodes_set_cooc();
|]
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/ 0000775 0000000 0000000 00000000000 14124644201 0031104 5 ustar 00root root 0000000 0000000 Hyperdata.hs 0000664 0000000 0000000 00000003760 14124644201 0033310 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types {-|
Module : Gargantext.Database.Admin.Types.Hyperdata
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Database.Admin.Types.Hyperdata
( module Gargantext.Database.Admin.Types.Hyperdata.Any
, module Gargantext.Database.Admin.Types.Hyperdata.Contact
, module Gargantext.Database.Admin.Types.Hyperdata.Corpus
, module Gargantext.Database.Admin.Types.Hyperdata.Dashboard
, module Gargantext.Database.Admin.Types.Hyperdata.Document
, module Gargantext.Database.Admin.Types.Hyperdata.File
, module Gargantext.Database.Admin.Types.Hyperdata.Folder
, module Gargantext.Database.Admin.Types.Hyperdata.Frame
, module Gargantext.Database.Admin.Types.Hyperdata.List
, module Gargantext.Database.Admin.Types.Hyperdata.Model
, module Gargantext.Database.Admin.Types.Hyperdata.Prelude
, module Gargantext.Database.Admin.Types.Hyperdata.Texts
, module Gargantext.Database.Admin.Types.Hyperdata.Phylo
, module Gargantext.Database.Admin.Types.Hyperdata.User
, module Gargantext.Core.Viz.Graph
)
where
import Gargantext.Database.Admin.Types.Hyperdata.Any
import Gargantext.Database.Admin.Types.Hyperdata.Contact
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Admin.Types.Hyperdata.Dashboard
import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Hyperdata.File
import Gargantext.Database.Admin.Types.Hyperdata.Folder
import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Hyperdata.List
import Gargantext.Database.Admin.Types.Hyperdata.Model
import Gargantext.Database.Admin.Types.Hyperdata.Prelude (Hyperdata)
import Gargantext.Database.Admin.Types.Hyperdata.Texts
import Gargantext.Database.Admin.Types.Hyperdata.Phylo
import Gargantext.Database.Admin.Types.Hyperdata.User
import Gargantext.Core.Viz.Graph (HyperdataGraph(..), defaultHyperdataGraph)
Hyperdata/ 0000775 0000000 0000000 00000000000 14124644201 0032746 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types Any.hs 0000664 0000000 0000000 00000003142 14124644201 0034031 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.Any
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.Any
where
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
------------------------------------------------------------------------
newtype HyperdataAny = HyperdataAny Object
deriving (Show, Generic, ToJSON, FromJSON)
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
instance Hyperdata HyperdataAny
instance Arbitrary HyperdataAny where
arbitrary = pure $ HyperdataAny mempty -- TODO produce arbitrary objects
instance ToSchema HyperdataAny where
declareNamedSchema proxy =
pure $ genericNameSchema defaultSchemaOptions proxy mempty
& schema.description ?~ "Hyperdata of any node (Json Value)"
& schema.example ?~ emptyObject -- TODO
instance FromField HyperdataAny where
fromField = fromField'
instance QueryRunnerColumnDefault PGJsonb HyperdataAny
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
Contact.hs 0000664 0000000 0000000 00000015446 14124644201 0034707 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.Contact
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.Contact
where
import Data.Time.Segment (jour)
import Data.Time (UTCTime)
import Gargantext.Core.Text (HasText(..))
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Prelude
--------------------------------------------------------------------------------
data HyperdataContact =
HyperdataContact { _hc_bdd :: Maybe Text -- ID of Database source
, _hc_who :: Maybe ContactWho
, _hc_where :: [ContactWhere]
, _hc_title :: Maybe Text -- TODO remove (only demo)
, _hc_source :: Maybe Text -- TODO remove (only demo)
, _hc_lastValidation :: Maybe Text -- TODO UTCTime
, _hc_uniqIdBdd :: Maybe Text
, _hc_uniqId :: Maybe Text
} deriving (Eq, Show, Generic)
instance HasText HyperdataContact
where
hasText = undefined
defaultHyperdataContact :: HyperdataContact
defaultHyperdataContact = HyperdataContact (Just "bdd")
(Just defaultContactWho)
[defaultContactWhere]
(Just "Title")
(Just "Source")
(Just "TODO lastValidation date")
(Just "DO NOT expose this")
(Just "DO NOT expose this")
hyperdataContact :: FirstName -> LastName -> HyperdataContact
hyperdataContact fn ln = HyperdataContact Nothing
(Just (contactWho fn ln))
[]
Nothing
Nothing
Nothing
Nothing
Nothing
-- TOD0 contact metadata (Type is too flat)
data ContactMetaData =
ContactMetaData { _cm_bdd :: Maybe Text
, _cm_lastValidation :: Maybe Text -- TODO UTCTIME
} deriving (Eq, Show, Generic)
defaultContactMetaData :: ContactMetaData
defaultContactMetaData = ContactMetaData (Just "bdd") (Just "TODO UTCTime")
arbitraryHyperdataContact :: HyperdataContact
arbitraryHyperdataContact = HyperdataContact Nothing Nothing []
Nothing Nothing Nothing
Nothing Nothing
data ContactWho =
ContactWho { _cw_id :: Maybe Text
, _cw_firstName :: Maybe Text
, _cw_lastName :: Maybe Text
, _cw_keywords :: [Text]
, _cw_freetags :: [Text]
} deriving (Eq, Show, Generic)
type FirstName = Text
type LastName = Text
defaultContactWho :: ContactWho
defaultContactWho = contactWho "Pierre" "Dupont"
contactWho :: FirstName -> LastName -> ContactWho
contactWho fn ln = ContactWho Nothing
(Just fn)
(Just ln)
[]
[]
data ContactWhere =
ContactWhere { _cw_organization :: [Text]
, _cw_labTeamDepts :: [Text]
, _cw_role :: Maybe Text
, _cw_office :: Maybe Text
, _cw_country :: Maybe Text
, _cw_city :: Maybe Text
, _cw_touch :: Maybe ContactTouch
, _cw_entry :: Maybe UTCTime
, _cw_exit :: Maybe UTCTime
} deriving (Eq, Show, Generic)
defaultContactWhere :: ContactWhere
defaultContactWhere = ContactWhere ["Organization X"]
["Lab Z"]
(Just "Role")
(Just "Office")
(Just "Country")
(Just "City")
(Just defaultContactTouch)
(Just $ jour 01 01 2020)
(Just $ jour 01 01 2029)
data ContactTouch =
ContactTouch { _ct_mail :: Maybe Text
, _ct_phone :: Maybe Text
, _ct_url :: Maybe Text
} deriving (Eq, Show, Generic)
defaultContactTouch :: ContactTouch
defaultContactTouch = ContactTouch (Just "email@data.com")
(Just "+336 328 283 288")
(Just "https://url.com")
-- | ToSchema instances
instance ToSchema HyperdataContact where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hc_")
instance ToSchema ContactWho where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_cw_")
instance ToSchema ContactWhere where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_cw_")
instance ToSchema ContactTouch where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ct_")
instance ToSchema ContactMetaData where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_cm_")
-- | Arbitrary instances
instance Arbitrary HyperdataContact where
arbitrary = elements [HyperdataContact Nothing Nothing [] Nothing Nothing Nothing Nothing Nothing]
-- | Specific Gargantext instance
instance Hyperdata HyperdataContact
-- | Database (Posgresql-simple instance)
instance FromField HyperdataContact where
fromField = fromField'
-- | Database (Opaleye instance)
instance QueryRunnerColumnDefault PGJsonb HyperdataContact where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault (Nullable PGJsonb) HyperdataContact where
queryRunnerColumnDefault = fieldQueryRunnerColumn
-- | All lenses
makeLenses ''ContactWho
makeLenses ''ContactWhere
makeLenses ''ContactTouch
makeLenses ''ContactMetaData
makeLenses ''HyperdataContact
-- | All Json instances
$(deriveJSON (unPrefix "_cw_") ''ContactWho)
$(deriveJSON (unPrefix "_cw_") ''ContactWhere)
$(deriveJSON (unPrefix "_ct_") ''ContactTouch)
$(deriveJSON (unPrefix "_cm_") ''ContactMetaData)
$(deriveJSON (unPrefix "_hc_") ''HyperdataContact)
Corpus.hs 0000664 0000000 0000000 00000007220 14124644201 0034556 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.Corpus
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.Corpus
where
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.CorpusField
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
------------------------------------------------------------------------
data HyperdataCorpus =
HyperdataCorpus { _hc_fields :: ![HyperdataField CorpusField] }
deriving (Generic)
defaultHyperdataCorpus :: HyperdataCorpus
defaultHyperdataCorpus =
HyperdataCorpus [ HyperdataField Markdown
"Corpus analysis"
(MarkdownField "# title\n## subtitle")
, HyperdataField JSON
"Metadata (Experts only)"
(JsonField "Title" "Descr" "Bool query" "Authors")
]
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Annuaire and Corpus should be the same
data HyperdataAnnuaire = HyperdataAnnuaire { _ha_title :: !(Maybe Text)
, _ha_desc :: !(Maybe Text)
} deriving (Show, Generic)
defaultHyperdataAnnuaire :: HyperdataAnnuaire
defaultHyperdataAnnuaire = HyperdataAnnuaire (Just "Title") (Just "Description")
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
instance Hyperdata HyperdataCorpus
instance Hyperdata HyperdataAnnuaire
$(makeLenses ''HyperdataCorpus)
$(makeLenses ''HyperdataAnnuaire)
$(deriveJSON (unPrefix "_hc_") ''HyperdataCorpus)
$(deriveJSON (unPrefix "_ha_") ''HyperdataAnnuaire)
------------------------------------------------------------------------
instance ToSchema HyperdataCorpus where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_hc_") proxy
& mapped.schema.description ?~ "Corpus Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataCorpus
instance ToSchema HyperdataAnnuaire where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_ha_") proxy
& mapped.schema.description ?~ "Annuaire Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataAnnuaire
------------------------------------------------------------------------
instance Arbitrary HyperdataCorpus where
arbitrary = pure defaultHyperdataCorpus
instance Arbitrary HyperdataAnnuaire where
arbitrary = pure defaultHyperdataAnnuaire
------------------------------------------------------------------------
instance FromField HyperdataCorpus
where
fromField = fromField'
instance FromField HyperdataAnnuaire
where
fromField = fromField'
------------------------------------------------------------------------
instance QueryRunnerColumnDefault PGJsonb HyperdataCorpus
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataAnnuaire
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
CorpusField.hs 0000664 0000000 0000000 00000005543 14124644201 0035530 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.CorpusField
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.CorpusField
where
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
data CodeType = JSON | Markdown | Haskell | Python
deriving (Generic, Show, Eq)
instance ToJSON CodeType
instance FromJSON CodeType
instance ToSchema CodeType
------------------------------------------------------------------------
data CorpusField = MarkdownField { _cf_text :: !Text }
| HaskellField { _cf_haskell :: !Text }
| PythonField { _cf_python :: !Text }
| JsonField { _cf_title :: !Text
, _cf_desc :: !Text
, _cf_query :: !Text
, _cf_authors :: !Text
-- , _cf_resources :: ![Resource]
}
deriving (Show, Generic)
defaultCorpusField :: CorpusField
defaultCorpusField = MarkdownField "# Title"
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
$(makeLenses ''CorpusField)
$(deriveJSON (unPrefix "_cf_") ''CorpusField)
instance ToSchema CorpusField where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_cf_") proxy
& mapped.schema.description ?~ "CorpusField"
& mapped.schema.example ?~ toJSON defaultCorpusField
------------------------------------------------------------------------
data HyperdataField a =
HyperdataField { _hf_type :: !CodeType
, _hf_name :: !Text
, _hf_data :: !a
} deriving (Generic, Show)
defaultHyperdataField :: HyperdataField CorpusField
defaultHyperdataField = HyperdataField Markdown "name" defaultCorpusField
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
$(makeLenses ''HyperdataField)
$(deriveJSON (unPrefix "_hf_") ''HyperdataField)
instance (Typeable a, ToSchema a) => ToSchema (HyperdataField a) where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_hf_") proxy
& mapped.schema.description ?~ "Hyperdata Field"
& mapped.schema.example ?~ toJSON defaultCorpusField
{-
declareNamedSchema =
wellNamedSchema "_hf_"
-- & mapped.schema.description ?~ "HyperdataField"
-- & mapped.schema.example ?~ toJSON defaultHyperdataField
-}
Dashboard.hs 0000664 0000000 0000000 00000005034 14124644201 0035173 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.Dashboard
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.Dashboard
where
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.CorpusField
------------------------------------------------------------------------
data HyperdataDashboard =
HyperdataDashboard { _hd_preferences :: !(Maybe Text)
, _hd_charts :: ![Chart]
, _hd_fields :: ![HyperdataField CorpusField]
}
deriving (Show, Generic)
defaultHyperdataDashboard :: HyperdataDashboard
defaultHyperdataDashboard = HyperdataDashboard Nothing [] defaultHyperdataDashboardFields
defaultHyperdataDashboardFields :: [HyperdataField CorpusField]
defaultHyperdataDashboardFields =
[ HyperdataField Markdown
"DashBoard Title"
(MarkdownField "# DashBoard Title\n## Dashboard subtitle\nText and Chart cells.")
{- , HyperdataField JSON
"Metadata (Experts only)"
(JsonField "Title" "Descr" "Bool query" "Authors")
-}
]
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
instance Hyperdata HyperdataDashboard
$(makeLenses ''HyperdataDashboard)
$(deriveJSON (unPrefix "_hd_") ''HyperdataDashboard)
instance Arbitrary HyperdataDashboard where
arbitrary = pure defaultHyperdataDashboard
instance ToSchema HyperdataDashboard where
declareNamedSchema proxy =
pure $ genericNameSchema defaultSchemaOptions proxy mempty
-- genericDeclareNamedSchema (unPrefixSwagger "hp_") proxy
& schema.description ?~ "Dashboard Hyperdata"
& schema.example ?~ toJSON defaultHyperdataDashboard
instance FromField HyperdataDashboard where
fromField = fromField'
instance QueryRunnerColumnDefault PGJsonb HyperdataDashboard
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
Default.hs 0000664 0000000 0000000 00000011105 14124644201 0034664 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.Default
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.Default
where
import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
data DefaultHyperdata =
DefaultUser HyperdataUser
| DefaultContact HyperdataContact
| DefaultCorpus HyperdataCorpus
| DefaultCorpusV3 HyperdataCorpus
| DefaultAnnuaire HyperdataAnnuaire
| DefaultDocument HyperdataDocument
| DefaultTexts HyperdataTexts
| DefaultList HyperdataList
| DefaultListCooc HyperdataListCooc
| DefaultModel HyperdataModel
| DefaultFolder HyperdataFolder
| DefaultFolderPrivate HyperdataFolderPrivate
| DefaultFolderShared HyperdataFolderShared
| DefaultTeam HyperdataFolder
| DefaultFolderPublic HyperdataFolderPublic
| DefaultGraph HyperdataGraph
| DefaultPhylo HyperdataPhylo
| DefaultDashboard HyperdataDashboard
| DefaultFrameWrite HyperdataFrame
| DefaultFrameCalc HyperdataFrame
| DefaultFrameVisio HyperdataFrame
| DefaultFrameCode HyperdataFrame
| DefaultFile HyperdataFile
instance Hyperdata DefaultHyperdata
instance ToJSON DefaultHyperdata where
toJSON (DefaultUser x) = toJSON x
toJSON (DefaultContact x) = toJSON x
toJSON (DefaultCorpus x) = toJSON x
toJSON (DefaultCorpusV3 x) = toJSON x
toJSON (DefaultAnnuaire x) = toJSON x
toJSON (DefaultDocument x) = toJSON x
toJSON (DefaultTexts x) = toJSON x
toJSON (DefaultList x) = toJSON x
toJSON (DefaultListCooc x) = toJSON x
toJSON (DefaultModel x) = toJSON x
toJSON (DefaultFolder x) = toJSON x
toJSON (DefaultFolderPrivate x) = toJSON x
toJSON (DefaultFolderShared x) = toJSON x
toJSON (DefaultTeam x) = toJSON x
toJSON (DefaultFolderPublic x) = toJSON x
toJSON (DefaultGraph x) = toJSON x
toJSON (DefaultPhylo x) = toJSON x
toJSON (DefaultDashboard x) = toJSON x
toJSON (DefaultFrameWrite x) = toJSON x
toJSON (DefaultFrameCalc x) = toJSON x
toJSON (DefaultFrameVisio x) = toJSON x
toJSON (DefaultFrameCode x) = toJSON x
toJSON (DefaultFile x) = toJSON x
defaultHyperdata :: NodeType -> DefaultHyperdata
defaultHyperdata NodeUser = DefaultUser defaultHyperdataUser
defaultHyperdata NodeContact = DefaultContact defaultHyperdataContact
defaultHyperdata NodeCorpus = DefaultCorpus defaultHyperdataCorpus
defaultHyperdata NodeCorpusV3 = DefaultCorpusV3 defaultHyperdataCorpus
defaultHyperdata NodeAnnuaire = DefaultAnnuaire defaultHyperdataAnnuaire
defaultHyperdata NodeDocument = DefaultDocument defaultHyperdataDocument
defaultHyperdata NodeTexts = DefaultTexts defaultHyperdataTexts
defaultHyperdata NodeList = DefaultList defaultHyperdataList
defaultHyperdata NodeListCooc = DefaultListCooc defaultHyperdataListCooc
defaultHyperdata NodeModel = DefaultModel defaultHyperdataModel
defaultHyperdata NodeFolder = DefaultFolder defaultHyperdataFolder
defaultHyperdata NodeFolderPrivate = DefaultFolderPrivate defaultHyperdataFolderPrivate
defaultHyperdata NodeFolderShared = DefaultFolderShared defaultHyperdataFolderShared
defaultHyperdata NodeTeam = DefaultTeam defaultHyperdataFolder
defaultHyperdata NodeFolderPublic = DefaultFolderPublic defaultHyperdataFolderPublic
defaultHyperdata NodeGraph = DefaultGraph defaultHyperdataGraph
defaultHyperdata NodePhylo = DefaultPhylo defaultHyperdataPhylo
defaultHyperdata NodeDashboard = DefaultDashboard defaultHyperdataDashboard
defaultHyperdata NodeFrameWrite = DefaultFrameWrite defaultHyperdataFrame
defaultHyperdata NodeFrameCalc = DefaultFrameCalc defaultHyperdataFrame
defaultHyperdata NodeFrameVisio = DefaultFrameVisio defaultHyperdataFrame
defaultHyperdata NodeFrameNotebook = DefaultFrameCalc defaultHyperdataFrame
defaultHyperdata NodeFile = DefaultFile defaultHyperdataFile
Document.hs 0000664 0000000 0000000 00000024103 14124644201 0035060 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.Document
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.Document where
import Data.Maybe (catMaybes)
import Gargantext.Prelude
import Gargantext.Core.Text (HasText(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
------------------------------------------------------------------------
data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text)
, _hd_doi :: !(Maybe Text)
, _hd_url :: !(Maybe Text)
, _hd_uniqId :: !(Maybe Text)
, _hd_uniqIdBdd :: !(Maybe Text)
, _hd_page :: !(Maybe Int)
, _hd_title :: !(Maybe Text)
, _hd_authors :: !(Maybe Text)
, _hd_institutes :: !(Maybe Text)
, _hd_source :: !(Maybe Text)
, _hd_abstract :: !(Maybe Text)
, _hd_publication_date :: !(Maybe Text)
, _hd_publication_year :: !(Maybe Int)
, _hd_publication_month :: !(Maybe Int)
, _hd_publication_day :: !(Maybe Int)
, _hd_publication_hour :: !(Maybe Int)
, _hd_publication_minute :: !(Maybe Int)
, _hd_publication_second :: !(Maybe Int)
, _hd_language_iso2 :: !(Maybe Text)
}
deriving (Show, Generic)
instance HasText HyperdataDocument
where
hasText h = catMaybes [ _hd_title h
, _hd_abstract h
]
defaultHyperdataDocument :: HyperdataDocument
defaultHyperdataDocument = case decode docExample of
Just hp -> hp
Nothing -> HyperdataDocument Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
Nothing Nothing Nothing
where
docExample :: ByteString
docExample = "{\"doi\":\"sdfds\",\"publication_day\":6,\"language_iso2\":\"en\",\"publication_minute\":0,\"publication_month\":7,\"language_iso3\":\"eng\",\"publication_second\":0,\"authors\":\"Nils Hovdenak, Kjell Haram\",\"publication_year\":2012,\"publication_date\":\"2012-07-06 00:00:00+00:00\",\"language_name\":\"English\",\"realdate_full_\":\"2012 01 12\",\"source\":\"European journal of obstetrics, gynecology, and reproductive biology\",\"abstract\":\"The literature was searched for publications on minerals and vitamins during pregnancy and the possible influence of supplements on pregnancy outcome.\",\"title\":\"Influence of mineral and vitamin supplements on pregnancy outcome.\",\"publication_hour\":0}"
------------------------------------------------------------------------
-- | Legacy Garg V3 compatibility (to be removed one year after release)
data StatusV3 = StatusV3 { statusV3_error :: !(Maybe Text)
, statusV3_action :: !(Maybe Text)
} deriving (Show, Generic)
$(deriveJSON (unPrefix "statusV3_") ''StatusV3)
------------------------------------------------------------------------
data HyperdataDocumentV3 = HyperdataDocumentV3 { _hdv3_publication_day :: !(Maybe Int)
, _hdv3_language_iso2 :: !(Maybe Text)
, _hdv3_publication_second :: !(Maybe Int)
, _hdv3_publication_minute :: !(Maybe Int)
, _hdv3_publication_month :: !(Maybe Int)
, _hdv3_publication_hour :: !(Maybe Int)
, _hdv3_error :: !(Maybe Text)
, _hdv3_language_iso3 :: !(Maybe Text)
, _hdv3_authors :: !(Maybe Text)
, _hdv3_publication_year :: !(Maybe Int)
, _hdv3_publication_date :: !(Maybe Text)
, _hdv3_language_name :: !(Maybe Text)
, _hdv3_statuses :: !(Maybe [StatusV3])
, _hdv3_realdate_full_ :: !(Maybe Text)
, _hdv3_source :: !(Maybe Text)
, _hdv3_abstract :: !(Maybe Text)
, _hdv3_title :: !(Maybe Text)
} deriving (Show, Generic)
------------------------------------------------------------------------
-- | Instances for Analysis
------------------------------------------------------------------------
class ToHyperdataDocument a where
toHyperdataDocument :: a -> HyperdataDocument
instance ToHyperdataDocument HyperdataDocument
where
toHyperdataDocument = identity
------------------------------------------------------------------------
instance Eq HyperdataDocument where
(==) h1 h2 = (==) (_hd_uniqId h1) (_hd_uniqId h2)
------------------------------------------------------------------------
instance Ord HyperdataDocument where
compare h1 h2 = compare (_hd_publication_date h1) (_hd_publication_date h2)
------------------------------------------------------------------------
instance Arbitrary HyperdataDocument where
arbitrary = elements arbitraryHyperdataDocuments
arbitraryHyperdataDocuments :: [HyperdataDocument]
arbitraryHyperdataDocuments =
map toHyperdataDocument' ([ ("AI is big but less than crypto", "Troll System journal")
, ("Crypto is big but less than AI", "System Troll review" )
, ("Science is magic" , "Closed Source review")
, ("Open science for all" , "No Time" )
, ("Closed science for me" , "No Space" )
] :: [(Text, Text)])
where
toHyperdataDocument' (t1,t2) =
HyperdataDocument Nothing Nothing Nothing Nothing Nothing Nothing (Just t1)
Nothing Nothing (Just t2) Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
------------------------------------------------------------------------
-- | Common Instances of Hyperdata
------------------------------------------------------------------------
instance Hyperdata HyperdataDocument
instance Hyperdata HyperdataDocumentV3
------------------------------------------------------------------------
$(makeLenses ''HyperdataDocument)
makePrisms ''HyperdataDocument
$(makeLenses ''HyperdataDocumentV3)
-- $(deriveJSON (unPrefix "_hd_") ''HyperdataDocument)
instance FromJSON HyperdataDocument
where
parseJSON = genericParseJSON
( defaultOptions { sumEncoding = ObjectWithSingleField
, fieldLabelModifier = unCapitalize . dropPrefix "_hd_"
, omitNothingFields = True
}
)
instance ToJSON HyperdataDocument
where
toJSON = genericToJSON
( defaultOptions { sumEncoding = ObjectWithSingleField
, fieldLabelModifier = unCapitalize . dropPrefix "_hd_"
, omitNothingFields = True
}
)
$(deriveJSON (unPrefix "_hdv3_") ''HyperdataDocumentV3)
instance ToSchema HyperdataDocument where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_hd_") proxy
& mapped.schema.description ?~ "Document Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataDocument
{-
-- | For now HyperdataDocumentV3 is not exposed with the API
instance ToSchema HyperdataDocumentV3 where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "hyperdataDocumentV3_") proxy
& mapped.schema.description ?~ "Document Hyperdata for Garg V3"
& mapped.schema.example ?~ toJSON defaultHyperdataDocumentV3
-}
------------------------------------------------------------------------
instance FromField HyperdataDocument
where
fromField = fromField'
instance FromField HyperdataDocumentV3
where
fromField = fromField'
-------
instance ToField HyperdataDocument where
toField = toJSONField
instance ToField HyperdataDocumentV3 where
toField = toJSONField
------------------------------------------------------------------------
instance QueryRunnerColumnDefault PGJsonb HyperdataDocument
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataDocumentV3
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
------------------------------------------------------------------------
File.hs 0000664 0000000 0000000 00000003635 14124644201 0034170 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.File
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.File
where
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
------------------------------------------------------------------------
data HyperdataFile =
HyperdataFile { _hff_name :: !Text
, _hff_path :: !Text
, _hff_mime :: !Text
}
deriving (Generic)
defaultHyperdataFile :: HyperdataFile
defaultHyperdataFile = HyperdataFile "" "" ""
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
-- | Specific Gargantext instance
instance Hyperdata HyperdataFile
makeLenses ''HyperdataFile
-- | All Json instances
$(deriveJSON (unPrefix "_hff_") ''HyperdataFile)
-- | Arbitrary instances for tests
instance Arbitrary HyperdataFile where
arbitrary = pure defaultHyperdataFile
instance FromField HyperdataFile
where
fromField = fromField'
instance QueryRunnerColumnDefault PGJsonb HyperdataFile
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance ToSchema HyperdataFile where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_hff_") proxy
& mapped.schema.description ?~ "File Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataFile
Folder.hs 0000664 0000000 0000000 00000002663 14124644201 0034524 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.Folder
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.Folder
where
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
------------------------------------------------------------------------
type HyperdataFolder = HyperdataCorpus
defaultHyperdataFolder :: HyperdataFolder
defaultHyperdataFolder = defaultHyperdataCorpus
------------------------------------------------------------------------
type HyperdataFolderPrivate = HyperdataFolder
defaultHyperdataFolderPrivate :: HyperdataFolderPrivate
defaultHyperdataFolderPrivate = defaultHyperdataFolder
type HyperdataFolderShared = HyperdataFolder
defaultHyperdataFolderShared :: HyperdataFolderShared
defaultHyperdataFolderShared = defaultHyperdataFolder
type HyperdataFolderPublic = HyperdataFolder
defaultHyperdataFolderPublic :: HyperdataFolderPublic
defaultHyperdataFolderPublic = defaultHyperdataFolder
Frame.hs 0000664 0000000 0000000 00000003602 14124644201 0034335 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.Frame
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.Frame
where
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
------------------------------------------------------------------------
data HyperdataFrame =
HyperdataFrame { _hf_base :: !Text
, _hf_frame_id :: !Text
}
deriving (Generic)
defaultHyperdataFrame :: HyperdataFrame
defaultHyperdataFrame = HyperdataFrame "" ""
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
-- | Specific Gargantext instance
instance Hyperdata HyperdataFrame
makeLenses ''HyperdataFrame
-- | All Json instances
$(deriveJSON (unPrefix "_hf_") ''HyperdataFrame)
-- | Arbitrary instances for tests
instance Arbitrary HyperdataFrame where
arbitrary = pure defaultHyperdataFrame
instance FromField HyperdataFrame
where
fromField = fromField'
instance QueryRunnerColumnDefault PGJsonb HyperdataFrame
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance ToSchema HyperdataFrame where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_hf_") proxy
& mapped.schema.description ?~ "Frame Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataFrame
List.hs 0000664 0000000 0000000 00000007560 14124644201 0034225 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.List
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.List
where
import Data.Vector (Vector)
--import qualified Data.Vector as V
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import Control.Applicative
import Gargantext.Prelude
import Gargantext.Core.Viz.Types (Histo(..))
import Gargantext.API.Ngrams.NgramsTree (NgramsTree)
import Gargantext.API.Ngrams.Types (TabType)
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Database.Admin.Types.Metrics (ChartMetrics(..), Metrics)
------------------------------------------------------------------------
data HyperdataList =
HyperdataList { _hl_chart :: !(HashMap TabType (ChartMetrics Histo))
, _hl_list :: !(Maybe Text)
, _hl_pie :: !(HashMap TabType (ChartMetrics Histo))
, _hl_scatter :: !(HashMap TabType Metrics)
, _hl_tree :: !(HashMap TabType (ChartMetrics (Vector NgramsTree)))
} deriving (Show, Generic)
-- HyperdataList { _hl_chart :: !(Maybe (ChartMetrics Histo))
-- , _hl_list :: !(Maybe Text)
-- , _hl_pie :: !(Maybe (ChartMetrics Histo))
-- , _hl_scatter :: !(Maybe Metrics)
-- , _hl_tree :: !(Maybe (ChartMetrics [NgramsTree]))
-- } deriving (Show, Generic)
defaultHyperdataList :: HyperdataList
defaultHyperdataList =
HyperdataList { _hl_chart = HM.empty
, _hl_list = Nothing
, _hl_pie = HM.empty
, _hl_scatter = HM.empty
, _hl_tree = HM.empty
}
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
instance Hyperdata HyperdataList
$(makeLenses ''HyperdataList)
$(deriveJSON (unPrefix "_hl_") ''HyperdataList)
------------------------------------------------------------------------
data HyperdataListCooc =
HyperdataListCooc { _hlc_preferences :: !Text }
deriving (Generic)
defaultHyperdataListCooc :: HyperdataListCooc
defaultHyperdataListCooc = HyperdataListCooc ""
instance Hyperdata HyperdataListCooc
$(makeLenses ''HyperdataListCooc)
$(deriveJSON (unPrefix "_hlc_") ''HyperdataListCooc)
instance Arbitrary HyperdataList where
arbitrary = pure defaultHyperdataList
instance Arbitrary HyperdataListCooc where
arbitrary = pure defaultHyperdataListCooc
instance FromField HyperdataList
where
fromField = fromField'
instance FromField HyperdataListCooc
where
fromField = fromField'
instance QueryRunnerColumnDefault PGJsonb HyperdataList
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataListCooc
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance ToSchema HyperdataList where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_hl_") proxy
& mapped.schema.description ?~ "List Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataList
instance ToSchema HyperdataListCooc where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_hlc_") proxy
& mapped.schema.description ?~ "List Cooc Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataListCooc
Model.hs 0000664 0000000 0000000 00000003607 14124644201 0034350 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.Model
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.Model
where
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
------------------------------------------------------------------------
data HyperdataModel =
HyperdataModel { _hm_params :: !(Int, Int)
, _hm_path :: !Text
, _hm_score :: !(Maybe Double)
} deriving (Show, Generic)
defaultHyperdataModel :: HyperdataModel
defaultHyperdataModel = HyperdataModel (400,500) "data/models/test.model" (Just 0.83)
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
instance Hyperdata HyperdataModel
$(makeLenses ''HyperdataModel)
$(deriveJSON (unPrefix "_hm_") ''HyperdataModel)
instance Arbitrary HyperdataModel where
arbitrary = pure defaultHyperdataModel
instance FromField HyperdataModel
where
fromField = fromField'
instance QueryRunnerColumnDefault PGJsonb HyperdataModel
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance ToSchema HyperdataModel where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_hm_") proxy
& mapped.schema.description ?~ "Model Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataModel
Phylo.hs 0000664 0000000 0000000 00000003610 14124644201 0034375 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.Phylo
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.Phylo
where
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Core.Viz.LegacyPhylo (Phylo(..))
------------------------------------------------------------------------
data HyperdataPhylo =
HyperdataPhylo { _hp_preferences :: !(Maybe Text)
, _hp_data :: !(Maybe Phylo)
}
deriving (Show, Generic)
defaultHyperdataPhylo :: HyperdataPhylo
defaultHyperdataPhylo = HyperdataPhylo Nothing Nothing
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
instance Hyperdata HyperdataPhylo
$(makeLenses ''HyperdataPhylo)
$(deriveJSON (unPrefix "_hp_") ''HyperdataPhylo)
instance Arbitrary HyperdataPhylo where
arbitrary = pure defaultHyperdataPhylo
instance ToSchema HyperdataPhylo where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_hp_") proxy
& mapped.schema.description ?~ "Phylo Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataPhylo
instance FromField HyperdataPhylo where
fromField = fromField'
instance QueryRunnerColumnDefault PGJsonb HyperdataPhylo
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
Prelude.hs 0000664 0000000 0000000 00000004103 14124644201 0034700 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.Prelude
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Database.Admin.Types.Hyperdata.Prelude
( module Control.Lens
, module Data.Aeson
, module Data.Aeson.TH
, module Data.Aeson.Types
, module Data.ByteString.Lazy.Internal
, module Data.Maybe
, module Data.Monoid
, module Data.Swagger
, module Data.Text
, module Database.PostgreSQL.Simple.FromField
, module Database.PostgreSQL.Simple.ToField
, module GHC.Generics
, module Gargantext.Core.Utils.Prefix
, module Gargantext.Database.Prelude
, module Opaleye
, module Test.QuickCheck
, module Test.QuickCheck.Arbitrary
, Hyperdata
, Chart(..)
)
where
import Control.Lens hiding (elements, (&), (.=), Indexed)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Aeson.Types (emptyObject)
import Data.ByteString.Lazy.Internal (ByteString)
import Data.Maybe (Maybe(..))
import Data.Monoid (mempty)
import Data.Swagger hiding (unwrapUnaryRecords, constructorTagModifier, allNullaryToStringTag, allOf, fieldLabelModifier)
import Data.Text (Text)
import Database.PostgreSQL.Simple.FromField (FromField, fromField)
import Database.PostgreSQL.Simple.ToField (ToField, toField, toJSONField)
import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Prelude (fromField')
import Gargantext.Prelude
import Opaleye (QueryRunnerColumnDefault, queryRunnerColumnDefault, PGJsonb, fieldQueryRunnerColumn, Nullable)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary hiding (vector)
------------------------------------------------------------------------
-- Only Hyperdata types should be member of this type class.
class Hyperdata a
data Chart =
CDocsHistogram
| CAuthorsPie
| CInstitutesTree
| CTermsMetrics
deriving (Generic, Show, Eq)
instance ToJSON Chart
instance FromJSON Chart
instance ToSchema Chart
Texts.hs 0000664 0000000 0000000 00000003420 14124644201 0034410 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.Texts
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.Texts
where
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
------------------------------------------------------------------------
data HyperdataTexts =
HyperdataTexts { _ht_preferences :: !(Maybe Text)
}
deriving (Show, Generic)
defaultHyperdataTexts :: HyperdataTexts
defaultHyperdataTexts = HyperdataTexts Nothing
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
instance Hyperdata HyperdataTexts
$(makeLenses ''HyperdataTexts)
$(deriveJSON (unPrefix "_ht_") ''HyperdataTexts)
instance Arbitrary HyperdataTexts where
arbitrary = pure defaultHyperdataTexts
instance ToSchema HyperdataTexts where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_ht_") proxy
& mapped.schema.description ?~ "Texts Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataTexts
instance FromField HyperdataTexts where
fromField = fromField'
instance QueryRunnerColumnDefault PGJsonb HyperdataTexts
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
User.hs 0000664 0000000 0000000 00000010400 14124644201 0034213 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types/Hyperdata {-|
Module : Gargantext.Database.Admin.Types.Hyperdata.User
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Hyperdata.User
where
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.Contact
import Gargantext.Database.Admin.Types.Node (DocumentId)
-- import Gargantext.Database.Schema.Node -- (Node(..))
data HyperdataUser =
HyperdataUser { _hu_private :: !(Maybe HyperdataPrivate)
, _hu_shared :: !(Maybe HyperdataContact)
, _hu_public :: !(Maybe HyperdataPublic)
} deriving (Eq, Show, Generic)
data HyperdataPrivate =
HyperdataPrivate { _hpr_password :: !Text
, _hpr_lang :: !Lang
}
deriving (Eq, Show, Generic)
data HyperdataPublic =
HyperdataPublic { _hpu_pseudo :: !Text
, _hpu_publications :: ![DocumentId]
}
deriving (Eq, Show, Generic)
-- | Default
defaultHyperdataUser :: HyperdataUser
defaultHyperdataUser = HyperdataUser (Just defaultHyperdataPrivate)
(Just defaultHyperdataContact)
(Just defaultHyperdataPublic)
defaultHyperdataPublic :: HyperdataPublic
defaultHyperdataPublic = HyperdataPublic "pseudo" [1..10]
defaultHyperdataPrivate :: HyperdataPrivate
defaultHyperdataPrivate = HyperdataPrivate "password" EN
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
-- | Specific Gargantext instance
instance Hyperdata HyperdataUser
instance Hyperdata HyperdataPrivate
instance Hyperdata HyperdataPublic
-- | All lenses
makeLenses ''HyperdataUser
makeLenses ''HyperdataPrivate
makeLenses ''HyperdataPublic
-- | All Json instances
$(deriveJSON (unPrefix "_hu_") ''HyperdataUser)
$(deriveJSON (unPrefix "_hpr_") ''HyperdataPrivate)
$(deriveJSON (unPrefix "_hpu_") ''HyperdataPublic)
-- | Arbitrary instances
instance Arbitrary HyperdataUser where
arbitrary = HyperdataUser <$> arbitrary <*> arbitrary <*> arbitrary
instance Arbitrary HyperdataPrivate where
arbitrary = pure defaultHyperdataPrivate
instance Arbitrary HyperdataPublic where
arbitrary = pure defaultHyperdataPublic
-- | ToSchema instances
instance ToSchema HyperdataUser where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_hu_") proxy
& mapped.schema.description ?~ "User Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataUser
instance ToSchema HyperdataPrivate where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_hpr_") proxy
& mapped.schema.description ?~ "User Private Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataPrivate
instance ToSchema HyperdataPublic where
declareNamedSchema proxy =
genericDeclareNamedSchema (unPrefixSwagger "_hpu_") proxy
& mapped.schema.description ?~ "User Public Hyperdata"
& mapped.schema.example ?~ toJSON defaultHyperdataPublic
-- | Database (Posgresql-simple instance)
instance FromField HyperdataUser where
fromField = fromField'
instance FromField HyperdataPrivate where
fromField = fromField'
instance FromField HyperdataPublic where
fromField = fromField'
-- | Database (Opaleye instance)
instance QueryRunnerColumnDefault PGJsonb HyperdataUser where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataPrivate where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataPublic where
queryRunnerColumnDefault = fieldQueryRunnerColumn
Metrics.hs 0000664 0000000 0000000 00000003227 14124644201 0032773 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Admin.Types.Metrics where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Vector (Vector)
import qualified Data.Vector as V
import Protolude
import Test.QuickCheck.Arbitrary
import Gargantext.Core.Types (ListType(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
----------------------------------------------------------------------------
newtype Metrics = Metrics
{ metrics_data :: Vector Metric}
deriving (Generic, Show)
instance ToSchema Metrics where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "metrics_")
instance Arbitrary Metrics
where
arbitrary = (Metrics . V.fromList) <$> arbitrary
data Metric = Metric
{ m_label :: !Text
, m_x :: !Double
, m_y :: !Double
, m_cat :: !ListType
} deriving (Generic, Show)
instance ToSchema Metric where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "m_")
instance Arbitrary Metric
where
arbitrary = Metric <$> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
deriveJSON (unPrefix "metrics_") ''Metrics
deriveJSON (unPrefix "m_") ''Metric
newtype ChartMetrics a = ChartMetrics { chartMetrics_data :: a }
deriving (Generic, Show)
instance (Typeable a, ToSchema a) => ToSchema (ChartMetrics a) where
declareNamedSchema = wellNamedSchema "chartMetrics_"
instance (Arbitrary a) => Arbitrary (ChartMetrics a)
where
arbitrary = ChartMetrics <$> arbitrary
deriveJSON (unPrefix "chartMetrics_") ''ChartMetrics
Node.hs 0000664 0000000 0000000 00000030002 14124644201 0032241 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Admin/Types {-|
Module : Gargantext.Database.Types.Nodes
Description : Main Types of Nodes in Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-}
-- {-# LANGUAGE DuplicateRecordFields #-}
module Gargantext.Database.Admin.Types.Node
where
import Codec.Serialise (Serialise())
import Control.Monad (mzero)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Either
import Data.Hashable (Hashable)
import Data.Swagger
import Data.Text (Text, unpack)
import Data.Time (UTCTime)
import Database.PostgreSQL.Simple.FromField (FromField, fromField)
import Database.PostgreSQL.Simple.ToField (ToField, toField)
import GHC.Generics (Generic)
import Servant
import qualified Opaleye as O
import Opaleye (QueryRunnerColumnDefault, queryRunnerColumnDefault, PGInt4, PGText, PGTSVector, Nullable, fieldQueryRunnerColumn)
import Test.QuickCheck (elements)
import Gargantext.Prelude.Crypto.Hash (Hash)
import Test.QuickCheck.Arbitrary
import Test.QuickCheck.Instances.Text ()
import Test.QuickCheck.Instances.Time ()
import Text.Read (read)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
-- import Gargantext.Database.Prelude (fromField')
import Gargantext.Database.Schema.Node
import Gargantext.Prelude
type UserId = Int
type MasterUserId = UserId
------------------------------------------------------------------------
-- | NodePoly indicates that Node has a Polymorphism Type
type Node json = NodePoly NodeId (Maybe Hash) NodeTypeId UserId (Maybe ParentId) NodeName UTCTime json
-- | NodeSearch (queries)
-- type NodeSearch json = NodePolySearch NodeId NodeTypeId UserId (Maybe ParentId) NodeName UTCTime json (Maybe TSVector)
------------------------------------------------------------------------
instance (Typeable hyperdata, ToSchema hyperdata) =>
ToSchema (NodePoly NodeId Hash NodeTypeId
(Maybe UserId)
ParentId NodeName
UTCTime hyperdata
) where
declareNamedSchema = wellNamedSchema "_node_"
instance (Typeable hyperdata, ToSchema hyperdata) =>
ToSchema (NodePoly NodeId Hash NodeTypeId
UserId
(Maybe ParentId) NodeName
UTCTime hyperdata
) where
declareNamedSchema = wellNamedSchema "_node_"
instance (Typeable hyperdata, ToSchema hyperdata) =>
ToSchema (NodePoly NodeId (Maybe Hash) NodeTypeId
UserId
(Maybe ParentId) NodeName
UTCTime hyperdata
) where
declareNamedSchema = wellNamedSchema "_node_"
instance (Typeable hyperdata, ToSchema hyperdata) =>
ToSchema (NodePolySearch NodeId NodeTypeId
(Maybe UserId)
ParentId NodeName
UTCTime hyperdata (Maybe TSVector)
) where
declareNamedSchema = wellNamedSchema "_ns_"
instance (Typeable hyperdata, ToSchema hyperdata) =>
ToSchema (NodePolySearch NodeId NodeTypeId
UserId
(Maybe ParentId) NodeName
UTCTime hyperdata (Maybe TSVector)
) where
declareNamedSchema = wellNamedSchema "_ns_"
instance (Arbitrary nodeId
,Arbitrary hashId
,Arbitrary toDBid
,Arbitrary userId
,Arbitrary nodeParentId
, Arbitrary hyperdata
) => Arbitrary (NodePoly nodeId hashId toDBid userId nodeParentId
NodeName UTCTime hyperdata) where
--arbitrary = Node 1 1 (Just 1) 1 "name" (jour 2018 01 01) (arbitrary) (Just "")
arbitrary = Node <$> arbitrary <*> arbitrary <*> arbitrary
<*> arbitrary <*> arbitrary <*> arbitrary
<*> arbitrary <*> arbitrary
instance (Arbitrary hyperdata
,Arbitrary nodeId
,Arbitrary toDBid
,Arbitrary userId
,Arbitrary nodeParentId
) => Arbitrary (NodePolySearch nodeId toDBid userId nodeParentId
NodeName UTCTime hyperdata (Maybe TSVector)) where
--arbitrary = Node 1 1 (Just 1) 1 "name" (jour 2018 01 01) (arbitrary) (Just "")
arbitrary = NodeSearch <$> arbitrary <*> arbitrary <*> arbitrary
<*> arbitrary <*> arbitrary <*> arbitrary
<*> arbitrary <*> arbitrary
------------------------------------------------------------------------
pgNodeId :: NodeId -> O.Column O.PGInt4
pgNodeId = O.pgInt4 . id2int
where
id2int :: NodeId -> Int
id2int (NodeId n) = n
------------------------------------------------------------------------
newtype NodeId = NodeId Int
deriving (Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable)
instance Show NodeId where
show (NodeId n) = "nodeId-" <> show n
instance Serialise NodeId
instance ToField NodeId where
toField (NodeId n) = toField n
instance FromField NodeId where
fromField field mdata = do
n <- fromField field mdata
if (n :: Int) > 0
then return $ NodeId n
else mzero
instance ToSchema NodeId
unNodeId :: NodeId -> Int
unNodeId (NodeId n) = n
type NodeTypeId = Int
type NodeName = Text
type TSVector = Text
------------------------------------------------------------------------
------------------------------------------------------------------------
instance FromHttpApiData NodeId where
parseUrlPiece n = pure $ NodeId $ (read . cs) n
instance ToParamSchema NodeId
instance Arbitrary NodeId where
arbitrary = NodeId <$> arbitrary
type ParentId = NodeId
type CorpusId = NodeId
type CommunityId = NodeId
type ListId = NodeId
type DocumentId = NodeId
type DocId = NodeId
type RootId = NodeId
type MasterCorpusId = CorpusId
type UserCorpusId = CorpusId
type GraphId = NodeId
type PhyloId = NodeId
type AnnuaireId = NodeId
type ContactId = NodeId
------------------------------------------------------------------------
data Status = Status { status_failed :: !Int
, status_succeeded :: !Int
, status_remaining :: !Int
} deriving (Show, Generic)
$(deriveJSON (unPrefix "status_") ''Status)
instance Arbitrary Status where
arbitrary = Status <$> arbitrary <*> arbitrary <*> arbitrary
------------------------------------------------------------------------
data LanguageNodes = LanguageNodes { languageNodes___unknown__ :: [Int]}
deriving (Show, Generic)
$(deriveJSON (unPrefix "languageNodes_") ''LanguageNodes)
------------------------------------------------------------------------
-- level: debug | dev (fatal = critical)
data EventLevel = CRITICAL | FATAL | ERROR | WARNING | INFO | DEBUG
deriving (Show, Generic, Enum, Bounded)
instance FromJSON EventLevel
instance ToJSON EventLevel
instance Arbitrary EventLevel where
arbitrary = elements [minBound..maxBound]
instance ToSchema EventLevel where
declareNamedSchema proxy = genericDeclareNamedSchema defaultSchemaOptions proxy
------------------------------------------------------------------------
data Event = Event { event_level :: !EventLevel
, event_message :: !Text
, event_date :: !UTCTime
} deriving (Show, Generic)
$(deriveJSON (unPrefix "event_") ''Event)
instance Arbitrary Event where
arbitrary = Event <$> arbitrary <*> arbitrary <*> arbitrary
instance ToSchema Event where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "event_")
------------------------------------------------------------------------
data Resource = Resource { resource_path :: !(Maybe Text)
, resource_scraper :: !(Maybe Text)
, resource_query :: !(Maybe Text)
, resource_events :: !([Event])
, resource_status :: !Status
, resource_date :: !UTCTime
} deriving (Show, Generic)
$(deriveJSON (unPrefix "resource_") ''Resource)
instance Arbitrary Resource where
arbitrary = Resource <$> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
instance ToSchema Resource where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "resource_")
------------------------------------------------------------------------
-- | Then a Node can be either a Folder or a Corpus or a Document
data NodeType = NodeUser
| NodeFolderPrivate
| NodeFolderShared | NodeTeam
| NodeFolderPublic
| NodeFolder
-- | NodeAnalysis | NodeCommunity
| NodeCorpus | NodeCorpusV3 | NodeTexts | NodeDocument
| NodeAnnuaire | NodeContact
| NodeGraph | NodePhylo
| NodeDashboard -- | NodeChart | NodeNoteBook
| NodeList | NodeModel
| NodeListCooc
{-
-- | Metrics
-- | NodeOccurrences
-- | Classification
-}
-- Optional Nodes
| NodeFrameWrite | NodeFrameCalc | NodeFrameVisio | NodeFrameNotebook
| NodeFile
deriving (Show, Read, Eq, Generic, Bounded, Enum)
allNodeTypes :: [NodeType]
allNodeTypes = [minBound ..]
defaultName :: NodeType -> Text
defaultName NodeUser = "User"
defaultName NodeContact = "Contact"
defaultName NodeCorpus = "Corpus"
defaultName NodeCorpusV3 = "Corpus"
defaultName NodeAnnuaire = "Annuaire"
defaultName NodeDocument = "Doc"
defaultName NodeTexts = "Docs"
defaultName NodeList = "List"
defaultName NodeListCooc = "List"
defaultName NodeModel = "Model"
defaultName NodeFolder = "Folder"
defaultName NodeFolderPrivate = "Private Folder"
defaultName NodeFolderShared = "Shared Folder"
defaultName NodeTeam = "Folder"
defaultName NodeFolderPublic = "Public Folder"
defaultName NodeDashboard = "Board"
defaultName NodeGraph = "Graph"
defaultName NodePhylo = "Phylo"
defaultName NodeFrameWrite = "Frame Write"
defaultName NodeFrameCalc = "Frame Calc"
defaultName NodeFrameVisio = "Frame Visio"
defaultName NodeFrameNotebook = "Frame Code"
defaultName NodeFile = "File"
instance FromJSON NodeType
instance ToJSON NodeType
instance FromHttpApiData NodeType
where
parseUrlPiece = Right . read . unpack
instance ToParamSchema NodeType
instance ToSchema NodeType
instance Arbitrary NodeType where
arbitrary = elements allNodeTypes
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
instance ToSchema Status where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "status_")
------------------------------------------------------------------------
{-
instance FromField (NodeId, Text)
where
fromField = fromField'
-}
------------------------------------------------------------------------
instance QueryRunnerColumnDefault PGTSVector (Maybe TSVector)
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGInt4 (Maybe NodeId)
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGInt4 NodeId
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault (Nullable PGInt4) NodeId
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance (QueryRunnerColumnDefault (Nullable O.PGTimestamptz) UTCTime)
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGText (Maybe Hash)
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/GargDB.hs 0000664 0000000 0000000 00000013422 14124644201 0030374 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Prelude.GargDB
Description : Useful Tools near Prelude of the project
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO_1: qualitative tests (human)
TODO_2: quantitative tests (coded)
-}
module Gargantext.Database.GargDB
where
import Control.Exception
import Control.Lens (view)
import Control.Monad.Reader (MonadReader)
import Data.Aeson (ToJSON, toJSON)
import Data.Text (Text)
import Data.Tuple.Extra (both)
import GHC.IO (FilePath)
import Gargantext.Database.Prelude (HasConfig(..))
import Gargantext.Prelude
import Gargantext.Prelude.Config
import Gargantext.Prelude.Crypto.Hash
import System.Directory (createDirectoryIfMissing)
import System.IO.Error
import System.Random (newStdGen)
import qualified Data.Text as Text
import qualified System.Directory as SD
-------------------------------------------------------------------
-- | Main Class to use (just declare needed functions)
class GargDB a where
write :: a -> IO ()
read :: FilePath -> IO a
rm :: (a, FilePath) -> IO ()
mv :: (a, FilePath) -> FilePath -> IO ()
-- | Why not this class too ?
class ToJSON parameters => GargDB' parameters gargdata where
write' :: parameters -> gargdata -> IO ()
read' :: parameters -> IO gargdata
rm' :: gargdata -> parameters -> IO ()
mv' :: gargdata -> parameters -> parameters -> IO ()
-------------------------------------------------------------------
-- | Deprecated Class, use GargDB instead
class SaveFile a where
saveFile' :: FilePath -> a -> IO ()
class ReadFile a where
readFile' :: FilePath -> IO a
-------------------------------------------------------------------
-------------------------------------------------------------------
type GargFilePath = (FolderPath, FileName)
-- where
type FolderPath = FilePath
type FileName = FilePath
--------------------------------
dataFilePath :: (ToJSON a) => a -> GargFilePath
dataFilePath = toPath . hash . show . toJSON
randomFilePath :: ( MonadReader env m
, MonadBase IO m
)
=> m GargFilePath
randomFilePath = do
(foldPath, fileName) <- liftBase
$ toPath
. hash
. show
<$> newStdGen
pure (foldPath, fileName)
-- | toPath' : how to hash text to path
{- example of use:
>>> toPath' (1,2) ("","helloword")
("/he","lloword")
>>> toPath' (2,2) ("","helloword")
("/he/ll","oword")
>>> toPath' (2,3) ("","helloword")
("/hel/low","ord")
-}
toPath :: Text -> (FolderPath, FileName)
toPath tx = both Text.unpack $ toPath' (2,3) ("", tx)
toPath' :: (Int,Int) -> (Text,Text) -> (Text,Text)
toPath' (n,m) (t,x) = foldl' (\tx _ -> toPath'' m tx) (t,x) [1..n]
toPath'' :: Int -> (Text, Text) -> (Text, Text)
toPath'' n (fp,fn) = (fp'',fn')
where
(fp',fn') = Text.splitAt n fn
fp'' = Text.intercalate "/" [fp,fp']
-------------------------------------------------------------------
type DataPath = FilePath
toFilePath :: FilePath -> FilePath -> FilePath
toFilePath fp1 fp2 = fp1 <> "/" <> fp2
-------------------------------------------------------------------
-- | Disk operations
-- | For example, this write file with a random filepath
-- better use a hash of json of Type used to parameter as input
-- the functions
writeFile :: ( MonadReader env m
, HasConfig env
, MonadBase IO m
, SaveFile a
)
=> a -> m FilePath
writeFile a = do
dataPath <- view $ hasConfig . gc_datafilepath
(foldPath, fileName) <- randomFilePath
let filePath = toFilePath foldPath fileName
dataFoldPath = toFilePath dataPath foldPath
dataFileName = toFilePath dataPath filePath
_ <- liftBase $ createDirectoryIfMissing True dataFoldPath
_ <- liftBase $ saveFile' dataFileName a
pure filePath
---
-- | Example to read a file with Type
readFile :: ( MonadReader env m
, HasConfig env
, MonadBase IO m
, ReadFile a
)
=> FilePath -> m a
readFile fp = do
dataPath <- view $ hasConfig . gc_datafilepath
liftBase $ readFile' $ toFilePath dataPath fp
---
rmFile :: ( MonadReader env m
, MonadBase IO m
, HasConfig env
)
=> FilePath -> m ()
rmFile = onDisk_1 SD.removeFile
cpFile :: (MonadReader env m, MonadBase IO m, HasConfig env)
=> FilePath -> FilePath -> m ()
cpFile = onDisk_2 SD.copyFile
---
mvFile :: (MonadReader env m, MonadBase IO m, HasConfig env)
=> FilePath -> FilePath -> m ()
mvFile fp1 fp2 = do
cpFile fp1 fp2
rmFile fp1
pure ()
------------------------------------------------------------------------
onDisk_1 :: ( MonadReader env m
, MonadBase IO m
, HasConfig env
)
=> (FilePath -> IO ()) -> FilePath -> m ()
onDisk_1 action fp = do
dataPath <- view $ hasConfig . gc_datafilepath
liftBase $ action (toFilePath dataPath fp) `catch` handleExists
where
handleExists e
| isDoesNotExistError e = return ()
| otherwise = throwIO e
onDisk_2 :: ( MonadReader env m
, MonadBase IO m
, HasConfig env
)
=> (FilePath -> FilePath -> IO ())
-> FilePath
-> FilePath
-> m ()
onDisk_2 action fp1 fp2 = do
dataPath <- view $ hasConfig . gc_datafilepath
let fp1' = toFilePath dataPath fp1
fp2' = toFilePath dataPath fp2
liftBase $ action fp1' fp2' `catch` handleExists
where
handleExists e
| isDoesNotExistError e = return ()
| otherwise = throwIO e
------------------------------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Prelude.hs 0000664 0000000 0000000 00000014377 14124644201 0030720 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Prelude
Description : Specific Prelude for Database management
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
module Gargantext.Database.Prelude where
import Control.Exception
import Control.Lens (Getter, view)
import Control.Monad.Except
import Control.Monad.Random
import Control.Monad.Reader
import Control.Monad.Trans.Control (MonadBaseControl)
import Data.Aeson (Result(Error,Success), fromJSON, FromJSON)
import Data.ByteString.Char8 (hPutStrLn)
import Data.Either.Extra (Either(Left, Right))
import Data.Ini (readIniFile, lookupValue)
import Data.Pool (Pool, withResource)
import Data.Profunctor.Product.Default (Default)
import Data.Text (unpack, pack, Text)
import Data.Word (Word16)
import Database.PostgreSQL.Simple (Connection, connect)
import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError)
import Database.PostgreSQL.Simple.Internal (Field)
import Gargantext.Prelude
import Opaleye (Query, Unpackspec, showSqlForPostgres, FromFields, Select, runQuery, PGJsonb, QueryRunnerColumnDefault)
import Opaleye.Aggregate (countRows)
import System.IO (FilePath)
import System.IO (stderr)
import Text.Read (read)
import qualified Data.ByteString as DB
import qualified Data.List as DL
import qualified Database.PostgreSQL.Simple as PGS
import Gargantext.Prelude.Config (GargConfig())
-------------------------------------------------------
class HasConnectionPool env where
connPool :: Getter env (Pool Connection)
instance HasConnectionPool (Pool Connection) where
connPool = identity
class HasConfig env where
hasConfig :: Getter env GargConfig
instance HasConfig GargConfig where
hasConfig = identity
-------------------------------------------------------
type JSONB = QueryRunnerColumnDefault PGJsonb
-------------------------------------------------------
type CmdM'' env err m =
( MonadReader env m
, MonadError err m
, MonadBaseControl IO m
, MonadRandom m
)
type CmdM' env err m =
( MonadReader env m
, MonadError err m
, MonadBaseControl IO m
-- , MonadRandom m
)
type CmdM env err m =
( CmdM' env err m
, HasConnectionPool env
, HasConfig env
)
type CmdRandom env err m =
( CmdM' env err m
, HasConnectionPool env
, HasConfig env
, MonadRandom m
)
type Cmd'' env err a = forall m. CmdM'' env err m => m a
type Cmd' env err a = forall m. CmdM' env err m => m a
type Cmd err a = forall m env. CmdM env err m => m a
type CmdR err a = forall m env. CmdRandom env err m => m a
fromInt64ToInt :: Int64 -> Int
fromInt64ToInt = fromIntegral
-- TODO: ideally there should be very few calls to this functions.
mkCmd :: (Connection -> IO a) -> Cmd err a
mkCmd k = do
pool <- view connPool
withResource pool (liftBase . k)
runCmd :: (HasConnectionPool env)
=> env
-> Cmd'' env err a
-> IO (Either err a)
runCmd env m = runExceptT $ runReaderT m env
runOpaQuery :: Default FromFields fields haskells
=> Select fields
-> Cmd err [haskells]
runOpaQuery q = mkCmd $ \c -> runQuery c q
runCountOpaQuery :: Select a -> Cmd err Int
runCountOpaQuery q = do
counts <- mkCmd $ \c -> runQuery c $ countRows q
-- countRows is guaranteed to return a list with exactly one row so DL.head is safe here
pure $ fromInt64ToInt $ DL.head counts
formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err DB.ByteString
formatPGSQuery q a = mkCmd $ \conn -> PGS.formatQuery conn q a
-- TODO use runPGSQueryDebug everywhere
runPGSQuery' :: (PGS.ToRow a, PGS.FromRow b) => PGS.Query -> a -> Cmd err [b]
runPGSQuery' q a = mkCmd $ \conn -> PGS.query conn q a
runPGSQuery :: ( CmdM env err m
, PGS.FromRow r, PGS.ToRow q
)
=> PGS.Query -> q -> m [r]
runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
where
printError c (SomeException e) = do
q' <- PGS.formatQuery c q a
hPutStrLn stderr q'
throw (SomeException e)
-- | TODO catch error
runPGSQuery_ :: ( CmdM env err m
, PGS.FromRow r
)
=> PGS.Query -> m [r]
runPGSQuery_ q = mkCmd $ \conn -> catch (PGS.query_ conn q) printError
where
printError (SomeException e) = do
printDebug "[G.D.P.runPGSQuery_]" ("TODO: format query error query" :: Text)
throw (SomeException e)
execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err Int64
execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a
------------------------------------------------------------------------
databaseParameters :: FilePath -> IO PGS.ConnectInfo
databaseParameters fp = do
ini <- readIniFile fp
let ini'' = case ini of
Left e -> panic (pack $ "No ini file error" <> show e)
Right ini' -> ini'
let val x = case (lookupValue (pack "database") (pack x) ini'') of
Left _ -> panic (pack $ "no" <> x)
Right p' -> unpack p'
pure $ PGS.ConnectInfo { PGS.connectHost = val "DB_HOST"
, PGS.connectPort = read (val "DB_PORT") :: Word16
, PGS.connectUser = val "DB_USER"
, PGS.connectPassword = val "DB_PASS"
, PGS.connectDatabase = val "DB_NAME"
}
connectGargandb :: FilePath -> IO Connection
connectGargandb fp = databaseParameters fp >>= \params -> connect params
fromField' :: (Typeable b, FromJSON b) => Field -> Maybe DB.ByteString -> Conversion b
fromField' field mb = do
v <- fromField field mb
valueToHyperdata v
where
valueToHyperdata v = case fromJSON v of
Success a -> pure a
Error _err -> returnError ConversionFailed field
$ DL.intercalate " " [ "cannot parse hyperdata for JSON: "
, show v
]
printSqlOpa :: Default Unpackspec a a => Query a -> IO ()
printSqlOpa = putStrLn . maybe "Empty query" identity . showSqlForPostgres
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query.hs 0000664 0000000 0000000 00000000441 14124644201 0030410 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Query
Description : Main Tools of Node to the database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Database.Query
where
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/ 0000775 0000000 0000000 00000000000 14124644201 0030055 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Facet.hs 0000664 0000000 0000000 00000033224 14124644201 0031437 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Query.Facet
Description : Main requests of Node to the database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
------------------------------------------------------------------------
module Gargantext.Database.Query.Facet
( runViewAuthorsDoc
, runViewDocuments
, runCountDocuments
, filterWith
, Category
, Score
, Title
, Pair(..)
, Facet(..)
, FacetDoc
, FacetDocRead
, FacetPaired(..)
, FacetPairedRead
, FacetPairedReadNull
, FacetPairedReadNullAgg
, OrderBy(..)
)
where
import Control.Arrow (returnA)
import Control.Lens ((^.))
import Data.Aeson (FromJSON, ToJSON)
import Data.Aeson.TH (deriveJSON)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Swagger
import qualified Data.Text as T
import Data.Time (UTCTime)
import Data.Time.Segment (jour)
import Opaleye
import Protolude hiding (null, map, sum, not)
import Servant.API
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import qualified Opaleye.Internal.Unpackspec()
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Query.Filter
import Gargantext.Database.Query.Join (leftJoin5)
import Gargantext.Database.Query.Table.Ngrams
import Gargantext.Database.Query.Table.NodeNode
import Gargantext.Database.Query.Table.NodeNodeNgrams
import Gargantext.Database.Prelude
import Gargantext.Database.Schema.Node
------------------------------------------------------------------------
-- | DocFacet
-- data Facet = FacetDoc | FacetSources | FacetAuthors | FacetTerms
-- deriving (Show, Generic)
--instance FromJSON Facet
--instance ToJSON Facet
type Category = Int
type Score = Double
type Title = Text
-- TODO remove Title
type FacetDoc = Facet NodeId UTCTime Title HyperdataDocument (Maybe Category) (Maybe Double) (Maybe Score)
-- type FacetSources = FacetDoc
-- type FacetAuthors = FacetDoc
-- type FacetTerms = FacetDoc
data Facet id created title hyperdata category ngramCount score =
FacetDoc { facetDoc_id :: id
, facetDoc_created :: created
, facetDoc_title :: title
, facetDoc_hyperdata :: hyperdata
, facetDoc_category :: category
, facetDoc_ngramCount :: ngramCount
, facetDoc_score :: score
} deriving (Show, Generic)
{- | TODO after demo
data Facet id date hyperdata score =
FacetDoc { facetDoc_id :: id
, facetDoc_date :: date
, facetDoc_hyperdata :: hyperdata
, facetDoc_score :: score
} deriving (Show, Generic)
-}
data Pair i l = Pair {
_p_id :: i
, _p_label :: l
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_p_") ''Pair)
$(makeAdaptorAndInstance "pPair" ''Pair)
instance (Typeable i, Typeable l, ToSchema i, ToSchema l) => ToSchema (Pair i l) where
declareNamedSchema = wellNamedSchema "_p_"
instance (Arbitrary i, Arbitrary l) => Arbitrary (Pair i l) where
arbitrary = Pair <$> arbitrary <*> arbitrary
data FacetPaired id date hyperdata score =
FacetPaired { _fp_id :: id
, _fp_date :: date
, _fp_hyperdata :: hyperdata
, _fp_score :: score }
deriving (Show, Generic)
$(deriveJSON (unPrefix "_fp_") ''FacetPaired)
$(makeAdaptorAndInstance "pFacetPaired" ''FacetPaired)
instance ( ToSchema id
, ToSchema date
, ToSchema hyperdata
, ToSchema score
, Typeable id
, Typeable date
, Typeable hyperdata
, Typeable score
) => ToSchema (FacetPaired id date hyperdata score) where
declareNamedSchema = wellNamedSchema "_fp_"
instance ( Arbitrary id
, Arbitrary date
, Arbitrary hyperdata
, Arbitrary score
) => Arbitrary (FacetPaired id date hyperdata score) where
arbitrary = FacetPaired <$> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary
type FacetPairedRead = FacetPaired (Column PGInt4 )
(Column PGTimestamptz)
(Column PGJsonb )
(Column PGInt4 )
type FacetPairedReadNull = FacetPaired (Column (Nullable PGInt4) )
(Column (Nullable PGTimestamptz))
(Column (Nullable PGJsonb) )
(Column (Nullable PGInt4) )
type FacetPairedReadNullAgg = FacetPaired (Aggregator (Column (Nullable PGInt4) )
(Column (Nullable PGInt4) )
)
(Aggregator (Column (Nullable PGTimestamptz))
(Column (Nullable PGTimestamptz))
)
(Aggregator (Column (Nullable PGJsonb) )
(Column (Nullable PGJsonb) )
)
(Aggregator (Column (Nullable PGInt4) )
(Column (Nullable PGInt4) )
)
-- | JSON instance
$(deriveJSON (unPrefix "facetDoc_") ''Facet)
-- | Documentation instance
instance ToSchema FacetDoc where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "facetDoc_")
-- | Mock and Quickcheck instances
instance Arbitrary FacetDoc where
arbitrary = elements [ FacetDoc id' (jour year 01 01) t hp (Just cat) (Just ngramCount) (Just score)
| id' <- [1..10]
, year <- [1990..2000]
, t <- ["title", "another title"]
, hp <- arbitraryHyperdataDocuments
, cat <- [0..2]
, ngramCount <- [3..100]
, score <- [3..100]
]
-- Facets / Views for the Front End
-- | Database instances
$(makeAdaptorAndInstance "pFacetDoc" ''Facet)
-- $(makeLensesWith abbreviatedFields ''Facet)
type FacetDocRead = Facet (Column PGInt4 )
(Column PGTimestamptz)
(Column PGText )
(Column PGJsonb )
(Column (Nullable PGInt4)) -- Category
(Column (Nullable PGFloat8)) -- Ngrams Count
(Column (Nullable PGFloat8)) -- Score
-----------------------------------------------------------------------
-----------------------------------------------------------------------
data OrderBy = DateAsc | DateDesc
| TitleAsc | TitleDesc
| ScoreDesc | ScoreAsc
| SourceAsc | SourceDesc
deriving (Generic, Enum, Bounded, Read, Show)
instance FromHttpApiData OrderBy
where
parseUrlPiece "DateAsc" = pure DateAsc
parseUrlPiece "DateDesc" = pure DateDesc
parseUrlPiece "TitleAsc" = pure TitleAsc
parseUrlPiece "TitleDesc" = pure TitleDesc
parseUrlPiece "ScoreAsc" = pure ScoreAsc
parseUrlPiece "ScoreDesc" = pure ScoreDesc
parseUrlPiece "SourceAsc" = pure SourceAsc
parseUrlPiece "SourceDesc" = pure SourceDesc
parseUrlPiece _ = Left "Unexpected value of OrderBy"
instance ToParamSchema OrderBy
instance FromJSON OrderBy
instance ToJSON OrderBy
instance ToSchema OrderBy
instance Arbitrary OrderBy
where
arbitrary = elements [minBound..maxBound]
-- TODO-SECURITY check
--{-
runViewAuthorsDoc :: HasDBid NodeType
=> ContactId
-> IsTrash
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Cmd err [FacetDoc]
runViewAuthorsDoc cId t o l order = runOpaQuery $ filterWith o l order $ viewAuthorsDoc cId t ntId
where
ntId = NodeDocument
-- TODO add delete ?
viewAuthorsDoc :: HasDBid NodeType
=> ContactId
-> IsTrash
-> NodeType
-> Query FacetDocRead
viewAuthorsDoc cId _ nt = proc () -> do
(doc,(_,(_,(_,contact')))) <- queryAuthorsDoc -< ()
{-nn <- queryNodeNodeTable -< ()
restrict -< nn_node1_id nn .== _node_id doc
-- restrict -< nn_delete nn .== (pgBool t)
-}
restrict -< _node_id contact' .== (toNullable $ pgNodeId cId)
restrict -< _node_typename doc .== (pgInt4 $ toDBid nt)
returnA -< FacetDoc (_node_id doc)
(_node_date doc)
(_node_name doc)
(_node_hyperdata doc)
(toNullable $ pgInt4 1)
(toNullable $ pgDouble 1)
(toNullable $ pgDouble 1)
queryAuthorsDoc :: Query (NodeRead, (NodeNodeNgramsReadNull, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull))))
queryAuthorsDoc = leftJoin5 queryNodeTable queryNodeNodeNgramsTable queryNgramsTable queryNodeNodeNgramsTable queryNodeTable cond12 cond23 cond34 cond45
where
cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
cond12 (nodeNgram, doc) = _node_id doc
.== _nnng_node1_id nodeNgram
cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
cond23 (ngrams', (nodeNgram, _)) = ngrams'^.ngrams_id
.== _nnng_ngrams_id nodeNgram
cond34 :: (NodeNodeNgramsRead, (NgramsRead, (NodeNodeNgramsReadNull, NodeReadNull))) -> Column PGBool
cond34 (nodeNgram2, (ngrams', (_,_)))= ngrams'^.ngrams_id .== _nnng_ngrams_id nodeNgram2
cond45 :: (NodeRead, (NodeNodeNgramsRead, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull)))) -> Column PGBool
cond45 (contact', (nodeNgram2', (_, (_,_)))) = _node_id contact' .== _nnng_node1_id nodeNgram2'
--}
------------------------------------------------------------------------
-- TODO-SECURITY check
runViewDocuments :: HasDBid NodeType
=> CorpusId
-> IsTrash
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Maybe Text
-> Cmd err [FacetDoc]
runViewDocuments cId t o l order query = do
runOpaQuery $ filterWith o l order sqlQuery
where
ntId = toDBid NodeDocument
sqlQuery = viewDocuments cId t ntId query
runCountDocuments :: HasDBid NodeType => CorpusId -> IsTrash -> Maybe Text -> Cmd err Int
runCountDocuments cId t mQuery = do
runCountOpaQuery sqlQuery
where
sqlQuery = viewDocuments cId t (toDBid NodeDocument) mQuery
viewDocuments :: CorpusId
-> IsTrash
-> NodeTypeId
-> Maybe Text
-> Query FacetDocRead
viewDocuments cId t ntId mQuery = proc () -> do
n <- queryNodeTable -< ()
nn <- queryNodeNodeTable -< ()
restrict -< n^.node_id .== nn^.nn_node2_id
restrict -< nn^.nn_node1_id .== (pgNodeId cId)
restrict -< n^.node_typename .== (pgInt4 ntId)
restrict -< if t then nn^.nn_category .== (pgInt4 0)
else nn^.nn_category .>= (pgInt4 1)
let query = (fromMaybe "" mQuery)
iLikeQuery = T.intercalate "" ["%", query, "%"]
restrict -< (n^.node_name) `ilike` (pgStrictText iLikeQuery)
returnA -< FacetDoc (_node_id n)
(_node_date n)
(_node_name n)
(_node_hyperdata n)
(toNullable $ nn^.nn_category)
(toNullable $ nn^.nn_score)
(toNullable $ nn^.nn_score)
------------------------------------------------------------------------
filterWith :: (PGOrd date, PGOrd title, PGOrd category, PGOrd score, hyperdata ~ Column SqlJsonb) =>
Maybe Gargantext.Core.Types.Offset
-> Maybe Gargantext.Core.Types.Limit
-> Maybe OrderBy
-> Select (Facet id (Column date) (Column title) hyperdata (Column category) ngramCount (Column score))
-> Select (Facet id (Column date) (Column title) hyperdata (Column category) ngramCount (Column score))
filterWith o l order q = limit' l $ offset' o $ orderBy (orderWith order) q
orderWith :: (PGOrd b1, PGOrd b2, PGOrd b3, PGOrd b4)
=> Maybe OrderBy
-> Order (Facet id (Column b1) (Column b2) (Column SqlJsonb) (Column b3) ngramCount (Column b4))
orderWith (Just DateAsc) = asc facetDoc_created
orderWith (Just DateDesc) = desc facetDoc_created
orderWith (Just TitleAsc) = asc facetDoc_title
orderWith (Just TitleDesc) = desc facetDoc_title
orderWith (Just ScoreAsc) = asc facetDoc_score
orderWith (Just ScoreDesc) = descNullsLast facetDoc_score
orderWith (Just SourceAsc) = asc facetDoc_source
orderWith (Just SourceDesc) = desc facetDoc_source
orderWith _ = asc facetDoc_created
facetDoc_source :: PGIsJson a
=> Facet id created title (Column a) favorite ngramCount score
-> Column (Nullable PGText)
facetDoc_source x = toNullable (facetDoc_hyperdata x) .->> pgString "source"
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Filter.hs 0000664 0000000 0000000 00000001472 14124644201 0031642 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Query.Filter
Description : Main requests of Node to the database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Database.Query.Filter
where
import Gargantext.Core.Types (Limit, Offset)
import Data.Maybe (Maybe, maybe)
import Opaleye (Query, limit, offset)
limit' :: Maybe Limit -> Query a -> Query a
limit' maybeLimit query = maybe query (\l -> limit l query) maybeLimit
offset' :: Maybe Offset -> Query a -> Query a
offset' maybeOffset query = maybe query (\o -> offset o query) maybeOffset
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Join.hs 0000664 0000000 0000000 00000040675 14124644201 0031324 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Query.Join
Description : Main Join queries (using Opaleye)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Multiple Join functions with Opaleye.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE TemplateHaskell #-}
------------------------------------------------------------------------
module Gargantext.Database.Query.Join ( leftJoin2
, leftJoin3
, leftJoin4
, leftJoin5
, leftJoin6
, leftJoin7
, leftJoin8
, leftJoin9
)
where
import Control.Arrow ((>>>))
import Data.Profunctor.Product.Default
import Gargantext.Prelude
import Opaleye
import Opaleye.Internal.Join (NullMaker(..))
import qualified Opaleye.Internal.Unpackspec()
------------------------------------------------------------------------
leftJoin2 :: (Default Unpackspec fieldsL fieldsL,
Default Unpackspec fieldsR fieldsR,
Default NullMaker fieldsR nullableFieldsR) =>
Select fieldsL
-> Select fieldsR
-> ((fieldsL, fieldsR) -> Column PGBool)
-> Select (fieldsL, nullableFieldsR)
leftJoin2 = leftJoin
------------------------------------------------------------------------
-- | LeftJoin3 in two ways to write it
_leftJoin3 :: Query columnsA -> Query columnsB -> Query columnsC
-> ((columnsA, columnsB, columnsC) -> Column PGBool)
-> Query (columnsA, columnsB, columnsC)
_leftJoin3 q1 q2 q3 cond = ((,,) <$> q1 <*> q2 <*> q3) >>> keepWhen cond
leftJoin3 :: ( Default Unpackspec b2 b2
, Default Unpackspec b3 b3
, Default Unpackspec fieldsL fieldsL
, Default Unpackspec fieldsR fieldsR
, Default NullMaker b3 b4
, Default NullMaker b2 b5
, Default NullMaker fieldsR b2) =>
Select fieldsR
-> Select b3
-> Select fieldsL
-> ((b3, fieldsR) -> Column PGBool)
-> ((fieldsL, (b3, b2)) -> Column PGBool)
-> Select (fieldsL, (b4, b5))
leftJoin3 q1 q2 q3
cond12 cond23 =
leftJoin q3 ( leftJoin q2 q1 cond12) cond23
leftJoin4 :: (Default Unpackspec b2 b2,
Default Unpackspec fieldsL fieldsL, Default Unpackspec b3 b3,
Default Unpackspec b4 b4, Default Unpackspec b5 b5,
Default Unpackspec b6 b6, Default Unpackspec fieldsR fieldsR,
Default NullMaker b2 b7, Default NullMaker b5 b8,
Default NullMaker b6 b9, Default NullMaker b3 b5,
Default NullMaker b4 b6, Default NullMaker fieldsR b4) =>
Select fieldsR
-> Select b3
-> Select b2
-> Select fieldsL
-> ((b3, fieldsR) -> Column PGBool)
-> ((b2, (b3, b4)) -> Column PGBool)
-> ((fieldsL, (b2, (b5, b6))) -> Column PGBool)
-> Select (fieldsL, (b7, (b8, b9)))
leftJoin4 q1 q2 q3 q4
cond12 cond23 cond34 =
leftJoin q4 ( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
leftJoin5 :: (Default Unpackspec b2 b2, Default Unpackspec b3 b3,
Default Unpackspec b4 b4, Default Unpackspec b5 b5,
Default Unpackspec b6 b6, Default Unpackspec b7 b7,
Default Unpackspec fieldsL fieldsL, Default Unpackspec b8 b8,
Default Unpackspec b9 b9, Default Unpackspec b10 b10,
Default Unpackspec fieldsR fieldsR, Default NullMaker b7 b6,
Default NullMaker b6 b11, Default NullMaker b8 b12,
Default NullMaker b3 b13, Default NullMaker b2 b14,
Default NullMaker b9 b3, Default NullMaker b10 b2,
Default NullMaker b5 b9, Default NullMaker b4 b10,
Default NullMaker fieldsR b4) =>
Select fieldsR
-> Select b5
-> Select b7
-> Select b8
-> Select fieldsL
-> ((b5, fieldsR) -> Column PGBool)
-> ((b7, (b5, b4)) -> Column PGBool)
-> ((b8, (b7, (b9, b10))) -> Column PGBool)
-> ((fieldsL, (b8, (b6, (b3, b2)))) -> Column PGBool)
-> Select (fieldsL, (b12, (b11, (b13, b14))))
leftJoin5 q1 q2 q3 q4 q5
cond12 cond23 cond34 cond45 =
leftJoin q5 ( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
leftJoin6 :: (Default Unpackspec b2 b2, Default Unpackspec b3 b3,
Default Unpackspec b4 b4, Default Unpackspec b5 b5,
Default Unpackspec fieldsL fieldsL, Default Unpackspec b6 b6,
Default Unpackspec b7 b7, Default Unpackspec b8 b8,
Default Unpackspec b9 b9, Default Unpackspec b10 b10,
Default Unpackspec b11 b11, Default Unpackspec b12 b12,
Default Unpackspec b13 b13, Default Unpackspec b14 b14,
Default Unpackspec b15 b15, Default Unpackspec fieldsR fieldsR,
Default NullMaker b5 b4, Default NullMaker b4 b16,
Default NullMaker b6 b17, Default NullMaker b2 b18,
Default NullMaker b7 b2, Default NullMaker b3 b7,
Default NullMaker b12 b19, Default NullMaker b13 b20,
Default NullMaker b10 b12, Default NullMaker b11 b13,
Default NullMaker b14 b10, Default NullMaker b15 b11,
Default NullMaker b8 b14, Default NullMaker b9 b15,
Default NullMaker fieldsR b9) =>
Select fieldsR
-> Select b8
-> Select b3
-> Select b5
-> Select b6
-> Select fieldsL
-> ((b8, fieldsR) -> Column PGBool)
-> ((b3, (b8, b9)) -> Column PGBool)
-> ((b5, (b3, (b14, b15))) -> Column PGBool)
-> ((b6, (b5, (b7, (b10, b11)))) -> Column PGBool)
-> ((fieldsL, (b6, (b4, (b2, (b12, b13))))) -> Column PGBool)
-> Select (fieldsL, (b17, (b16, (b18, (b19, b20)))))
leftJoin6 q1 q2 q3 q4 q5 q6
cond12 cond23 cond34 cond45 cond56 =
leftJoin q6 ( leftJoin q5
( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
) cond56
leftJoin7 :: (Default Unpackspec b2 b2, Default Unpackspec b3 b3,
Default Unpackspec b4 b4, Default Unpackspec b5 b5,
Default Unpackspec b6 b6, Default Unpackspec b7 b7,
Default Unpackspec b8 b8, Default Unpackspec b9 b9,
Default Unpackspec b10 b10, Default Unpackspec b11 b11,
Default Unpackspec b12 b12, Default Unpackspec b13 b13,
Default Unpackspec fieldsL fieldsL, Default Unpackspec b14 b14,
Default Unpackspec b15 b15, Default Unpackspec b16 b16,
Default Unpackspec b17 b17, Default Unpackspec b18 b18,
Default Unpackspec b19 b19, Default Unpackspec b20 b20,
Default Unpackspec b21 b21, Default Unpackspec fieldsR fieldsR,
Default NullMaker b11 b8, Default NullMaker b8 b10,
Default NullMaker b10 b9, Default NullMaker b9 b22,
Default NullMaker b16 b12, Default NullMaker b12 b17,
Default NullMaker b17 b23, Default NullMaker b13 b24,
Default NullMaker b15 b25, Default NullMaker b14 b15,
Default NullMaker b3 b26, Default NullMaker b2 b27,
Default NullMaker b18 b3, Default NullMaker b19 b2,
Default NullMaker b5 b18, Default NullMaker b4 b19,
Default NullMaker b20 b5, Default NullMaker b21 b4,
Default NullMaker b7 b20, Default NullMaker b6 b21,
Default NullMaker fieldsR b6) =>
Select fieldsR
-> Select b7
-> Select b11
-> Select b16
-> Select b14
-> Select b13
-> Select fieldsL
-> ((b7, fieldsR) -> Column PGBool)
-> ((b11, (b7, b6)) -> Column PGBool)
-> ((b16, (b11, (b20, b21))) -> Column PGBool)
-> ((b14, (b16, (b8, (b5, b4)))) -> Column PGBool)
-> ((b13, (b14, (b12, (b10, (b18, b19))))) -> Column PGBool)
-> ((fieldsL, (b13, (b15, (b17, (b9, (b3, b2))))))
-> Column PGBool)
-> Select (fieldsL, (b24, (b25, (b23, (b22, (b26, b27))))))
leftJoin7 q1 q2 q3 q4 q5 q6 q7
cond12 cond23 cond34 cond45 cond56 cond67 =
leftJoin q7 ( leftJoin q6
( leftJoin q5
( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
) cond56
) cond67
leftJoin8 :: (Default Unpackspec b2 b2, Default Unpackspec b3 b3,
Default Unpackspec b4 b4, Default Unpackspec b5 b5,
Default Unpackspec b6 b6, Default Unpackspec b7 b7,
Default Unpackspec b8 b8, Default Unpackspec b9 b9,
Default Unpackspec b10 b10, Default Unpackspec fieldsL fieldsL,
Default Unpackspec b11 b11, Default Unpackspec b12 b12,
Default Unpackspec b13 b13, Default Unpackspec b14 b14,
Default Unpackspec b15 b15, Default Unpackspec b16 b16,
Default Unpackspec b17 b17, Default Unpackspec b18 b18,
Default Unpackspec b19 b19, Default Unpackspec b20 b20,
Default Unpackspec b21 b21, Default Unpackspec b22 b22,
Default Unpackspec b23 b23, Default Unpackspec b24 b24,
Default Unpackspec b25 b25, Default Unpackspec b26 b26,
Default Unpackspec b27 b27, Default Unpackspec b28 b28,
Default Unpackspec fieldsR fieldsR, Default NullMaker b8 b5,
Default NullMaker b5 b7, Default NullMaker b7 b6,
Default NullMaker b6 b29, Default NullMaker b13 b9,
Default NullMaker b9 b14, Default NullMaker b14 b30,
Default NullMaker b10 b31, Default NullMaker b12 b32,
Default NullMaker b11 b12, Default NullMaker b2 b33,
Default NullMaker b15 b2, Default NullMaker b3 b15,
Default NullMaker b16 b3, Default NullMaker b4 b16,
Default NullMaker b23 b34, Default NullMaker b24 b35,
Default NullMaker b21 b23, Default NullMaker b22 b24,
Default NullMaker b25 b21, Default NullMaker b26 b22,
Default NullMaker b19 b25, Default NullMaker b20 b26,
Default NullMaker b27 b19, Default NullMaker b28 b20,
Default NullMaker b17 b27, Default NullMaker b18 b28,
Default NullMaker fieldsR b18) =>
Select fieldsR
-> Select b17
-> Select b4
-> Select b8
-> Select b13
-> Select b11
-> Select b10
-> Select fieldsL
-> ((b17, fieldsR) -> Column PGBool)
-> ((b4, (b17, b18)) -> Column PGBool)
-> ((b8, (b4, (b27, b28))) -> Column PGBool)
-> ((b13, (b8, (b16, (b19, b20)))) -> Column PGBool)
-> ((b11, (b13, (b5, (b3, (b25, b26))))) -> Column PGBool)
-> ((b10, (b11, (b9, (b7, (b15, (b21, b22)))))) -> Column PGBool)
-> ((fieldsL, (b10, (b12, (b14, (b6, (b2, (b23, b24)))))))
-> Column PGBool)
-> Select (fieldsL, (b31, (b32, (b30, (b29, (b33, (b34, b35)))))))
leftJoin8 q1 q2 q3 q4 q5 q6 q7 q8
cond12 cond23 cond34 cond45 cond56 cond67 cond78 =
leftJoin q8 ( leftJoin q7
( leftJoin q6
( leftJoin q5
( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
) cond56
) cond67
) cond78
leftJoin9 :: (Default Unpackspec b2 b2, Default Unpackspec b3 b3,
Default Unpackspec b4 b4, Default Unpackspec b5 b5,
Default Unpackspec b6 b6, Default Unpackspec b7 b7,
Default Unpackspec b8 b8, Default Unpackspec b9 b9,
Default Unpackspec b10 b10, Default Unpackspec b11 b11,
Default Unpackspec b12 b12, Default Unpackspec b13 b13,
Default Unpackspec b14 b14, Default Unpackspec b15 b15,
Default Unpackspec b16 b16, Default Unpackspec b17 b17,
Default Unpackspec b18 b18, Default Unpackspec b19 b19,
Default Unpackspec b20 b20, Default Unpackspec b21 b21,
Default Unpackspec fieldsL fieldsL, Default Unpackspec b22 b22,
Default Unpackspec b23 b23, Default Unpackspec b24 b24,
Default Unpackspec b25 b25, Default Unpackspec b26 b26,
Default Unpackspec b27 b27, Default Unpackspec b28 b28,
Default Unpackspec b29 b29, Default Unpackspec b30 b30,
Default Unpackspec b31 b31, Default Unpackspec b32 b32,
Default Unpackspec b33 b33, Default Unpackspec b34 b34,
Default Unpackspec b35 b35, Default Unpackspec b36 b36,
Default Unpackspec fieldsR fieldsR, Default NullMaker b15 b10,
Default NullMaker b10 b14, Default NullMaker b14 b11,
Default NullMaker b11 b13, Default NullMaker b13 b12,
Default NullMaker b12 b37, Default NullMaker b28 b16,
Default NullMaker b16 b29, Default NullMaker b29 b17,
Default NullMaker b17 b30, Default NullMaker b30 b38,
Default NullMaker b21 b20, Default NullMaker b20 b39,
Default NullMaker b22 b40, Default NullMaker b18 b41,
Default NullMaker b23 b18, Default NullMaker b19 b23,
Default NullMaker b26 b42, Default NullMaker b25 b26,
Default NullMaker b27 b25, Default NullMaker b24 b27,
Default NullMaker b3 b43, Default NullMaker b2 b44,
Default NullMaker b31 b3, Default NullMaker b32 b2,
Default NullMaker b5 b31, Default NullMaker b4 b32,
Default NullMaker b33 b5, Default NullMaker b34 b4,
Default NullMaker b7 b33, Default NullMaker b6 b34,
Default NullMaker b35 b7, Default NullMaker b36 b6,
Default NullMaker b9 b35, Default NullMaker b8 b36,
Default NullMaker fieldsR b8) =>
Select fieldsR
-> Select b9
-> Select b15
-> Select b28
-> Select b24
-> Select b19
-> Select b21
-> Select b22
-> Select fieldsL
-> ((b9, fieldsR) -> Column PGBool)
-> ((b15, (b9, b8)) -> Column PGBool)
-> ((b28, (b15, (b35, b36))) -> Column PGBool)
-> ((b24, (b28, (b10, (b7, b6)))) -> Column PGBool)
-> ((b19, (b24, (b16, (b14, (b33, b34))))) -> Column PGBool)
-> ((b21, (b19, (b27, (b29, (b11, (b5, b4)))))) -> Column PGBool)
-> ((b22, (b21, (b23, (b25, (b17, (b13, (b31, b32)))))))
-> Column PGBool)
-> ((fieldsL, (b22, (b20, (b18, (b26, (b30, (b12, (b3, b2))))))))
-> Column PGBool)
-> Select
(fieldsL, (b40, (b39, (b41, (b42, (b38, (b37, (b43, b44))))))))
leftJoin9 q1 q2 q3 q4 q5 q6 q7 q8 q9
cond12 cond23 cond34 cond45 cond56 cond67 cond78 cond89 =
leftJoin q9 ( leftJoin q8
( leftJoin q7
( leftJoin q6
( leftJoin q5
( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
) cond56
) cond67
) cond78
) cond89
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Prelude.hs0000664 0000000 0000000 00000001237 14124644201 0032014 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Query.Prelude
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
------------------------------------------------------------------------
module Gargantext.Database.Query.Prelude
( module Gargantext.Database.Query.Join
, module Gargantext.Database.Query.Table.Node
, module Gargantext.Database.Query.Table.NodeNode
, module Control.Arrow
)
where
import Control.Arrow (returnA)
import Gargantext.Database.Query.Join
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.NodeNode
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table/ 0000775 0000000 0000000 00000000000 14124644201 0031104 5 ustar 00root root 0000000 0000000 Ngrams.hs 0000664 0000000 0000000 00000006544 14124644201 0032621 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table {-|
Module : Gargantext.Database.Query.Table.Ngrams
Description : Deal with in Gargantext Database.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Table.Ngrams
( module Gargantext.Database.Schema.Ngrams
, queryNgramsTable
, selectNgramsByDoc
, insertNgrams
)
where
import Control.Lens ((^.))
import Data.HashMap.Strict (HashMap)
import Data.ByteString.Internal (ByteString)
import Data.Text (Text)
import qualified Database.PostgreSQL.Simple as PGS
import qualified Data.List as List
import qualified Data.HashMap.Strict as HashMap
import Gargantext.Core.Types
import Gargantext.Database.Prelude (runOpaQuery, Cmd)
import Gargantext.Database.Prelude (runPGSQuery, formatPGSQuery)
import Gargantext.Database.Query.Table.NodeNodeNgrams
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Types
import Gargantext.Prelude
queryNgramsTable :: Query NgramsRead
queryNgramsTable = queryTable ngramsTable
selectNgramsByDoc :: [ListId] -> DocId -> NgramsType -> Cmd err [Text]
selectNgramsByDoc lIds dId nt = runOpaQuery (query lIds dId nt)
where
join :: Query (NgramsRead, NodeNodeNgramsReadNull)
join = leftJoin queryNgramsTable queryNodeNodeNgramsTable on1
where
on1 (ng,nnng) = ng^.ngrams_id .== nnng^.nnng_ngrams_id
query cIds' dId' nt' = proc () -> do
(ng,nnng) <- join -< ()
restrict -< foldl (\b cId -> ((toNullable $ pgNodeId cId) .== nnng^.nnng_node1_id) .|| b) (pgBool True) cIds'
restrict -< (toNullable $ pgNodeId dId') .== nnng^.nnng_node2_id
restrict -< (toNullable $ pgNgramsType nt') .== nnng^.nnng_ngramsType
returnA -< ng^.ngrams_terms
_postNgrams :: CorpusId -> DocId -> [Text] -> Cmd err Int
_postNgrams = undefined
_dbGetNgramsDb :: Cmd err [NgramsDB]
_dbGetNgramsDb = runOpaQuery queryNgramsTable
-- TODO-ACCESS: access must not be checked here but when insertNgrams is called.
insertNgrams :: [Ngrams] -> Cmd err (HashMap Text NgramsId)
insertNgrams ns =
if List.null ns
then pure HashMap.empty
else HashMap.fromList <$> map (\(Indexed i t) -> (t, i)) <$> (insertNgrams' ns)
-- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
insertNgrams' :: [Ngrams] -> Cmd err [Indexed Int Text]
insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
_insertNgrams_Debug :: [(Text, Size)] -> Cmd err ByteString
_insertNgrams_Debug ns = formatPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
----------------------
queryInsertNgrams :: PGS.Query
queryInsertNgrams = [sql|
WITH input_rows(terms,n) AS (?)
, ins AS (
INSERT INTO ngrams (terms,n)
SELECT * FROM input_rows
ON CONFLICT (terms) DO NOTHING -- unique index created here
RETURNING id,terms
)
SELECT id, terms
FROM ins
UNION ALL
SELECT c.id, terms
FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index
|]
NgramsPostag.hs 0000664 0000000 0000000 00000015141 14124644201 0033770 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table {-|
Module : Gargantext.Database.Query.Table.NgramsPostag
Description : Deal with in Gargantext Database.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO use Opaleye for the queries.
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Table.NgramsPostag
where
import Control.Lens (view, (^.))
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Text (Text)
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Prelude (Cmd, runPGSQuery, runPGSQuery_)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Query.Table.Ngrams
import Gargantext.Database.Types
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Database.PostgreSQL.Simple as PGS
data NgramsPostag = NgramsPostag { _np_lang :: !Lang
, _np_algo :: !PosTagAlgo
, _np_postag :: !POS
, _np_form :: !Ngrams
, _np_lem :: !Ngrams
}
deriving (Eq, Ord, Generic, Show)
makeLenses ''NgramsPostag
instance Hashable NgramsPostag
type NgramsPostagInsert = ( Int
, Int
, Text
, Text
, Int
, Text
, Int
)
toInsert :: NgramsPostag -> NgramsPostagInsert
toInsert (NgramsPostag l a p form lem) =
( toDBid l
, toDBid a
, cs $ show p
, view ngramsTerms form
, view ngramsSize form
, view ngramsTerms lem
, view ngramsSize lem
)
insertNgramsPostag :: [NgramsPostag] -> Cmd err (HashMap Text NgramsId)
insertNgramsPostag xs =
if List.null xs
then pure HashMap.empty
else do
-- We do not store the lem if it equals to its self form
let
(ns, nps) =
List.partition (\np -> np ^. np_form . ngramsTerms
== np ^. np_lem . ngramsTerms
) xs
ns' <- insertNgrams (map (view np_form) ns)
nps' <- HashMap.fromList
<$> map (\(Indexed t i) -> (t,i))
<$> insertNgramsPostag' (map toInsert nps)
pure $ HashMap.union ns' nps'
insertNgramsPostag' :: [NgramsPostagInsert] -> Cmd err [Indexed Text Int]
insertNgramsPostag' ns = runPGSQuery queryInsertNgramsPostag (PGS.Only $ Values fields ns)
where
fields = map (\t -> QualifiedIdentifier Nothing t) $ snd fields_name
fields_name :: ( [Text], [Text])
fields_name = ( ["lang_id", "algo_id", "postag", "form", "form_n", "lem" , "lem_n"]
, ["int4" , "int4" , "text" , "text", "int4" , "text", "int4" ]
)
----------------------
queryInsertNgramsPostag :: PGS.Query
queryInsertNgramsPostag = [sql|
WITH input_rows(lang_id,algo_id,postag,form,form_n, lem, lem_n)
AS (?)
-- ((VALUES (1::"int4",2::"int4",'VB'::"text",'dansaient'::"text",1::"int4",'danser'::"text",1::"int4")))
------------------------------------------------
, ins_form AS (INSERT INTO ngrams (terms,n)
SELECT ir1.form, ir1.form_n
FROM input_rows as ir1
UNION ALL
SELECT ir2.lem, ir2.lem_n
FROM input_rows as ir2
ON CONFLICT (terms)
DO NOTHING
RETURNING id,terms
)
------------------------------------------------
, ins_form_ret AS (
SELECT id, terms
FROM ins_form
UNION ALL
SELECT n.id, ir.form
FROM input_rows ir
JOIN ngrams n ON n.terms = ir.form
)
, ins_lem_ret AS (
SELECT id, terms
FROM ins_form
UNION ALL
SELECT n.id, ir.lem
FROM input_rows ir
JOIN ngrams n ON n.terms = ir.lem
)
------------------------------------------------
------------------------------------------------
, ins_postag AS (
INSERT INTO ngrams_postag (lang_id, algo_id, postag, ngrams_id, lemm_id,score)
SELECT ir.lang_id, ir.algo_id, ir.postag, form.id, lem.id,1 -- count(*) as s
FROM input_rows ir
JOIN ins_form_ret form ON form.terms = ir.form
JOIN ins_lem_ret lem ON lem.terms = ir.lem
-- GROUP BY ir.lang_id, ir.algo_id, ir.postag, form.id, lem.id
-- ORDER BY s DESC
-- LIMIT 1
ON CONFLICT (lang_id,algo_id,postag,ngrams_id,lemm_id)
DO NOTHING -- acceptable for now since we are using NP mainly
-- DO UPDATE SET score = ngrams_postag.score + 1
)
SELECT terms,id FROM ins_form_ret
INNER JOIN input_rows ir ON ins_form_ret.terms = ir.form
|]
-- TODO add lang and postag algo
-- TODO remove when form == lem in insert
selectLems :: Lang -> PosTagAlgo -> [Ngrams] -> Cmd err [(Form, Lem)]
selectLems l a ns = runPGSQuery querySelectLems (PGS.Only $ Values fields datas)
where
fields = map (\t -> QualifiedIdentifier Nothing t) ["int4","int4","text", "int4"]
datas = map (\d -> [toField $ toDBid l, toField $ toDBid a] <> toRow d) ns
----------------------
querySelectLems :: PGS.Query
querySelectLems = [sql|
WITH input_rows(lang_id, algo_id, terms,n)
AS (?) -- ((VALUES ('automata' :: "text")))
, lems AS ( select n1.terms as t1 ,n2.terms as t2 ,sum(np.score) as score from input_rows ir
JOIN ngrams n1 ON ir.terms = n1.terms
JOIN ngrams_postag np ON np.ngrams_id = n1.id
JOIN ngrams n2 ON n2.id = np.lemm_id
WHERE np.lang_id = ir.lang_id
AND np.algo_id = ir.algo_id
GROUP BY n1.terms, n2.terms
ORDER BY score DESC
)
SELECT t1,t2 from lems
|]
-- | Insert Table
createTable_NgramsPostag :: Cmd err [Int]
createTable_NgramsPostag = map (\(PGS.Only a) -> a)
<$> runPGSQuery_ queryCreateTable
where
queryCreateTable :: PGS.Query
queryCreateTable = [sql|
CREATE TABLE public.ngrams_postag (
id SERIAL,
lang_id INTEGER,
algo_id INTEGER,
postag CHARACTER varying(5),
ngrams_id INTEGER NOT NULL,
lemm_id INTEGER NOT NULL,
score INTEGER DEFAULT 1 ::integer NOT NULL,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE,
FOREIGN KEY (lemm_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
) ;
-- ALTER TABLE public.ngrams_postag OWNER TO gargantua;
CREATE UNIQUE INDEX ON public.ngrams_postag (lang_id,algo_id,postag,ngrams_id,lemm_id);
|]
Node.hs 0000664 0000000 0000000 00000032106 14124644201 0032250 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table {-|
import Gargantext.Database.Prelude (Cmd, runPGSQuery)
Module : Gargantext.Database.Query.Table.Node
Description : Main Tools of Node to the database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Database.Query.Table.Node
where
import Control.Arrow (returnA)
import Control.Lens (set, view)
import Data.Aeson
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Database.PostgreSQL.Simple as DPS
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Opaleye hiding (FromField)
import Prelude hiding (null, id, map, sum)
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Hyperdata.Default
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Filter (limit', offset')
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Schema.Node
import Gargantext.Prelude hiding (sum, head)
queryNodeSearchTable :: Query NodeSearchRead
queryNodeSearchTable = queryTable nodeTableSearch
selectNode :: Column PGInt4 -> Query NodeRead
selectNode id' = proc () -> do
row <- queryNodeTable -< ()
restrict -< _node_id row .== id'
returnA -< row
runGetNodes :: Query NodeRead -> Cmd err [Node HyperdataAny]
runGetNodes = runOpaQuery
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | order by publication date
-- Favorites (Bool), node_ngrams
selectNodesWith :: HasDBid NodeType
=> ParentId -> Maybe NodeType
-> Maybe Offset -> Maybe Limit -> Query NodeRead
selectNodesWith parentId maybeNodeType maybeOffset maybeLimit =
--offset' maybeOffset $ limit' maybeLimit $ orderBy (asc (hyperdataDocument_Publication_date . node_hyperdata)) $ selectNodesWith' parentId typeId
limit' maybeLimit $ offset' maybeOffset
$ orderBy (asc _node_id)
$ selectNodesWith' parentId maybeNodeType
selectNodesWith' :: HasDBid NodeType
=> ParentId -> Maybe NodeType -> Query NodeRead
selectNodesWith' parentId maybeNodeType = proc () -> do
node' <- (proc () -> do
row@(Node _ _ typeId _ parentId' _ _ _) <- queryNodeTable -< ()
restrict -< parentId' .== (pgNodeId parentId)
let typeId' = maybe 0 toDBid maybeNodeType
restrict -< if typeId' > 0
then typeId .== (pgInt4 (typeId' :: Int))
else (pgBool True)
returnA -< row ) -< ()
returnA -< node'
deleteNode :: NodeId -> Cmd err Int
deleteNode n = mkCmd $ \conn ->
fromIntegral <$> runDelete conn nodeTable
(\(Node n_id _ _ _ _ _ _ _) -> n_id .== pgNodeId n)
deleteNodes :: [NodeId] -> Cmd err Int
deleteNodes ns = mkCmd $ \conn ->
fromIntegral <$> runDelete conn nodeTable
(\(Node n_id _ _ _ _ _ _ _) -> in_ ((map pgNodeId ns)) n_id)
-- TODO: NodeType should match with `a'
getNodesWith :: (JSONB a, HasDBid NodeType) => NodeId -> proxy a -> Maybe NodeType
-> Maybe Offset -> Maybe Limit -> Cmd err [Node a]
getNodesWith parentId _ nodeType maybeOffset maybeLimit =
runOpaQuery $ selectNodesWith parentId nodeType maybeOffset maybeLimit
-- TODO: Why is the second parameter ignored?
-- TODO: Why not use getNodesWith?
getNodesWithParentId :: (Hyperdata a, JSONB a)
=> Maybe NodeId
-> Cmd err [Node a]
getNodesWithParentId n = runOpaQuery $ selectNodesWithParentID n'
where
n' = case n of
Just n'' -> n''
Nothing -> 0
-- | Given a node id, find it's closest parent of given type
-- NOTE: This isn't too optimal: can make successive queries depending on how
-- deeply nested the child is.
getClosestParentIdByType :: HasDBid NodeType
=> NodeId
-> NodeType
-> Cmd err (Maybe NodeId)
getClosestParentIdByType nId nType = do
result <- runPGSQuery query (nId, 0 :: Int)
case result of
[(NodeId parentId, pTypename)] -> do
if toDBid nType == pTypename then
pure $ Just $ NodeId parentId
else
getClosestParentIdByType (NodeId parentId) nType
_ -> pure Nothing
where
query :: DPS.Query
query = [sql|
SELECT n2.id, n2.typename
FROM nodes n1
JOIN nodes n2 ON n1.parent_id = n2.id
WHERE n1.id = ? AND 0 = ?;
|]
------------------------------------------------------------------------
getDocumentsV3WithParentId :: HasDBid NodeType => NodeId -> Cmd err [Node HyperdataDocumentV3]
getDocumentsV3WithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeDocument)
-- TODO: merge with getDocumentsWithParentId by having a class IsHyperdataDocument
getDocumentsWithParentId :: HasDBid NodeType => NodeId -> Cmd err [Node HyperdataDocument]
getDocumentsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeDocument)
getListsModelWithParentId :: HasDBid NodeType => NodeId -> Cmd err [Node HyperdataModel]
getListsModelWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeModel)
getCorporaWithParentId :: HasDBid NodeType => NodeId -> Cmd err [Node HyperdataCorpus]
getCorporaWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeCorpus)
------------------------------------------------------------------------
selectNodesWithParentID :: NodeId -> Query NodeRead
selectNodesWithParentID n = proc () -> do
row@(Node _ _ _ _ parent_id _ _ _) <- queryNodeTable -< ()
restrict -< parent_id .== (pgNodeId n)
returnA -< row
------------------------------------------------------------------------
-- | Example of use:
-- runCmdReplEasy (getNodesWithType NodeList (Proxy :: Proxy HyperdataList))
getNodesWithType :: (HasNodeError err, JSONB a, HasDBid NodeType) => NodeType -> proxy a -> Cmd err [Node a]
getNodesWithType nt _ = runOpaQuery $ selectNodesWithType nt
where
selectNodesWithType :: HasDBid NodeType
=> NodeType -> Query NodeRead
selectNodesWithType nt' = proc () -> do
row@(Node _ _ tn _ _ _ _ _) <- queryNodeTable -< ()
restrict -< tn .== (pgInt4 $ toDBid nt')
returnA -< row
getNodesIdWithType :: (HasNodeError err, HasDBid NodeType) => NodeType -> Cmd err [NodeId]
getNodesIdWithType nt = do
ns <- runOpaQuery $ selectNodesIdWithType nt
pure (map NodeId ns)
selectNodesIdWithType :: HasDBid NodeType
=> NodeType -> Query (Column PGInt4)
selectNodesIdWithType nt = proc () -> do
row@(Node _ _ tn _ _ _ _ _) <- queryNodeTable -< ()
restrict -< tn .== (pgInt4 $ toDBid nt)
returnA -< _node_id row
------------------------------------------------------------------------
getNode :: HasNodeError err => NodeId -> Cmd err (Node Value)
getNode nId = do
maybeNode <- headMay <$> runOpaQuery (selectNode (pgNodeId nId))
case maybeNode of
Nothing -> nodeError (DoesNotExist nId)
Just r -> pure r
getNodeWith :: (HasNodeError err, JSONB a)
=> NodeId -> proxy a -> Cmd err (Node a)
getNodeWith nId _ = do
maybeNode <- headMay <$> runOpaQuery (selectNode (pgNodeId nId))
case maybeNode of
Nothing -> nodeError (DoesNotExist nId)
Just r -> pure r
------------------------------------------------------------------------
-- | Sugar to insert Node with NodeType in Database
insertDefaultNode :: HasDBid NodeType
=> NodeType -> ParentId -> UserId -> Cmd err [NodeId]
insertDefaultNode nt p u = insertNode nt Nothing Nothing p u
insertNode :: HasDBid NodeType
=> NodeType -> Maybe Name -> Maybe DefaultHyperdata -> ParentId -> UserId -> Cmd err [NodeId]
insertNode nt n h p u = insertNodesR [nodeW nt n h p u]
nodeW :: HasDBid NodeType
=> NodeType -> Maybe Name -> Maybe DefaultHyperdata -> ParentId -> UserId -> NodeWrite
nodeW nt n h p u = node nt n' h' (Just p) u
where
n' = fromMaybe (defaultName nt) n
h' = maybe (defaultHyperdata nt) identity h
------------------------------------------------------------------------
node :: (ToJSON a, Hyperdata a, HasDBid NodeType)
=> NodeType
-> Name
-> a
-> Maybe ParentId
-> UserId
-> NodeWrite
node nodeType name hyperData parentId userId =
Node Nothing Nothing
(pgInt4 typeId)
(pgInt4 userId)
(pgNodeId <$> parentId)
(pgStrictText name)
Nothing
(pgJSONB $ cs $ encode hyperData)
where
typeId = toDBid nodeType
-------------------------------
insertNodes :: [NodeWrite] -> Cmd err Int64
insertNodes ns = mkCmd $ \conn -> runInsert_ conn $ Insert nodeTable ns rCount Nothing
{-
insertNodes' :: [Node a] -> Cmd err Int64
insertNodes' ns = mkCmd $ \conn -> runInsert_ conn
$ Insert nodeTable ns' rCount Nothing
where
ns' :: [NodeWrite]
ns' = map (\(Node i t u p n d h)
-> Node (pgNodeId <$> i)
(pgInt4 $ toDBid t)
(pgInt4 u)
(pgNodeId <$> p)
(pgStrictText n)
(pgUTCTime <$> d)
(pgJSONB $ cs $ encode h)
) ns
-}
insertNodesR :: [NodeWrite] -> Cmd err [NodeId]
insertNodesR ns = mkCmd $ \conn ->
runInsert_ conn (Insert nodeTable ns (rReturning (\(Node i _ _ _ _ _ _ _) -> i)) Nothing)
insertNodesWithParent :: Maybe ParentId -> [NodeWrite] -> Cmd err Int64
insertNodesWithParent pid ns = insertNodes (set node_parent_id (pgNodeId <$> pid) <$> ns)
insertNodesWithParentR :: Maybe ParentId -> [NodeWrite] -> Cmd err [NodeId]
insertNodesWithParentR pid ns = insertNodesR (set node_parent_id (pgNodeId <$> pid) <$> ns)
------------------------------------------------------------------------
-- TODO
-- currently this function removes the child relation
-- needs a Temporary type between Node' and NodeWriteT
node2table :: HasDBid NodeType
=> UserId -> Maybe ParentId -> Node' -> NodeWrite
node2table uid pid (Node' nt txt v []) = Node Nothing Nothing (pgInt4 $ toDBid nt) (pgInt4 uid) (fmap pgNodeId pid) (pgStrictText txt) Nothing (pgStrictJSONB $ cs $ encode v)
node2table _ _ (Node' _ _ _ _) = panic "node2table: should not happen, Tree insert not implemented yet"
data Node' = Node' { _n_type :: NodeType
, _n_name :: Text
, _n_data :: Value
, _n_children :: [Node']
} deriving (Show)
mkNodes :: [NodeWrite] -> Cmd err Int64
mkNodes ns = mkCmd $ \conn -> runInsert_ conn
$ Insert nodeTable ns rCount Nothing
mkNodeR :: [NodeWrite] -> Cmd err [NodeId]
mkNodeR ns = mkCmd $ \conn -> runInsert_ conn $ Insert nodeTable ns (rReturning _node_id) Nothing
------------------------------------------------------------------------
childWith :: HasDBid NodeType
=> UserId -> ParentId -> Node' -> NodeWrite
childWith uId pId (Node' NodeDocument txt v []) = node2table uId (Just pId) (Node' NodeDocument txt v [])
childWith uId pId (Node' NodeContact txt v []) = node2table uId (Just pId) (Node' NodeContact txt v [])
childWith _ _ (Node' _ _ _ _) = panic "This NodeType can not be a child"
-- =================================================================== --
-- |
-- CorpusDocument is a corpus made from a set of documents
-- CorpusContact is a corpus made from a set of contacts (syn of Annuaire)
data CorpusType = CorpusDocument | CorpusContact
class MkCorpus a
where
mk :: HasDBid NodeType => Maybe Name -> Maybe a -> ParentId -> UserId -> Cmd err [NodeId]
instance MkCorpus HyperdataCorpus
where
mk n Nothing p u = insertNode NodeCorpus n Nothing p u
mk n (Just h) p u = insertNode NodeCorpus n (Just $ DefaultCorpus h) p u
instance MkCorpus HyperdataAnnuaire
where
mk n Nothing p u = insertNode NodeCorpus n Nothing p u
mk n (Just h) p u = insertNode NodeAnnuaire n (Just $ DefaultAnnuaire h) p u
getOrMkList :: (HasNodeError err, HasDBid NodeType)
=> ParentId
-> UserId
-> Cmd err ListId
getOrMkList pId uId =
maybe (mkList' pId uId) (pure . view node_id) . headMay =<< getListsWithParentId pId
where
mkList' pId' uId' = maybe (nodeError MkNode) pure . headMay =<< insertDefaultNode NodeList pId' uId'
-- | TODO remove defaultList
defaultList :: (HasNodeError err, HasDBid NodeType) => CorpusId -> Cmd err ListId
defaultList cId =
maybe (nodeError NoListFound) (pure . view node_id) . headMay =<< getListsWithParentId cId
defaultListMaybe :: (HasNodeError err, HasDBid NodeType) => CorpusId -> Cmd err (Maybe NodeId)
defaultListMaybe cId = headMay <$> map (view node_id ) <$> getListsWithParentId cId
getListsWithParentId :: HasDBid NodeType => NodeId -> Cmd err [Node HyperdataList]
getListsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeList)
Node/ 0000775 0000000 0000000 00000000000 14124644201 0031712 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table Children.hs 0000664 0000000 0000000 00000005270 14124644201 0034002 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table/Node {-|
Module : Gargantext.Database.Query.Table.Node.Children
Description : Main requests of Node to the database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
module Gargantext.Database.Query.Table.Node.Children
where
import Control.Arrow (returnA)
import Data.Proxy
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument, HyperdataContact)
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Filter
import Gargantext.Database.Query.Table.NodeNode
import Gargantext.Database.Schema.Node
import Opaleye
import Protolude
-- TODO getAllTableDocuments
getAllDocuments :: HasDBid NodeType => ParentId -> Cmd err (TableResult (Node HyperdataDocument))
getAllDocuments pId = getAllChildren pId (Proxy :: Proxy HyperdataDocument)
(Just NodeDocument)
-- TODO getAllTableContacts
getAllContacts :: HasDBid NodeType => ParentId -> Cmd err (TableResult (Node HyperdataContact))
getAllContacts pId = getAllChildren pId (Proxy :: Proxy HyperdataContact)
(Just NodeContact)
getAllChildren :: (JSONB a, HasDBid NodeType)
=> ParentId
-> proxy a
-> Maybe NodeType
-> Cmd err (NodeTableResult a)
getAllChildren pId p maybeNodeType = getChildren pId p maybeNodeType Nothing Nothing
getChildren :: (JSONB a, HasDBid NodeType)
=> ParentId
-> proxy a
-> Maybe NodeType
-> Maybe Offset
-> Maybe Limit
-> Cmd err (NodeTableResult a)
getChildren pId _ maybeNodeType maybeOffset maybeLimit = do
docs <- runOpaQuery
$ limit' maybeLimit $ offset' maybeOffset
$ orderBy (asc _node_id)
$ query
docCount <- runCountOpaQuery query
pure $ TableResult { tr_docs = docs, tr_count = docCount }
where
query = selectChildren pId maybeNodeType
selectChildren :: HasDBid NodeType
=> ParentId
-> Maybe NodeType
-> Query NodeRead
selectChildren parentId maybeNodeType = proc () -> do
row@(Node nId _ typeName _ parent_id _ _ _) <- queryNodeTable -< ()
(NodeNode n1id n2id _ _) <- queryNodeNodeTable -< ()
let nodeType = maybe 0 toDBid maybeNodeType
restrict -< typeName .== pgInt4 nodeType
restrict -< (.||) (parent_id .== (pgNodeId parentId))
( (.&&) (n1id .== pgNodeId parentId)
(n2id .== nId))
returnA -< row
Contact.hs 0000664 0000000 0000000 00000001053 14124644201 0033640 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table/Node {-|
Module : Gargantext.Database.Query.Table.Node.Contact
Description : Update Node in Database (Postgres)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Database.Query.Table.Node.Contact
where
import Gargantext.Database.Admin.Types.Node ( Node)
import Gargantext.Database.Admin.Types.Hyperdata.Contact
------------------------------------------------------------------------
type NodeContact = Node HyperdataContact
Document/ 0000775 0000000 0000000 00000000000 14124644201 0033470 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table/Node Add.hs 0000664 0000000 0000000 00000005257 14124644201 0034525 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table/Node/Document {-|
Module : Gargantext.Database.Node.Document.Add
Description : Importing context of texts (documents)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Add Documents/Contact to a Corpus/Annuaire.
-}
------------------------------------------------------------------------
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TypeSynonymInstances #-}
------------------------------------------------------------------------
module Gargantext.Database.Query.Table.Node.Document.Add
where
import Data.ByteString.Internal (ByteString)
import Data.Text (Text)
import Database.PostgreSQL.Simple (Query, Only(..))
import Database.PostgreSQL.Simple.SqlQQ
import Database.PostgreSQL.Simple.ToField (toField)
import Database.PostgreSQL.Simple.ToRow (ToRow(..))
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import GHC.Generics (Generic)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd, runPGSQuery, formatPGSQuery)
import Gargantext.Prelude
---------------------------------------------------------------------------
add :: ParentId -> [NodeId] -> Cmd err [Only Int]
add pId ns = runPGSQuery queryAdd (Only $ Values fields inputData)
where
fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
inputData = prepare pId ns
add_debug :: ParentId -> [NodeId] -> Cmd err ByteString
add_debug pId ns = formatPGSQuery queryAdd (Only $ Values fields inputData)
where
fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
inputData = prepare pId ns
-- | Input Tables: types of the tables
inputSqlTypes :: [Text]
inputSqlTypes = ["int4","int4","int4"]
-- | SQL query to add documents
-- TODO return id of added documents only
queryAdd :: Query
queryAdd = [sql|
WITH input_rows(node1_id,node2_id,category) AS (?)
INSERT INTO nodes_nodes (node1_id, node2_id,category)
SELECT * FROM input_rows
ON CONFLICT (node1_id, node2_id) DO NOTHING -- on unique index
RETURNING 1
;
|]
prepare :: ParentId -> [NodeId] -> [InputData]
prepare pId ns = map (\nId -> InputData pId nId) ns
------------------------------------------------------------------------
-- * Main Types used
data InputData = InputData { inNode1_id :: NodeId
, inNode2_id :: NodeId
} deriving (Show, Generic, Typeable)
instance ToRow InputData where
toRow inputData = [ toField (inNode1_id inputData)
, toField (inNode2_id inputData)
, toField (1 :: Int)
]
Insert.hs 0000664 0000000 0000000 00000026630 14124644201 0035277 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table/Node/Document {-|
Module : Gargantext.Database.Node.Document.Insert
Description : Importing context of texts (documents)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
* Purpose of this module
Enabling "common goods" of text data and respecting privacy.
Gargantext shares as "common good" the links between context of texts
and terms / words / ngrams.
Basically a context of text can be defined as a document (see 'Gargantext.Core.Text').
Issue to tackle in that module: each global document of Gargantext has
to be unique, then shared, but how to respect privacy if needed ?
* Methodology to get uniqueness and privacy by design
As a consequence, when importing/inserting a new document in Gargantext,
a policy for the uniqueness of the inserted docuemnts has to be defined.
That is the purpose of this module which defines its main concepts.
Unique identifier in database is of a 3-tuple of 3 policies that
together define uniqueness:
- Design policy: type of node is needed as TypenameId, that is a
Document or Individual or something else;
- Privacy policy: with ParentId, parent becomes unique, then it enables
users to get their own copy without sharing it with all the users of the
database (in others words parent_id is necessary to preserve privacy for
instance).
- Hash policy: this UniqId is a sha256 uniq id which is the result of
the concatenation of the parameters defined by @shaParameters@.
> -- * Example
> insertTest :: FromRow r => CorpusId -> [Node HyperdataDocument] -> IO [r]
> insertTest :: IO [ReturnId]
> insertTest = runCmdDev $ insertDocuments 1 452162 hyperdataDocuments
-}
------------------------------------------------------------------------
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TypeSynonymInstances #-}
------------------------------------------------------------------------
module Gargantext.Database.Query.Table.Node.Document.Insert
where
import Control.Lens (set, view)
import Control.Lens.Cons
import Control.Lens.Prism
import Data.Aeson (toJSON, encode, ToJSON)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
-- import Data.ByteString (ByteString)
import Data.Time.Segment (jour)
import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
import Database.PostgreSQL.Simple.FromRow (fromRow, field)
-- import Database.PostgreSQL.Simple.ToRow (toRow, ToRow)
import Database.PostgreSQL.Simple.SqlQQ
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import GHC.Generics (Generic)
import Gargantext.Core (HasDBid(toDBid))
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd, runPGSQuery{-, formatPGSQuery-})
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (hash)
import qualified Data.Text as DT (pack, concat, take)
{-| To Print result query
import Data.ByteString.Internal (ByteString)
import Database.PostgreSQL.Simple (formatQuery)
-}
---------------------------------------------------------------------------
-- * Main Insert functions
-- | Insert Document main function
-- UserId : user who is inserting the documents
-- ParentId : folder ID which is parent of the inserted documents
-- Administrator of the database has to create a uniq index as following SQL command:
-- `create unique index on nodes (typename, parent_id, (hyperdata ->> 'uniqId'));`
insertDb :: (InsertDb a, HasDBid NodeType) => UserId -> ParentId -> [a] -> Cmd err [ReturnId]
insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
where
fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
class InsertDb a
where
insertDb' :: HasDBid NodeType => UserId -> ParentId -> a -> [Action]
instance InsertDb HyperdataDocument
where
insertDb' u p h = [ toField ("" :: Text)
, toField $ toDBid NodeDocument
, toField u
, toField p
, toField $ maybe "No Title" (DT.take 255) (_hd_title h)
, toField $ _hd_publication_date h -- TODO USE UTCTime
, (toField . toJSON) h
]
instance InsertDb HyperdataContact
where
insertDb' u p h = [ toField ("" :: Text)
, toField $ toDBid NodeContact
, toField u
, toField p
, toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
, toField $ jour 0 1 1 -- TODO put default date
, (toField . toJSON) h
]
instance ToJSON a => InsertDb (Node a)
where
insertDb' _u _p (Node _nid hashId t u p n d h) = [ toField hashId
, toField t
, toField u
, toField p
, toField n
, toField d
, (toField . toJSON) h
]
-- | Debug SQL function
--
-- to print rendered query (Debug purpose) use @formatQuery@ function.
{-
insertDocuments_Debug :: (Hyperdata a, ToJSON a, ToRow a, InsertDb [a])
=> UserId -> ParentId -> [a] -> Cmd err ByteString
insertDocuments_Debug uId pId hs = formatPGSQuery queryInsert (Only $ Values fields inputData)
where
fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
inputData = insertDb' uId pId hs
-}
-- | Input Tables: types of the tables
inputSqlTypes :: [Text]
inputSqlTypes = map DT.pack ["text", "int4","int4","int4","text","date","jsonb"]
-- | SQL query to insert documents inside the database
queryInsert :: Query
queryInsert = [sql|
WITH input_rows(hash_id,typename,user_id,parent_id,name,date,hyperdata) AS (?)
, ins AS (
INSERT INTO nodes (hash_id, typename,user_id,parent_id,name,date,hyperdata)
SELECT * FROM input_rows
ON CONFLICT (hash_id) DO NOTHING -- on unique index -- this does not return the ids
RETURNING id,hash_id
)
SELECT true AS source -- true for 'newly inserted'
, id
, hash_id
FROM ins
UNION ALL
SELECT false AS source -- false for 'not inserted'
, n.id
, hash_id
FROM input_rows
JOIN nodes n USING (hash_id); -- columns of unique index
|]
------------------------------------------------------------------------
-- * Main Types used
-- ** Return Types
-- | When documents are inserted
-- ReturnType after insertion
data ReturnId = ReturnId { reInserted :: Bool -- if the document is inserted (True: is new, False: is not new)
, reId :: NodeId -- always return the id of the document (even new or not new)
-- this is the uniq id in the database
, reUniqId :: Text -- Hash Id with concatenation of sha parameters
} deriving (Show, Generic)
instance FromRow ReturnId where
fromRow = ReturnId <$> field <*> field <*> field
---------------------------------------------------------------------------
-- * Uniqueness of document definition
class AddUniqId a
where
addUniqId :: a -> a
instance AddUniqId HyperdataDocument
where
addUniqId = addUniqIdsDoc
where
addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
addUniqIdsDoc doc = set hd_uniqIdBdd (Just shaBdd)
$ set hd_uniqId (Just shaUni) doc
where
shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
shaParametersDoc :: [(HyperdataDocument -> Text)]
shaParametersDoc = [ \d -> maybeText (_hd_title d)
, \d -> maybeText (_hd_abstract d)
, \d -> maybeText (_hd_source d)
, \d -> maybeText (_hd_publication_date d)
]
-- TODO put this elsewhere (fix bin/gargantext-init/Main.hs too)
secret :: Text
secret = "Database secret to change"
instance (AddUniqId a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a)
where
addUniqId (Node nid _ t u p n d h) = Node nid hashId t u p n d h
where
hashId = Just $ "\\x" <> (hash $ DT.concat params)
params = [ secret
, cs $ show $ toDBid NodeDocument
, n
, cs $ show p
, cs $ encode h
]
{-
addUniqId n@(Node nid _ t u p n d h) =
case n of
Node HyperdataDocument -> Node nid hashId t u p n d h
where
hashId = "\\x" <> (hash $ DT.concat params)
params = [ secret
, cs $ show $ toDBid NodeDocument
, n
, cs $ show p
, cs $ encode h
]
_ -> undefined
-}
---------------------------------------------------------------------------
-- * Uniqueness of document definition
-- TODO factorize with above (use the function below for tests)
instance AddUniqId HyperdataContact
where
addUniqId = addUniqIdsContact
addUniqIdsContact :: HyperdataContact -> HyperdataContact
addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
$ set (hc_uniqId ) (Just shaUni) hc
where
shaUni = hash $ DT.concat $ map ($ hc) shaParametersContact
shaBdd = hash $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
-- | TODO add more shaparameters
shaParametersContact :: [(HyperdataContact -> Text)]
shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName ) d
, \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
, \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
]
maybeText :: Maybe Text -> Text
maybeText = maybe (DT.pack "") identity
---------------------------------------------------------------------------
class ToNode a
where
-- TODO Maybe NodeId
toNode :: HasDBid NodeType => UserId -> ParentId -> a -> Node a
instance ToNode HyperdataDocument where
toNode u p h = Node 0 Nothing (toDBid NodeDocument) u (Just p) n date h
where
n = maybe "No Title" (DT.take 255) (_hd_title h)
date = jour y m d
y = maybe 0 fromIntegral $ _hd_publication_year h
m = fromMaybe 1 $ _hd_publication_month h
d = fromMaybe 1 $ _hd_publication_day h
-- TODO better Node
instance ToNode HyperdataContact where
toNode u p h = Node 0 Nothing (toDBid NodeContact) u (Just p) "Contact" date h
where
date = jour 2020 01 01
Error.hs 0000664 0000000 0000000 00000004256 14124644201 0033346 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table/Node {-|
Module : Gargantext.Database.Types.Error
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Database.Query.Table.Node.Error where
import Control.Lens (Prism', (#), (^?))
import Control.Monad.Except (MonadError(..))
import Data.Text (Text)
import Prelude hiding (null, id, map, sum)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Prelude hiding (sum, head)
------------------------------------------------------------------------
data NodeError = NoListFound
| NoRootFound
| NoCorpusFound
| NoUserFound
| MkNode
| UserNoParent
| HasParent
| ManyParents
| NegativeId
| NotImplYet
| ManyNodeUsers
| DoesNotExist NodeId
| NeedsConfiguration
| NodeError Text
instance Show NodeError
where
show NoListFound = "No list found"
show NoRootFound = "No Root found"
show NoCorpusFound = "No Corpus found"
show NoUserFound = "No user found"
show MkNode = "Cannot make node"
show NegativeId = "Node with negative Id"
show UserNoParent = "Should not have parent"
show HasParent = "NodeType has parent"
show NotImplYet = "Not implemented yet"
show ManyParents = "Too many parents"
show ManyNodeUsers = "Many userNode/user"
show (DoesNotExist n) = "Node does not exist (" <> show n <> ")"
show NeedsConfiguration = "Needs configuration"
show (NodeError e) = "NodeError: " <> cs e
class HasNodeError e where
_NodeError :: Prism' e NodeError
errorWith :: ( MonadError e m
, HasNodeError e)
=> Text -> m a
errorWith x = nodeError (NodeError x)
nodeError :: ( MonadError e m
, HasNodeError e)
=> NodeError -> m a
nodeError ne = throwError $ _NodeError # ne
catchNodeError :: (MonadError e m, HasNodeError e) => m a -> (NodeError -> m a) -> m a
catchNodeError f g = catchError f (\e -> maybe (throwError e) g (e ^? _NodeError))
Select.hs 0000664 0000000 0000000 00000002261 14124644201 0033466 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table/Node {-|
Module : Gargantext.Database.Node.Select
Description : Main requests of Node to the database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Arrows #-}
module Gargantext.Database.Query.Table.Node.Select
where
import Control.Arrow (returnA)
import Opaleye
import Protolude
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Core.Types.Individu (Username)
import Gargantext.Database.Prelude
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.User
import Gargantext.Database.Query.Table.User
selectNodesWithUsername :: HasDBid NodeType => NodeType -> Username -> Cmd err [NodeId]
selectNodesWithUsername nt u = runOpaQuery (q u)
where
q u' = proc () -> do
(n,usrs) <- join' -< ()
restrict -< user_username usrs .== (toNullable $ pgStrictText u')
restrict -< _node_typename n .== (pgInt4 $ toDBid nt)
returnA -< _node_id n
join' :: Query (NodeRead, UserReadNull)
join' = leftJoin queryNodeTable queryUserTable on1
where
on1 (n,us) = _node_user_id n .== user_id us
Update.hs 0000664 0000000 0000000 00000003004 14124644201 0033465 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table/Node {-|
Module : Gargantext.Database.Node.Update
Description : Update Node in Database (Postgres)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Query.Table.Node.Update (Update(..), update)
where
import qualified Data.Text as DT
import Database.PostgreSQL.Simple
import Gargantext.Prelude
import Gargantext.Core.Types (Name)
import Gargantext.Database.Prelude
import Gargantext.Database.Admin.Types.Node (NodeId, ParentId)
-- import Data.ByteString
--rename :: NodeId -> Text -> IO ByteString
--rename nodeId name = formatPGSQuery "UPDATE nodes SET name=? where id=?" (name,nodeId)
------------------------------------------------------------------------
data Update = Rename NodeId Name
| Move NodeId ParentId
-- | Update a Node
-- TODO : Field as parameter
-- TODO jsonb values, consider this:
-- https://stackoverflow.com/questions/26703476/how-to-perform-update-operations-on-columns-of-type-jsonb-in-postgres-9-4
unOnly :: Only a -> a
unOnly (Only a) = a
-- TODO-ACCESS
update :: Update -> Cmd err [Int]
update (Rename nId name) = map unOnly <$> runPGSQuery "UPDATE nodes SET name=? where id=? returning id"
(DT.take 255 name,nId)
update (Move nId pId) = map unOnly <$> runPGSQuery "UPDATE nodes SET parent_id= ? where id=? returning id"
(pId, nId)
UpdateOpaleye.hs 0000664 0000000 0000000 00000004051 14124644201 0035007 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table/Node {-|
Module : Gargantext.Database.Node.UpdateOpaleye
Description : Update Node in Database (Postgres)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Query.Table.Node.UpdateOpaleye
where
import Opaleye
import Data.Aeson (encode, ToJSON)
import Gargantext.Core
import Gargantext.Prelude
import Gargantext.Database.Schema.Node
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd, mkCmd, JSONB)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error
updateHyperdata :: ToJSON a => NodeId -> a -> Cmd err Int64
updateHyperdata i h = mkCmd $ \c -> runUpdate_ c (updateHyperdataQuery i h)
updateHyperdataQuery :: ToJSON a => NodeId -> a -> Update Int64
updateHyperdataQuery i h = Update
{ uTable = nodeTable
, uUpdateWith = updateEasy (\ (Node _ni _nh _nt _nu _np _nn _nd _h)
-> Node _ni _nh _nt _nu _np _nn _nd h'
)
, uWhere = (\row -> _node_id row .== pgNodeId i )
, uReturning = rCount
}
where h' = (pgJSONB $ cs $ encode $ h)
----------------------------------------------------------------------------------
updateNodesWithType :: ( HasNodeError err
, JSONB a
, ToJSON a
, HasDBid NodeType
) => NodeType -> proxy a -> (a -> a) -> Cmd err [Int64]
updateNodesWithType nt p f = do
ns <- getNodesWithType nt p
mapM (\n -> updateHyperdata (_node_id n) (f $ _node_hyperdata n)) ns
-- | In case the Hyperdata Types are not compatible
updateNodesWithType_ :: ( HasNodeError err
, JSONB a
, ToJSON a
, HasDBid NodeType
) => NodeType -> a -> Cmd err [Int64]
updateNodesWithType_ nt h = do
ns <- getNodesIdWithType nt
mapM (\n -> updateHyperdata n h) ns
User.hs 0000664 0000000 0000000 00000002403 14124644201 0033163 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table/Node {-|
Module : Gargantext.Database.Action.Query.Node.User
Description : User Node in Gargantext
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Database.Query.Table.Node.User
where
import Data.Maybe (fromMaybe)
import Gargantext.Core
import Gargantext.Core.Types (Name)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataUser(..), defaultHyperdataUser)
import Gargantext.Database.Admin.Types.Node (Node, NodeId(..), UserId, NodeType(..), pgNodeId)
import Gargantext.Database.Prelude -- (fromField', Cmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Schema.Node -- (Node(..))
import Gargantext.Prelude
import Opaleye (limit)
getNodeUser :: NodeId -> Cmd err (Node HyperdataUser)
getNodeUser nId = do
fromMaybe (panic $ "Node does not exist: " <> (cs $ show nId)) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
nodeUserW :: HasDBid NodeType => Maybe Name -> Maybe HyperdataUser -> UserId -> NodeWrite
nodeUserW maybeName maybeHyperdata = node NodeUser name user Nothing
where
name = maybe "User" identity maybeName
user = maybe defaultHyperdataUser identity maybeHyperdata
NodeNgrams.hs 0000664 0000000 0000000 00000011270 14124644201 0033417 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table {-|
Module : Gargantext.Database.Query.Table.NodeNgrams
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
NodeNgrams register Context of Ngrams (named Cgrams then)
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Table.NodeNgrams
( getCgramsId
, listInsertDb
, module Gargantext.Database.Schema.NodeNgrams
)
where
import Data.List.Extra (nubOrd)
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Database.PostgreSQL.Simple (FromRow)
import Database.PostgreSQL.Simple.FromRow (fromRow, field)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.ToField (toField)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Prelude
import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypeId, fromNgramsTypeId)
import Gargantext.Database.Schema.NodeNgrams
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Database.PostgreSQL.Simple as PGS (Query, Only(..))
-- | Type for query return
data Returning = Returning { re_type :: !(Maybe NgramsType)
, re_terms :: !Text
, re_ngrams_id :: !Int
}
deriving (Show)
instance FromRow Returning where
fromRow = Returning <$> (fromNgramsTypeId <$> field) <*> field <*> field
getCgramsId :: Map NgramsType (Map Text Int) -> NgramsType -> Text -> Maybe Int
getCgramsId mapId nt t = case Map.lookup nt mapId of
Nothing -> Nothing
Just mapId' -> Map.lookup t mapId'
-- insertDb :: ListId -> Map NgramsType [NgramsElement] -> Cmd err [Result]
listInsertDb :: Show a => ListId
-> (ListId -> a -> [NodeNgramsW])
-> a
-- -> Cmd err [Returning]
-> Cmd err (Map NgramsType (Map Text Int))
listInsertDb l f ngs = Map.map Map.fromList
<$> Map.fromListWith (<>)
<$> List.map (\(Returning t tx id) -> (fromJust t, [(tx, id)]))
<$> List.filter (\(Returning t _ _) -> isJust t)
<$> insertNodeNgrams (f l ngs)
-- TODO optimize with size of ngrams
insertNodeNgrams :: [NodeNgramsW] -> Cmd err [Returning]
insertNodeNgrams nns = runPGSQuery query (PGS.Only $ Values fields nns')
where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4"
,"int4","int4","int4","int4"
,"float8"]
-- nns' :: [(Int, ListTypeId, NgramsText, NgramsTypeId ,NgramsField, NgramsTag, NgramsClass, Double)]
nns' = map (\(NodeNgrams _id (NodeId node_id'') node_subtype ngrams_terms ngrams_type ngrams_field ngrams_tag ngrams_class weight)
-> [ toField node_id''
, toField $ toDBid node_subtype
, toField $ ngrams_terms
, toField $ ngramsTypeId ngrams_type
, toField $ fromMaybe 0 ngrams_field
, toField $ fromMaybe 0 ngrams_tag
, toField $ fromMaybe 0 ngrams_class
, toField weight
]
) $ nubOrd nns
query :: PGS.Query
query = [sql|
WITH input(node_id, node_subtype, ngrams_terms, ngrams_type, ngrams_field, ngrams_tag, ngrams_class, weight) AS (?),
return(id, ngrams_type, ngrams_id) AS (
INSERT INTO node_ngrams (node_id, node_subtype, ngrams_id, ngrams_type, ngrams_field, ngrams_tag, ngrams_class, weight)
SELECT i.node_id, i.node_subtype, ng.id, i.ngrams_type, i.ngrams_field, i.ngrams_tag, i.ngrams_class, i.weight FROM input as i
INNER JOIN ngrams as ng ON ng.terms = i.ngrams_terms
ON CONFLICT(node_id, node_subtype, ngrams_id) DO NOTHING
-- DO UPDATE SET node_subtype = excluded.node_subtype, ngrams_type = excluded.ngrams_type, ngrams_field = excluded.ngrams_field, ngrams_tag = excluded.ngrams_tag, ngrams_class = excluded.ngrams_class, weight = excluded.weight
RETURNING id, ngrams_type, ngrams_id
)
SELECT return.ngrams_type, ng.terms, return.id FROM return
INNER JOIN ngrams ng ON return.ngrams_id = ng.id;
|]
NodeNode.hs 0000664 0000000 0000000 00000021224 14124644201 0033055 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table {-|
Module : Gargantext.Database.Query.Table.NodeNode
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Table.NodeNode
( module Gargantext.Database.Schema.NodeNode
, queryNodeNodeTable
, selectDocsDates
, selectDocNodes
, selectDocs
, nodeNodesCategory
, nodeNodesScore
, getNodeNode
, insertNodeNode
, deleteNodeNode
, selectPublicNodes
, selectCountDocs
)
where
import Control.Arrow (returnA)
import Control.Lens (view, (^.))
import Data.Maybe (catMaybes)
import Data.Text (Text, splitOn)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import qualified Database.PostgreSQL.Simple as PGS (Query, Only(..))
import qualified Opaleye as O
import Opaleye
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Schema.NodeNode
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Prelude
import Gargantext.Database.Schema.Node
import Gargantext.Prelude
queryNodeNodeTable :: Query NodeNodeRead
queryNodeNodeTable = queryTable nodeNodeTable
-- | not optimized (get all ngrams without filters)
_nodesNodes :: Cmd err [NodeNode]
_nodesNodes = runOpaQuery queryNodeNodeTable
------------------------------------------------------------------------
-- | Basic NodeNode tools
getNodeNode :: NodeId -> Cmd err [NodeNode]
getNodeNode n = runOpaQuery (selectNodeNode $ pgNodeId n)
where
selectNodeNode :: Column PGInt4 -> Query NodeNodeRead
selectNodeNode n' = proc () -> do
ns <- queryNodeNodeTable -< ()
restrict -< _nn_node1_id ns .== n'
returnA -< ns
------------------------------------------------------------------------
-- TODO (refactor with Children)
{-
getNodeNodeWith :: NodeId -> proxy a -> Maybe NodeType -> Cmd err [a]
getNodeNodeWith pId _ maybeNodeType = runOpaQuery query
where
query = selectChildren pId maybeNodeType
selectChildren :: ParentId
-> Maybe NodeType
-> Query NodeRead
selectChildren parentId maybeNodeType = proc () -> do
row@(Node nId typeName _ parent_id _ _ _) <- queryNodeTable -< ()
(NodeNode _ n1id n2id _ _) <- queryNodeNodeTable -< ()
let nodeType = maybe 0 toDBid maybeNodeType
restrict -< typeName .== pgInt4 nodeType
restrict -< (.||) (parent_id .== (pgNodeId parentId))
( (.&&) (n1id .== pgNodeId parentId)
(n2id .== nId))
returnA -< row
-}
------------------------------------------------------------------------
insertNodeNode :: [NodeNode] -> Cmd err Int
insertNodeNode ns = mkCmd $ \conn -> fromIntegral <$> (runInsert_ conn
$ Insert nodeNodeTable ns' rCount (Just DoNothing))
where
ns' :: [NodeNodeWrite]
ns' = map (\(NodeNode n1 n2 x y)
-> NodeNode (pgNodeId n1)
(pgNodeId n2)
(pgDouble <$> x)
(pgInt4 <$> y)
) ns
------------------------------------------------------------------------
type Node1_Id = NodeId
type Node2_Id = NodeId
deleteNodeNode :: Node1_Id -> Node2_Id -> Cmd err Int
deleteNodeNode n1 n2 = mkCmd $ \conn ->
fromIntegral <$> runDelete conn nodeNodeTable
(\(NodeNode n1_id n2_id _ _) -> n1_id .== pgNodeId n1
.&& n2_id .== pgNodeId n2 )
------------------------------------------------------------------------
-- | Favorite management
_nodeNodeCategory :: CorpusId -> DocId -> Int -> Cmd err [Int]
_nodeNodeCategory cId dId c = map (\(PGS.Only a) -> a) <$> runPGSQuery favQuery (c,cId,dId)
where
favQuery :: PGS.Query
favQuery = [sql|UPDATE nodes_nodes SET category = ?
WHERE node1_id = ? AND node2_id = ?
RETURNING node2_id;
|]
nodeNodesCategory :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
nodeNodesCategory inputData = map (\(PGS.Only a) -> a)
<$> runPGSQuery catQuery (PGS.Only $ Values fields inputData)
where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","int4"]
catQuery :: PGS.Query
catQuery = [sql| UPDATE nodes_nodes as nn0
SET category = nn1.category
FROM (?) as nn1(node1_id,node2_id,category)
WHERE nn0.node1_id = nn1.node1_id
AND nn0.node2_id = nn1.node2_id
RETURNING nn1.node2_id
|]
------------------------------------------------------------------------
-- | Score management
_nodeNodeScore :: CorpusId -> DocId -> Int -> Cmd err [Int]
_nodeNodeScore cId dId c = map (\(PGS.Only a) -> a) <$> runPGSQuery scoreQuery (c,cId,dId)
where
scoreQuery :: PGS.Query
scoreQuery = [sql|UPDATE nodes_nodes SET score = ?
WHERE node1_id = ? AND node2_id = ?
RETURNING node2_id;
|]
nodeNodesScore :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
nodeNodesScore inputData = map (\(PGS.Only a) -> a)
<$> runPGSQuery catScore (PGS.Only $ Values fields inputData)
where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","int4"]
catScore :: PGS.Query
catScore = [sql| UPDATE nodes_nodes as nn0
SET score = nn1.score
FROM (?) as nn1(node1_id, node2_id, score)
WHERE nn0.node1_id = nn1.node1_id
AND nn0.node2_id = nn1.node2_id
RETURNING nn1.node2_id
|]
------------------------------------------------------------------------
selectCountDocs :: HasDBid NodeType => CorpusId -> Cmd err Int
selectCountDocs cId = runCountOpaQuery (queryCountDocs cId)
where
queryCountDocs cId' = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< nn^.nn_node1_id .== (toNullable $ pgNodeId cId')
restrict -< nn^.nn_category .>= (toNullable $ pgInt4 1)
restrict -< n^.node_typename .== (pgInt4 $ toDBid NodeDocument)
returnA -< n
-- | TODO use UTCTime fast
selectDocsDates :: HasDBid NodeType => CorpusId -> Cmd err [Text]
selectDocsDates cId = map (head' "selectDocsDates" . splitOn "-")
<$> catMaybes
<$> map (view hd_publication_date)
<$> selectDocs cId
selectDocs :: HasDBid NodeType => CorpusId -> Cmd err [HyperdataDocument]
selectDocs cId = runOpaQuery (queryDocs cId)
queryDocs :: HasDBid NodeType => CorpusId -> O.Query (Column PGJsonb)
queryDocs cId = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< nn^.nn_node1_id .== (toNullable $ pgNodeId cId)
restrict -< nn^.nn_category .>= (toNullable $ pgInt4 1)
restrict -< n^.node_typename .== (pgInt4 $ toDBid NodeDocument)
returnA -< view (node_hyperdata) n
selectDocNodes :: HasDBid NodeType =>CorpusId -> Cmd err [Node HyperdataDocument]
selectDocNodes cId = runOpaQuery (queryDocNodes cId)
queryDocNodes :: HasDBid NodeType =>CorpusId -> O.Query NodeRead
queryDocNodes cId = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< nn^.nn_node1_id .== (toNullable $ pgNodeId cId)
restrict -< nn^.nn_category .>= (toNullable $ pgInt4 1)
restrict -< n^.node_typename .== (pgInt4 $ toDBid NodeDocument)
returnA -< n
joinInCorpus :: O.Query (NodeRead, NodeNodeReadNull)
joinInCorpus = leftJoin queryNodeTable queryNodeNodeTable cond
where
cond :: (NodeRead, NodeNodeRead) -> Column PGBool
cond (n, nn) = nn^.nn_node2_id .== (view node_id n)
joinOn1 :: O.Query (NodeRead, NodeNodeReadNull)
joinOn1 = leftJoin queryNodeTable queryNodeNodeTable cond
where
cond :: (NodeRead, NodeNodeRead) -> Column PGBool
cond (n, nn) = nn^.nn_node1_id .== n^.node_id
------------------------------------------------------------------------
selectPublicNodes :: HasDBid NodeType => (Hyperdata a, QueryRunnerColumnDefault PGJsonb a)
=> Cmd err [(Node a, Maybe Int)]
selectPublicNodes = runOpaQuery (queryWithType NodeFolderPublic)
queryWithType :: HasDBid NodeType =>NodeType -> O.Query (NodeRead, Column (Nullable PGInt4))
queryWithType nt = proc () -> do
(n, nn) <- joinOn1 -< ()
restrict -< n^.node_typename .== (pgInt4 $ toDBid nt)
returnA -< (n, nn^.nn_node2_id)
NodeNodeNgrams.hs 0000664 0000000 0000000 00000003600 14124644201 0034223 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table {-|
Module : Gargantext.Database.Schema.NodeNodeNgrams
Description : TODO: remove this module and table in database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Table.NodeNodeNgrams
( module Gargantext.Database.Schema.NodeNodeNgrams
, queryNodeNodeNgramsTable
, insertNodeNodeNgrams
)
where
import Gargantext.Database.Admin.Types.Node (pgNodeId)
import Gargantext.Database.Prelude (Cmd, mkCmd)
import Gargantext.Database.Schema.Ngrams (pgNgramsTypeId)
import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Schema.Prelude
import Prelude
queryNodeNodeNgramsTable :: Query NodeNodeNgramsRead
queryNodeNodeNgramsTable = queryTable nodeNodeNgramsTable
-- | Insert utils
insertNodeNodeNgrams :: [NodeNodeNgrams] -> Cmd err Int
insertNodeNodeNgrams = insertNodeNodeNgramsW
. map (\(NodeNodeNgrams n1 n2 ng nt w) ->
NodeNodeNgrams (pgNodeId n1)
(pgNodeId n2)
(pgInt4 ng)
(pgNgramsTypeId nt)
(pgDouble w)
)
insertNodeNodeNgramsW :: [NodeNodeNgramsWrite] -> Cmd err Int
insertNodeNodeNgramsW nnnw =
mkCmd $ \c -> fromIntegral <$> runInsert_ c insertNothing
where
insertNothing = (Insert { iTable = nodeNodeNgramsTable
, iRows = nnnw
, iReturning = rCount
, iOnConflict = (Just DoNothing)
})
NodeNodeNgrams2.hs 0000664 0000000 0000000 00000003271 14124644201 0034311 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table {-|
Module : Gargantext.Database.Schema.NodeNodeNgrams
Description : TODO: remove this module and table in database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Table.NodeNodeNgrams2
( module Gargantext.Database.Schema.NodeNodeNgrams2
, insertNodeNodeNgrams2
)
where
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Schema.NodeNodeNgrams2
import Gargantext.Database.Admin.Types.Node (pgNodeId)
import Gargantext.Database.Prelude (Cmd, mkCmd)
import Prelude
_queryNodeNodeNgrams2Table :: Query NodeNodeNgrams2Read
_queryNodeNodeNgrams2Table = queryTable nodeNodeNgrams2Table
-- | Insert utils
insertNodeNodeNgrams2 :: [NodeNodeNgrams2] -> Cmd err Int
insertNodeNodeNgrams2 = insertNodeNodeNgrams2W
. map (\(NodeNodeNgrams2 n1 n2 w) ->
NodeNodeNgrams2 (pgNodeId n1)
(pgInt4 n2)
(pgDouble w)
)
insertNodeNodeNgrams2W :: [NodeNodeNgrams2Write] -> Cmd err Int
insertNodeNodeNgrams2W nnnw =
mkCmd $ \c -> fromIntegral <$> runInsert_ c insertNothing
where
insertNothing = Insert { iTable = nodeNodeNgrams2Table
, iRows = nnnw
, iReturning = rCount
, iOnConflict = (Just DoNothing)
}
Node_NodeNgramsNodeNgrams.hs 0000664 0000000 0000000 00000005167 14124644201 0036352 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table {-|
Module : Gargantext.Database.Schema.Node_NodeNgrams_NodeNgrams
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
lgrams: listed ngrams
Node_NodeNgrams_NodeNgrams table is used to group ngrams
- first NodeId :: Referential / space node (corpus)
- NodeNgrams where Node is List
- lgrams1_id, lgrams2_id where all lgrams2_id will be added to lgrams1_id
- weight: score the relation
Next Step benchmark:
- recursive queries of postgres
- group with: https://en.wikipedia.org/wiki/Nested_set_model
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
( module Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
, insert_Node_NodeNgrams_NodeNgrams
)
where
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Prelude (Cmd, runOpaQuery, mkCmd)
import Gargantext.Database.Admin.Types.Node (pgNodeId)
import Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
import Gargantext.Database.Schema.Node()
import Gargantext.Prelude
queryNode_NodeNgrams_NodeNgrams_Table :: Query Node_NodeNgrams_NodeNgrams_Read
queryNode_NodeNgrams_NodeNgrams_Table = queryTable node_NodeNgrams_NodeNgrams_Table
-- | Select NodeNgramsNgrams
-- TODO not optimized (get all ngrams without filters)
_node_Node_NodeNgrams_NodeNgrams :: Cmd err [Node_NodeNgrams_NodeNgrams]
_node_Node_NodeNgrams_NodeNgrams = runOpaQuery queryNode_NodeNgrams_NodeNgrams_Table
-- TODO: Add option on conflict
insert_Node_NodeNgrams_NodeNgrams :: [Node_NodeNgrams_NodeNgrams] -> Cmd err Int64
insert_Node_NodeNgrams_NodeNgrams = insert_Node_NodeNgrams_NodeNgrams_W
. map (\(Node_NodeNgrams_NodeNgrams n ng1 ng2 maybeWeight) ->
Node_NodeNgrams_NodeNgrams (pgNodeId n )
(pgInt4 <$> ng1)
(pgInt4 ng2)
(pgDouble <$> maybeWeight)
)
insert_Node_NodeNgrams_NodeNgrams_W :: [Node_NodeNgrams_NodeNgrams_Write] -> Cmd err Int64
insert_Node_NodeNgrams_NodeNgrams_W ns =
mkCmd $ \c -> runInsert_ c Insert { iTable = node_NodeNgrams_NodeNgrams_Table
, iRows = ns
, iReturning = rCount
, iOnConflict = (Just DoNothing)
}
NodesNgramsRepo.hs 0000664 0000000 0000000 00000002443 14124644201 0034432 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table {-|
Module : Gargantext.Database.Schema.NodesNgramsRepo
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Table.NodesNgramsRepo
( module Gargantext.Database.Schema.NodesNgramsRepo
)
where
import Gargantext.Database.Schema.Prelude
import Gargantext.API.Ngrams (NgramsStatePatch)
import Gargantext.Database.Schema.NodesNgramsRepo
import Gargantext.Database.Prelude (mkCmd, Cmd, runOpaQuery)
import Gargantext.Prelude
selectPatches :: Query RepoDbRead
selectPatches = proc () -> do
repos <- queryTable repoTable -< ()
returnA -< repos
_selectRepo :: Cmd err [RepoDbNgrams]
_selectRepo = runOpaQuery selectPatches
_insertRepos :: [NgramsStatePatch] -> Cmd err Int64
_insertRepos ns = mkCmd $ \conn -> runInsert_ conn $ Insert repoTable (toWrite ns) rCount Nothing
where
toWrite :: [NgramsStatePatch] -> [RepoDbWrite]
toWrite = undefined
--ns' = map (\(RepoDbNgrams v ps) -> RepoDbWrite (pgInt4 v) (pgJSONB ps)) ns
User.hs 0000664 0000000 0000000 00000011466 14124644201 0032307 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Table {-|
Module : Gargantext.Database.Query.Table.User
Description : User Database management tools
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Functions to deal with users, database side.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
module Gargantext.Database.Query.Table.User
( insertUsers
, toUserWrite
, deleteUsers
, updateUserDB
, queryUserTable
, getUser
, insertNewUsers
, selectUsersLightWith
, userWithUsername
, userWithId
, userLightWithId
, getUsersWith
, getUsersWithId
, module Gargantext.Database.Schema.User
)
where
import Control.Arrow (returnA)
import Data.List (find)
import Data.Text (Text)
import Data.Time (UTCTime)
import Gargantext.Core.Types.Individu
import qualified Gargantext.Prelude.Crypto.Auth as Auth
import Gargantext.Database.Schema.User
import Gargantext.Database.Prelude
import Gargantext.Prelude
import Opaleye
------------------------------------------------------------------------
-- TODO: on conflict, nice message
insertUsers :: [UserWrite] -> Cmd err Int64
insertUsers us = mkCmd $ \c -> runInsert_ c insert
where
insert = Insert userTable us rCount Nothing
deleteUsers :: [Username] -> Cmd err Int64
deleteUsers us = mkCmd $ \c -> runDelete c userTable
(\user -> in_ (map pgStrictText us) (user_username user))
-- Updates email or password only (for now)
updateUserDB :: UserWrite -> Cmd err Int64
updateUserDB us = mkCmd $ \c -> runUpdate_ c (updateUserQuery us)
where
updateUserQuery :: UserWrite -> Update Int64
updateUserQuery us' = Update
{ uTable = userTable
, uUpdateWith = updateEasy (\ (UserDB _id _p ll su un fn ln _em is ia dj)
-> UserDB _id p' ll su un fn ln em' is ia dj
)
, uWhere = (\row -> user_username row .== un')
, uReturning = rCount
}
where
UserDB _ p' _ _ un' _ _ em' _ _ _ = us'
-----------------------------------------------------------------------
toUserWrite :: NewUser HashPassword -> UserWrite
toUserWrite (NewUser u m (Auth.PasswordHash p)) =
UserDB (Nothing) (pgStrictText p)
(Nothing) (pgBool True) (pgStrictText u)
(pgStrictText "first_name")
(pgStrictText "last_name")
(pgStrictText m)
(pgBool True)
(pgBool True) Nothing
------------------------------------------------------------------
getUsersWith :: Username -> Cmd err [UserLight]
getUsersWith u = map toUserLight <$> runOpaQuery (selectUsersLightWith u)
selectUsersLightWith :: Username -> Query UserRead
selectUsersLightWith u = proc () -> do
row <- queryUserTable -< ()
restrict -< user_username row .== pgStrictText u
returnA -< row
----------------------------------------------------------
getUsersWithId :: Int -> Cmd err [UserLight]
getUsersWithId i = map toUserLight <$> runOpaQuery (selectUsersLightWithId i)
where
selectUsersLightWithId :: Int -> Query UserRead
selectUsersLightWithId i' = proc () -> do
row <- queryUserTable -< ()
restrict -< user_id row .== pgInt4 i'
returnA -< row
queryUserTable :: Query UserRead
queryUserTable = queryTable userTable
------------------------------------------------------------------
-- | Select User with some parameters
-- Not optimized version
userWith :: (Eq a1, Foldable t) => (a -> a1) -> a1 -> t a -> Maybe a
userWith f t xs = find (\x -> f x == t) xs
-- | Select User with Username
userWithUsername :: Text -> [UserDB] -> Maybe UserDB
userWithUsername t xs = userWith user_username t xs
userWithId :: Int -> [UserDB] -> Maybe UserDB
userWithId t xs = userWith user_id t xs
userLightWithUsername :: Text -> [UserLight] -> Maybe UserLight
userLightWithUsername t xs = userWith userLight_username t xs
userLightWithId :: Int -> [UserLight] -> Maybe UserLight
userLightWithId t xs = userWith userLight_id t xs
----------------------------------------------------------------------
users :: Cmd err [UserDB]
users = runOpaQuery queryUserTable
usersLight :: Cmd err [UserLight]
usersLight = map toUserLight <$> users
getUser :: Username -> Cmd err (Maybe UserLight)
getUser u = userLightWithUsername u <$> usersLight
----------------------------------------------------------------------
insertNewUsers :: [NewUser GargPassword] -> Cmd err Int64
insertNewUsers newUsers = do
users' <- liftBase $ mapM toUserHash newUsers
insertUsers $ map toUserWrite users'
----------------------------------------------------------------------
instance QueryRunnerColumnDefault PGTimestamptz (Maybe UTCTime) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Tree.hs 0000664 0000000 0000000 00000032546 14124644201 0031322 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Tree
Description : Tree of Resource Nodes built from Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Let a Root Node, return the Tree of the Node as a directed acyclic graph
(Tree).
-- TODO delete node, if not owned, then suppress the link only
-- see Action/Delete.hs
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Tree
( module Gargantext.Database.Query.Tree.Error
, isDescendantOf
, isIn
, tree
, TreeMode(..)
, findNodesId
, DbTreeNode(..)
, dt_name
, dt_nodeId
, dt_typeId
, findShared
, findNodes
, findNodesWithType
, NodeMode(..)
, sharedTreeUpdate
, dbTree
, updateTree
)
where
import Control.Lens (view, toListOf, at, each, _Just, to, set, makeLenses)
import Control.Monad.Error.Class (MonadError())
import Data.List (tail, concat, nub)
import qualified Data.List as List
import Data.Map (Map, fromListWith, lookup)
-- import Data.Monoid (mconcat)
import Data.Proxy
-- import qualified Data.Set as Set
import Data.Text (Text)
import Database.PostgreSQL.Simple
import Database.PostgreSQL.Simple.SqlQQ
import Gargantext.Prelude
import Gargantext.Core
import Gargantext.Core.Types.Main (NodeTree(..), Tree(..))
import Gargantext.Database.Admin.Config (fromNodeTypeId, nodeTypeId, fromNodeTypeId)
import Gargantext.Database.Admin.Types.Hyperdata.Any (HyperdataAny)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Query.Table.Node (getNodeWith)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.NodeNode (getNodeNode)
import Gargantext.Database.Query.Tree.Error
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Database.Schema.NodeNode (NodeNodePoly(..))
------------------------------------------------------------------------
data DbTreeNode = DbTreeNode { _dt_nodeId :: NodeId
, _dt_typeId :: Int
, _dt_parentId :: Maybe NodeId
, _dt_name :: Text
} deriving (Show)
makeLenses ''DbTreeNode
instance Eq DbTreeNode where
(==) d1 d2 = (==) (_dt_nodeId d1) (_dt_nodeId d2)
------------------------------------------------------------------------
data TreeMode = TreeBasic | TreeAdvanced | TreeFirstLevel
-- | Returns the Tree of Nodes in Database
tree :: (HasTreeError err, HasNodeError err)
=> TreeMode
-> RootId
-> [NodeType]
-> Cmd err (Tree NodeTree)
tree TreeBasic = tree_basic
tree TreeAdvanced = tree_advanced
tree TreeFirstLevel = tree_first_level
-- | Tree basic returns the Tree of Nodes in Database
-- (without shared folders)
-- keeping this for teaching purpose only
tree_basic :: (HasTreeError err, HasNodeError err)
=> RootId
-> [NodeType]
-> Cmd err (Tree NodeTree)
tree_basic r nodeTypes =
(dbTree r nodeTypes <&> toTreeParent) >>= toTree
-- Same as (but easier to read) :
-- toTree =<< (toTreeParent <$> dbTree r nodeTypes)
-- | Advanced mode of the Tree enables shared nodes
tree_advanced :: (HasTreeError err, HasNodeError err)
=> RootId
-> [NodeType]
-> Cmd err (Tree NodeTree)
tree_advanced r nodeTypes = do
-- let rPrefix s = "[tree_advanced] root = " <> show r <> " " <> s
mainRoot <- findNodes r Private nodeTypes
-- printDebug (rPrefix "mainRoot") mainRoot
publicRoots <- findNodes r Public nodeTypes
-- printDebug (rPrefix "publicRoots") publicRoots
sharedRoots <- findNodes r Shared nodeTypes
-- printDebug (rPrefix "sharedRoots") sharedRoots
toTree $ toTreeParent (mainRoot <> sharedRoots <> publicRoots)
-- | Fetch only first level of tree
tree_first_level :: (HasTreeError err, HasNodeError err)
=> RootId
-> [NodeType]
-> Cmd err (Tree NodeTree)
tree_first_level r nodeTypes = do
-- let rPrefix s = mconcat [ "[tree_first_level] root = "
-- , show r
-- , ", nodeTypes = "
-- , show nodeTypes
-- , " "
-- , s ]
mainRoot <- findNodes r Private nodeTypes
-- printDebug (rPrefix "mainRoot") mainRoot
publicRoots <- findNodes r PublicDirect nodeTypes
-- printDebug (rPrefix "publicRoots") publicRoots
sharedRoots <- findNodes r SharedDirect nodeTypes
-- printDebug (rPrefix "sharedRoots") sharedRoots
ret <- toTree $ toSubtreeParent r (mainRoot <> sharedRoots <> publicRoots)
-- printDebug (rPrefix "tree") ret
pure ret
------------------------------------------------------------------------
data NodeMode = Private | Shared | Public | SharedDirect | PublicDirect
findNodes :: (HasTreeError err, HasNodeError err)
=> RootId
-> NodeMode
-> [NodeType]
-> Cmd err [DbTreeNode]
findNodes r Private nt = dbTree r nt
findNodes r Shared nt = findShared r NodeFolderShared nt sharedTreeUpdate
findNodes r SharedDirect nt = findSharedDirect r NodeFolderShared nt sharedTreeUpdate
findNodes r Public nt = findShared r NodeFolderPublic nt publicTreeUpdate
findNodes r PublicDirect nt = findSharedDirect r NodeFolderPublic nt publicTreeUpdate
------------------------------------------------------------------------
-- | Collaborative Nodes in the Tree
-- Queries the `nodes_nodes` table.
findShared :: HasTreeError err
=> RootId -> NodeType -> [NodeType] -> UpdateTree err
-> Cmd err [DbTreeNode]
findShared r nt nts fun = do
foldersSharedId <- findNodesId r [nt]
trees <- mapM (updateTree nts fun) foldersSharedId
pure $ concat trees
-- | Find shared folders with "direct" access, i.e. when fetching only
-- first-level subcomponents. This works in a simplified manner: fetch the node
-- and get the tree for its parent.
findSharedDirect :: (HasTreeError err, HasNodeError err)
=> RootId -> NodeType -> [NodeType] -> UpdateTree err
-> Cmd err [DbTreeNode]
findSharedDirect r nt nts fun = do
-- let rPrefix s = mconcat [ "[findSharedDirect] r = "
-- , show r
-- , ", nt = "
-- , show nt
-- , ", nts = "
-- , show nts
-- , " "
-- , s ]
parent <- getNodeWith r (Proxy :: Proxy HyperdataAny)
let mParent = _node_parent_id parent
case mParent of
Nothing -> pure []
Just parentId -> do
foldersSharedId <- findNodesId parentId [nt]
-- printDebug (rPrefix "foldersSharedId") foldersSharedId
trees <- mapM (updateTree nts fun) foldersSharedId
-- printDebug (rPrefix "trees") trees
pure $ concat trees
type UpdateTree err = ParentId -> [NodeType] -> NodeId -> Cmd err [DbTreeNode]
updateTree :: HasTreeError err
=> [NodeType] -> UpdateTree err -> RootId
-> Cmd err [DbTreeNode]
updateTree nts fun r = do
folders <- getNodeNode r
nodesSharedId <- mapM (fun r nts)
$ map _nn_node2_id folders
pure $ concat nodesSharedId
sharedTreeUpdate :: HasTreeError err => UpdateTree err
sharedTreeUpdate p nt n = dbTree n nt
<&> map (\n' -> if (view dt_nodeId n') == n
-- && elem (fromDBid $ _dt_typeId n') [NodeGraph]
-- && not (elem (fromDBid $ _dt_typeId n') [NodeFile])
then set dt_parentId (Just p) n'
else n')
publicTreeUpdate :: HasTreeError err => UpdateTree err
publicTreeUpdate p nt n = dbTree n nt
<&> map (\n' -> if _dt_nodeId n' == n
-- && (fromDBid $ _dt_typeId n') /= NodeGraph
-- && not (elem (fromDBid $ _dt_typeId n') [NodeFile])
then set dt_parentId (Just p) n'
else n')
-- | findNodesId returns all nodes matching nodeType but the root (Nodeuser)
findNodesId :: RootId -> [NodeType] -> Cmd err [NodeId]
findNodesId r nt = tail
<$> map _dt_nodeId
<$> dbTree r nt
findNodesWithType :: RootId -> [NodeType] -> [NodeType] -> Cmd err [DbTreeNode]
findNodesWithType root target through =
filter isInTarget <$> dbTree root through
where
isInTarget n = List.elem (fromDBid $ view dt_typeId n)
$ List.nub $ target <> through
------------------------------------------------------------------------
------------------------------------------------------------------------
toTree :: ( MonadError e m
, HasTreeError e
, MonadBase IO m )
=> Map (Maybe ParentId) [DbTreeNode]
-> m (Tree NodeTree)
toTree m =
case lookup Nothing m of
Just [root] -> pure $ toTree' m root
Nothing -> treeError NoRoot
Just [] -> treeError EmptyRoot
Just _r -> treeError TooManyRoots
where
toTree' :: Map (Maybe ParentId) [DbTreeNode]
-> DbTreeNode
-> Tree NodeTree
toTree' m' root =
TreeN (toNodeTree root) $
-- Lines below are equivalent computationally but not semantically
-- m' ^.. at (Just $ _dt_nodeId root) . _Just . each . to (toTree' m')
toListOf (at (Just $ _dt_nodeId root) . _Just . each . to (toTree' m')) m'
toNodeTree :: DbTreeNode
-> NodeTree
toNodeTree (DbTreeNode nId tId _ n) = NodeTree n (fromNodeTypeId tId) nId
------------------------------------------------------------------------
toTreeParent :: [DbTreeNode]
-> Map (Maybe ParentId) [DbTreeNode]
toTreeParent = fromListWith (\a b -> nub $ a <> b) . map (\n -> (_dt_parentId n, [n]))
------------------------------------------------------------------------
-- toSubtreeParent' :: [DbTreeNode]
-- -> Map (Maybe ParentId) [DbTreeNode]
-- toSubtreeParent' ns = fromListWith (\a b -> nub $ a <> b) . map (\n -> (_dt_parentId n, [n])) $ nullifiedParents
-- where
-- nodeIds = Set.fromList $ map (\n -> unNodeId $ _dt_nodeId n) ns
-- nullifiedParents = map nullifyParent ns
-- nullifyParent dt@(DbTreeNode { _dt_parentId = Nothing }) = dt
-- nullifyParent dt@(DbTreeNode { _dt_nodeId = nId
-- , _dt_parentId = Just pId
-- , _dt_typeId = tId
-- , _dt_name = name }) =
-- if Set.member (unNodeId pId) nodeIds then
-- dt
-- else
-- DbTreeNode { _dt_nodeId = nId
-- , _dt_typeId = tId
-- , _dt_parentId = Nothing
-- , _dt_name = name }
------------------------------------------------------------------------
toSubtreeParent :: RootId
-> [DbTreeNode]
-> Map (Maybe ParentId) [DbTreeNode]
toSubtreeParent r ns = fromListWith (\a b -> nub $ a <> b) . map (\n -> (_dt_parentId n, [n])) $ nullifiedParents
where
nullifiedParents = map nullifyParent ns
nullifyParent dt@(DbTreeNode { _dt_parentId = Nothing }) = dt
nullifyParent dt@(DbTreeNode { _dt_nodeId = nId
, _dt_parentId = _pId
, _dt_typeId = tId
, _dt_name = name }) =
if r == nId then
DbTreeNode { _dt_nodeId = nId
, _dt_typeId = tId
, _dt_parentId = Nothing
, _dt_name = name }
else
dt
------------------------------------------------------------------------
-- | Main DB Tree function
dbTree :: RootId
-> [NodeType]
-> Cmd err [DbTreeNode]
dbTree rootId nodeTypes = map (\(nId, tId, pId, n) -> DbTreeNode nId tId pId n)
<$> runPGSQuery [sql|
WITH RECURSIVE
tree (id, typename, parent_id, name) AS
(
SELECT p.id, p.typename, p.parent_id, p.name
FROM nodes AS p
WHERE p.id = ?
UNION
SELECT c.id, c.typename, c.parent_id, c.name
FROM nodes AS c
INNER JOIN tree AS s ON c.parent_id = s.id
WHERE c.typename IN ?
)
SELECT * from tree;
|] (rootId, In typename)
where
typename = map nodeTypeId ns
ns = case nodeTypes of
[] -> allNodeTypes
_ -> nodeTypes
isDescendantOf :: NodeId -> RootId -> Cmd err Bool
isDescendantOf childId rootId = (== [Only True])
<$> runPGSQuery [sql|
BEGIN ;
SET TRANSACTION READ ONLY;
COMMIT;
WITH RECURSIVE
tree (id, parent_id) AS
(
SELECT c.id, c.parent_id
FROM nodes AS c
WHERE c.id = ?
UNION
SELECT p.id, p.parent_id
FROM nodes AS p
INNER JOIN tree AS t ON t.parent_id = p.id
)
SELECT COUNT(*) = 1 from tree AS t
WHERE t.id = ?;
|] (childId, rootId)
-- TODO should we check the category?
isIn :: NodeId -> DocId -> Cmd err Bool
isIn cId docId = ( == [Only True])
<$> runPGSQuery [sql| SELECT COUNT(*) = 1
FROM nodes_nodes nn
WHERE nn.node1_id = ?
AND nn.node2_id = ?;
|] (cId, docId)
-----------------------------------------------------
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Tree/ 0000775 0000000 0000000 00000000000 14124644201 0030754 5 ustar 00root root 0000000 0000000 Error.hs 0000664 0000000 0000000 00000001715 14124644201 0032326 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Tree {-|
Module : Gargantext.Database.Tree.Error
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Query.Tree.Error
where
import Control.Lens (Prism', (#))
import Control.Monad.Except (MonadError(throwError))
import Gargantext.Prelude
------------------------------------------------------------------------
data TreeError = NoRoot
| EmptyRoot
| TooManyRoots
instance Show TreeError
where
show NoRoot = "Root node not found"
show EmptyRoot = "Root node should not be empty"
show TooManyRoots = "Too many root nodes"
class HasTreeError e where
_TreeError :: Prism' e TreeError
treeError :: ( MonadError e m
, HasTreeError e )
=> TreeError
-> m a
treeError te = throwError $ _TreeError # te
Root.hs 0000664 0000000 0000000 00000011100 14124644201 0032145 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Query/Tree {-|
Module : Gargantext.Database.Root
Description : Main requests to get root of users
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Arrows #-}
module Gargantext.Database.Query.Tree.Root
where
import Control.Arrow (returnA)
import Data.Either (Either, fromLeft, fromRight)
import Gargantext.Core
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main (CorpusName)
import Gargantext.Database.Action.Node
import Gargantext.Database.Action.User (getUserId, getUsername)
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataUser)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd, runOpaQuery)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Table.User (queryUserTable, UserPoly(..))
import Gargantext.Database.Schema.Node (NodePoly(..), NodeRead)
import Gargantext.Database.Schema.Node (queryNodeTable)
import Gargantext.Prelude
import Opaleye (restrict, (.==), Query)
import Opaleye.PGTypes (pgStrictText, pgInt4)
getRootId :: (HasNodeError err) => User -> Cmd err NodeId
getRootId u = do
maybeRoot <- head <$> getRoot u
case maybeRoot of
Nothing -> nodeError $ NodeError "[G.D.Q.T.R.getRootId] No root id"
Just r -> pure (_node_id r)
getRoot :: User -> Cmd err [Node HyperdataUser]
getRoot = runOpaQuery . selectRoot
getOrMkRoot :: (HasNodeError err)
=> User
-> Cmd err (UserId, RootId)
getOrMkRoot user = do
userId <- getUserId user
rootId' <- map _node_id <$> getRoot user
rootId'' <- case rootId' of
[] -> mkRoot user
n -> case length n >= 2 of
True -> nodeError ManyNodeUsers
False -> pure rootId'
rootId <- maybe (nodeError NoRootFound) pure (head rootId'')
pure (userId, rootId)
getOrMk_RootWithCorpus :: (HasNodeError err, MkCorpus a)
=> User
-> Either CorpusName [CorpusId]
-> Maybe a
-> Cmd err (UserId, RootId, CorpusId)
getOrMk_RootWithCorpus user cName c = do
(userId, rootId) <- getOrMkRoot user
corpusId'' <- if user == UserName userMaster
then do
ns <- getCorporaWithParentId rootId
pure $ map _node_id ns
else
pure $ fromRight [] cName
corpusId' <- if corpusId'' /= []
then pure corpusId''
else do
c' <- mk (Just $ fromLeft "Default" cName) c rootId userId
_tId <- case head c' of
Nothing -> nodeError $ NodeError "[G.D.Q.T.Root.getOrMk...] mk Corpus failed"
Just c'' -> insertDefaultNode NodeTexts c'' userId
pure c'
corpusId <- maybe (nodeError NoCorpusFound) pure (head corpusId')
pure (userId, rootId, corpusId)
mkRoot :: HasNodeError err
=> User
-> Cmd err [RootId]
mkRoot user = do
-- TODO
-- udb <- getUserDb user
-- let uid = user_id udb
uid <- getUserId user
-- TODO ? Which name for user Node ?
una <- getUsername user
case uid > 0 of
False -> nodeError NegativeId
True -> do
rs <- mkNodeWithParent NodeUser Nothing uid una
_ <- case rs of
[r] -> do
_ <- insertNode NodeFolderPrivate Nothing Nothing r uid
_ <- insertNode NodeFolderShared Nothing Nothing r uid
_ <- insertNode NodeFolderPublic Nothing Nothing r uid
pure rs
_ -> pure rs
pure rs
selectRoot :: User -> Query NodeRead
selectRoot (UserName username) = proc () -> do
row <- queryNodeTable -< ()
users <- queryUserTable -< ()
restrict -< _node_typename row .== (pgInt4 $ toDBid NodeUser)
restrict -< user_username users .== (pgStrictText username)
restrict -< _node_user_id row .== (user_id users)
returnA -< row
selectRoot (UserDBId uid) = proc () -> do
row <- queryNodeTable -< ()
restrict -< _node_typename row .== (pgInt4 $ toDBid NodeUser)
restrict -< _node_user_id row .== (pgInt4 uid)
returnA -< row
selectRoot (RootId nid) =
proc () -> do
row <- queryNodeTable -< ()
restrict -< _node_typename row .== (pgInt4 $ toDBid NodeUser)
restrict -< _node_id row .== (pgNodeId nid)
returnA -< row
selectRoot UserPublic = panic {-nodeError $ NodeError-} "[G.D.Q.T.Root.selectRoot] No root for Public"
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Schema/ 0000775 0000000 0000000 00000000000 14124644201 0030150 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Schema/Ngrams.hs0000664 0000000 0000000 00000014647 14124644201 0031747 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Schema.NgramsPostag
Description : Ngram connection to the Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Ngrams connection to the Database.
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.Ngrams
where
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Codec.Serialise (Serialise())
import Control.Lens (over)
import Control.Monad (mzero)
import Data.Aeson
import Data.Aeson.Types (toJSONKeyText)
import Data.Map (fromList, lookup)
import Data.Text (Text, splitOn, pack, strip)
import Gargantext.Core.Types (TODO(..), Typed(..))
import Gargantext.Prelude
import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
import Text.Read (read)
import Gargantext.Database.Types
import Gargantext.Database.Schema.Prelude
import qualified Database.PostgreSQL.Simple as PGS
import qualified Data.HashMap.Strict as HashMap
type NgramsId = Int
type Size = Int
data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
, _ngrams_terms :: !terms
, _ngrams_n :: !n
} deriving (Show)
type NgramsWrite = NgramsPoly (Maybe (Column PGInt4))
(Column PGText)
(Column PGInt4)
type NgramsRead = NgramsPoly (Column PGInt4)
(Column PGText)
(Column PGInt4)
type NgramsReadNull = NgramsPoly (Column (Nullable PGInt4))
(Column (Nullable PGText))
(Column (Nullable PGInt4))
type NgramsDB = NgramsPoly Int Text Int
$(makeAdaptorAndInstance "pNgramsDb" ''NgramsPoly)
makeLenses ''NgramsPoly
ngramsTable :: Table NgramsWrite NgramsRead
ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optional "id"
, _ngrams_terms = required "terms"
, _ngrams_n = required "n"
}
)
-- | Main Ngrams Types
-- | Typed Ngrams
-- Typed Ngrams localize the context of the ngrams
-- ngrams in source field of document has Sources Type
-- ngrams in authors field of document has Authors Type
-- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
data NgramsType = Authors | Institutes | Sources | NgramsTerms
deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
instance Serialise NgramsType
ngramsTypes :: [NgramsType]
ngramsTypes = [minBound..]
instance ToSchema NgramsType
{- where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
--}
newtype NgramsTypeId = NgramsTypeId Int
deriving (Eq, Show, Ord, Num)
instance ToField NgramsTypeId where
toField (NgramsTypeId n) = toField n
instance FromField NgramsTypeId where
fromField fld mdata = do
n <- fromField fld mdata
if (n :: Int) > 0 then return $ NgramsTypeId n
else mzero
instance FromJSON NgramsType
instance FromJSONKey NgramsType where
fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
instance ToJSON NgramsType
instance ToJSONKey NgramsType where
toJSONKey = toJSONKeyText (pack . show)
instance FromHttpApiData NgramsType where
parseUrlPiece n = pure $ (read . cs) n
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
pgNgramsType :: NgramsType -> Column PGInt4
pgNgramsType = pgNgramsTypeId . ngramsTypeId
pgNgramsTypeId :: NgramsTypeId -> Column PGInt4
pgNgramsTypeId (NgramsTypeId n) = pgInt4 n
ngramsTypeId :: NgramsType -> NgramsTypeId
ngramsTypeId Authors = 1
ngramsTypeId Institutes = 2
ngramsTypeId Sources = 3
ngramsTypeId NgramsTerms = 4
fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
fromNgramsTypeId id = lookup id
$ fromList [ (ngramsTypeId nt,nt)
| nt <- [minBound .. maxBound] :: [NgramsType]
]
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams
data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
, _ngramsSize :: Int
}
deriving (Generic, Show, Eq, Ord)
instance Hashable Ngrams
makeLenses ''Ngrams
instance PGS.ToRow Ngrams where
toRow (UnsafeNgrams t s) = [toField t, toField s]
instance FromField Ngrams where
fromField fld mdata = do
x <- fromField fld mdata
pure $ text2ngrams x
text2ngrams :: Text -> Ngrams
text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
where
txt' = strip txt
------------------------------------------------------------------------
-------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams
-- Named entity are typed ngrams of Terms Ngrams
data NgramsT a =
NgramsT { _ngramsType :: NgramsType
, _ngramsT :: a
} deriving (Generic, Show, Eq, Ord)
makeLenses ''NgramsT
instance Functor NgramsT where
fmap = over ngramsT
-----------------------------------------------------------------------
withMap :: HashMap Text NgramsId -> Text -> NgramsId
withMap m n = maybe (panic $ "[G.D.S.Ngrams.withMap] Should not happen" <> (cs $ show n))
identity (HashMap.lookup n m)
indexNgramsT :: HashMap Text NgramsId -> NgramsT Ngrams -> NgramsT (Indexed Int Ngrams)
indexNgramsT = fmap . indexNgramsWith . withMap
-- | TODO replace NgramsT whith Typed NgramsType Ngrams
indexTypedNgrams :: HashMap Text NgramsId
-> Typed NgramsType Ngrams
-> Typed NgramsType (Indexed Int Ngrams)
indexTypedNgrams = fmap . indexNgramsWith . withMap
indexNgrams :: HashMap Text NgramsId -> Ngrams -> Indexed Int Ngrams
indexNgrams = indexNgramsWith . withMap
indexNgramsWith :: (Text -> NgramsId) -> Ngrams -> Indexed Int Ngrams
indexNgramsWith f n = Indexed (f $ _ngramsTerms n) n
NgramsPostag.hs 0000664 0000000 0000000 00000007136 14124644201 0033041 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Schema {-|
Module : Gargantext.Database.Schema.NgramsPostag
Description : Ngram connection to the Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Each Ngrams has a pos-tagging version to ease the default groups of
ngrams in NgramsTerm Lists.
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NgramsPostag
where
import Control.Lens
import Data.Text (Text)
import Gargantext.Database.Schema.Prelude
import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as PGS
data NgramsPostagPoly id
lang_id
algo_id
postag
ngrams_id
lemm_id
score
= NgramsPostagPoly { _ngramsPostag_id :: !id
, _ngramsPostag_lang_id :: !lang_id
, _ngramsPostag_algo_id :: !algo_id
, _ngramsPostag_postag :: !postag
, _ngramsPostag_ngrams_id :: !ngrams_id
, _ngramsPostag_lemm_id :: !lemm_id
, _ngramsPostag_score :: !score
} deriving (Show)
------------------------------------------------------------------------
data PosTag = PosTag { unPosTag :: !Text }
| NER { unNER :: !Text } -- TODO
------------------------------------------------------------------------
-- type NgramsPostag = NgramsPostagPoly (Maybe Int) Lang PostTagAlgo (Maybe PosTag) NgramsTerm NgramsTerm (Maybe Int)
type NgramsPostagDB = NgramsPostagPoly (Maybe Int) Int Int (Maybe Text) Int Int Int
------------------------------------------------------------------------
type NgramsPosTagWrite = NgramsPostagPoly (Maybe (Column PGInt4))
(Column PGInt4)
(Column PGInt4)
(Maybe (Column PGText))
(Column PGInt4)
(Column PGInt4)
(Maybe (Column PGInt4))
type NgramsPosTagRead = NgramsPostagPoly (Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGText)
(Column PGInt4)
(Column PGInt4)
(Column PGInt4)
type NgramsPosTagReadNull = NgramsPostagPoly (Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGText))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
makeLenses ''NgramsPostagPoly
instance PGS.ToRow NgramsPostagDB where
toRow (NgramsPostagPoly f0 f1 f2 f3 f4 f5 f6) = [ toField f0
, toField f1
, toField f2
, toField f3
, toField f4
, toField f5
, toField f6
]
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Schema/Node.hs 0000664 0000000 0000000 00000015131 14124644201 0031372 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Schema.Node
Description : Main requests of Node to the database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Database.Schema.Node where
import Control.Lens hiding (elements, (&))
import Gargantext.Database.Schema.Prelude
import Prelude hiding (null, id, map, sum)
------------------------------------------------------------------------
-- Main polymorphic Node definition
data NodePoly id
hash_id
typename
user_id
parent_id
name
date
hyperdata =
Node { _node_id :: !id
, _node_hash_id :: !hash_id
, _node_typename :: !typename
, _node_user_id :: !user_id
, _node_parent_id :: !parent_id
, _node_name :: !name
, _node_date :: !date
, _node_hyperdata :: !hyperdata
} deriving (Show, Generic)
------------------------------------------------------------------------
-- Automatic instances derivation
$(deriveJSON (unPrefix "_node_") ''NodePoly)
$(makeLenses ''NodePoly)
$(makeAdaptorAndInstance "pNode" ''NodePoly)
$(makeLensesWith abbreviatedFields ''NodePoly)
nodeTable :: Table NodeWrite NodeRead
nodeTable = Table "nodes" (pNode Node { _node_id = optional "id"
, _node_hash_id = optional "hash_id"
, _node_typename = required "typename"
, _node_user_id = required "user_id"
, _node_parent_id = optional "parent_id"
, _node_name = required "name"
, _node_date = optional "date"
, _node_hyperdata = required "hyperdata"
-- ignoring ts_vector field here
}
)
queryNodeTable :: Query NodeRead
queryNodeTable = queryTable nodeTable
------------------------------------------------------------------------
type NodeWrite = NodePoly (Maybe (Column PGInt4) )
(Maybe (Column PGText) )
(Column PGInt4)
(Column PGInt4)
(Maybe (Column PGInt4) )
(Column PGText)
(Maybe (Column PGTimestamptz))
(Column PGJsonb)
type NodeRead = NodePoly (Column PGInt4 )
(Column PGText )
(Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGText )
(Column PGTimestamptz )
(Column PGJsonb )
type NodeReadNull = NodePoly (Column (Nullable PGInt4))
(Column (Nullable PGText))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGText))
(Column (Nullable PGTimestamptz))
(Column (Nullable PGJsonb))
------------------------------------------------------------------------
-- | Node(Read|Write)Search is slower than Node(Write|Read) use it
-- for full text search only
type NodeSearchWrite =
NodePolySearch
(Maybe (Column PGInt4) )
(Column PGInt4 )
(Column PGInt4 )
(Column (Nullable PGInt4) )
(Column PGText )
(Maybe (Column PGTimestamptz))
(Column PGJsonb )
(Maybe (Column PGTSVector) )
type NodeSearchRead =
NodePolySearch
(Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column (Nullable PGInt4 ))
(Column PGText )
(Column PGTimestamptz )
(Column PGJsonb )
(Column PGTSVector )
type NodeSearchReadNull =
NodePolySearch
(Column (Nullable PGInt4) )
(Column (Nullable PGInt4) )
(Column (Nullable PGInt4) )
(Column (Nullable PGInt4) )
(Column (Nullable PGText) )
(Column (Nullable PGTimestamptz))
(Column (Nullable PGJsonb) )
(Column (Nullable PGTSVector) )
data NodePolySearch id
typename
user_id
parent_id
name
date
hyperdata
search =
NodeSearch { _ns_id :: id
, _ns_typename :: typename
, _ns_user_id :: user_id
-- , nodeUniqId :: shaId
, _ns_parent_id :: parent_id
, _ns_name :: name
, _ns_date :: date
, _ns_hyperdata :: hyperdata
, _ns_search :: search
} deriving (Show, Generic)
$(makeAdaptorAndInstance "pNodeSearch" ''NodePolySearch)
$(makeLensesWith abbreviatedFields ''NodePolySearch)
$(deriveJSON (unPrefix "_ns_") ''NodePolySearch)
$(makeLenses ''NodePolySearch)
nodeTableSearch :: Table NodeSearchWrite NodeSearchRead
nodeTableSearch = Table "nodes" ( pNodeSearch
NodeSearch { _ns_id = optional "id"
, _ns_typename = required "typename"
, _ns_user_id = required "user_id"
, _ns_parent_id = required "parent_id"
, _ns_name = required "name"
, _ns_date = optional "date"
, _ns_hyperdata = required "hyperdata"
, _ns_search = optional "search"
}
)
------------------------------------------------------------------------
NodeNgrams.hs 0000664 0000000 0000000 00000007204 14124644201 0032465 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Schema {-|
Module : Gargantext.Database.Schema.NodeNgrams
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
NodeNgrams register Context of Ngrams (named Cgrams then)
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NodeNgrams where
import Data.Text (Text)
import Gargantext.Core.Types
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
data NodeNgramsPoly id
node_id'
node_subtype
ngrams_id
ngrams_type
ngrams_field
ngrams_tag
ngrams_class
weight
= NodeNgrams { _nng_id :: !id
, _nng_node_id :: !node_id'
, _nng_node_subtype :: !node_subtype
, _nng_ngrams_id :: !ngrams_id
, _nng_ngrams_type :: !ngrams_type
, _nng_ngrams_field :: !ngrams_field
, _nng_ngrams_tag :: !ngrams_tag
, _nng_ngrams_class :: !ngrams_class
, _nng_ngrams_weight :: !weight
} deriving (Show, Eq, Ord)
{-
type NodeNgramsWrite = NodeNgramsPoly (Maybe (Column (PGInt4)))
(Column (PGInt4))
(Maybe (Column (PGInt4)))
(Column (PGInt4))
(Maybe (Column (PGInt4)))
(Maybe (Column (PGInt4)))
(Maybe (Column (PGInt4)))
(Maybe (Column (PGInt4)))
(Maybe (Column (PGFloat8)))
type NodeNodeRead = NodeNgramsPoly (Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGFloat8)
type NodeNgramsReadNull = NodeNgramsPoly (Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGFloat8))
-}
type NodeNgramsId = Int
type NgramsId = Int
type NgramsField = Int
type NgramsTag = Int
type NgramsClass = Int
type NgramsText = Text
-- Example of list Ngrams
-- type ListNgrams = NodeNgramsPoly (Maybe Int) ListType Text
type NodeNgramsW =
NodeNgramsPoly (Maybe NodeNgramsId) NodeId ListType NgramsText
NgramsType (Maybe NgramsField) (Maybe NgramsTag) (Maybe NgramsClass)
Double
NodeNode.hs 0000664 0000000 0000000 00000005527 14124644201 0032131 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Schema {-|
Module : Gargantext.Database.Schema.NodeNode
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NodeNode where
import Gargantext.Core.Types
import Gargantext.Database.Schema.Prelude
import Gargantext.Prelude
data NodeNodePoly node1_id node2_id score cat
= NodeNode { _nn_node1_id :: !node1_id
, _nn_node2_id :: !node2_id
, _nn_score :: !score
, _nn_category :: !cat
} deriving (Show)
type NodeNodeWrite = NodeNodePoly (Column (PGInt4))
(Column (PGInt4))
(Maybe (Column (PGFloat8)))
(Maybe (Column (PGInt4)))
type NodeNodeRead = NodeNodePoly (Column (PGInt4))
(Column (PGInt4))
(Column (PGFloat8))
(Column (PGInt4))
type NodeNodeReadNull = NodeNodePoly (Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGFloat8))
(Column (Nullable PGInt4))
type NodeNode = NodeNodePoly NodeId NodeId (Maybe Double) (Maybe Int)
$(makeAdaptorAndInstance "pNodeNode" ''NodeNodePoly)
makeLenses ''NodeNodePoly
nodeNodeTable :: Table NodeNodeWrite NodeNodeRead
nodeNodeTable =
Table "nodes_nodes"
( pNodeNode
NodeNode { _nn_node1_id = required "node1_id"
, _nn_node2_id = required "node2_id"
, _nn_score = optional "score"
, _nn_category = optional "category"
}
)
instance QueryRunnerColumnDefault (Nullable PGInt4) Int where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault (Nullable PGFloat8) Int where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault (Nullable PGFloat8) Double where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
NodeNodeNgrams.hs 0000664 0000000 0000000 00000004757 14124644201 0033305 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Schema {-|
Module : Gargantext.Database.Schema.NodeNodeNgrams
Description : TODO: remove this module and table in database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NodeNodeNgrams
where
import Prelude
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Schema.Ngrams (NgramsTypeId, NgramsId)
import Gargantext.Database.Admin.Types.Node
data NodeNodeNgramsPoly n1 n2 ngrams_id ngt w
= NodeNodeNgrams { _nnng_node1_id :: !n1
, _nnng_node2_id :: !n2
, _nnng_ngrams_id :: !ngrams_id
, _nnng_ngramsType :: !ngt
, _nnng_weight :: !w
} deriving (Show)
type NodeNodeNgramsWrite =
NodeNodeNgramsPoly (Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
type NodeNodeNgramsRead =
NodeNodeNgramsPoly (Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
type NodeNodeNgramsReadNull =
NodeNodeNgramsPoly (Column (Nullable PGInt4 ))
(Column (Nullable PGInt4 ))
(Column (Nullable PGInt4 ))
(Column (Nullable PGInt4 ))
(Column (Nullable PGFloat8))
type NodeNodeNgrams =
NodeNodeNgramsPoly CorpusId DocId NgramsId NgramsTypeId Double
$(makeAdaptorAndInstance "pNodeNodeNgrams" ''NodeNodeNgramsPoly)
makeLenses ''NodeNodeNgramsPoly
nodeNodeNgramsTable :: Table NodeNodeNgramsWrite NodeNodeNgramsRead
nodeNodeNgramsTable = Table "node_node_ngrams"
( pNodeNodeNgrams NodeNodeNgrams
{ _nnng_node1_id = required "node1_id"
, _nnng_node2_id = required "node2_id"
, _nnng_ngrams_id = required "ngrams_id"
, _nnng_ngramsType = required "ngrams_type"
, _nnng_weight = required "weight"
}
)
NodeNodeNgrams2.hs 0000664 0000000 0000000 00000004031 14124644201 0033350 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Schema {-|
Module : Gargantext.Database.Schema.NodeNodeNgrams
Description : TODO: remove this module and table in database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NodeNodeNgrams2
where
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Schema.NodeNgrams (NodeNgramsId)
import Gargantext.Database.Admin.Types.Node
import Prelude
data NodeNodeNgrams2Poly node_id nodengrams_id w
= NodeNodeNgrams2 { _nnng2_node_id :: !node_id
, _nnng2_nodengrams_id :: !nodengrams_id
, _nnng2_weight :: !w
} deriving (Show)
type NodeNodeNgrams2Write =
NodeNodeNgrams2Poly (Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
type NodeNodeNgrams2Read =
NodeNodeNgrams2Poly (Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
type NodeNodeNgrams2ReadNull =
NodeNodeNgrams2Poly (Column (Nullable PGInt4 ))
(Column (Nullable PGInt4 ))
(Column (Nullable PGFloat8))
type NodeNodeNgrams2 =
NodeNodeNgrams2Poly DocId NodeNgramsId Double
$(makeAdaptorAndInstance "pNodeNodeNgrams2" ''NodeNodeNgrams2Poly)
makeLenses ''NodeNodeNgrams2Poly
nodeNodeNgrams2Table :: Table NodeNodeNgrams2Write NodeNodeNgrams2Read
nodeNodeNgrams2Table = Table "node_node_ngrams2"
( pNodeNodeNgrams2 NodeNodeNgrams2
{ _nnng2_node_id = required "node_id"
, _nnng2_nodengrams_id = required "nodengrams_id"
, _nnng2_weight = required "weight"
}
)
Node_NodeNgramsNodeNgrams.hs 0000664 0000000 0000000 00000005511 14124644201 0035407 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Schema {-|
Module : Gargantext.Database.Schema.Node_NodeNgrams_NodeNgrams
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
lgrams: listed ngrams
Node_NodeNgrams_NodeNgrams table is used to group ngrams
- first NodeId :: Referential / space node (corpus)
- NodeNgrams where Node is List
- lgrams1_id, lgrams2_id where all lgrams2_id will be added to lgrams1_id
- weight: score the relation
Next Step benchmark:
- recursive queries of postgres
- group with: https://en.wikipedia.org/wiki/Nested_set_model
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
where
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Schema.Node()
import Gargantext.Prelude
data Node_NodeNgrams_NodeNgrams_Poly node_id nng1_id nng2_id weight =
Node_NodeNgrams_NodeNgrams { _nnn_node_id :: !node_id
, _nnn_nng1_id :: !nng1_id
, _nnn_nng2_id :: !nng2_id
, _nnn_weight :: !weight
} deriving (Show)
type Node_NodeNgrams_NodeNgrams_Write =
Node_NodeNgrams_NodeNgrams_Poly
(Column PGInt4 )
(Maybe (Column PGInt4 ))
(Column PGInt4 )
(Maybe (Column PGFloat8))
type Node_NodeNgrams_NodeNgrams_Read =
Node_NodeNgrams_NodeNgrams_Poly
(Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
type ListNgramsId = Int
type Node_NodeNgrams_NodeNgrams =
Node_NodeNgrams_NodeNgrams_Poly CorpusId (Maybe ListNgramsId) ListNgramsId (Maybe Double)
$(makeAdaptorAndInstance "pNode_NodeNgrams_NodeNgrams"
''Node_NodeNgrams_NodeNgrams_Poly)
$(makeLensesWith abbreviatedFields
''Node_NodeNgrams_NodeNgrams_Poly)
node_NodeNgrams_NodeNgrams_Table :: Table Node_NodeNgrams_NodeNgrams_Write Node_NodeNgrams_NodeNgrams_Read
node_NodeNgrams_NodeNgrams_Table =
Table "node_nodengrams_nodengrams"
( pNode_NodeNgrams_NodeNgrams Node_NodeNgrams_NodeNgrams
{ _nnn_node_id = required "node_id"
, _nnn_nng1_id = optional "node_ngrams1_id"
, _nnn_nng2_id = required "node_ngrams2_id"
, _nnn_weight = optional "weight"
}
)
instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
NodesNgramsRepo.hs 0000664 0000000 0000000 00000003325 14124644201 0033476 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Schema {-|
Module : Gargantext.Database.Schema.NodesNgramsRepo
Description : NodeNgram for Ngram indexation or Lists
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NodesNgramsRepo
where
import Data.Map.Strict.Patch (PatchMap)
import Gargantext.Database.Schema.Prelude
import Gargantext.API.Ngrams.Types (NgramsStatePatch, NgramsTablePatch)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Prelude
data RepoDbPoly version patches
= RepoDbNgrams { _rdp_version :: !version
, _rdp_patches :: !patches
} deriving (Show)
type RepoDbWrite
= RepoDbPoly (Column PGInt4)
(Column PGJsonb)
type RepoDbRead
= RepoDbPoly (Column PGInt4)
(Column PGJsonb)
type RepoDbNgrams = RepoDbPoly Int NgramsStatePatch
$(makeAdaptorAndInstance "pRepoDbNgrams" ''RepoDbPoly)
makeLenses ''RepoDbPoly
instance QueryRunnerColumnDefault PGJsonb
(PatchMap NgramsType
(PatchMap NodeId NgramsTablePatch))
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
repoTable :: Table RepoDbWrite RepoDbRead
repoTable = Table "nodes_ngrams_repo"
(pRepoDbNgrams RepoDbNgrams
{ _rdp_version = required "version"
, _rdp_patches = required "patches"
}
)
Prelude.hs 0000664 0000000 0000000 00000003221 14124644201 0032023 0 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Schema {-|
Module : Gargantext.Database.Prelude
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Database.Schema.Prelude
( module Control.Arrow
, module Control.Lens.TH
, module Data.Aeson.TH
, module Data.Profunctor.Product.TH
, module Data.Swagger
, module Database.PostgreSQL.Simple.FromField
, module Database.PostgreSQL.Simple.FromRow
, module Database.PostgreSQL.Simple.SqlQQ
, module Database.PostgreSQL.Simple.ToField
, module Database.PostgreSQL.Simple.ToRow
, module Database.PostgreSQL.Simple.Types
, module GHC.Generics
, module Gargantext.Core.Utils.Prefix
, module Opaleye
, module Opaleye.Internal.QueryArr
, module Test.QuickCheck.Arbitrary
)
where
import Control.Arrow (returnA)
import Control.Lens.TH (makeLenses, makeLensesWith, abbreviatedFields)
import Data.Aeson.TH (deriveJSON)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Swagger hiding (required, in_)
import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Opaleye hiding (FromField, readOnly)
import Opaleye.Internal.QueryArr (Query)
import Test.QuickCheck.Arbitrary hiding (vector)
import Database.PostgreSQL.Simple.FromField (FromField, fromField)
import Database.PostgreSQL.Simple.FromRow (FromRow, fromRow, field)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.ToField (toField, ToField)
import Database.PostgreSQL.Simple.ToRow (toRow)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Schema/User.hs 0000664 0000000 0000000 00000011242 14124644201 0031422 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.user
Description : User Database management tools
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Functions to deal with users, database side.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.User where
import Data.Text (Text)
import Data.Time (UTCTime)
import Gargantext.Prelude
import GHC.Generics (Generic)
import Database.PostgreSQL.Simple.FromField (FromField, fromField)
import Data.Aeson.TH (deriveJSON)
import Gargantext.Database.Prelude (fromField')
import Gargantext.Core.Utils.Prefix (unPrefix)
-- FIXME PLZ : the import below leads to an error, why ?
-- import Gargantext.Database.Schema.Prelude hiding (makeLensesWith, abbreviatedFields, makeAdaptorAndInstance)
-- When FIXED : Imports to remove:
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Opaleye hiding (FromField)
------------------------------------------------------------------------
data UserLight = UserLight { userLight_id :: !Int
, userLight_username :: !Text
, userLight_email :: !Text
, userLight_password :: !Text
} deriving (Show, Generic)
toUserLight :: UserDB -> UserLight
toUserLight (UserDB id p _ _ u _ _ e _ _ _ ) = UserLight id u e p
data UserPoly id pass llogin suser
uname fname lname
mail staff active djoined =
UserDB { user_id :: !id
, user_password :: !pass
, user_lastLogin :: !llogin
, user_isSuperUser :: !suser
, user_username :: !uname
, user_firstName :: !fname
, user_lastName :: !lname
, user_email :: !mail
, user_isStaff :: !staff
, user_isActive :: !active
, user_dateJoined :: !djoined
} deriving (Show, Generic)
type UserWrite = UserPoly (Maybe (Column PGInt4)) (Column PGText)
(Maybe (Column PGTimestamptz)) (Column PGBool)
(Column PGText) (Column PGText)
(Column PGText) (Column PGText)
(Column PGBool) (Column PGBool)
(Maybe (Column PGTimestamptz))
type UserRead = UserPoly (Column PGInt4) (Column PGText)
(Column PGTimestamptz) (Column PGBool)
(Column PGText) (Column PGText)
(Column PGText) (Column PGText)
(Column PGBool) (Column PGBool)
(Column PGTimestamptz)
type UserReadNull = UserPoly (Column (Nullable PGInt4)) (Column (Nullable PGText))
(Column (Nullable PGTimestamptz)) (Column (Nullable PGBool))
(Column (Nullable PGText)) (Column (Nullable PGText))
(Column (Nullable PGText)) (Column (Nullable PGText))
(Column (Nullable PGBool)) (Column (Nullable PGBool))
(Column (Nullable PGTimestamptz))
type UserDB = UserPoly Int Text (Maybe UTCTime) Bool Text Text Text Text Bool Bool UTCTime
$(makeAdaptorAndInstance "pUserDB" ''UserPoly)
$(makeLensesWith abbreviatedFields ''UserPoly)
userTable :: Table UserWrite UserRead
userTable = Table "auth_user"
(pUserDB UserDB { user_id = optional "id"
, user_password = required "password"
, user_lastLogin = optional "last_login"
, user_isSuperUser = required "is_superuser"
, user_username = required "username"
, user_firstName = required "first_name"
, user_lastName = required "last_name"
, user_email = required "email"
, user_isStaff = required "is_staff"
, user_isActive = required "is_active"
, user_dateJoined = optional "date_joined"
}
)
instance FromField UserLight where
fromField = fromField'
instance FromField UserDB where
fromField = fromField'
$(deriveJSON (unPrefix "userLight_") ''UserLight)
$(deriveJSON (unPrefix "user_") ''UserPoly)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Database/Types.hs 0000664 0000000 0000000 00000002110 14124644201 0030402 0 ustar 00root root 0000000 0000000 {-|
Module : Gargantext.Database.Types
Description : Specific Types to manage core Gargantext type with database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Types
where
import Data.Hashable (Hashable)
import Gargantext.Core.Text (HasText(..))
import Gargantext.Database.Schema.Prelude
import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as PGS
-- | Index memory of any type in Gargantext
data Indexed i a =
Indexed { _index :: !i
, _unIndex :: !a
}
deriving (Show, Generic, Eq, Ord)
makeLenses ''Indexed
----------------------------------------------------------------------
-- | Main instances
instance (FromField i, FromField a) => PGS.FromRow (Indexed i a) where
fromRow = Indexed <$> field <*> field
instance HasText a => HasText (Indexed i a)
where
hasText (Indexed _ a) = hasText a
instance (Hashable a, Hashable b) => Hashable (Indexed a b)
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Utils/ 0000775 0000000 0000000 00000000000 14124644201 0026344 5 ustar 00root root 0000000 0000000 haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/src/Gargantext/Utils/Aeson.hs 0000664 0000000 0000000 00000000363 14124644201 0027747 0 ustar 00root root 0000000 0000000 module Gargantext.Utils.Aeson where
import Data.Aeson.Types
-- this is what purescript Simple.JSON generics assumes
defaultTaggedObject :: SumEncoding
defaultTaggedObject = TaggedObject { tagFieldName = "type", contentsFieldName = "value" }
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/stack.yaml 0000664 0000000 0000000 00000012440 14124644201 0024343 0 ustar 00root root 0000000 0000000 resolver:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/4.yaml
flags: {}
extra-package-dbs: []
packages:
- .
#- 'deps/patches-class'
#- 'deps/patches-map'
#- 'deps/accelerate'
#- 'deps/accelerate-utility'
docker:
enable: false
repo: 'cgenie/stack-build:lts-17.13-garg'
run-args:
- '--publish=8008:8008'
nix:
enable: false
add-gc-roots: true
shell-file: nix/stack-shell.nix
allow-newer: true
#ghc-options:
# "$everything": -haddock
extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit: 3e32ec3aca71eb326805355d3a99b9288dc342ee
# Data Mining Libs
- git: https://github.com/delanoe/data-time-segment.git
commit: 10a416b9f6c443866b36479c3441ebb3bcdeb7ef
- git: https://gitlab.iscpif.fr/gargantext/hlcm.git
commit: 6f0595d2421005837d59151a8b26eee83ebb67b5
- git: https://github.com/delanoe/hstatistics.git
commit: 90eef7604bb230644c2246eccd094d7bfefcb135
- git: https://github.com/paulrzcz/HSvm.git
commit: 3fe28b683aba5ddf05e3b5f8eced0bd05c5a29f9
# API libs
- git: https://github.com/delanoe/servant-static-th.git
commit: 8cb8aaf2962ad44d319fcea48442e4397b3c49e8
# Databases libs
- git: https://github.com/delanoe/haskell-opaleye.git #- opaleye-0.6.7002.0
commit: 63ee65d974e9d20eaaf17a2e83652175988cbb79
- git: https://github.com/delanoe/hsparql.git
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- git: https://github.com/robstewart57/rdf4h.git
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
# External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: a9d8e08a7ef82f90e29dfaced4071704a3163394
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: daeae80365250c4bd539f0a65e271f9aa37f731f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: 020f5f9b308f5c23c925aedf5fb11f8b4728fb19
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
# NP libs
#- git: https://github.com/np/servant-job.git # waiting for PR
- git: https://github.com/delanoe/servant-job.git
commit: 5b15f6ffbff6bc8e26c46206d6d227213fc1821f
#- git: https://github.com/np/patches-map
- git: https://github.com/delanoe/patches-map
commit: 76cae88f367976ff091e661ee69a5c3126b94694
#- git: https://gitlab.com/npouillard/patches-class.git
- git: https://gitlab.iscpif.fr/gargantext/patches-class.git
commit: d3e971d4e78d1dfcc853f2fb86bde1995faf22ae
# Graph libs
#- git: https://github.com/kaizhang/haskell-igraph.git
- git: https://github.com/alpmestan/haskell-igraph.git
commit: 9f55eb36639c8e0965c8bc539a57738869f33e9a
# Accelerate Linear Algebra and specific instances
# (UndecidableInstances for newer GHC version)
- git: https://gitlab.iscpif.fr/anoe/accelerate.git
commit: f5c0e0071ec7b6532f9a9cd3eb33d14f340fbcc9
- git: https://gitlab.iscpif.fr/anoe/accelerate-utility.git
commit: 83ada76e78ac10d9559af8ed6bd4064ec81308e4
- accelerate-arithmetic-1.0.0.1@sha256:555639232aa5cad411e89247b27871d09352b987a754230a288c690b6de6d888,2096
# Others dependencies (using stack resolver)
- constraints-extras-0.3.1.0@sha256:12016ebb91ad5ed2c82bf7e48c6bd6947d164d33c9dca5ac3965de1bb6c780c0,1777
- KMP-0.2.0.0@sha256:6dfbac03ef00ebd9347234732cb86a40f62ab5a80c0cc6bedb8eb51766f7df28,2562
- Unique-0.4.7.8@sha256:9661f45aa31dde119a2114566166ea38b011a45653337045ee4ced75636533c0,2067
- dependent-sum-0.7.1.0@sha256:0e419237f5b86da3659772afff9cab355c0f8d5b3fdb15a5b30e673d8dc83941,2147
- duckling-0.2.0.0@sha256:84becd4e48ee3676cdd6fe5745a77ee60e365ea730cd759610c8a7738f3eb4a6,60543
- fclabels-2.0.5@sha256:817006077632bd29e637956154aa33d3c10a59be0791c308cef955eb951b2675,4473
- full-text-search-0.2.1.4@sha256:81f6df3327e5b604f99b15e78635e5d6ca996e504c21d268a6d751d7d131aa36,6032
- fullstop-0.1.4@sha256:80a3e382ef53551bb936e7da8b2825621df0ea169af1212debcb0a90010b30c8,2044
- hgal-2.0.0.2@sha256:13d58afd0668b9cb881c612eff8488a0e289edd4bbffa893df4beee60cfeb73b,653
- json-stream-0.4.2.4@sha256:8b7f17d54a6e1e6311756270f8bcf51e91bab4300945400de66118470dcf51b9,4716
- located-base-0.1.1.1@sha256:7c6395f2b6fbf2d5f76c3514f774423838c0ea94e1c6a5530dd3c94b30c9d1c8,1904
- logging-effect-1.3.12@sha256:72d168dd09887649ba9501627219b6027cbec2d5541931555b7885b133785ce3,1679
- MissingH-1.4.3.0@sha256:32f9892ec98cd21df4f4d3ed8d95a3831ae74287ea0641d6f09b2dc6ef061d39,4859
- monoid-extras-0.5.1@sha256:438dbfd7b4dce47d8f0ca577f56caf94bd1e21391afa545cad09fe7cf2e5793d,2333
- rake-0.0.1@sha256:3380f6567fb17505d1095b7f32222c0b631fa04126ad39726c84262da99c08b3,2025
- servant-cassava-0.10.1@sha256:07e7b6ca67cf57dcb4a0041a399a25d058844505837c6479e01d62be59d01fdf,1665
- servant-flatten-0.2@sha256:276896f7c5cdec5b8f8493f6205fded0cc602d050b58fdb09a6d7c85c3bb0837,1234
- servant-mock-0.8.7@sha256:64cb3e52bbd51ab6cb25e3f412a99ea712c6c26f1efd117f01a8d1664df49c67,2306
- servant-xml-1.0.1.4@sha256:6c9f2986ac42e72fe24b794c660763a1966a18d696b34cd4f4ed15165edd4aa0,851
- stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
- xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
- xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
# need Vector.uncons
- vector-0.12.3.0@sha256:0ae2c1ba86f0077910be242ec6802cc3d7725fe7b2bea6987201aa3737b239b5,7953
haskell-gargantext-852fae2806bbb66a0fbe90c081aa562c807b4e69/version 0000775 0000000 0000000 00000001342 14124644201 0023764 0 ustar 00root root 0000000 0000000 #!/bin/bash
# Prompt
git tag
echo "Which version ?"
read VERSION
echo "Change to: ${VERSION}"
#################################################################
# Haskell
#################################################################
YAML="package.yaml"
sed -i "s/version:.*/version: \'$VERSION\'/" $YAML
git add -u
git commit -m "[VERSION] +1 to ${VERSION}"
git tag $VERSION
#################################################################
# Purescript
#################################################################
JSON="purescript-gargantext/package.json"
sed -i "s/\"version\": \".*\"/\"version\": \"${VERSION}\"/" $JSON
cd purescript-gargantext
git add -u
git commit -m "[VERSION] +1 to ${VERSION}"
git tag $VERSION
cd ..