Commit da3a6888 authored by qlobbe's avatar qlobbe

Merge branch 'dev' into dev-phylo

parents 3e2a42ab db0f6f08
...@@ -13,7 +13,11 @@ Institute of Paris Île-de-France (ISC-PIF) and its partners. ...@@ -13,7 +13,11 @@ Institute of Paris Île-de-France (ISC-PIF) and its partners.
## Installation ## Installation
curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/install | sh ### Docker
curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devops/docker-install | sh
### Debian
curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devops/debian-install | sh
## Use Cases ## Use Cases
......
...@@ -24,14 +24,13 @@ import qualified Data.Vector as V ...@@ -24,14 +24,13 @@ import qualified Data.Vector as V
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Search import Gargantext.Text.Search
import Gargantext.Text.Parsers.CSV import qualified Gargantext.Text.Parsers.CSV as CSV
------------------------------------------------------------------------ ------------------------------------------------------------------------
type Query = [S.Term] type Query = [S.Term]
filterDocs :: [DocId] -> Vector Doc -> Vector Doc filterDocs :: [DocId] -> Vector CSV.CsvGargV3 -> Vector CSV.CsvGargV3
filterDocs docIds = V.filter (\doc -> S.member (d_docId doc) $ S.fromList docIds ) filterDocs docIds = V.filter (\doc -> S.member (CSV.d_docId doc) $ S.fromList docIds )
main :: IO () main :: IO ()
...@@ -41,17 +40,17 @@ main = do ...@@ -41,17 +40,17 @@ main = do
--let q = ["water", "scarcity", "morocco", "shortage","flood"] --let q = ["water", "scarcity", "morocco", "shortage","flood"]
let q = ["gratuit", "gratuité", "culture", "culturel"] let q = ["gratuit", "gratuité", "culture", "culturel"]
(h,csvDocs) <- readCsv rPath (h,csvDocs) <- CSV.readFile rPath
putStrLn $ "Number of documents before:" <> show (V.length csvDocs) putStrLn $ "Number of documents before:" <> show (V.length csvDocs)
putStrLn $ "Mean size of docs:" <> show ( docsSize csvDocs) putStrLn $ "Mean size of docs:" <> show ( CSV.docsSize csvDocs)
let docs = toDocs csvDocs let docs = CSV.toDocs csvDocs
let engine = insertDocs docs initialDocSearchEngine let engine = insertDocs docs initialDocSearchEngine
let docIds = S.query engine (map pack q) let docIds = S.query engine (map pack q)
let docs' = fromDocs $ filterDocs docIds (V.fromList docs) let docs' = CSV.fromDocs $ filterDocs docIds (V.fromList docs)
putStrLn $ "Number of documents after:" <> show (V.length docs') putStrLn $ "Number of documents after:" <> show (V.length docs')
putStrLn $ "Mean size of docs:" <> show (docsSize docs') putStrLn $ "Mean size of docs:" <> show (CSV.docsSize docs')
writeCsv wPath (h, docs') CSV.writeFile wPath (h, docs')
...@@ -57,7 +57,7 @@ import Gargantext.Core.Types ...@@ -57,7 +57,7 @@ import Gargantext.Core.Types
import Gargantext.Text.Terms import Gargantext.Text.Terms
import Gargantext.Text.Context import Gargantext.Text.Context
import Gargantext.Text.Terms.WithList import Gargantext.Text.Terms.WithList
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year) import Gargantext.Text.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Text.List.CSV (csvGraphTermList) import Gargantext.Text.List.CSV (csvGraphTermList)
import Gargantext.Text.Terms (terms) import Gargantext.Text.Terms (terms)
import Gargantext.Text.Metrics.Count (coocOnContexts, Coocs) import Gargantext.Text.Metrics.Count (coocOnContexts, Coocs)
...@@ -105,7 +105,7 @@ main = do ...@@ -105,7 +105,7 @@ main = do
. DV.toList . DV.toList
. DV.map (\n -> (csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)])) . DV.map (\n -> (csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
. snd . snd
<$> readCsv corpusFile <$> readFile corpusFile
-- termListMap :: [Text] -- termListMap :: [Text]
termList <- csvGraphTermList termListFile termList <- csvGraphTermList termListFile
......
...@@ -27,7 +27,7 @@ import Options.Generic ...@@ -27,7 +27,7 @@ import Options.Generic
import Data.Text (unpack) import Data.Text (unpack)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.API (startGargantext, startGargantextMock) import Gargantext.API (startGargantext) -- , startGargantextMock)
-------------------------------------------------------- --------------------------------------------------------
-- Graph Tests -- Graph Tests
...@@ -73,7 +73,9 @@ main = do ...@@ -73,7 +73,9 @@ main = do
myIniFile' = case myIniFile of myIniFile' = case myIniFile of
Nothing -> panic "[ERROR] gargantext.ini needed" Nothing -> panic "[ERROR] gargantext.ini needed"
Just i -> i Just i -> i
_ -> startGargantextMock myPort' Dev -> panic "[ERROR] Dev mode unsupported"
Mock -> panic "[ERROR] Mock mode unsupported"
-- _ -> startGargantextMock myPort'
putStrLn $ "Starting with " <> show myMode <> " mode." putStrLn $ "Starting with " <> show myMode <> " mode."
start start
......
#!/bin/bash
stack haddock --no-haddock-deps
#!/bin/bash
if git --version;
then
echo "git installed, ok"
else
sudo apt update && sudo apt install git
fi
sudo apt update
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-9.6 nginx
#echo "Which user?"
#read USER
#sudo adduser --disabled-password --gecos "" $USER
#sudo su $USER
curl -sSL https://get.haskellstack.org/ | sh
stack update
stack upgrade
git clone https://gitlab.iscpif.fr/gargantext/haskell-gargantext.git
cd haskell-gargantext
git clone https://gitlab.iscpif.fr/gargantext/purescript-gargantext
mkdir deps
cd deps
git clone https://gitlab.iscpif.fr/gargantext/clustering-louvain.git
git clone https://github.com/np/servant-job.git
git clone https://github.com/np/patches-map
git clone https://gitlab.com/npouillard/patches-class.git
git clone https://github.com/delanoe/haskell-opaleye
git clone -b next --single-branch https://github.com/delanoe/hsparql
cd ..
stack setup
stack build
stack install
# Specific to our servers
### Configure timezone and locale ###################################
echo "########### LOCALES & TZ #################"
echo "Europe/Paris" > /etc/timezone
dpkg-reconfigure --frontend=noninteractive tzdata
#ENV TZ "Europe/Paris"
sed -i -e 's/# en_GB.UTF-8 UTF-8/en_GB.UTF-8 UTF-8/' /etc/locale.gen && \
sed -i -e 's/# fr_FR.UTF-8 UTF-8/fr_FR.UTF-8 UTF-8/' /etc/locale.gen && \
locale-gen && \
update-locale LANG=fr_FR.UTF-8 && \
update-locale LANGUAGE=fr_FR.UTF-8 && \
update-locale LC_ALL=fr_FR.UTF-8
################################################################
# Database configuration
# CREATE USER gargantua WITH PASSWORD $(grep DB_PASS gargantext.ini)
# GRANT ALL PRIVILEGES ON DATABASE gargandbV4 to gargantua
#######################################################################
## POSTGRESQL DATA (as ROOT)
#######################################################################
sed -iP "s%^data_directory.*%data_directory = \'\/srv\/gargandata\'%" /etc/postgresql/9.6/main/postgresql.conf
echo "host all all 0.0.0.0/0 md5" >> /etc/postgresql/9.6/main/pg_hba.conf
echo "listen_addresses='*'" >> /etc/postgresql/9.6/main/postgresql.conf
#!/bin/bash
if git --version;
then
echo "git installed, ok"
else
sudo apt update && sudo apt install git
fi
sudo apt update
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-9.6 nginx libpq-dev
jq < repo.json '.state |= map_values(map_values(length)) | .history |= length'
name: gargantext name: gargantext
version: '4.0.0.4' version: '4.0.0.5'
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -40,7 +40,6 @@ library: ...@@ -40,7 +40,6 @@ library:
- Gargantext.Database - Gargantext.Database
- Gargantext.Database.Flow - Gargantext.Database.Flow
- Gargantext.Database.Schema.Node - Gargantext.Database.Schema.Node
- Gargantext.Database.Cooc
- Gargantext.Database.Tree - Gargantext.Database.Tree
- Gargantext.Database.Types.Node - Gargantext.Database.Types.Node
- Gargantext.Database.Utils - Gargantext.Database.Utils
...@@ -102,6 +101,7 @@ library: ...@@ -102,6 +101,7 @@ library:
- conduit-extra - conduit-extra
- containers - containers
- contravariant - contravariant
- crawlerPubMed
- data-time-segment - data-time-segment
- directory - directory
- duckling - duckling
...@@ -169,6 +169,7 @@ library: ...@@ -169,6 +169,7 @@ library:
- servant-swagger - servant-swagger
- servant-swagger-ui - servant-swagger-ui
- servant-static-th - servant-static-th
- servant-cassava
- serialise - serialise
- split - split
- stemmer - stemmer
......
...@@ -56,7 +56,7 @@ import Network.Wai.Handler.Warp hiding (defaultSettings) ...@@ -56,7 +56,7 @@ import Network.Wai.Handler.Warp hiding (defaultSettings)
import Servant import Servant
import Servant.HTML.Blaze (HTML) import Servant.HTML.Blaze (HTML)
import Servant.Mock (mock) --import Servant.Mock (mock)
--import Servant.Job.Server (WithCallbacks) --import Servant.Job.Server (WithCallbacks)
import Servant.Static.TH.Internal.Server (fileTreeToServer) import Servant.Static.TH.Internal.Server (fileTreeToServer)
import Servant.Static.TH.Internal.FileTree (fileTypeToFileTree, FileType(FileTypeFile)) import Servant.Static.TH.Internal.FileTree (fileTypeToFileTree, FileType(FileTypeFile))
...@@ -66,23 +66,24 @@ import Servant.Swagger.UI ...@@ -66,23 +66,24 @@ import Servant.Swagger.UI
import Text.Blaze.Html (Html) import Text.Blaze.Html (Html)
--import Gargantext.API.Swagger --import Gargantext.API.Swagger
import Gargantext.Prelude
import Gargantext.Core.Types (HasInvalidError(..))
import Gargantext.API.FrontEnd (FrontEndAPI, frontEndServer)
--import Gargantext.Database.Node.Contact (HyperdataContact)
import Gargantext.API.Auth (AuthRequest, AuthResponse, auth) import Gargantext.API.Auth (AuthRequest, AuthResponse, auth)
import Gargantext.API.Ngrams (HasRepo(..), HasRepoSaver(..), saveRepo) import Gargantext.API.Count ( CountAPI, count, Query)
import Gargantext.API.Types import Gargantext.API.FrontEnd (FrontEndAPI, frontEndServer)
import Gargantext.API.Ngrams (HasRepo(..), HasRepoSaver(..), saveRepo, TableNgramsApi, apiNgramsTableDoc)
import Gargantext.API.Node import Gargantext.API.Node
import Gargantext.API.Search ( SearchAPI, search, SearchQuery)
import Gargantext.API.Types
import Gargantext.API.Upload
import Gargantext.Core.Types (HasInvalidError(..))
import Gargantext.Database.Facet
import Gargantext.Database.Schema.Node (HasNodeError(..), NodeError) import Gargantext.Database.Schema.Node (HasNodeError(..), NodeError)
--import Gargantext.Database.Node.Contact (HyperdataContact)
import Gargantext.Database.Types.Node
import Gargantext.Database.Utils (HasConnection)
import Gargantext.Database.Tree (HasTreeError(..), TreeError) import Gargantext.Database.Tree (HasTreeError(..), TreeError)
import Gargantext.Database.Types.Node
import Gargantext.Database.Types.Node (NodeId, CorpusId, AnnuaireId) import Gargantext.Database.Types.Node (NodeId, CorpusId, AnnuaireId)
import Gargantext.API.Count ( CountAPI, count, Query) import Gargantext.Database.Utils (HasConnection)
import Gargantext.API.Search ( SearchAPI, search, SearchQuery) import Gargantext.Prelude
import Gargantext.Database.Facet
import Gargantext.Viz.Graph.API import Gargantext.Viz.Graph.API
--import Gargantext.API.Orchestrator --import Gargantext.API.Orchestrator
...@@ -144,7 +145,7 @@ fireWall req fw = do ...@@ -144,7 +145,7 @@ fireWall req fw = do
then pure True then pure True
else pure False else pure False
{-
-- makeMockApp :: Env -> IO (Warp.Settings, Application) -- makeMockApp :: Env -> IO (Warp.Settings, Application)
makeMockApp :: MockEnv -> IO Application makeMockApp :: MockEnv -> IO Application
makeMockApp env = do makeMockApp env = do
...@@ -177,7 +178,7 @@ makeMockApp env = do ...@@ -177,7 +178,7 @@ makeMockApp env = do
--pure (warpS, logWare $ checkOriginAndHost $ corsMiddleware $ serverApp) --pure (warpS, logWare $ checkOriginAndHost $ corsMiddleware $ serverApp)
pure $ logStdoutDev $ checkOriginAndHost $ corsMiddleware $ serverApp pure $ logStdoutDev $ checkOriginAndHost $ corsMiddleware $ serverApp
-}
makeDevMiddleware :: IO Middleware makeDevMiddleware :: IO Middleware
...@@ -248,6 +249,10 @@ type GargAPI' = ...@@ -248,6 +249,10 @@ type GargAPI' =
:<|> "annuaire":> Summary "Annuaire endpoint" :<|> "annuaire":> Summary "Annuaire endpoint"
:> Capture "id" AnnuaireId :> NodeAPI HyperdataAnnuaire :> Capture "id" AnnuaireId :> NodeAPI HyperdataAnnuaire
-- Document endpoint
:<|> "document":> Summary "Document endpoint"
:> Capture "id" DocId :> "ngrams" :> TableNgramsApi
-- Corpus endpoint -- Corpus endpoint
:<|> "nodes" :> Summary "Nodes endpoint" :<|> "nodes" :> Summary "Nodes endpoint"
:> ReqBody '[JSON] [NodeId] :> NodesAPI :> ReqBody '[JSON] [NodeId] :> NodesAPI
...@@ -274,6 +279,8 @@ type GargAPI' = ...@@ -274,6 +279,8 @@ type GargAPI' =
:<|> "tree" :> Summary "Tree endpoint" :<|> "tree" :> Summary "Tree endpoint"
:> Capture "id" NodeId :> TreeAPI :> Capture "id" NodeId :> TreeAPI
:<|> "upload" :> ApiUpload
-- :<|> "scraper" :> WithCallbacks ScraperAPI -- :<|> "scraper" :> WithCallbacks ScraperAPI
...@@ -310,11 +317,13 @@ serverGargAPI -- orchestrator ...@@ -310,11 +317,13 @@ serverGargAPI -- orchestrator
:<|> nodeAPI (Proxy :: Proxy HyperdataAny) fakeUserId :<|> nodeAPI (Proxy :: Proxy HyperdataAny) fakeUserId
:<|> nodeAPI (Proxy :: Proxy HyperdataCorpus) fakeUserId :<|> nodeAPI (Proxy :: Proxy HyperdataCorpus) fakeUserId
:<|> nodeAPI (Proxy :: Proxy HyperdataAnnuaire) fakeUserId :<|> nodeAPI (Proxy :: Proxy HyperdataAnnuaire) fakeUserId
:<|> apiNgramsTableDoc
:<|> nodesAPI :<|> nodesAPI
:<|> count -- TODO: undefined :<|> count -- TODO: undefined
:<|> search :<|> search
:<|> graphAPI -- TODO: mock :<|> graphAPI -- TODO: mock
:<|> treeAPI :<|> treeAPI
:<|> upload
-- :<|> orchestrator -- :<|> orchestrator
where where
fakeUserId = 1 -- TODO fakeUserId = 1 -- TODO
...@@ -331,16 +340,16 @@ swaggerFront :: Server SwaggerFrontAPI ...@@ -331,16 +340,16 @@ swaggerFront :: Server SwaggerFrontAPI
swaggerFront = schemaUiServer swaggerDoc swaggerFront = schemaUiServer swaggerDoc
:<|> frontEndServer :<|> frontEndServer
gargMock :: Server GargAPI --gargMock :: Server GargAPI
gargMock = mock apiGarg Proxy --gargMock = mock apiGarg Proxy
--------------------------------------------------------------------- ---------------------------------------------------------------------
makeApp :: (HasConnection env, HasRepo env, HasSettings env) makeApp :: (HasConnection env, HasRepo env, HasSettings env)
=> env -> IO Application => env -> IO Application
makeApp = fmap (serve api) . server makeApp = fmap (serve api) . server
appMock :: Application --appMock :: Application
appMock = serve api (swaggerFront :<|> gargMock :<|> serverStatic) --appMock = serve api (swaggerFront :<|> gargMock :<|> serverStatic)
--------------------------------------------------------------------- ---------------------------------------------------------------------
api :: Proxy API api :: Proxy API
...@@ -405,9 +414,10 @@ startGargantext port file = do ...@@ -405,9 +414,10 @@ startGargantext port file = do
mid <- makeDevMiddleware mid <- makeDevMiddleware
run port (mid app) `finally` stopGargantext env run port (mid app) `finally` stopGargantext env
{-
startGargantextMock :: PortNumber -> IO () startGargantextMock :: PortNumber -> IO ()
startGargantextMock port = do startGargantextMock port = do
portRouteInfo port portRouteInfo port
application <- makeMockApp . MockEnv $ FireWall False application <- makeMockApp . MockEnv $ FireWall False
run port application run port application
-}
...@@ -35,6 +35,9 @@ import Gargantext.Core.Utils.Prefix (unPrefix) ...@@ -35,6 +35,9 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Utils import Gargantext.Database.Utils
import Gargantext.Core.Types (CorpusId) import Gargantext.Core.Types (CorpusId)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.NTree
import Gargantext.Database.Flow
import Gargantext.Viz.Chart import Gargantext.Viz.Chart
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
...@@ -66,7 +69,6 @@ instance Arbitrary Metric ...@@ -66,7 +69,6 @@ instance Arbitrary Metric
deriveJSON (unPrefix "metrics_") ''Metrics deriveJSON (unPrefix "metrics_") ''Metrics
deriveJSON (unPrefix "m_") ''Metric deriveJSON (unPrefix "m_") ''Metric
------------------------------------------------------------- -------------------------------------------------------------
data ChartMetrics a = ChartMetrics { chartMetrics_data :: a } data ChartMetrics a = ChartMetrics { chartMetrics_data :: a }
...@@ -89,24 +91,30 @@ instance Arbitrary Histo ...@@ -89,24 +91,30 @@ instance Arbitrary Histo
deriveJSON (unPrefix "histo_") ''Histo deriveJSON (unPrefix "histo_") ''Histo
instance ToSchema MyTree
instance Arbitrary MyTree
where
arbitrary = MyTree <$> arbitrary <*> arbitrary <*> arbitrary
-- TODO add start / end -- TODO add start / end
getChart :: CorpusId -> Maybe UTCTime -> Maybe UTCTime -> Cmd err (ChartMetrics Histo) getChart :: CorpusId -> Maybe UTCTime -> Maybe UTCTime -> Cmd err (ChartMetrics Histo)
getChart cId _start _end = do getChart cId _start _end = do
h <- histoData cId h <- histoData cId
pure (ChartMetrics h) pure (ChartMetrics h)
getPie :: FlowCmdM env err m => CorpusId -> Maybe UTCTime -> Maybe UTCTime -> TabType -> m (ChartMetrics Histo)
getPie cId _start _end tt = do
p <- pieData cId (ngramsTypeFromTabType tt) GraphTerm
pure (ChartMetrics p)
getTree :: FlowCmdM env err m => CorpusId -> Maybe UTCTime -> Maybe UTCTime -> TabType -> ListType -> m (ChartMetrics [MyTree])
getTree cId _start _end tt lt = do
p <- treeData cId (ngramsTypeFromTabType tt) lt
pure (ChartMetrics p)
{-
data FacetChart = FacetChart { facetChart_time :: UTCTime'
, facetChart_count :: Double
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "facetChart_") ''FacetChart)
instance ToSchema FacetChart
instance Arbitrary FacetChart where
arbitrary = FacetChart <$> arbitrary <*> arbitrary
-}
This diff is collapsed.
{-|
Module : Gargantext.API.Ngrams.NTree
Description : Tree of Ngrams
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Ngrams.NTree
where
import Data.Text (Text)
import Gargantext.Prelude
import GHC.Generics (Generic)
import Data.Aeson.TH (deriveJSON)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Core.Types (ListType(..), NodeId)
import Gargantext.API.Ngrams
import Data.Tree
import Data.Maybe (catMaybes)
import Data.Map (Map)
import Data.Set (Set)
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List
type Children = Text
type Root = Text
data MyTree = MyTree { mt_label :: Text
, mt_value :: Double
, mt_children :: [MyTree]
} deriving (Generic, Show)
toMyTree :: Tree (Text,Double) -> MyTree
toMyTree (Node (l,v) xs) = MyTree l v (map toMyTree xs)
deriveJSON (unPrefix "mt_") ''MyTree
toTree :: ListType -> Map Text (Set NodeId) -> Map Text NgramsRepoElement -> [MyTree]
toTree lt vs m = map toMyTree $ unfoldForest buildNode roots
where
buildNode r = maybe ((r, value r),[]) (\x -> ((r, value r), mSetToList $ _nre_children x)) (Map.lookup r m)
value l = maybe 0 (fromIntegral . Set.size) $ Map.lookup l vs
rootsCandidates = catMaybes
$ List.nub
$ map (\(c,c') -> case _nre_root c' of
Nothing -> Just c
_ -> _nre_root c' ) (Map.toList m)
roots = map fst
$ filter (\(_,l) -> l == lt)
$ catMaybes
$ map (\c -> (,) <$> Just c <*> (_nre_list <$> Map.lookup c m)) rootsCandidates
...@@ -58,7 +58,6 @@ mapTermListRoot nodeIds ngramsType = do ...@@ -58,7 +58,6 @@ mapTermListRoot nodeIds ngramsType = do
| (t, nre) <- Map.toList ngrams | (t, nre) <- Map.toList ngrams
] ]
filterListWithRoot :: ListType -> Map Text (ListType, Maybe Text) filterListWithRoot :: ListType -> Map Text (ListType, Maybe Text)
-> Map Text (Maybe RootTerm) -> Map Text (Maybe RootTerm)
filterListWithRoot lt m = Map.fromList filterListWithRoot lt m = Map.fromList
...@@ -71,7 +70,6 @@ filterListWithRoot lt m = Map.fromList ...@@ -71,7 +70,6 @@ filterListWithRoot lt m = Map.fromList
Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r
Just (l',_) -> l' == lt Just (l',_) -> l' == lt
groupNodesByNgrams :: Map Text (Maybe RootTerm) groupNodesByNgrams :: Map Text (Maybe RootTerm)
-> Map Text (Set NodeId) -> Map Text (Set NodeId)
-> Map Text (Set NodeId) -> Map Text (Set NodeId)
...@@ -97,4 +95,3 @@ getCoocByNgrams (Diagonal diag) m = ...@@ -97,4 +95,3 @@ getCoocByNgrams (Diagonal diag) m =
False -> listToCombi identity (Map.keys m) False -> listToCombi identity (Map.keys m)
] ]
...@@ -45,11 +45,11 @@ import Data.Text (Text()) ...@@ -45,11 +45,11 @@ import Data.Text (Text())
import Data.Time (UTCTime) import Data.Time (UTCTime)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.API.Metrics import Gargantext.API.Metrics
import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, TableNgramsApiGet, tableNgramsPatch, getTableNgrams, QueryParamR) import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, apiNgramsTableCorpus, QueryParamR)
import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery) import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery)
import Gargantext.API.Types import Gargantext.API.Types
import Gargantext.Core.Types (Offset, Limit) import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Types.Main (Tree, NodeTree) import Gargantext.Core.Types.Main (Tree, NodeTree, ListType)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),runViewAuthorsDoc) import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),runViewAuthorsDoc)
import Gargantext.Database.Node.Children (getChildren) import Gargantext.Database.Node.Children (getChildren)
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..)) import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..))
...@@ -61,6 +61,7 @@ import Gargantext.Prelude ...@@ -61,6 +61,7 @@ import Gargantext.Prelude
import Gargantext.Text.Metrics (Scored(..)) import Gargantext.Text.Metrics (Scored(..))
import Gargantext.Viz.Phylo.API (PhyloAPI, phyloAPI) import Gargantext.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Gargantext.Viz.Chart import Gargantext.Viz.Chart
import Gargantext.API.Ngrams.NTree (MyTree)
import Servant import Servant
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
...@@ -121,11 +122,9 @@ type NodeAPI a = Get '[JSON] (Node a) ...@@ -121,11 +122,9 @@ type NodeAPI a = Get '[JSON] (Node a)
-- TODO gather it -- TODO gather it
:<|> "table" :> TableApi :<|> "table" :> TableApi
:<|> "list" :> TableNgramsApi :<|> "ngrams" :> TableNgramsApi
:<|> "listGet" :> TableNgramsApiGet
:<|> "pairing" :> PairingApi :<|> "pairing" :> PairingApi
:<|> "favorites" :> FavApi :<|> "favorites" :> FavApi
:<|> "documents" :> DocsApi :<|> "documents" :> DocsApi
:<|> "search":> Summary "Node Search" :<|> "search":> Summary "Node Search"
...@@ -138,6 +137,8 @@ type NodeAPI a = Get '[JSON] (Node a) ...@@ -138,6 +137,8 @@ type NodeAPI a = Get '[JSON] (Node a)
-- VIZ -- VIZ
:<|> "metrics" :> MetricsAPI :<|> "metrics" :> MetricsAPI
:<|> "chart" :> ChartApi :<|> "chart" :> ChartApi
:<|> "pie" :> PieApi
:<|> "tree" :> TreeApi
:<|> "phylo" :> PhyloAPI :<|> "phylo" :> PhyloAPI
-- TODO-ACCESS: check userId CanRenameNode nodeId -- TODO-ACCESS: check userId CanRenameNode nodeId
...@@ -168,9 +169,9 @@ nodeAPI p uId id ...@@ -168,9 +169,9 @@ nodeAPI p uId id
-- TODO gather it -- TODO gather it
:<|> getTable id :<|> getTable id
:<|> tableNgramsPatch id :<|> apiNgramsTableCorpus id
:<|> getTableNgrams id
:<|> getPairing id :<|> getPairing id
-- :<|> getTableNgramsDoc id
:<|> favApi id :<|> favApi id
:<|> delDocs id :<|> delDocs id
...@@ -178,9 +179,11 @@ nodeAPI p uId id ...@@ -178,9 +179,11 @@ nodeAPI p uId id
:<|> getMetrics id :<|> getMetrics id
:<|> getChart id :<|> getChart id
:<|> getPie id
:<|> getTree id
:<|> phyloAPI id :<|> phyloAPI id
-- Annuaire -- Annuaire
-- :<|> upload
-- :<|> query -- :<|> query
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -264,6 +267,21 @@ type ChartApi = Summary " Chart API" ...@@ -264,6 +267,21 @@ type ChartApi = Summary " Chart API"
:> QueryParam "to" UTCTime :> QueryParam "to" UTCTime
:> Get '[JSON] (ChartMetrics Histo) :> Get '[JSON] (ChartMetrics Histo)
type PieApi = Summary " Chart API"
:> QueryParam "from" UTCTime
:> QueryParam "to" UTCTime
:> QueryParamR "ngramsType" TabType
:> Get '[JSON] (ChartMetrics Histo)
type TreeApi = Summary " Tree API"
:> QueryParam "from" UTCTime
:> QueryParam "to" UTCTime
:> QueryParamR "ngramsType" TabType
:> QueryParamR "listType" ListType
:> Get '[JSON] (ChartMetrics [MyTree])
-- Depending on the Type of the Node, we could post -- Depending on the Type of the Node, we could post
-- New documents for a corpus -- New documents for a corpus
-- New map list terms -- New map list terms
...@@ -276,7 +294,7 @@ type ChartApi = Summary " Chart API" ...@@ -276,7 +294,7 @@ type ChartApi = Summary " Chart API"
instance HasNodeError ServantErr where instance HasNodeError ServantErr where
_NodeError = prism' mk (const Nothing) -- $ panic "HasNodeError ServantErr: not a prism") _NodeError = prism' mk (const Nothing) -- panic "HasNodeError ServantErr: not a prism")
where where
e = "Gargantext NodeError: " e = "Gargantext NodeError: "
mk NoListFound = err404 { errBody = e <> "No list found" } mk NoListFound = err404 { errBody = e <> "No list found" }
...@@ -294,7 +312,7 @@ instance HasNodeError ServantErr where ...@@ -294,7 +312,7 @@ instance HasNodeError ServantErr where
-- TODO(orphan): There should be a proper APIError data type with a case TreeError. -- TODO(orphan): There should be a proper APIError data type with a case TreeError.
instance HasTreeError ServantErr where instance HasTreeError ServantErr where
_TreeError = prism' mk (const Nothing) -- $ panic "HasTreeError ServantErr: not a prism") _TreeError = prism' mk (const Nothing) -- panic "HasTreeError ServantErr: not a prism")
where where
e = "TreeError: " e = "TreeError: "
mk NoRoot = err404 { errBody = e <> "Root node not found" } mk NoRoot = err404 { errBody = e <> "Root node not found" }
...@@ -340,25 +358,7 @@ query :: Monad m => Text -> m Text ...@@ -340,25 +358,7 @@ query :: Monad m => Text -> m Text
query s = pure s query s = pure s
-- | Upload files -------------------------------------------------------------
-- TODO Is it possible to adapt the function according to iValue input ?
--upload :: MultipartData -> Handler Text
--upload multipartData = do
-- liftIO $ do
-- putStrLn "Inputs:"
-- forM_ (inputs multipartData) $ \input ->
-- putStrLn $ " " <> show (iName input)
-- <> " -> " <> show (iValue input)
--
-- forM_ (files multipartData) $ \file -> do
-- content <- readFile (fdFilePath file)
-- putStrLn $ "Content of " <> show (fdFileName file)
-- <> " at " <> fdFilePath file
-- putStrLn content
-- pure (pack "Data loaded")
-------------------------------------------------------------------------------
type MetricsAPI = Summary "SepGen IncExc metrics" type MetricsAPI = Summary "SepGen IncExc metrics"
:> QueryParam "list" ListId :> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType :> QueryParamR "ngramsType" TabType
...@@ -370,12 +370,11 @@ getMetrics cId maybeListId tabType maybeLimit = do ...@@ -370,12 +370,11 @@ getMetrics cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics' cId maybeListId tabType maybeLimit (ngs', scores) <- Metrics.getMetrics' cId maybeListId tabType maybeLimit
let let
metrics = map (\(Scored t s1 s2) -> Metric t s1 s2 (listType t ngs')) scores metrics = map (\(Scored t s1 s2) -> Metric t (log' 5 s1) (log' 2 s2) (listType t ngs')) scores
log' n x = 1 + (if x <= 0 then 0 else (log $ (10^(n::Int)) * x))
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent" errorMsg = "API.Node.metrics: key absent"
pure $ Metrics metrics pure $ Metrics metrics
...@@ -71,13 +71,13 @@ data SendEmailType = SendEmailViaAws ...@@ -71,13 +71,13 @@ data SendEmailType = SendEmailViaAws
data Settings = Settings data Settings = Settings
{ _allowedOrigin :: ByteString -- ^ allowed origin for CORS { _allowedOrigin :: ByteString -- allowed origin for CORS
, _allowedHost :: ByteString -- ^ allowed host for CORS , _allowedHost :: ByteString -- allowed host for CORS
, _appPort :: PortNumber , _appPort :: PortNumber
, _logLevelLimit :: LogLevel -- ^ log level from the monad-logger package , _logLevelLimit :: LogLevel -- log level from the monad-logger package
-- , _dbServer :: Text -- , _dbServer :: Text
-- ^ this is not used yet -- ^ this is not used yet
, _jwtSecret :: Jose.Jwk -- ^ key from the jose-jwt package , _jwtSecret :: Jose.Jwk -- key from the jose-jwt package
, _sendLoginEmails :: SendEmailType , _sendLoginEmails :: SendEmailType
, _scrapydUrl :: BaseUrl , _scrapydUrl :: BaseUrl
, _fileFolder :: FilePath , _fileFolder :: FilePath
...@@ -195,22 +195,22 @@ mkRepoSaver repo_var = mkDebounce settings ...@@ -195,22 +195,22 @@ mkRepoSaver repo_var = mkDebounce settings
settings = defaultDebounceSettings settings = defaultDebounceSettings
{ debounceFreq = 1000000 -- 1 second { debounceFreq = 1000000 -- 1 second
, debounceAction = withMVar repo_var repoSaverAction , debounceAction = withMVar repo_var repoSaverAction
-- ^ Here this not only `readMVar` but `takeMVar`. -- Here this not only `readMVar` but `takeMVar`.
-- Namely while repoSaverAction is saving no other change -- Namely while repoSaverAction is saving no other change
-- can be made to the MVar. -- can be made to the MVar.
-- This might be not efficent and thus reconsidered later. -- This might be not efficent and thus reconsidered later.
-- However this enables to safely perform a *final* save. -- However this enables to safely perform a *final* save.
-- See `cleanEnv`. -- See `cleanEnv`.
-- Future work: -- Future work:
-- * Add a new MVar just for saving. -- Add a new MVar just for saving.
} }
readRepoEnv :: IO RepoEnv readRepoEnv :: IO RepoEnv
readRepoEnv = do readRepoEnv = do
-- | Does file exist ? :: Bool -- Does file exist ? :: Bool
repoFile <- doesFileExist repoSnapshot repoFile <- doesFileExist repoSnapshot
-- | Is file not empty ? :: Bool -- Is file not empty ? :: Bool
repoExists <- if repoFile repoExists <- if repoFile
then (>0) <$> getFileSize repoSnapshot then (>0) <$> getFileSize repoSnapshot
else pure False else pure False
......
{-|
Module : Gargantext.API.Upload
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Upload
where
import qualified Data.Text as Text
import GHC.Generics (Generic)
import Gargantext.Prelude
import Data.Text (Text)
import Data.Aeson
import Servant
import Servant.Multipart
--import Servant.Mock (HasMock(mock))
import Servant.Swagger (HasSwagger(toSwagger))
-- import qualified Data.ByteString.Lazy as LBS
import Control.Monad
import Control.Monad.IO.Class
import Gargantext.API.Types
--import Servant.CSV.Cassava (CSV'(..))
--import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
--import Data.Swagger
--import Gargantext.API.Ngrams (TODO)
-- | Upload files
-- TODO Is it possible to adapt the function according to iValue input ?
--type API = MultipartForm Mem (MultipartData Mem) :> Post '[JSON] Integer
-- instance Generic Mem
--instance ToSchema Mem
--instance Arbitrary Mem
--instance ToSchema (MultipartData Mem)
--instance Arbitrary ( MultipartData Mem)
instance HasSwagger (MultipartForm tag a :> sub) where
-- TODO
toSwagger _ = undefined -- toSwagger (Proxy :: Proxy (TODO :> Post '[JSON] ()))
--declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
--instance Arbitrary (MultipartForm Mem (MultipartData Mem))
{-
instance (FromMultipart tag a, MultipartBackend tag, Servant.Multipart.LookupContext context (MultipartOptions tag))
=> HasMock (MultipartForm tag a :> sub) context where
mock _ _ = undefined
instance HasMock (MultipartForm Mem (MultipartData Mem) :> sub) context where
mock _ _ = undefined
-}
data Upload = Upload { up :: [Text] }
deriving (Generic)
instance ToJSON Upload
type ApiUpload = MultipartForm Mem (MultipartData Mem) :> Post '[JSON] Text
-- MultipartData consists in textual inputs,
-- accessible through its "inputs" field, as well
-- as files, accessible through its "files" field.
upload :: GargServer ApiUpload
upload multipartData = do
--{-
is <- liftIO $ do
putStrLn ("Inputs:" :: Text)
forM (inputs multipartData) $ \input -> do
putStrLn $ ("iName " :: Text) <> (iName input)
<> ("iValue " :: Text) <> (iValue input)
pure $ iName input
--{-
_ <- forM (files multipartData) $ \file -> do
let content = fdPayload file
putStrLn $ ("XXX " :: Text) <> (fdFileName file)
putStrLn $ ("YYY " :: Text) <> cs content
--pure $ cs content
-- is <- inputs multipartData
--}
pure $ Text.concat $ map cs is
-------------------------------------------------------------------------------
...@@ -148,38 +148,3 @@ put = U.update ...@@ -148,38 +148,3 @@ put = U.update
-- type Name = Text -- type Name = Text
--mkCorpus :: ToJSON a => Name -> (a -> Text) -> [a] -> Cmd err NewNode
--mkCorpus name title ns = do
-- pid <- home
--
-- let pid' = case lastMay pid of
-- Nothing -> printDebug "No home for" name
-- Just p -> p
--
-- let uid = 1
-- postNode uid (Just pid') ( Node' NodeCorpus name emptyObject
-- (map (\n -> Node' Document (title n) (toJSON n) []) ns)
-- )
--
---- |
---- import IMTClient as C
---- postAnnuaire "Annuaire IMT" (\n -> (maybe "" identity (C.prenom n)) <> " " <> (maybe "" identity (C.nom n))) (take 30 annuaire)
--mkAnnuaire :: ToJSON a => Name -> (a -> Text) -> [a] -> Cmd err NewNode
--mkAnnuaire name title ns = do
-- pid <- lastMay <$> home
-- let pid' = case lastMay pid of
-- Nothing -> printDebug "No home for" name
-- Just p -> p
-- let uid = 1
-- postNode uid (Just pid') ( Node' Annuaire name emptyObject
-- (map (\n -> Node' UserPage (title n) (toJSON n) []) ns)
-- )
--------------------------------------------------------------
-- |
-- myCorpus <- Prelude.map doc2hyperdataDocument <$> toDocs <$> snd <$> readCsv "doc/corpus_imt/Gargantext_Corpus_small.csv"
-- There is an error in the CSV parsing...
-- let myCorpus' = Prelude.filter (\n -> T.length (maybe "" identity (hyperdataDocument_title n)) > 30) myCorpus
-- corporaOf :: Username -> IO [Corpus]
{-|
Module : Gargantext.Database.Cooc
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
--{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.Database.Cooc where
import Database.PostgreSQL.Simple.SqlQQ
import Gargantext.Prelude
import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.API.Settings (runCmdDevNoErr, DevEnv)
type CorpusId = Int
type MainListId = Int
type GroupListId = Int
coocTest :: DevEnv -> IO [(Int, Int, Int)]
coocTest env = runCmdDevNoErr env $ dBcooc 421968 446602 446599
dBcooc :: CorpusId -> MainListId -> GroupListId -> Cmd err [(Int, Int, Int)]
dBcooc corpus mainList groupList = runPGSQuery [sql|
set work_mem='1GB';
--EXPLAIN ANALYZE
WITH COOC as (
SELECT
COALESCE(grA.ngram1_id, wlA.ngram_id) as ngA,
COALESCE(grB.ngram1_id, wlB.ngram_id) as ngB,
COUNT(*) AS score
FROM
nodes AS n
-- / -- X Y
-- SQL graph for getting the cooccurrences
-- STEP 1: X axis of the matrix
INNER JOIN nodes_ngrams
AS ngA ON ngA.node_id = n.id
-- \--> get the occurrences node/ngram of the corpus
INNER JOIN nodes_ngrams
AS wlA ON ngA.ngram_id = wlA.ngram_id
AND wlA.node_id = ?
-- \--> filter with white/main list (typename 7)
LEFT JOIN nodes_ngrams_ngrams
AS grA ON wlA.ngram_id = grA.ngram1_id
AND grA.node_id = ?
-- \--> adding (joining) ngrams that are grouped (typename 6)
LEFT JOIN nodes_ngrams
AS wlAA ON grA.ngram2_id = wlAA.ngram_id
AND wlAA.node_id = wlA.node_id
-- \--> adding (joining) ngrams that are not grouped
--LEFT JOIN ngrams AS wlAA ON grA.ngram2_id = wlAA.id
-- \--> for joining all synonyms even if they are not in the main list (white list)
-- STEP 2: Y axi of the matrix
INNER JOIN nodes_ngrams
AS ngB ON ngB.node_id = n.id
-- \--> get the occurrences node/ngram of the corpus
INNER JOIN nodes_ngrams
AS wlB ON ngB.ngram_id = wlB.ngram_id
AND wlB.node_id = ?
-- \--> filter with white/main list
LEFT JOIN nodes_ngrams_ngrams
AS grB ON wlB.ngram_id = grB.ngram1_id
AND grB.node_id = ?
-- \--> adding (joining) ngrams that are grouped
LEFT JOIN nodes_ngrams
AS wlBB ON grB.ngram2_id = wlBB.ngram_id
AND wlBB.node_id = wlB.node_id
-- \--> adding (joining) ngrams that are not grouped
-- LEFT JOIN ngrams AS wlBB ON grB.ngram2_id = wlBB.id
-- \--> for joining all synonyms even if they are not in the main list (white list)
WHERE
n.typename = 4
AND n.parent_id = ?
GROUP BY 1,2
-- ==
-- GROUP BY ngA, ngB
)
SELECT ngA, ngB, score
FROM COOC --> from the query above
WHERE score >= 3
AND
ngA <= ngB
|] (mainList, groupList, mainList, groupList, corpus)
...@@ -23,6 +23,7 @@ Portability : POSIX ...@@ -23,6 +23,7 @@ Portability : POSIX
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
------------------------------------------------------------------------ ------------------------------------------------------------------------
module Gargantext.Database.Facet module Gargantext.Database.Facet
where where
...@@ -171,8 +172,8 @@ type Trash = Bool ...@@ -171,8 +172,8 @@ type Trash = Bool
data OrderBy = DateAsc | DateDesc data OrderBy = DateAsc | DateDesc
| TitleAsc | TitleDesc | TitleAsc | TitleDesc
| ScoreDesc | ScoreAsc | ScoreDesc | ScoreAsc
| SourceAsc | SourceDesc
deriving (Generic, Enum, Bounded, Read, Show) deriving (Generic, Enum, Bounded, Read, Show)
-- | NgramCoun
instance FromHttpApiData OrderBy instance FromHttpApiData OrderBy
where where
...@@ -182,6 +183,8 @@ instance FromHttpApiData OrderBy ...@@ -182,6 +183,8 @@ instance FromHttpApiData OrderBy
parseUrlPiece "TitleDesc" = pure TitleDesc parseUrlPiece "TitleDesc" = pure TitleDesc
parseUrlPiece "ScoreAsc" = pure ScoreAsc parseUrlPiece "ScoreAsc" = pure ScoreAsc
parseUrlPiece "ScoreDesc" = pure ScoreDesc parseUrlPiece "ScoreDesc" = pure ScoreDesc
parseUrlPiece "SourceAsc" = pure SourceAsc
parseUrlPiece "SourceDesc" = pure SourceDesc
parseUrlPiece _ = Left "Unexpected value of OrderBy" parseUrlPiece _ = Left "Unexpected value of OrderBy"
instance ToParamSchema OrderBy instance ToParamSchema OrderBy
...@@ -251,7 +254,7 @@ viewDocuments cId t ntId = proc () -> do ...@@ -251,7 +254,7 @@ viewDocuments cId t ntId = proc () -> do
------------------------------------------------------------------------ ------------------------------------------------------------------------
filterWith :: (PGOrd date, PGOrd title, PGOrd score) => filterWith :: (PGOrd date, PGOrd title, PGOrd score, hyperdata ~ Column SqlJsonb) =>
Maybe Gargantext.Core.Types.Offset Maybe Gargantext.Core.Types.Offset
-> Maybe Gargantext.Core.Types.Limit -> Maybe Gargantext.Core.Types.Limit
-> Maybe OrderBy -> Maybe OrderBy
...@@ -260,14 +263,24 @@ filterWith :: (PGOrd date, PGOrd title, PGOrd score) => ...@@ -260,14 +263,24 @@ filterWith :: (PGOrd date, PGOrd title, PGOrd score) =>
filterWith o l order q = limit' l $ offset' o $ orderBy (orderWith order) q filterWith o l order q = limit' l $ offset' o $ orderBy (orderWith order) q
orderWith :: (PGOrd b1, PGOrd b2, PGOrd b3) => Maybe OrderBy -> Order (Facet id (Column b1) (Column b2) hyperdata (Column b3) score) orderWith :: (PGOrd b1, PGOrd b2, PGOrd b3)
orderWith order = case order of => Maybe OrderBy
(Just DateAsc) -> asc facetDoc_created -> Order (Facet id (Column b1) (Column b2) (Column SqlJsonb) (Column b3) score)
orderWith (Just DateAsc) = asc facetDoc_created
orderWith (Just DateDesc) = desc facetDoc_created
(Just TitleAsc) -> asc facetDoc_title orderWith (Just TitleAsc) = asc facetDoc_title
(Just TitleDesc) -> desc facetDoc_title orderWith (Just TitleDesc) = desc facetDoc_title
(Just ScoreAsc) -> asc facetDoc_favorite orderWith (Just ScoreAsc) = asc facetDoc_favorite
(Just ScoreDesc) -> desc facetDoc_favorite orderWith (Just ScoreDesc) = desc facetDoc_favorite
_ -> desc facetDoc_created
orderWith (Just SourceAsc) = asc facetDoc_source
orderWith (Just SourceDesc) = desc facetDoc_source
orderWith _ = asc facetDoc_created
facetDoc_source :: PGIsJson a
=> Facet id created title (Column a) favorite ngramCount
-> Column (Nullable PGText)
facetDoc_source x = toNullable (facetDoc_hyperdata x) .->> pgString "source"
...@@ -12,6 +12,10 @@ Portability : POSIX ...@@ -12,6 +12,10 @@ Portability : POSIX
-- check userId CanFillUserCorpus userCorpusId -- check userId CanFillUserCorpus userCorpusId
-- check masterUserId CanFillMasterCorpus masterCorpusId -- check masterUserId CanFillMasterCorpus masterCorpusId
-- TODO-ACCESS: check uId CanInsertDoc pId && checkDocType nodeType
-- TODO-EVENTS: InsertedNodes
-} -}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
...@@ -24,14 +28,6 @@ Portability : POSIX ...@@ -24,14 +28,6 @@ Portability : POSIX
module Gargantext.Database.Flow -- (flowDatabase, ngrams2list) module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
where where
--import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId)
--import Gargantext.Database.Metrics.TFICF (getTficf)
--import Gargantext.Database.Node.Contact (HyperdataContact(..))
--import Gargantext.Database.Schema.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
--import Gargantext.Database.Schema.NodeNgramsNgrams (NodeNgramsNgramsPoly(..), insertNodeNgramsNgramsNew)
--import Gargantext.Database.Schema.User (insertUsers, simpleUser, gargantuaUser)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
--import Gargantext.Text.Metrics.TFICF (Tficf(..))
--import Debug.Trace (trace) --import Debug.Trace (trace)
import Control.Lens ((^.), view, Lens', _Just) import Control.Lens ((^.), view, Lens', _Just)
import Control.Monad (mapM_) import Control.Monad (mapM_)
...@@ -43,35 +39,37 @@ import Data.Monoid ...@@ -43,35 +39,37 @@ import Data.Monoid
import Data.Text (Text, splitOn, intercalate) import Data.Text (Text, splitOn, intercalate)
import GHC.Show (Show) import GHC.Show (Show)
import Gargantext.API.Ngrams (HasRepoVar) import Gargantext.API.Ngrams (HasRepoVar)
import Gargantext.API.Ngrams (NgramsElement, putListNgrams, RepoCmdM) import Gargantext.API.Ngrams (NgramsElement(..), putListNgrams, RepoCmdM)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (NodePoly(..), Terms(..)) import Gargantext.Core.Types (NodePoly(..), Terms(..))
import Gargantext.Core.Types.Individu (Username) import Gargantext.Core.Types.Individu (Username)
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Config (userMaster, corpusMasterName) import Gargantext.Database.Config (userMaster, corpusMasterName)
import Gargantext.Database.Flow.Utils (insertToNodeNgrams) import Gargantext.Database.Flow.Utils (insertDocNgrams)
import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Node.Contact -- (HyperdataContact(..), ContactWho(..)) import Gargantext.Database.Node.Contact -- (HyperdataContact(..), ContactWho(..))
import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Root (getRoot) import Gargantext.Database.Root (getRoot)
import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId) import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError) import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
import Gargantext.Database.Schema.User (getUser, UserLight(..)) import Gargantext.Database.Schema.User (getUser, UserLight(..))
import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId) import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Utils (Cmd, CmdM) import Gargantext.Database.Utils (Cmd, CmdM)
import Gargantext.Ext.IMT (toSchoolName) import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.List (buildNgramsLists,StopSize(..)) import Gargantext.Text.List (buildNgramsLists,StopSize(..))
import Gargantext.Text.Parsers (parseDocs, FileFormat) import Gargantext.Text.Parsers (parseFile, FileFormat)
import Gargantext.Text.Terms (TermType(..), tt_lang) import Gargantext.Text.Terms (TermType(..), tt_lang)
import Gargantext.Text.Terms (extractTerms) import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
import qualified Gargantext.Text.Parsers.GrandDebat as GD
import Servant (ServantErr) import Servant (ServantErr)
import System.FilePath (FilePath) import System.FilePath (FilePath)
import qualified Data.Map as DM --import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Gargantext.Database.Node.Document.Add as Doc (add) import qualified Gargantext.Database.Node.Document.Add as Doc (add)
import qualified Gargantext.Text.Parsers.GrandDebat as GD
type FlowCmdM env err m = type FlowCmdM env err m =
( CmdM env err m ( CmdM env err m
...@@ -110,13 +108,13 @@ flowCorpusDebat u n l fp = do ...@@ -110,13 +108,13 @@ flowCorpusDebat u n l fp = do
flowCorpusFile :: FlowCmdM env ServantErr m flowCorpusFile :: FlowCmdM env ServantErr m
=> Username -> CorpusName => Username -> CorpusName
-> Limit -- ^ Limit the number of docs (for dev purpose) -> Limit -- Limit the number of docs (for dev purpose)
-> TermType Lang -> FileFormat -> FilePath -> TermType Lang -> FileFormat -> FilePath
-> m CorpusId -> m CorpusId
flowCorpusFile u n l la ff fp = do flowCorpusFile u n l la ff fp = do
docs <- liftIO ( splitEvery 500 docs <- liftIO ( splitEvery 500
<$> take l <$> take l
<$> parseDocs ff fp <$> parseFile ff fp
) )
flowCorpus u n la (map (map toHyperdataDocument) docs) flowCorpus u n la (map (map toHyperdataDocument) docs)
...@@ -130,10 +128,6 @@ flowCorpusSearchInDatabase u la q = do ...@@ -130,10 +128,6 @@ flowCorpusSearchInDatabase u la q = do
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO-ACCESS: check uId CanInsertDoc pId && checkDocType nodeType
-- TODO-EVENTS: InsertedNodes
flow :: (FlowCmdM env ServantErr m, FlowCorpus a, MkCorpus c) flow :: (FlowCmdM env ServantErr m, FlowCorpus a, MkCorpus c)
=> Maybe c -> Username -> CorpusName -> TermType Lang -> [[a]] -> m CorpusId => Maybe c -> Username -> CorpusName -> TermType Lang -> [[a]] -> m CorpusId
flow c u cn la docs = do flow c u cn la docs = do
...@@ -182,15 +176,14 @@ insertMasterDocs c lang hs = do ...@@ -182,15 +176,14 @@ insertMasterDocs c lang hs = do
-- TODO Type NodeDocumentUnicised -- TODO Type NodeDocumentUnicised
let hs' = map addUniqId hs let hs' = map addUniqId hs
ids <- insertDb masterUserId masterCorpusId hs' ids <- insertDb masterUserId masterCorpusId hs'
let documentsWithId = mergeData (toInserted ids) (DM.fromList $ map viewUniqId' hs') let documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' hs')
docsWithNgrams <- documentIdWithNgrams (extractNgramsT lang) documentsWithId
let maps = mapNodeIdNgrams docsWithNgrams maps <- mapNodeIdNgrams <$> documentIdWithNgrams (extractNgramsT lang) documentsWithId
terms2id <- insertNgrams $ Map.keys maps
let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps
terms2id <- insertNgrams $ DM.keys maps lId <- getOrMkList masterCorpusId masterUserId
let indexedNgrams = DM.mapKeys (indexNgrams terms2id) maps _ <- insertDocNgrams lId indexedNgrams
_ <- insertToNodeNgrams indexedNgrams
pure $ map reId ids pure $ map reId ids
...@@ -255,7 +248,7 @@ viewUniqId' d = maybe err (\h -> (h,d)) (view uniqId d) ...@@ -255,7 +248,7 @@ viewUniqId' d = maybe err (\h -> (h,d)) (view uniqId d)
toInserted :: [ReturnId] -> Map HashId ReturnId toInserted :: [ReturnId] -> Map HashId ReturnId
toInserted = DM.fromList . map (\r -> (reUniqId r, r) ) toInserted = Map.fromList . map (\r -> (reUniqId r, r) )
. filter (\r -> reInserted r == True) . filter (\r -> reInserted r == True)
data DocumentWithId a = DocumentWithId data DocumentWithId a = DocumentWithId
...@@ -266,7 +259,7 @@ data DocumentWithId a = DocumentWithId ...@@ -266,7 +259,7 @@ data DocumentWithId a = DocumentWithId
mergeData :: Map HashId ReturnId mergeData :: Map HashId ReturnId
-> Map HashId a -> Map HashId a
-> [DocumentWithId a] -> [DocumentWithId a]
mergeData rs = catMaybes . map toDocumentWithId . DM.toList mergeData rs = catMaybes . map toDocumentWithId . Map.toList
where where
toDocumentWithId (hash,hpd) = toDocumentWithId (hash,hpd) =
DocumentWithId <$> fmap reId (lookup hash rs) DocumentWithId <$> fmap reId (lookup hash rs)
...@@ -296,7 +289,7 @@ instance ExtractNgramsT HyperdataContact ...@@ -296,7 +289,7 @@ instance ExtractNgramsT HyperdataContact
$ maybe ["Nothing"] (\a -> [a]) $ maybe ["Nothing"] (\a -> [a])
$ view (hc_who . _Just . cw_lastName) hc' $ view (hc_who . _Just . cw_lastName) hc'
pure $ DM.fromList $ [(a', DM.singleton Authors 1) | a' <- authors ] pure $ Map.fromList $ [(a', Map.singleton Authors 1) | a' <- authors ]
...@@ -333,15 +326,15 @@ extractNgramsT' lang hd = filterNgramsT 255 <$> extractNgramsT'' lang hd ...@@ -333,15 +326,15 @@ extractNgramsT' lang hd = filterNgramsT 255 <$> extractNgramsT'' lang hd
<$> concat <$> concat
<$> liftIO (extractTerms lang' leText) <$> liftIO (extractTerms lang' leText)
pure $ DM.fromList $ [(source, DM.singleton Sources 1)] pure $ Map.fromList $ [(source, Map.singleton Sources 1)]
<> [(i', DM.singleton Institutes 1) | i' <- institutes ] <> [(i', Map.singleton Institutes 1) | i' <- institutes ]
<> [(a', DM.singleton Authors 1) | a' <- authors ] <> [(a', Map.singleton Authors 1) | a' <- authors ]
<> [(t', DM.singleton NgramsTerms 1) | t' <- terms' ] <> [(t', Map.singleton NgramsTerms 1) | t' <- terms' ]
filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int) filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int)
-> Map Ngrams (Map NgramsType Int) -> Map Ngrams (Map NgramsType Int)
filterNgramsT s ms = DM.fromList $ map (\a -> filter' s a) $ DM.toList ms filterNgramsT s ms = Map.fromList $ map (\a -> filter' s a) $ Map.toList ms
where where
filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of
True -> (ng,y) True -> (ng,y)
...@@ -365,11 +358,11 @@ documentIdWithNgrams f = mapM toDocumentIdWithNgrams ...@@ -365,11 +358,11 @@ documentIdWithNgrams f = mapM toDocumentIdWithNgrams
-- | TODO check optimization -- | TODO check optimization
mapNodeIdNgrams :: [DocumentIdWithNgrams a] mapNodeIdNgrams :: [DocumentIdWithNgrams a]
-> Map Ngrams (Map NgramsType (Map NodeId Int)) -> Map Ngrams (Map NgramsType (Map NodeId Int))
mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f mapNodeIdNgrams = Map.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap f
where where
f :: DocumentIdWithNgrams a f :: DocumentIdWithNgrams a
-> Map Ngrams (Map NgramsType (Map NodeId Int)) -> Map Ngrams (Map NgramsType (Map NodeId Int))
f d = fmap (fmap (DM.singleton nId)) $ document_ngrams d f d = fmap (fmap (Map.singleton nId)) $ document_ngrams d
where where
nId = documentId $ documentWithId d nId = documentId $ documentWithId d
...@@ -388,5 +381,6 @@ flowList uId cId ngs = do ...@@ -388,5 +381,6 @@ flowList uId cId ngs = do
lId <- getOrMkList cId uId lId <- getOrMkList cId uId
printDebug "listId flowList" lId printDebug "listId flowList" lId
listInsert lId ngs listInsert lId ngs
--trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
pure lId pure lId
...@@ -23,6 +23,8 @@ import Gargantext.Database.Schema.Ngrams ...@@ -23,6 +23,8 @@ import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Types.Node (NodeId, Node, NodePoly(..), Hyperdata) import Gargantext.Database.Types.Node (NodeId, Node, NodePoly(..), Hyperdata)
import Gargantext.Database.Utils (Cmd) import Gargantext.Database.Utils (Cmd)
import Gargantext.Database.Schema.NodeNgram import Gargantext.Database.Schema.NodeNgram
import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Types.Node
import Gargantext.Core.Types.Main (ListType(..), listTypeId) import Gargantext.Core.Types.Main (ListType(..), listTypeId)
toMaps :: Hyperdata a => (a -> Map (NgramsT Ngrams) Int) -> [Node a] -> Map (NgramsT Ngrams) (Map NodeId Int) toMaps :: Hyperdata a => (a -> Map (NgramsT Ngrams) Int) -> [Node a] -> Map (NgramsT Ngrams) (Map NodeId Int)
...@@ -56,6 +58,7 @@ data DocumentIdWithNgrams a = ...@@ -56,6 +58,7 @@ data DocumentIdWithNgrams a =
-- | TODO for now, list Type is CandidateTerm because Graph Terms -- | TODO for now, list Type is CandidateTerm because Graph Terms
-- have to be detected in next step in the flow -- have to be detected in next step in the flow
-- TODO remvoe this
insertToNodeNgrams :: Map NgramsIndexed (Map NgramsType (Map NodeId Int)) -> Cmd err Int insertToNodeNgrams :: Map NgramsIndexed (Map NgramsType (Map NodeId Int)) -> Cmd err Int
insertToNodeNgrams m = insertNodeNgrams [ NodeNgram n (_ngramsId ng) Nothing (ngramsTypeId t) (listTypeId CandidateTerm) (fromIntegral i) insertToNodeNgrams m = insertNodeNgrams [ NodeNgram n (_ngramsId ng) Nothing (ngramsTypeId t) (listTypeId CandidateTerm) (fromIntegral i)
| (ng, t2n2i) <- DM.toList m | (ng, t2n2i) <- DM.toList m
...@@ -63,3 +66,23 @@ insertToNodeNgrams m = insertNodeNgrams [ NodeNgram n (_ngramsId ng) Nothing (ng ...@@ -63,3 +66,23 @@ insertToNodeNgrams m = insertNodeNgrams [ NodeNgram n (_ngramsId ng) Nothing (ng
, (n, i) <- DM.toList n2i , (n, i) <- DM.toList n2i
] ]
docNgrams2nodeNodeNgrams :: CorpusId -> DocNgrams -> NodeNodeNgrams
docNgrams2nodeNodeNgrams cId (DocNgrams d n nt w) = NodeNodeNgrams Nothing cId d n nt w
data DocNgrams = DocNgrams { dn_doc_id :: DocId
, dn_ngrams_id :: Int
, dn_ngrams_type :: NgramsTypeId
, dn_weight :: Double
}
insertDocNgramsOn :: CorpusId -> [DocNgrams] -> Cmd err Int
insertDocNgramsOn cId dn = insertNodeNodeNgrams $ (map (docNgrams2nodeNodeNgrams cId) dn)
insertDocNgrams :: CorpusId -> Map NgramsIndexed (Map NgramsType (Map NodeId Int)) -> Cmd err Int
insertDocNgrams cId m = insertDocNgramsOn cId [ DocNgrams n (_ngramsId ng) (ngramsTypeId t) (fromIntegral i)
| (ng, t2n2i) <- DM.toList m
, (t, n2i) <- DM.toList t2n2i
, (n, i) <- DM.toList n2i
]
...@@ -22,9 +22,10 @@ import Data.Map (Map) ...@@ -22,9 +22,10 @@ import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType) import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm) import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm)
import Gargantext.Core.Types (ListType(..), Limit) import Gargantext.Core.Types (ListType(..), Limit, NodeType(..))
import Gargantext.Database.Flow (FlowCmdM) import Gargantext.Database.Flow (FlowCmdM)
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser, getTficfWith) import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser, getTficfWith)
import Gargantext.Database.Node.Select
import Gargantext.Database.Schema.Node (defaultList) import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Types.Node (ListId, CorpusId, HyperdataCorpus) import Gargantext.Database.Types.Node (ListId, CorpusId, HyperdataCorpus)
import Gargantext.Database.Flow (getOrMkRootWithCorpus) import Gargantext.Database.Flow (getOrMkRootWithCorpus)
...@@ -51,7 +52,10 @@ getMetrics cId maybeListId tabType maybeLimit = do ...@@ -51,7 +52,10 @@ getMetrics cId maybeListId tabType maybeLimit = do
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster "" (Nothing :: Maybe HyperdataCorpus) (_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster "" (Nothing :: Maybe HyperdataCorpus)
metrics' <- getTficfWith cId masterCorpusId (ngramsTypeFromTabType tabType) ngs' lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster
metrics' <- getTficfWith cId masterCorpusId (lIds <> [lId]) (ngramsTypeFromTabType tabType) ngs'
pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics']) pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics'])
...@@ -80,9 +84,12 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do ...@@ -80,9 +84,12 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do
take' Nothing xs = xs take' Nothing xs = xs
take' (Just n) xs = take n xs take' (Just n) xs = take n xs
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster
myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal True) myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (ngramsTypeFromTabType tabType) <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) (ngramsTypeFromTabType tabType)
(take' maybeLimit $ Map.keys ngs) (take' maybeLimit $ Map.keys ngs)
pure $ (ngs', ngs, myCooc) pure $ (ngs', ngs, myCooc)
......
{-|
Module : Gargantext.Database.Metrics.Count
Description : Ngram connection to the Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Count Ngrams by Context
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.Database.Metrics.Count where
import Control.Arrow (returnA)
import Control.Lens (view)
import Data.Map.Strict (Map, fromListWith, elems)
import Data.Monoid (mempty)
import Data.Text (Text)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement)
import Gargantext.Core.Types.Main (listTypeId, ListType(..))
import Gargantext.Database.Access
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Queries.Join (leftJoin4, leftJoin5, leftJoin3)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsType(..), ngramsTypeId, Ngrams(..), NgramsIndexed(..), ngrams, ngramsTerms, fromNgramsTypeId)
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Node (HasNodeError(..))
import Gargantext.Database.Schema.NodeNgram
import Gargantext.Database.Schema.NodeNode
import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Utils
import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Prelude hiding (sum)
import Gargantext.Text.Metrics.Count (Coocs, coocOn)
import Opaleye
import Safe (headMay)
import qualified Database.PostgreSQL.Simple as PGS
getCoocByDocDev :: HasNodeError err => CorpusId -> ListId -> Cmd err (Map ([Text], [Text]) Int)
getCoocByDocDev cId lId = coocOn (\n-> [ view ( ngrams . ngramsTerms) n]) <$> getNgramsByDoc cId lId
getCoocByDoc :: CorpusId -> ListId -> Cmd err (Map (NgramsIndexed, NgramsIndexed) Coocs)
getCoocByDoc cId lId = coocOn identity <$> getNgramsByDoc cId lId
getNgramsByDoc :: CorpusId -> ListId -> Cmd err [[NgramsIndexed]]
getNgramsByDoc cId lId =
elems
<$> fromListWith (<>)
<$> map (\(nId, ngId, nt, n) -> (nId, [NgramsIndexed (Ngrams nt n) ngId]))
<$> getNgramsByDocDb cId lId
getNgramsByDocDb :: CorpusId -> ListId -> Cmd err [(NodeId, NgramsId, Text, Int)]
getNgramsByDocDb cId lId = runPGSQuery query params
where
params = (cId, lId, listTypeId GraphTerm, ngramsTypeId NgramsTerms)
query = [sql|
-- TODO add CTE
SELECT n.id, ng.id, ng.terms, ng.n -- , list.parent_id
FROM nodes n
JOIN nodes_nodes nn ON nn.node2_id = n.id
JOIN nodes_ngrams nng ON nng.node_id = nn.node2_id
JOIN nodes_ngrams list ON list.ngrams_id = nng.ngrams_id
JOIN ngrams ng ON ng.id = nng.ngrams_id
WHERE nn.node1_id = ? -- CorpusId
AND list.node_id = ? -- ListId
AND list.list_type = ? -- GraphListId
AND list.ngrams_type = ? -- NgramsTypeId
|]
getNgramsByNode :: NodeId -> NgramsType -> Cmd err [[Text]]
getNgramsByNode nId nt = elems
<$> fromListWith (<>)
<$> map (\(i,t) -> (i,[t]))
<$> getNgramsByNodeNodeIndexed nId nt
-- | TODO add join with nodeNodeNgram (if it exists)
getNgramsByNodeNodeIndexed :: NodeId -> NgramsType -> Cmd err [(NodeId, Text)]
getNgramsByNodeNodeIndexed nId nt = runOpaQuery (select' nId)
where
select' nId' = proc () -> do
(ng,(nng,(nn,n))) <- getNgramsByNodeNodeIndexedJoin -< ()
restrict -< _node_id n .== toNullable (pgNodeId nId')
restrict -< nng_ngramsType nng .== toNullable (pgNgramsTypeId $ ngramsTypeId nt)
restrict -< nn_delete nn ./= (toNullable . pgBool) True
returnA -< (nng_node_id nng, ngrams_terms ng)
{-
getNgramsByNodeIndexed' :: NodeId -> NgramsType -> Cmd err [(NodeId, Maybe Text)]
getNgramsByNodeIndexed' nId nt = runOpaQuery (select' nId)
where
select' nId' = proc () -> do
(nnng,(ng,(nng,(_,n)))) <- getNgramsByNodeIndexedJoin5 -< ()
restrict -< _node_id n .== toNullable (pgNodeId nId')
restrict -< nng_ngramsType nng .== toNullable (pgNgramsTypeId $ ngramsTypeId nt)
let node_id' = ifThenElse (isNull $ toNullable $ nnng_node1_id nnng)
(nng_node_id nng)
(nnng_node2_id nng)
let t1 = ifThenElse (isNull $ toNullable $ nnng_node1_id nnng)
(ngrams_terms ng)
(nnng_terms nng)
returnA -< (n1, t1)
--}
getNgramsByNodeNodeIndexedJoin :: Query ( NgramsRead
, (NodeNgramReadNull
, (NodeNodeReadNull
, NodeReadNull
)
)
)
getNgramsByNodeNodeIndexedJoin = leftJoin4 queryNodeTable
queryNodeNodeTable
queryNodeNgramTable
queryNgramsTable
c1 c2 c3
where
c1 :: (NodeNodeRead, NodeRead) -> Column PGBool
c1 (nn,n) = nn_node1_id nn .== _node_id n
c2 :: ( NodeNgramRead
, (NodeNodeRead
, NodeReadNull
)
) -> Column PGBool
c2 (nng,(nn',_)) = (nng_node_id nng) .== nn_node2_id nn'
c3 :: ( NgramsRead
, ( NodeNgramRead
, ( NodeNodeReadNull
, NodeReadNull
)
)
) -> Column PGBool
c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng'
getNgramsByNodeNodeIndexedJoin5 :: Query ( NodeNodeNgramsRead
, (NgramsReadNull
, (NodeNgramReadNull
, (NodeNodeReadNull
, NodeReadNull
)
)
)
)
getNgramsByNodeNodeIndexedJoin5 = leftJoin5 queryNodeTable
queryNodeNodeTable
queryNodeNgramTable
queryNgramsTable
queryNodeNodeNgramsTable
c1 c2 c3 c4
where
c1 :: (NodeNodeRead, NodeRead) -> Column PGBool
c1 (nn,n) = nn_node1_id nn .== _node_id n
c2 :: ( NodeNgramRead
, (NodeNodeRead
, NodeReadNull
)
) -> Column PGBool
c2 (nng,(nn',_)) = (nng_node_id nng) .== nn_node2_id nn'
c3 :: ( NgramsRead
, ( NodeNgramRead
, ( NodeNodeReadNull
, NodeReadNull
)
)
) -> Column PGBool
c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng'
c4 :: ( NodeNodeNgramsRead
, (NgramsRead
, ( NodeNgramReadNull
, ( NodeNodeReadNull
, NodeReadNull
)
)
)
) -> Column PGBool
c4 (nnng,(_,(_,(nn,_)))) = (toNullable $ nnng_node1_id nnng) .== (nn_node1_id nn)
.&& (toNullable $ nnng_node2_id nnng) .== (nn_node2_id nn)
--}
--{-
getNgramsElementsWithParentNodeId :: NodeId -> Cmd err (Map NgramsType [NgramsElement])
getNgramsElementsWithParentNodeId nId = do
ns <- getNgramsWithParentNodeId nId
pure $ fromListWith (<>)
[ (maybe (panic "error") identity $ fromNgramsTypeId nt,
[mkNgramsElement ng CandidateTerm Nothing mempty])
| (_,(nt,ng)) <- ns
]
-------------------------------------------------------------------------
getNgramsWithParentNodeId :: NodeId -> Cmd err [(NodeId, (NgramsTypeId, Text))]
getNgramsWithParentNodeId nId = runOpaQuery (select nId)
where
select nId' = proc () -> do
(ng,(nng,n)) <- getNgramsWithParentNodeIdJoin -< ()
restrict -< _node_parentId n .== (toNullable $ pgNodeId nId')
restrict -< _node_typename n .== (toNullable $ pgInt4 $ nodeTypeId NodeDocument)
returnA -< (nng_node_id nng, (nng_ngramsType nng, ngrams_terms ng))
--}
getNgramsWithParentNodeIdJoin :: Query ( NgramsRead
, ( NodeNgramReadNull
, NodeReadNull
)
)
getNgramsWithParentNodeIdJoin = leftJoin3 queryNodeTable queryNodeNgramTable queryNgramsTable on1 on2
where
on1 :: (NodeNgramRead, NodeRead) -> Column PGBool
on1 (nng,n) = nng_node_id nng .== _node_id n
on2 :: (NgramsRead, (NodeNgramRead, NodeReadNull))-> Column PGBool
on2 (ng, (nng,_)) = ngrams_id ng .== nng_ngrams_id nng
countCorpusDocuments :: Roles -> Int -> Cmd err Int
countCorpusDocuments r cId = maybe 0 identity
<$> headMay
<$> map (\(PGS.Only n) -> n)
<$> runQuery' r cId
where
runQuery' RoleUser cId' = runPGSQuery
"SELECT count(*) from nodes_nodes nn WHERE nn.node1_id = ? AND nn.delete = False"
(PGS.Only cId')
runQuery' RoleMaster cId' = runPGSQuery
"SELECT count(*) from nodes n WHERE n.parent_id = ? AND n.typename = ?"
(cId', nodeTypeId NodeDocument)
...@@ -69,11 +69,11 @@ getTficf' u m nt f = do ...@@ -69,11 +69,11 @@ getTficf' u m nt f = do
(countNodesByNgramsWith f m') (countNodesByNgramsWith f m')
--{- --{-
getTficfWith :: UserCorpusId -> MasterCorpusId getTficfWith :: UserCorpusId -> MasterCorpusId -> [ListId]
-> NgramsType -> Map Text (Maybe Text) -> NgramsType -> Map Text (Maybe Text)
-> Cmd err (Map Text (Double, Set Text)) -> Cmd err (Map Text (Double, Set Text))
getTficfWith u m nt mtxt = do getTficfWith u m ls nt mtxt = do
u' <- getNodesByNgramsOnlyUser u nt (Map.keys mtxt) u' <- getNodesByNgramsOnlyUser u ls nt (Map.keys mtxt)
m' <- getNodesByNgramsMaster u m m' <- getNodesByNgramsMaster u m
let f x = case Map.lookup x mtxt of let f x = case Map.lookup x mtxt of
...@@ -126,33 +126,34 @@ getNodesByNgramsUser :: CorpusId -> NgramsType ...@@ -126,33 +126,34 @@ getNodesByNgramsUser :: CorpusId -> NgramsType
getNodesByNgramsUser cId nt = getNodesByNgramsUser cId nt =
fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n)) fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n))
<$> selectNgramsByNodeUser cId nt <$> selectNgramsByNodeUser cId nt
where
selectNgramsByNodeUser :: CorpusId -> NgramsType selectNgramsByNodeUser :: CorpusId -> NgramsType
-> Cmd err [(NodeId, Text)] -> Cmd err [(NodeId, Text)]
selectNgramsByNodeUser cId nt = selectNgramsByNodeUser cId' nt' =
runPGSQuery queryNgramsByNodeUser runPGSQuery queryNgramsByNodeUser
( cId ( cId'
, nodeTypeId NodeDocument , nodeTypeId NodeDocument
, ngramsTypeId nt , ngramsTypeId nt'
, 1000 :: Int -- limit -- , 100 :: Int -- limit
, 0 :: Int -- offset -- , 0 :: Int -- offset
) )
queryNgramsByNodeUser :: DPS.Query queryNgramsByNodeUser :: DPS.Query
queryNgramsByNodeUser = [sql| queryNgramsByNodeUser = [sql|
SELECT nng.node_id, ng.terms FROM nodes_ngrams nng SELECT nng.node2_id, ng.terms FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN nodes_nodes nn ON nn.node2_id = nng.node_id JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False AND nn.delete = False
GROUP BY nng.node_id, ng.terms GROUP BY nng.node2_id, ng.terms
ORDER BY (nng.node_id, ng.terms) DESC ORDER BY (nng.node2_id, ng.terms) DESC
LIMIT ? -- LIMIT ?
OFFSET ? -- OFFSET ?
|] |]
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO add groups -- TODO add groups
...@@ -162,17 +163,21 @@ getOccByNgramsOnlyFast cId nt ngs = ...@@ -162,17 +163,21 @@ getOccByNgramsOnlyFast cId nt ngs =
fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
-- just slower than getOccByNgramsOnlyFast -- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow :: CorpusId -> NgramsType -> [Text] getOccByNgramsOnlySlow :: NodeType -> CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text Int) -> Cmd err (Map Text Int)
getOccByNgramsOnlySlow cId nt ngs = getOccByNgramsOnlySlow t cId ls nt ngs =
Map.map Set.size <$> getNodesByNgramsOnlyUser cId nt ngs Map.map Set.size <$> getScore' t cId ls nt ngs
where
getScore' NodeCorpus = getNodesByNgramsOnlyUser
getScore' NodeDocument = getNgramsByDocOnlyUser
getScore' _ = getNodesByNgramsOnlyUser
getOccByNgramsOnlySafe :: CorpusId -> NgramsType -> [Text] getOccByNgramsOnlySafe :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text Int) -> Cmd err (Map Text Int)
getOccByNgramsOnlySafe cId nt ngs = do getOccByNgramsOnlySafe cId ls nt ngs = do
printDebug "getOccByNgramsOnlySafe" (cId, nt, length ngs) printDebug "getOccByNgramsOnlySafe" (cId, nt, length ngs)
fast <- getOccByNgramsOnlyFast cId nt ngs fast <- getOccByNgramsOnlyFast cId nt ngs
slow <- getOccByNgramsOnlySlow cId nt ngs slow <- getOccByNgramsOnlySlow NodeCorpus cId ls nt ngs
when (fast /= slow) $ when (fast /= slow) $
printDebug "getOccByNgramsOnlySafe: difference" (diff slow fast :: PatchMap Text (Replace (Maybe Int))) printDebug "getOccByNgramsOnlySafe: difference" (diff slow fast :: PatchMap Text (Replace (Maybe Int)))
pure slow pure slow
...@@ -197,29 +202,30 @@ queryNgramsOccurrencesOnlyByNodeUser :: DPS.Query ...@@ -197,29 +202,30 @@ queryNgramsOccurrencesOnlyByNodeUser :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser = [sql| queryNgramsOccurrencesOnlyByNodeUser = [sql|
WITH input_rows(terms) AS (?) WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node_id) FROM nodes_ngrams nng SELECT ng.terms, COUNT(nng.node2_id) FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node_id JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False AND nn.delete = False
GROUP BY nng.node_id, ng.terms GROUP BY nng.node2_id, ng.terms
|] |]
getNodesByNgramsOnlyUser :: CorpusId -> NgramsType -> [Text] getNodesByNgramsOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId)) -> Cmd err (Map Text (Set NodeId))
getNodesByNgramsOnlyUser cId nt ngs = Map.unionsWith (<>) getNodesByNgramsOnlyUser cId ls nt ngs = Map.unionsWith (<>)
. map (fromListWith (<>) . map (second Set.singleton)) . map (fromListWith (<>) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByNodeUser cId nt) (splitEvery 1000 ngs) <$> mapM (selectNgramsOnlyByNodeUser cId ls nt) (splitEvery 1000 ngs)
selectNgramsOnlyByNodeUser :: CorpusId -> NgramsType -> [Text] selectNgramsOnlyByNodeUser :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, NodeId)] -> Cmd err [(Text, NodeId)]
selectNgramsOnlyByNodeUser cId nt tms = selectNgramsOnlyByNodeUser cId ls nt tms =
runPGSQuery queryNgramsOnlyByNodeUser runPGSQuery queryNgramsOnlyByNodeUser
( Values fields (DPS.Only <$> tms) ( Values fields (DPS.Only <$> tms)
, Values [QualifiedIdentifier Nothing "int4"] (DPS.Only <$> (map (\(NodeId n) -> n) ls))
, cId , cId
, nodeTypeId NodeDocument , nodeTypeId NodeDocument
, ngramsTypeId nt , ngramsTypeId nt
...@@ -230,19 +236,58 @@ selectNgramsOnlyByNodeUser cId nt tms = ...@@ -230,19 +236,58 @@ selectNgramsOnlyByNodeUser cId nt tms =
queryNgramsOnlyByNodeUser :: DPS.Query queryNgramsOnlyByNodeUser :: DPS.Query
queryNgramsOnlyByNodeUser = [sql| queryNgramsOnlyByNodeUser = [sql|
WITH input_rows(terms) AS (?) WITH input_rows(terms) AS (?),
SELECT ng.terms, nng.node_id FROM nodes_ngrams nng input_list(id) AS (?)
SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node_id JOIN input_list il ON il.id = nng.node1_id
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False AND nn.delete = False
GROUP BY nng.node_id, ng.terms GROUP BY ng.terms, nng.node2_id
|] |]
getNgramsByDocOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId))
getNgramsByDocOnlyUser cId ls nt ngs = Map.unionsWith (<>)
. map (fromListWith (<>) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByDocUser cId ls nt) (splitEvery 1000 ngs)
selectNgramsOnlyByDocUser :: DocId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, NodeId)]
selectNgramsOnlyByDocUser dId ls nt tms =
runPGSQuery queryNgramsOnlyByDocUser
( Values fields (DPS.Only <$> tms)
, Values [QualifiedIdentifier Nothing "int4"] (DPS.Only <$> (map (\(NodeId n) -> n) ls))
, dId
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByDocUser :: DPS.Query
queryNgramsOnlyByDocUser = [sql|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = nng.node1_id
WHERE nng.node2_id = ? -- DocId
AND nng.ngrams_type = ? -- NgramsTypeId
GROUP BY ng.terms, nng.node2_id
|]
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO filter by language, database, any social field -- | TODO filter by language, database, any social field
getNodesByNgramsMaster :: UserCorpusId -> MasterCorpusId -> Cmd err (Map Text (Set NodeId)) getNodesByNgramsMaster :: UserCorpusId -> MasterCorpusId -> Cmd err (Map Text (Set NodeId))
...@@ -272,6 +317,7 @@ selectNgramsByNodeMaster n ucId mcId p = runPGSQuery ...@@ -272,6 +317,7 @@ selectNgramsByNodeMaster n ucId mcId p = runPGSQuery
, ngramsTypeId NgramsTerms , ngramsTypeId NgramsTerms
) )
-- | TODO fix node_node_ngrams relation
queryNgramsByNodeMaster' :: DPS.Query queryNgramsByNodeMaster' :: DPS.Query
queryNgramsByNodeMaster' = [sql| queryNgramsByNodeMaster' = [sql|
...@@ -279,7 +325,7 @@ WITH nodesByNgramsUser AS ( ...@@ -279,7 +325,7 @@ WITH nodesByNgramsUser AS (
SELECT n.id, ng.terms FROM nodes n SELECT n.id, ng.terms FROM nodes n
JOIN nodes_nodes nn ON n.id = nn.node2_id JOIN nodes_nodes nn ON n.id = nn.node2_id
JOIN nodes_ngrams nng ON nn.node2_id = n.id JOIN node_node_ngrams nng ON nng.node2_id = n.id
JOIN ngrams ng ON nng.ngrams_id = ng.id JOIN ngrams ng ON nng.ngrams_id = ng.id
WHERE nn.node1_id = ? -- UserCorpusId WHERE nn.node1_id = ? -- UserCorpusId
-- AND n.typename = ? -- NodeTypeId -- AND n.typename = ? -- NodeTypeId
...@@ -294,7 +340,7 @@ SELECT n.id, ng.terms FROM nodes n ...@@ -294,7 +340,7 @@ SELECT n.id, ng.terms FROM nodes n
nodesByNgramsMaster AS ( nodesByNgramsMaster AS (
SELECT n.id, ng.terms FROM nodes n TABLESAMPLE SYSTEM_ROWS(?) SELECT n.id, ng.terms FROM nodes n TABLESAMPLE SYSTEM_ROWS(?)
JOIN nodes_ngrams nng ON n.id = nng.node_id JOIN node_node_ngrams nng ON n.id = nng.node2_id
JOIN ngrams ng ON ng.id = nng.ngrams_id JOIN ngrams ng ON ng.id = nng.ngrams_id
WHERE n.parent_id = ? -- Master Corpus NodeTypeId WHERE n.parent_id = ? -- Master Corpus NodeTypeId
......
{-|
Module : Gargantext.Database.Ngrams
Description : Deal with in Gargantext Database.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.Database.Ngrams
where
import Data.Text (Text)
import Gargantext.Core.Types
import Gargantext.Database.Utils (runOpaQuery, Cmd)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Schema.Node
import Gargantext.Prelude
import Opaleye
import Control.Arrow (returnA)
selectNgramsByDoc :: [CorpusId] -> DocumentId -> NgramsType -> Cmd err [Text]
selectNgramsByDoc cIds dId nt = runOpaQuery (query cIds dId nt)
where
join :: Query (NgramsRead, NodeNodeNgramsReadNull)
join = leftJoin queryNgramsTable queryNodeNodeNgramsTable on1
where
on1 (ng,nnng) = ngrams_id ng .== nnng_ngrams_id nnng
query cIds' dId' nt' = proc () -> do
(ng,nnng) <- join -< ()
restrict -< foldl (\b cId -> ((toNullable $ pgNodeId cId) .== nnng_node1_id nnng) .|| b) (pgBool True) cIds'
restrict -< (toNullable $ pgNodeId dId') .== nnng_node2_id nnng
restrict -< (toNullable $ pgNgramsType nt') .== nnng_ngramsType nnng
returnA -< ngrams_terms ng
postNgrams :: CorpusId -> DocumentId -> [Text] -> Cmd err Int
postNgrams = undefined
...@@ -63,6 +63,7 @@ import Control.Lens.Prism ...@@ -63,6 +63,7 @@ import Control.Lens.Prism
import Control.Lens.Cons import Control.Lens.Cons
import Data.Aeson (toJSON) import Data.Aeson (toJSON)
import Data.Maybe (maybe) import Data.Maybe (maybe)
import Data.Time.Segment (jour)
import Data.Text (Text) import Data.Text (Text)
import Database.PostgreSQL.Simple (FromRow, Query, Only(..)) import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
import Database.PostgreSQL.Simple.FromRow (fromRow, field) import Database.PostgreSQL.Simple.FromRow (fromRow, field)
...@@ -120,6 +121,7 @@ instance InsertDb HyperdataDocument ...@@ -120,6 +121,7 @@ instance InsertDb HyperdataDocument
, toField u , toField u
, toField p , toField p
, toField $ maybe "No Title" (DT.take 255) (_hyperdataDocument_title h) , toField $ maybe "No Title" (DT.take 255) (_hyperdataDocument_title h)
, toField $ _hyperdataDocument_publication_date h -- TODO USE UTCTime
, (toField . toJSON) h , (toField . toJSON) h
] ]
...@@ -129,6 +131,7 @@ instance InsertDb HyperdataContact ...@@ -129,6 +131,7 @@ instance InsertDb HyperdataContact
, toField u , toField u
, toField p , toField p
, toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h) , toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
, toField $ jour 2010 1 1 -- TODO put default date
, (toField . toJSON) h , (toField . toJSON) h
] ]
...@@ -147,14 +150,14 @@ insertDocuments_Debug uId pId hs = formatPGSQuery queryInsert (Only $ Values fie ...@@ -147,14 +150,14 @@ insertDocuments_Debug uId pId hs = formatPGSQuery queryInsert (Only $ Values fie
-- | Input Tables: types of the tables -- | Input Tables: types of the tables
inputSqlTypes :: [Text] inputSqlTypes :: [Text]
inputSqlTypes = map DT.pack ["int4","int4","int4","text","jsonb"] inputSqlTypes = map DT.pack ["int4","int4","int4","text","date","jsonb"]
-- | SQL query to insert documents inside the database -- | SQL query to insert documents inside the database
queryInsert :: Query queryInsert :: Query
queryInsert = [sql| queryInsert = [sql|
WITH input_rows(typename,user_id,parent_id,name,hyperdata) AS (?) WITH input_rows(typename,user_id,parent_id,name,date,hyperdata) AS (?)
, ins AS ( , ins AS (
INSERT INTO nodes (typename,user_id,parent_id,name,hyperdata) INSERT INTO nodes (typename,user_id,parent_id,name,date,hyperdata)
SELECT * FROM input_rows SELECT * FROM input_rows
ON CONFLICT ((hyperdata ->> 'uniqIdBdd')) DO NOTHING -- on unique index ON CONFLICT ((hyperdata ->> 'uniqIdBdd')) DO NOTHING -- on unique index
-- ON CONFLICT (typename, parent_id, (hyperdata ->> 'uniqId')) DO NOTHING -- on unique index -- ON CONFLICT (typename, parent_id, (hyperdata ->> 'uniqId')) DO NOTHING -- on unique index
...@@ -180,10 +183,10 @@ queryInsert = [sql| ...@@ -180,10 +183,10 @@ queryInsert = [sql|
-- | When documents are inserted -- | When documents are inserted
-- ReturnType after insertion -- ReturnType after insertion
data ReturnId = ReturnId { reInserted :: Bool -- ^ if the document is inserted (True: is new, False: is not new) data ReturnId = ReturnId { reInserted :: Bool -- if the document is inserted (True: is new, False: is not new)
, reId :: NodeId -- ^ always return the id of the document (even new or not new) , reId :: NodeId -- always return the id of the document (even new or not new)
-- this is the uniq id in the database -- this is the uniq id in the database
, reUniqId :: Text -- ^ Hash Id with concatenation of hash parameters , reUniqId :: Text -- Hash Id with concatenation of hash parameters
} deriving (Show, Generic) } deriving (Show, Generic)
instance FromRow ReturnId where instance FromRow ReturnId where
......
{-|
Module : Gargantext.Database.Node.Select
Description : Main requests of Node to the database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.Database.Node.Select where
import Opaleye
import Gargantext.Core.Types
import Gargantext.Database.Schema.Node
import Gargantext.Database.Utils
import Gargantext.Database.Config
import Gargantext.Database.Schema.User
import Gargantext.Core.Types.Individu (Username)
import Control.Arrow (returnA)
selectNodesWithUsername :: NodeType -> Username -> Cmd err [NodeId]
selectNodesWithUsername nt u = runOpaQuery (q u)
where
q u' = proc () -> do
(n,usrs) <- join -< ()
restrict -< user_username usrs .== (toNullable $ pgStrictText u')
restrict -< _node_typename n .== (pgInt4 $ nodeTypeId nt)
returnA -< _node_id n
join :: Query (NodeRead, UserReadNull)
join = leftJoin queryNodeTable queryUserTable on1
where
on1 (n,us) = _node_userId n .== user_id us
This diff is collapsed.
...@@ -276,6 +276,8 @@ selectNode id = proc () -> do ...@@ -276,6 +276,8 @@ selectNode id = proc () -> do
restrict -< _node_id row .== id restrict -< _node_id row .== id
returnA -< row returnA -< row
runGetNodes :: Query NodeRead -> Cmd err [NodeAny] runGetNodes :: Query NodeRead -> Cmd err [NodeAny]
runGetNodes = runOpaQuery runGetNodes = runOpaQuery
...@@ -306,7 +308,6 @@ selectNodesWith' parentId maybeNodeType = proc () -> do ...@@ -306,7 +308,6 @@ selectNodesWith' parentId maybeNodeType = proc () -> do
returnA -< row ) -< () returnA -< row ) -< ()
returnA -< node returnA -< node
deleteNode :: NodeId -> Cmd err Int deleteNode :: NodeId -> Cmd err Int
deleteNode n = mkCmd $ \conn -> deleteNode n = mkCmd $ \conn ->
fromIntegral <$> runDelete conn nodeTable fromIntegral <$> runDelete conn nodeTable
...@@ -593,7 +594,6 @@ defaultList cId = ...@@ -593,7 +594,6 @@ defaultList cId =
mkList :: HasNodeError err => ParentId -> UserId -> Cmd err [NodeId] mkList :: HasNodeError err => ParentId -> UserId -> Cmd err [NodeId]
mkList p u = insertNodesR [nodeListW Nothing Nothing p u] mkList p u = insertNodesR [nodeListW Nothing Nothing p u]
mkGraph :: ParentId -> UserId -> Cmd err [GraphId] mkGraph :: ParentId -> UserId -> Cmd err [GraphId]
mkGraph p u = insertNodesR [nodeGraphW Nothing Nothing p u] mkGraph p u = insertNodesR [nodeGraphW Nothing Nothing p u]
......
...@@ -116,6 +116,7 @@ nodeNgramTable = Table "nodes_ngrams" ...@@ -116,6 +116,7 @@ nodeNgramTable = Table "nodes_ngrams"
queryNodeNgramTable :: Query NodeNgramRead queryNodeNgramTable :: Query NodeNgramRead
queryNodeNgramTable = queryTable nodeNgramTable queryNodeNgramTable = queryTable nodeNgramTable
--{-
insertNodeNgrams :: [NodeNgram] -> Cmd err Int insertNodeNgrams :: [NodeNgram] -> Cmd err Int
insertNodeNgrams = insertNodeNgramW insertNodeNgrams = insertNodeNgramW
. map (\(NodeNgram n g p ngt lt w) -> . map (\(NodeNgram n g p ngt lt w) ->
...@@ -126,7 +127,6 @@ insertNodeNgrams = insertNodeNgramW ...@@ -126,7 +127,6 @@ insertNodeNgrams = insertNodeNgramW
(pgInt4 lt) (pgInt4 lt)
(pgDouble w) (pgDouble w)
) )
insertNodeNgramW :: [NodeNgramWrite] -> Cmd err Int insertNodeNgramW :: [NodeNgramWrite] -> Cmd err Int
insertNodeNgramW nns = insertNodeNgramW nns =
mkCmd $ \c -> fromIntegral <$> runInsert_ c insertNothing mkCmd $ \c -> fromIntegral <$> runInsert_ c insertNothing
...@@ -136,7 +136,7 @@ insertNodeNgramW nns = ...@@ -136,7 +136,7 @@ insertNodeNgramW nns =
, iReturning = rCount , iReturning = rCount
, iOnConflict = (Just DoNothing) , iOnConflict = (Just DoNothing)
}) })
--}
type NgramsText = Text type NgramsText = Text
updateNodeNgrams' :: ListId -> [(NgramsTypeId, NgramsText, ListTypeId)] -> Cmd err () updateNodeNgrams' :: ListId -> [(NgramsTypeId, NgramsText, ListTypeId)] -> Cmd err ()
...@@ -166,138 +166,3 @@ ON CONFLICT (node_id, ngrams_id, ngrams_type) DO ...@@ -166,138 +166,3 @@ ON CONFLICT (node_id, ngrams_id, ngrams_type) DO
UPDATE SET list_type = excluded.list_type UPDATE SET list_type = excluded.list_type
; ;
|] |]
data Action = Del | Add
type NgramsParent = Text
type NgramsChild = Text
{-
ngramsGroup :: Action -> ListId -> [(NgramsTypeId, NgramsParent, NgramsChild)] -> Cmd err ()
ngramsGroup _ _ [] = pure ()
ngramsGroup a lid input = void $ trace (show input) $ execPGSQuery (ngramsGroupQuery a) (DPS.Only $ Values fields input')
where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4","text","text"]
input' = map (\(ntpid,p,c) -> (lid, nodeTypeId NodeList, userMaster, ntpid, p,c)) input
-}
ngramsGroupQuery :: Action -> DPS.Query
ngramsGroupQuery a = case a of
Add -> [sql|
WITH input(lid,listTypeId,masterUsername,ntype,parent_terms,child_terms)
AS (?),
-- (VALUES (15::"int4",5::"int4",'gargantua'::"text",4::"int4",'input'::"text",'designer'::"text"))),
list_master AS (
SELECT n.id from nodes n
JOIN auth_user u ON n.user_id = u.id
JOIN input ON n.typename = input.listTypeId
WHERE u.username = input.masterUsername
LIMIT 1
),
list_user AS(
-- FIRST import parent from master to user list
INSERT INTO nodes_ngrams (node_id,ngrams_id,parent_id,ngrams_type,list_type,weight)
SELECT input.lid,nn.ngrams_id,nn.parent_id,nn.ngrams_type,nn.list_type, nn.weight
FROM INPUT
JOIN ngrams ng ON ng.terms = input.parent_terms
JOIN nodes_ngrams nn ON nn.ngrams_id = ng.id
JOIN list_master ON nn.node_id = list_master.id
WHERE
nn.ngrams_id = ng.id
ON CONFLICT (node_id,ngrams_id,ngrams_type) DO NOTHING
)
INSERT INTO nodes_ngrams (node_id,ngrams_id,parent_id,ngrams_type,list_type,weight)
SELECT input.lid, nc.id, nnpu.id, input.ntype, nnmaster.list_type, nnmaster.weight
FROM input
JOIN ngrams np ON np.terms = input.parent_terms
JOIN ngrams nc ON nc.terms = input.child_terms
JOIN nodes_ngrams nnpu ON nnpu.ngrams_id = np.id
JOIN nodes_ngrams nnmaster ON nnmaster.ngrams_id = nc.id
JOIN list_master ON nnmaster.node_id = list_master.id
WHERE
nnpu.node_id = input.lid
AND nnpu.ngrams_type = input.ntype
AND nnmaster.ngrams_id = nc.id
AND nnmaster.ngrams_type = ntype
ON CONFLICT (node_id,ngrams_id,ngrams_type) DO
UPDATE SET parent_id = excluded.parent_id
|]
Del -> [sql|
WITH input(lid,listTypeId,masterUsername,ntype,parent_terms,child_terms)
AS (?),
-- (VALUES (15::"int4",5::"int4",'gargantua'::"text",4::"int4",'input'::"text",'designer'::"text"))),
list_master AS (
SELECT n.id from nodes n
JOIN auth_user u ON n.user_id = u.id
JOIN input ON n.typename = input.listTypeId
WHERE u.username = input.masterUsername
LIMIT 1
),
list_user AS(
-- FIRST import parent from master to user list
INSERT INTO nodes_ngrams (node_id,ngrams_id,parent_id,ngrams_type,list_type,weight)
SELECT input.lid,nn.ngrams_id,nn.parent_id,nn.ngrams_type,nn.list_type, nn.weight
FROM INPUT
JOIN ngrams ng ON ng.terms = input.parent_terms
JOIN nodes_ngrams nn ON nn.ngrams_id = ng.id
JOIN list_master ON nn.node_id = list_master.id
WHERE
nn.ngrams_id = ng.id
ON CONFLICT (node_id,ngrams_id,ngrams_type) DO NOTHING
)
INSERT INTO nodes_ngrams (node_id,ngrams_id,parent_id,ngrams_type,list_type,weight)
SELECT input.lid, nc.id, NULL, input.ntype, nnmaster.list_type, nnmaster.weight
FROM input
JOIN ngrams np ON np.terms = input.parent_terms
JOIN ngrams nc ON nc.terms = input.child_terms
JOIN nodes_ngrams nnmaster ON nnmaster.ngrams_id = nc.id
JOIN list_master ON nnmaster.node_id = list_master.id
WHERE
nnmaster.ngrams_id = nc.id
AND nnmaster.ngrams_type = ntype
ON CONFLICT (node_id,ngrams_id,ngrams_type) DO
UPDATE SET parent_id = NULL
|]
data NodeNgramsUpdate = NodeNgramsUpdate
{ _nnu_user_list_id :: ListId
, _nnu_lists_update :: [(NgramsTypeId, NgramsText, ListTypeId)]
, _nnu_add_children :: [(NgramsTypeId, NgramsParent, NgramsChild)]
, _nnu_rem_children :: [(NgramsTypeId, NgramsParent, NgramsChild)]
}
-- TODO wrap these updates in a transaction.
-- TODO-ACCESS:
-- * check userId CanUpdateNgrams userListId
{-
updateNodeNgrams :: NodeNgramsUpdate -> Cmd err ()
updateNodeNgrams nnu = do
updateNodeNgrams' userListId $ _nnu_lists_update nnu
ngramsGroup Del userListId $ _nnu_rem_children nnu
ngramsGroup Add userListId $ _nnu_add_children nnu
-- TODO remove duplicate line (fix SQL query)
ngramsGroup Add userListId $ _nnu_add_children nnu
where
userListId = _nnu_user_list_id nnu
-}
...@@ -9,14 +9,15 @@ Portability : POSIX ...@@ -9,14 +9,15 @@ Portability : POSIX
-} -}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NodeNodeNgrams module Gargantext.Database.Schema.NodeNodeNgrams
where where
...@@ -24,61 +25,92 @@ module Gargantext.Database.Schema.NodeNodeNgrams ...@@ -24,61 +25,92 @@ module Gargantext.Database.Schema.NodeNodeNgrams
import Prelude import Prelude
import Data.Maybe (Maybe) import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance) import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields) --import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Gargantext.Database.Utils (Cmd, runOpaQuery) import Gargantext.Database.Utils (Cmd, mkCmd)
import Gargantext.Database.Schema.Ngrams (NgramsTypeId, pgNgramsTypeId, NgramsId)
import Gargantext.Database.Schema.Node (pgNodeId)
import Gargantext.Database.Types.Node
import Opaleye import Opaleye
data NodeNodeNgramsPoly node1_id node2_id ngram_id score
= NodeNodeNgrams { nnng_node1_id :: node1_id data NodeNodeNgramsPoly id' n1 n2 ngrams_id ngt w
, nnng_node2_id :: node2_id = NodeNodeNgrams { nnng_id :: id'
, nnng_ngrams_id :: ngram_id , nnng_node1_id :: n1
, nnng_score :: score , nnng_node2_id :: n2
, nnng_ngrams_id :: ngrams_id
, nnng_ngramsType :: ngt
, nnng_weight :: w
} deriving (Show) } deriving (Show)
type NodeNodeNgramsWrite = NodeNodeNgramsPoly (Column PGInt4 ) type NodeNodeNgramsWrite =
NodeNodeNgramsPoly (Maybe (Column PGInt4 ))
(Column PGInt4 ) (Column PGInt4 )
(Column PGInt4 ) (Column PGInt4 )
(Maybe (Column PGFloat8)) (Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
type NodeNodeNgramsRead = NodeNodeNgramsPoly (Column PGInt4 ) type NodeNodeNgramsRead =
NodeNodeNgramsPoly (Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 ) (Column PGInt4 )
(Column PGInt4 ) (Column PGInt4 )
(Column PGFloat8) (Column PGFloat8)
type NodeNodeNgramsReadNull = NodeNodeNgramsPoly (Column (Nullable PGInt4 )) type NodeNodeNgramsReadNull =
NodeNodeNgramsPoly (Column (Nullable PGInt4 ))
(Column (Nullable PGInt4 ))
(Column (Nullable PGInt4 ))
(Column (Nullable PGInt4 )) (Column (Nullable PGInt4 ))
(Column (Nullable PGInt4 )) (Column (Nullable PGInt4 ))
(Column (Nullable PGFloat8)) (Column (Nullable PGFloat8))
type NodeNodeNgrams = NodeNodeNgramsPoly Int type NodeNodeNgrams =
Int NodeNodeNgramsPoly (Maybe Int) CorpusId DocId NgramsId NgramsTypeId Double
Int
(Maybe Double)
--{-
$(makeAdaptorAndInstance "pNodeNodeNgrams" ''NodeNodeNgramsPoly) $(makeAdaptorAndInstance "pNodeNodeNgrams" ''NodeNodeNgramsPoly)
$(makeLensesWith abbreviatedFields ''NodeNodeNgramsPoly) -- $(makeLensesWith abbreviatedFields ''NodeNodeNgramsPoly)
nodeNodeNgramsTable :: Table NodeNodeNgramsWrite NodeNodeNgramsRead nodeNodeNgramsTable :: Table NodeNodeNgramsWrite NodeNodeNgramsRead
nodeNodeNgramsTable = Table "nodes_nodes_ngrams" nodeNodeNgramsTable = Table "node_node_ngrams"
( pNodeNodeNgrams NodeNodeNgrams ( pNodeNodeNgrams NodeNodeNgrams
{ nnng_node1_id = required "node1_id" { nnng_id = optional "id"
, nnng_node1_id = required "node1_id"
, nnng_node2_id = required "node2_id" , nnng_node2_id = required "node2_id"
, nnng_ngrams_id = required "ngram_id" , nnng_ngrams_id = required "ngrams_id"
, nnng_score = optional "score" , nnng_ngramsType = required "ngrams_type"
, nnng_weight = required "weight"
} }
) )
queryNodeNodeNgramsTable :: Query NodeNodeNgramsRead queryNodeNodeNgramsTable :: Query NodeNodeNgramsRead
queryNodeNodeNgramsTable = queryTable nodeNodeNgramsTable queryNodeNodeNgramsTable = queryTable nodeNodeNgramsTable
-- | not optimized (get all ngrams without filters)
nodeNodeNgrams :: Cmd err [NodeNodeNgrams]
nodeNodeNgrams = runOpaQuery queryNodeNodeNgramsTable
instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where -- | Insert utils
queryRunnerColumnDefault = fieldQueryRunnerColumn insertNodeNodeNgrams :: [NodeNodeNgrams] -> Cmd err Int
insertNodeNodeNgrams = insertNodeNodeNgramsW
. map (\(NodeNodeNgrams id'' n1 n2 ng nt w) ->
NodeNodeNgrams (pgInt4 <$> id'')
(pgNodeId n1)
(pgNodeId n2)
(pgInt4 ng)
(pgNgramsTypeId nt)
(pgDouble w)
)
insertNodeNodeNgramsW :: [NodeNodeNgramsWrite] -> Cmd err Int
insertNodeNodeNgramsW nnnw =
mkCmd $ \c -> fromIntegral <$> runInsert_ c insertNothing
where
insertNothing = (Insert { iTable = nodeNodeNgramsTable
, iRows = nnnw
, iReturning = rCount
, iOnConflict = (Just DoNothing)
})
...@@ -43,7 +43,6 @@ import Opaleye ...@@ -43,7 +43,6 @@ import Opaleye
------------------------------------------------------------------------ ------------------------------------------------------------------------
type UserId = Int type UserId = Int
data UserLight = UserLight { userLight_id :: Int data UserLight = UserLight { userLight_id :: Int
, userLight_username :: Text , userLight_username :: Text
, userLight_email :: Text , userLight_email :: Text
...@@ -83,6 +82,16 @@ type UserRead = UserPoly (Column PGInt4) (Column PGText) ...@@ -83,6 +82,16 @@ type UserRead = UserPoly (Column PGInt4) (Column PGText)
(Column PGBool) (Column PGBool) (Column PGBool) (Column PGBool)
(Column PGTimestamptz) (Column PGTimestamptz)
type UserReadNull = UserPoly (Column (Nullable PGInt4)) (Column (Nullable PGText))
(Column (Nullable PGTimestamptz)) (Column (Nullable PGBool))
(Column (Nullable PGText)) (Column (Nullable PGText))
(Column (Nullable PGText)) (Column (Nullable PGText))
(Column (Nullable PGBool)) (Column (Nullable PGBool))
(Column (Nullable PGTimestamptz))
type User = UserPoly Int Text (Maybe UTCTime) Bool Text Text Text Text Bool Bool UTCTime type User = UserPoly Int Text (Maybe UTCTime) Bool Text Text Text Text Bool Bool UTCTime
$(makeAdaptorAndInstance "pUser" ''UserPoly) $(makeAdaptorAndInstance "pUser" ''UserPoly)
......
...@@ -49,36 +49,26 @@ CREATE TABLE public.ngrams ( ...@@ -49,36 +49,26 @@ CREATE TABLE public.ngrams (
); );
ALTER TABLE public.ngrams OWNER TO gargantua; ALTER TABLE public.ngrams OWNER TO gargantua;
--------------------------------------------------------------
-------------------------------------------------------------- --------------------------------------------------------------
-- TODO: delete delete this table -- TODO: delete delete this table
CREATE TABLE public.nodes_ngrams ( --CREATE TABLE public.nodes_ngrams (
id SERIAL, -- id SERIAL,
node_id integer NOT NULL, -- node_id integer NOT NULL,
ngrams_id integer NOT NULL, -- ngrams_id integer NOT NULL,
parent_id integer REFERENCES public.nodes_ngrams(id) ON DELETE SET NULL, -- parent_id integer REFERENCES public.nodes_ngrams(id) ON DELETE SET NULL,
ngrams_type integer, -- ngrams_type integer,
list_type integer, -- list_type integer,
weight double precision, -- weight double precision,
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE, -- FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE, -- FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE,
PRIMARY KEY (id) -- PRIMARY KEY (id)
-- PRIMARY KEY (node_id,ngrams_id) --);
); --ALTER TABLE public.nodes_ngrams OWNER TO gargantua;
ALTER TABLE public.nodes_ngrams OWNER TO gargantua;
-------------------------------------------------------------- --------------------------------------------------------------
CREATE TABLE public.nodes_ngrams_repo (
version integer NOT NULL,
patches jsonb DEFAULT '{}'::jsonb NOT NULL,
PRIMARY KEY (version)
);
ALTER TABLE public.nodes_ngrams_repo OWNER TO gargantua;
-------------------------------------------------------------- --------------------------------------------------------------
-- --
-- --
-- TODO: delete delete this table
--CREATE TABLE public.nodes_ngrams_ngrams ( --CREATE TABLE public.nodes_ngrams_ngrams (
-- node_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE, -- node_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
-- ngram1_id integer NOT NULL REFERENCES public.ngrams(id) ON DELETE CASCADE, -- ngram1_id integer NOT NULL REFERENCES public.ngrams(id) ON DELETE CASCADE,
...@@ -89,16 +79,38 @@ ALTER TABLE public.nodes_ngrams_repo OWNER TO gargantua; ...@@ -89,16 +79,38 @@ ALTER TABLE public.nodes_ngrams_repo OWNER TO gargantua;
-- --
--ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua; --ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua;
--------------------------------------------------------- ---------------------------------------------------------------
CREATE TABLE public.nodes_nodes ( CREATE TABLE public.nodes_nodes (
node1_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE, node1_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
node2_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE, node2_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
score real, score real,
favorite boolean, favorite boolean,
delete boolean, delete boolean,
PRIMARY KEY (node1_id, node2_id) PRIMARY KEY (node1_id,node2_id)
); );
ALTER TABLE public.nodes_nodes OWNER TO gargantua; ALTER TABLE public.nodes_nodes OWNER TO gargantua;
---------------------------------------------------------------
-- TODO should reference "id" of nodes_nodes (instead of node1_id, node2_id)
CREATE TABLE public.node_node_ngrams (
id SERIAL,
node1_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
node2_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
ngrams_type INTEGER,
weight double precision,
PRIMARY KEY (id)
);
ALTER TABLE public.node_node_ngrams OWNER TO gargantua;
--------------------------------------------------------------
--CREATE TABLE public.nodes_ngrams_repo (
-- version integer NOT NULL,
-- patches jsonb DEFAULT '{}'::jsonb NOT NULL,
-- PRIMARY KEY (version)
--);
--ALTER TABLE public.nodes_ngrams_repo OWNER TO gargantua;
--------------------------------------------------------- ---------------------------------------------------------
-- If needed for rights management at row level -- If needed for rights management at row level
...@@ -113,7 +125,6 @@ CREATE TABLE public.rights ( ...@@ -113,7 +125,6 @@ CREATE TABLE public.rights (
ALTER TABLE public.rights OWNER TO gargantua; ALTER TABLE public.rights OWNER TO gargantua;
------------------------------------------------------------ ------------------------------------------------------------
-- INDEXES -- INDEXES
...@@ -130,14 +141,10 @@ CREATE UNIQUE INDEX ON public.nodes USING btree (typename, parent_id, ((hyperdat ...@@ -130,14 +141,10 @@ CREATE UNIQUE INDEX ON public.nodes USING btree (typename, parent_id, ((hyperdat
CREATE UNIQUE INDEX ON public.ngrams (terms); -- TEST GIN CREATE UNIQUE INDEX ON public.ngrams (terms); -- TEST GIN
CREATE INDEX ON public.nodes_ngrams USING btree (ngrams_id);
CREATE UNIQUE INDEX ON public.nodes_ngrams USING btree (node_id,ngrams_id);
CREATE UNIQUE INDEX ON public.nodes_ngrams USING btree (node_id,ngrams_id,ngrams_type);
CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, delete); CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, delete);
CREATE UNIQUE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id); CREATE UNIQUE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id);
--CREATE INDEX ON public.nodes_nodes_ngrams USING btree (node1_id,nod2_id); CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type);
-- TRIGGERS -- TRIGGERS
-- TODO user haskell-postgresql-simple to create this function -- TODO user haskell-postgresql-simple to create this function
......
...@@ -129,23 +129,6 @@ joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNgramTable queryNgr ...@@ -129,23 +129,6 @@ joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNgramTable queryNgr
cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn_node2_id nn cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn_node2_id nn
{-
queryGraphCorpusAuthors' :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, NgramsReadNull)))
queryGraphCorpusAuthors' = leftJoin4 queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34
where
cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
cond23 (ng2, (nng2, _)) = nodeNgram_ngrams_id nng2 .== ngrams_id ng2
cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nodeNgram_ngrams_id nng
cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
cond45 (nn, (nng, (_,(_,_)))) = nodeNgram_node_id nng .== nodeNode_node2_id nn
cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nodeNode_node2_id nn
-}
newtype TSQuery = UnsafeTSQuery [Text] newtype TSQuery = UnsafeTSQuery [Text]
......
...@@ -65,6 +65,7 @@ newtype NodeId = NodeId Int ...@@ -65,6 +65,7 @@ newtype NodeId = NodeId Int
instance ToField NodeId where instance ToField NodeId where
toField (NodeId n) = toField n toField (NodeId n) = toField n
instance FromField NodeId where instance FromField NodeId where
fromField field mdata = do fromField field mdata = do
n <- fromField field mdata n <- fromField field mdata
...@@ -428,13 +429,16 @@ data NodeType = NodeUser ...@@ -428,13 +429,16 @@ data NodeType = NodeUser
| NodeFolder | NodeFolder
| NodeCorpus | NodeCorpusV3 | NodeDocument | NodeCorpus | NodeCorpusV3 | NodeDocument
| NodeAnnuaire | NodeContact | NodeAnnuaire | NodeContact
-- | NodeOccurrences
| NodeGraph | NodeGraph
| NodeDashboard | NodeChart | NodeDashboard | NodeChart
-- | Classification | NodeList | NodeListModel deriving (Show, Read, Eq, Generic, Bounded, Enum)
| NodeList | NodeListModel
{-
-- | Metrics -- | Metrics
deriving (Show, Read, Eq, Generic, Bounded, Enum) -- | NodeOccurrences
-- | Classification
-}
allNodeTypes :: [NodeType] allNodeTypes :: [NodeType]
allNodeTypes = [minBound ..] allNodeTypes = [minBound ..]
......
...@@ -101,7 +101,7 @@ mapIdSchool :: Map Text Text ...@@ -101,7 +101,7 @@ mapIdSchool :: Map Text Text
mapIdSchool = M.fromList $ Gargantext.Prelude.map (\(School n _ i) -> (i,n)) schools mapIdSchool = M.fromList $ Gargantext.Prelude.map (\(School n _ i) -> (i,n)) schools
hal_data :: IO (DV.Vector CsvHal) hal_data :: IO (DV.Vector CsvHal)
hal_data = snd <$> CSV.readHal "doc/corpus_imt/Gargantext_Corpus.csv" hal_data = snd <$> CSV.readCsvHal "doc/corpus_imt/Gargantext_Corpus.csv"
names :: S.Set Text names :: S.Set Text
names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools
......
...@@ -13,24 +13,86 @@ Text gathers terms in unit of contexts. ...@@ -13,24 +13,86 @@ Text gathers terms in unit of contexts.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Gargantext.Text module Gargantext.Text
where where
import Data.Text (Text, split) import Data.Text (Text, split)
import Gargantext.Prelude hiding (filter)
import NLP.FullStop (segment)
import qualified Data.Text as DT import qualified Data.Text as DT
import NLP.FullStop (segment)
----------------------------------------------------------------- -----------------------------------------------------------------
import Gargantext.Core -- | Why not use data ?
import Gargantext.Prelude hiding (filter) data Niveau = NiveauTexte Texte
| NiveauParagraphe Paragraphe
| NiveauPhrase Phrase
| NiveauMultiTerme MultiTerme
| NiveauMot Mot
| NiveauLettre Lettre
deriving (Show)
-- | Why use newtype ?
newtype Texte = Texte Text
newtype Paragraphe = Paragraphe Text
newtype Phrase = Phrase Text
newtype MultiTerme = MultiTerme Text
newtype Mot = Mot Text
newtype Lettre = Lettre Text
-- | Type syn seems obvious
type Titre = Phrase
----------------------------------------------------------------- -----------------------------------------------------------------
type Config = Lang -> Context instance Show Texte where
type Context = Text -> [Text] show (Texte t) = show t
data Viz = Graph | Phylo | Chart
instance Show Paragraphe where
show (Paragraphe p) = show p
instance Show Phrase where
show (Phrase p) = show p
instance Show MultiTerme where
show (MultiTerme mt) = show mt
instance Show Mot where
show (Mot t) = show t
instance Show Lettre where
show (Lettre l) = show l
----------------------------------------------------------------- -----------------------------------------------------------------
class Collage sup inf where
dec :: sup -> [inf]
inc :: [inf] -> sup
instance Collage Texte Paragraphe where
dec (Texte t) = map Paragraphe $ DT.splitOn "\n" t
inc = Texte . DT.intercalate "\n" . map (\(Paragraphe t) -> t)
instance Collage Paragraphe Phrase where
dec (Paragraphe t) = map Phrase $ sentences t
inc = Paragraphe . DT.unwords . map (\(Phrase p) -> p)
instance Collage Phrase MultiTerme where
dec (Phrase t) = map MultiTerme $ DT.words t
inc = Phrase . DT.unwords . map (\(MultiTerme p) -> p)
instance Collage MultiTerme Mot where
dec (MultiTerme mt) = map Mot $ DT.words mt
inc = MultiTerme . DT.intercalate " " . map (\(Mot m) -> m)
-- | We could use Type Classes but we lose the Sum Type classification
toMultiTerme :: Niveau -> [MultiTerme]
toMultiTerme (NiveauTexte (Texte _t)) = undefined
toMultiTerme (NiveauPhrase p) = dec p
toMultiTerme (NiveauMultiTerme mt) = [mt]
toMultiTerme (NiveauMot _m) = undefined
toMultiTerme _ = undefined
------------------------------------------------------------------- -------------------------------------------------------------------
-- Contexts of text -- Contexts of text
sentences :: Text -> [Text] sentences :: Text -> [Text]
......
{-|
Module : Gargantext.Text.Convert
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Format Converter.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Convert (risPress2csvWrite)
where
import System.FilePath (FilePath()) -- , takeExtension)
import Gargantext.Prelude
import Gargantext.Text.Parsers.CSV (writeDocs2Csv)
import Gargantext.Text.Parsers (parseFile, FileFormat(..))
risPress2csvWrite :: FilePath -> IO ()
risPress2csvWrite f = parseFile RisPresse (f <> ".ris")
>>= \hs -> writeDocs2Csv (f <> ".csv") hs
...@@ -60,23 +60,21 @@ buildNgramsOthersList :: UserCorpusId -> (Text -> Text) -> NgramsType ...@@ -60,23 +60,21 @@ buildNgramsOthersList :: UserCorpusId -> (Text -> Text) -> NgramsType
buildNgramsOthersList uCid groupIt nt = do buildNgramsOthersList uCid groupIt nt = do
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid nt ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid nt
pure $ Map.fromList [(nt, [ mkNgramsElement t CandidateTerm Nothing (mSetFromList []) let
| (t,_ns) <- Map.toList ngs all' = Map.toList ngs
pure $ (toElements GraphTerm $ take 10 all') <> (toElements CandidateTerm $ drop 10 all')
where
toElements nType x = Map.fromList [(nt, [ mkNgramsElement t nType Nothing (mSetFromList [])
| (t,_ns) <- x
] ]
) )
] ]
-- TODO remove hard coded parameters
buildNgramsTermsList :: Lang -> Int -> Int -> StopSize -> UserCorpusId -> MasterCorpusId buildNgramsTermsList :: Lang -> Int -> Int -> StopSize -> UserCorpusId -> MasterCorpusId
-> Cmd err (Map NgramsType [NgramsElement]) -> Cmd err (Map NgramsType [NgramsElement])
buildNgramsTermsList l n m s uCid mCid = do buildNgramsTermsList l n m s uCid mCid = do
candidates <- sortTficf <$> getTficf' uCid mCid NgramsTerms (ngramsGroup l n m) candidates <- sortTficf <$> getTficf' uCid mCid NgramsTerms (ngramsGroup l n m)
--printDebug "candidate" (length candidates)
let termList = toTermList ((isStopTerm s) . fst) candidates let termList = toTermList ((isStopTerm s) . fst) candidates
--let termList = toTermList ((\_ -> False) . fst) candidates
--printDebug "termlist" (length termList)
let ngs = List.concat $ map toNgramsElement termList let ngs = List.concat $ map toNgramsElement termList
pure $ Map.fromList [(NgramsTerms, ngs)] pure $ Map.fromList [(NgramsTerms, ngs)]
...@@ -114,7 +112,7 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs ...@@ -114,7 +112,7 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
zs = drop b $ drop a ns zs = drop b $ drop a ns
a = 3 a = 3
b = 5000 b = 500
isStopTerm :: StopSize -> Text -> Bool isStopTerm :: StopSize -> Text -> Bool
isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x) isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x)
......
...@@ -22,40 +22,36 @@ please follow the types. ...@@ -22,40 +22,36 @@ please follow the types.
{-# LANGUAGE PackageImports #-} {-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers (parse, FileFormat(..), clean, parseDocs) module Gargantext.Text.Parsers (FileFormat(..), clean, parseFile)
where where
import System.FilePath (FilePath(), takeExtension) --import Data.ByteString (ByteString)
import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries) import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Monad (join) import Control.Monad (join)
import qualified Data.Time as DT import qualified Data.ByteString.Char8 as DBC
import Data.Attoparsec.ByteString (parseOnly, Parser)
import Data.Either(Either(..))
import Data.Either.Extra (partitionEithers) import Data.Either.Extra (partitionEithers)
import Data.Time (UTCTime(..))
import Data.List (concat) import Data.List (concat)
import qualified Data.Map as DM import Data.List (lookup)
import qualified Data.ByteString as DB
import Data.Ord() import Data.Ord()
import Data.String (String())
import Data.String() import Data.String()
import Data.Either(Either(..))
import Data.Attoparsec.ByteString (parseOnly, Parser)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as DT
-- Activate Async for to parse in parallel
import Control.Concurrent.Async as CCA (mapConcurrently)
import Data.Text.Encoding (decodeUtf8) import Data.Text.Encoding (decodeUtf8)
import Data.String (String()) import Data.Tuple.Extra (both, first, second)
import Data.List (lookup) import System.FilePath (FilePath(), takeExtension)
import qualified Data.ByteString as DB
------------------------------------------------------------------------ import qualified Data.Map as DM
import qualified Data.Text as DT
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Types.Node (HyperdataDocument(..)) import Gargantext.Database.Types.Node (HyperdataDocument(..))
import Gargantext.Text.Parsers.WOS (wosParser) import qualified Gargantext.Text.Parsers.WOS as WOS
import Gargantext.Text.Parsers.Date (parseDate) import qualified Gargantext.Text.Parsers.RIS as RIS
import Gargantext.Text.Parsers.RIS.Presse (presseEnrich)
import qualified Gargantext.Text.Parsers.Date as Date
import Gargantext.Text.Parsers.CSV (parseHal) import Gargantext.Text.Parsers.CSV (parseHal)
import Gargantext.Text.Terms.Stop (detectLang) import Gargantext.Text.Terms.Stop (detectLang)
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -71,7 +67,8 @@ type ParseError = String ...@@ -71,7 +67,8 @@ type ParseError = String
-- | According to the format of Input file, -- | According to the format of Input file,
-- different parser are available. -- different parser are available.
data FileFormat = WOS | CsvHalFormat-- | CsvGargV3 data FileFormat = WOS | RIS | RisPresse
| CsvGargV3 | CsvHalFormat
deriving (Show) deriving (Show)
-- Implemented (ISI Format) -- Implemented (ISI Format)
...@@ -79,42 +76,33 @@ data FileFormat = WOS | CsvHalFormat-- | CsvGargV3 ...@@ -79,42 +76,33 @@ data FileFormat = WOS | CsvHalFormat-- | CsvGargV3
-- | ODT -- Not Implemented / import Pandoc -- | ODT -- Not Implemented / import Pandoc
-- | PDF -- Not Implemented / pdftotext and import Pandoc ? -- | PDF -- Not Implemented / pdftotext and import Pandoc ?
-- | XML -- Not Implemented / see : -- | XML -- Not Implemented / see :
-- -- > http://chrisdone.com/posts/fast-haskell-c-parsing-xml
-- TODO: to debug maybe add the filepath in error message
{-
parseFormat :: FileFormat -> ByteString -> [HyperdataDocument]
parseFormat = undefined
-}
-- | Parse file into documents -- | Parse file into documents
-- TODO manage errors here -- TODO manage errors here
parseDocs :: FileFormat -> FilePath -> IO [HyperdataDocument] -- TODO: to debug maybe add the filepath in error message
parseDocs WOS path = join $ mapM (toDoc WOS) <$> snd <$> parse WOS path parseFile :: FileFormat -> FilePath -> IO [HyperdataDocument]
parseDocs CsvHalFormat p = parseHal p parseFile CsvHalFormat p = parseHal p
parseFile RisPresse p = join $ mapM (toDoc RIS) <$> snd <$> enrichWith RisPresse <$> readFileWith RIS p
type Year = Int parseFile WOS p = join $ mapM (toDoc WOS) <$> snd <$> enrichWith WOS <$> readFileWith WOS p
type Month = Int parseFile ff p = join $ mapM (toDoc ff) <$> snd <$> enrichWith ff <$> readFileWith ff p
type Day = Int
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
parseDate' :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
parseDate' _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
parseDate' l (Just txt) = do
utcTime <- parseDate l txt
let (UTCTime day _) = utcTime
let (y,m,d) = DT.toGregorian day
pure (Just utcTime, (Just (fromIntegral y), Just m,Just d))
toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument
toDoc WOS d = do -- TODO use language for RIS
toDoc ff d = do
let abstract = lookup "abstract" d let abstract = lookup "abstract" d
let lang = maybe EN identity (join $ detectLang <$> (fmap (DT.take 50) abstract)) let lang = maybe EN identity (join $ detectLang <$> (fmap (DT.take 50) abstract))
let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d
(utcTime, (pub_year, pub_month, pub_day)) <- parseDate' lang dateToParse (utcTime, (pub_year, pub_month, pub_day)) <- Date.split lang dateToParse
pure $ HyperdataDocument (Just $ DT.pack $ show WOS) pure $ HyperdataDocument (Just $ DT.pack $ show ff)
(lookup "doi" d) (lookup "doi" d)
(lookup "URL" d) (lookup "URL" d)
Nothing Nothing
...@@ -133,26 +121,35 @@ toDoc WOS d = do ...@@ -133,26 +121,35 @@ toDoc WOS d = do
Nothing Nothing
Nothing Nothing
(Just $ (DT.pack . show) lang) (Just $ (DT.pack . show) lang)
toDoc _ _ = undefined
parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]]) enrichWith :: FileFormat
parse format path = do -> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]])
enrichWith RisPresse = enrichWith' presseEnrich
enrichWith WOS = enrichWith' (map (first WOS.keys))
enrichWith _ = enrichWith' identity
enrichWith' :: ([(DB.ByteString, DB.ByteString)] -> [(DB.ByteString, DB.ByteString)])
-> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]])
enrichWith' f = second (map both' . map f . concat)
where
both' = map (both decodeUtf8)
readFileWith :: FileFormat -> FilePath
-> IO ([ParseError], [[[(DB.ByteString, DB.ByteString)]]])
readFileWith format path = do
files <- case takeExtension path of files <- case takeExtension path of
".zip" -> openZip path ".zip" -> openZip path
_ -> pure <$> DB.readFile path _ -> pure <$> clean <$> DB.readFile path
(as, bs) <- partitionEithers <$> mapConcurrently (runParser format) files partitionEithers <$> mapConcurrently (runParser format) files
pure (as, map toText $ concat bs)
where
-- TODO : decode with bayesian inference on encodings
toText = map (\(a,b) -> (decodeUtf8 a, decodeUtf8 b))
-- | withParser: -- | withParser:
-- According to the format of the text, choose the right parser. -- According to the format of the text, choose the right parser.
-- TODO withParser :: FileFormat -> Parser [Document] -- TODO withParser :: FileFormat -> Parser [Document]
withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]] withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
withParser WOS = wosParser withParser WOS = WOS.parser
--withParser DOC = docParser withParser RIS = RIS.parser
--withParser ODT = odtParser --withParser ODT = odtParser
--withParser XML = xmlParser --withParser XML = xmlParser
withParser _ = panic "[ERROR] Parser not implemented yet" withParser _ = panic "[ERROR] Parser not implemented yet"
...@@ -167,9 +164,9 @@ openZip fp = do ...@@ -167,9 +164,9 @@ openZip fp = do
bs <- mapConcurrently (\s -> withArchive fp (getEntry s)) entries bs <- mapConcurrently (\s -> withArchive fp (getEntry s)) entries
pure bs pure bs
clean :: Text -> Text clean :: DB.ByteString -> DB.ByteString
clean txt = DT.map clean' txt clean txt = DBC.map clean' txt
where where
clean' '’' = '\'' clean' '’' = '\''
clean' '\r' = ' '
clean' c = c clean' c = c
...@@ -17,25 +17,23 @@ CSV parser for Gargantext corpus files. ...@@ -17,25 +17,23 @@ CSV parser for Gargantext corpus files.
module Gargantext.Text.Parsers.CSV where module Gargantext.Text.Parsers.CSV where
import GHC.Real (round)
import GHC.IO (FilePath)
import Control.Applicative import Control.Applicative
import Data.Char (ord) import Data.Char (ord)
import Data.Csv import Data.Csv
import Data.Either (Either(Left, Right)) import Data.Either (Either(Left, Right))
import Data.Text (Text, pack, length, intercalate) import Data.Text (Text, pack, length, intercalate)
import qualified Data.ByteString.Lazy as BL
import Data.Time.Segment (jour) import Data.Time.Segment (jour)
import Data.Vector (Vector) import Data.Vector (Vector)
import qualified Data.Vector as V import GHC.IO (FilePath)
import GHC.Real (round)
import Gargantext.Database.Types.Node (HyperdataDocument(..)) import GHC.Word (Word8)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..))
import Gargantext.Prelude hiding (length)
import Gargantext.Text import Gargantext.Text
import Gargantext.Text.Context import Gargantext.Text.Context
import Gargantext.Prelude hiding (length) import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString as BS
import qualified Data.Vector as V
--------------------------------------------------------------- ---------------------------------------------------------------
headerCsvGargV3 :: Header headerCsvGargV3 :: Header
...@@ -48,7 +46,7 @@ headerCsvGargV3 = header [ "title" ...@@ -48,7 +46,7 @@ headerCsvGargV3 = header [ "title"
, "authors" , "authors"
] ]
--------------------------------------------------------------- ---------------------------------------------------------------
data Doc = Doc data CsvGargV3 = CsvGargV3
{ d_docId :: !Int { d_docId :: !Int
, d_title :: !Text , d_title :: !Text
, d_source :: !Text , d_source :: !Text
...@@ -61,9 +59,8 @@ data Doc = Doc ...@@ -61,9 +59,8 @@ data Doc = Doc
deriving (Show) deriving (Show)
--------------------------------------------------------------- ---------------------------------------------------------------
-- | Doc 2 HyperdataDocument -- | Doc 2 HyperdataDocument
doc2hyperdataDocument :: Doc -> HyperdataDocument toDoc :: CsvGargV3 -> HyperdataDocument
--doc2hyperdataDocument (Doc did dt ds dpy dpm dpd dab dau) = toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
doc2hyperdataDocument (Doc did dt _ dpy dpm dpd dab dau) =
HyperdataDocument (Just "CSV") HyperdataDocument (Just "CSV")
(Just . pack . show $ did) (Just . pack . show $ did)
Nothing Nothing
...@@ -83,22 +80,23 @@ doc2hyperdataDocument (Doc did dt _ dpy dpm dpd dab dau) = ...@@ -83,22 +80,23 @@ doc2hyperdataDocument (Doc did dt _ dpy dpm dpd dab dau) =
Nothing Nothing
Nothing Nothing
Nothing Nothing
--------------------------------------------------------------- ---------------------------------------------------------------
-- | Types Conversions -- | Types Conversions
toDocs :: Vector CsvDoc -> [Doc] toDocs :: Vector CsvDoc -> [CsvGargV3]
toDocs v = V.toList toDocs v = V.toList
$ V.zipWith (\nId (CsvDoc t s py pm pd abst auth) $ V.zipWith (\nId (CsvDoc t s py pm pd abst auth)
-> Doc nId t s py pm pd abst auth ) -> CsvGargV3 nId t s py pm pd abst auth )
(V.enumFromN 1 (V.length v'')) v'' (V.enumFromN 1 (V.length v'')) v''
where where
v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
seps= (V.fromList [Paragraphs 1, Sentences 3, Chars 3]) seps= (V.fromList [Paragraphs 1, Sentences 3, Chars 3])
--------------------------------------------------------------- ---------------------------------------------------------------
fromDocs :: Vector Doc -> Vector CsvDoc fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
fromDocs docs = V.map fromDocs' docs fromDocs docs = V.map fromDocs' docs
where where
fromDocs' (Doc _ t s py pm pd abst auth) = (CsvDoc t s py pm pd abst auth) fromDocs' (CsvGargV3 _ t s py pm pd abst auth) = (CsvDoc t s py pm pd abst auth)
--------------------------------------------------------------- ---------------------------------------------------------------
-- | Split a document in its context -- | Split a document in its context
...@@ -174,44 +172,88 @@ instance ToNamedRecord CsvDoc where ...@@ -174,44 +172,88 @@ instance ToNamedRecord CsvDoc where
, "authors" .= aut , "authors" .= aut
] ]
hyperdataDocument2csvDoc :: HyperdataDocument -> CsvDoc
hyperdataDocument2csvDoc h = CsvDoc (m $ _hyperdataDocument_title h)
(m $ _hyperdataDocument_source h)
(mI $ _hyperdataDocument_publication_year h)
(mI $ _hyperdataDocument_publication_month h)
(mI $ _hyperdataDocument_publication_day h)
(m $ _hyperdataDocument_abstract h)
(m $ _hyperdataDocument_authors h)
where
m = maybe "" identity
mI = maybe 0 identity
csvDecodeOptions :: DecodeOptions csvDecodeOptions :: DecodeOptions
csvDecodeOptions = (defaultDecodeOptions csvDecodeOptions = defaultDecodeOptions {decDelimiter = delimiter}
{decDelimiter = fromIntegral $ ord '\t'}
)
csvEncodeOptions :: EncodeOptions csvEncodeOptions :: EncodeOptions
csvEncodeOptions = ( defaultEncodeOptions csvEncodeOptions = defaultEncodeOptions {encDelimiter = delimiter}
{encDelimiter = fromIntegral $ ord '\t'}
)
delimiter :: Word8
delimiter = fromIntegral $ ord '\t'
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
readCsvOn :: [CsvDoc -> Text] -> FilePath -> IO [Text] readCsvOn :: [CsvDoc -> Text] -> FilePath -> IO [Text]
readCsvOn fields fp = V.toList <$> V.map (\l -> intercalate (pack " ") $ map (\field -> field l) fields) readCsvOn fields fp = V.toList
<$> V.map (\l -> intercalate (pack " ") $ map (\field -> field l) fields)
<$> snd <$> snd
<$> readCsv fp <$> readFile fp
------------------------------------------------------------------------
readFileLazy :: (FromNamedRecord a) => proxy a -> FilePath -> IO (Header, Vector a)
readFileLazy f = fmap (readByteStringLazy f) . BL.readFile
readFileStrict :: (FromNamedRecord a) => proxy a -> FilePath -> IO (Header, Vector a)
readFileStrict f = fmap (readByteStringStrict f) . BS.readFile
readByteStringLazy :: (FromNamedRecord a) => proxy a -> BL.ByteString -> (Header, Vector a)
readByteStringLazy _f bs = case decodeByNameWith csvDecodeOptions bs of
Left e -> panic (pack e)
Right csvDocs -> csvDocs
readByteStringStrict :: (FromNamedRecord a) => proxy a -> BS.ByteString -> (Header, Vector a)
readByteStringStrict ff = (readByteStringLazy ff) . BL.fromStrict
------------------------------------------------------------------------ ------------------------------------------------------------------------
readCsv :: FilePath -> IO (Header, Vector CsvDoc) -- | TODO use readFileLazy
readCsv fp = do readFile :: FilePath -> IO (Header, Vector CsvDoc)
csvData <- BL.readFile fp readFile = fmap readCsvLazyBS . BL.readFile
case decodeByNameWith csvDecodeOptions csvData of
-- | TODO use readByteStringLazy
readCsvLazyBS :: BL.ByteString -> (Header, Vector CsvDoc)
readCsvLazyBS bs = case decodeByNameWith csvDecodeOptions bs of
Left e -> panic (pack e) Left e -> panic (pack e)
Right csvDocs -> pure csvDocs Right csvDocs -> csvDocs
------------------------------------------------------------------------
-- | TODO use readFileLazy
readCsvHal :: FilePath -> IO (Header, Vector CsvHal)
readCsvHal = fmap readCsvHalLazyBS . BL.readFile
readHal :: FilePath -> IO (Header, Vector CsvHal) -- | TODO use readByteStringLazy
readHal fp = do readCsvHalLazyBS :: BL.ByteString -> (Header, Vector CsvHal)
csvData <- BL.readFile fp readCsvHalLazyBS bs = case decodeByNameWith csvDecodeOptions bs of
case decodeByNameWith csvDecodeOptions csvData of
Left e -> panic (pack e) Left e -> panic (pack e)
Right csvDocs -> pure csvDocs Right csvDocs -> csvDocs
readCsvHalBSStrict :: BS.ByteString -> (Header, Vector CsvHal)
readCsvHalBSStrict = readCsvHalLazyBS . BL.fromStrict
------------------------------------------------------------------------ ------------------------------------------------------------------------
writeCsv :: FilePath -> (Header, Vector CsvDoc) -> IO () writeFile :: FilePath -> (Header, Vector CsvDoc) -> IO ()
writeCsv fp (h, vs) = BL.writeFile fp $ writeFile fp (h, vs) = BL.writeFile fp $
encodeByNameWith csvEncodeOptions h (V.toList vs) encodeByNameWith csvEncodeOptions h (V.toList vs)
writeDocs2Csv :: FilePath -> [HyperdataDocument] -> IO ()
writeDocs2Csv fp hs = BL.writeFile fp $ hyperdataDocument2csv hs
hyperdataDocument2csv :: [HyperdataDocument] -> BL.ByteString
hyperdataDocument2csv hs = encodeByNameWith csvEncodeOptions headerCsvGargV3 (map hyperdataDocument2csvDoc hs)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Hal Format -- Hal Format
...@@ -321,7 +363,6 @@ csvHal2doc (CsvHal title source ...@@ -321,7 +363,6 @@ csvHal2doc (CsvHal title source
------------------------------------------------------------------------ ------------------------------------------------------------------------
parseHal :: FilePath -> IO [HyperdataDocument] parseHal :: FilePath -> IO [HyperdataDocument]
parseHal fp = map csvHal2doc <$> V.toList <$> snd <$> readHal fp parseHal fp = map csvHal2doc <$> V.toList <$> snd <$> readCsvHal fp
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -18,12 +18,12 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00" ...@@ -18,12 +18,12 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.Date (parseDate, parseDateRaw) where module Gargantext.Text.Parsers.Date (parse, parseRaw, split) where
import Data.HashMap.Strict as HM hiding (map) import Data.HashMap.Strict as HM hiding (map)
import Data.Text (Text, unpack, splitOn, pack) import Data.Text (Text, unpack, splitOn, pack)
import Data.Time (parseTimeOrError, defaultTimeLocale) import Data.Time (parseTimeOrError, defaultTimeLocale, toGregorian)
import Data.Time.Clock (UTCTime, getCurrentTime) import Data.Time.Clock (UTCTime(..), getCurrentTime)
import Data.Time.LocalTime (utc) import Data.Time.LocalTime (utc)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime) import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Duckling.Api (analyze) import Duckling.Api (analyze)
...@@ -37,40 +37,45 @@ import qualified Data.Aeson as Json ...@@ -37,40 +37,45 @@ import qualified Data.Aeson as Json
import qualified Data.HashSet as HashSet import qualified Data.HashSet as HashSet
import qualified Duckling.Core as DC import qualified Duckling.Core as DC
-- | Unused import (to parse Date Format, keeping it for maybe next steps)
-- import Control.Monad ((=<<))
-- import Data.Either (Either)
-- import Data.Fixed (Fixed (MkFixed))
-- import Data.Foldable (length)
-- import Data.String (String)
-- import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
-- import Data.Time.Calendar (Day, fromGregorian)
-- import Duckling.Debug as DB
-- import Duckling.Engine (parseAndResolve)
-- import Duckling.Rules (rulesFor)
-- import Prelude (toInteger, div, otherwise, (++))
-- import Text.Parsec.Error (ParseError)
-- import Text.Parsec.Prim (Stream, ParsecT)
-- import Text.Parsec.String (Parser)
-- import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
-- import Text.XML.HXT.DOM.Util (decimalStringToInt)
-- import qualified Text.ParserCombinators.Parsec (parse)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
split :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
split _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
split l (Just txt) = do
utcTime <- parse l txt
let (y, m, d) = split' utcTime
pure (Just utcTime, (Just y, Just m,Just d))
split' :: UTCTime -> (Year, Month, Day)
split' utcTime = (fromIntegral y, m, d)
where
(UTCTime day _) = utcTime
(y,m,d) = toGregorian day
type Year = Int
type Month = Int
type Day = Int
------------------------------------------------------------------------
-- | Date Parser -- | Date Parser
-- Parses dates mentions in full text given the language. -- Parses dates mentions in full text given the language.
-- >>> parseDate FR (pack "10 avril 1979 à 19H") -- >>> parseDate FR (pack "10 avril 1979 à 19H")
-- 1979-04-10 19:00:00 UTC -- 1979-04-10 19:00:00 UTC
-- >>> parseDate EN (pack "April 10 1979") -- >>> parseDate EN (pack "April 10 1979")
-- 1979-04-10 00:00:00 UTC -- 1979-04-10 00:00:00 UTC
parseDate :: Lang -> Text -> IO UTCTime parse :: Lang -> Text -> IO UTCTime
parseDate lang s = do parse lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s
dateStr' <- parseDateRaw lang s
let format = "%Y-%m-%dT%T" type DateFormat = Text
let dateStr = unpack $ maybe "0-0-0T0:0:0" identity type DateDefault = Text
$ head $ splitOn "." dateStr'
pure $ parseTimeOrError True defaultTimeLocale format dateStr
parseDate' :: DateFormat -> DateDefault -> Lang -> Text -> IO UTCTime
parseDate' format def lang s = do
dateStr' <- parseRaw lang s
let dateStr = unpack $ maybe def identity
$ head $ splitOn "." dateStr'
pure $ parseTimeOrError True defaultTimeLocale (unpack format) dateStr
-- TODO add Paris at Duckling.Locale Region datatype -- TODO add Paris at Duckling.Locale Region datatype
...@@ -85,19 +90,19 @@ parserLang EN = DC.EN ...@@ -85,19 +90,19 @@ parserLang EN = DC.EN
-- IO can be avoided here: -- IO can be avoided here:
-- currentContext :: Lang -> IO Context -- currentContext :: Lang -> IO Context
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseDateRaw :: Context -> Text -> SomeErrorHandling Text -- parseRaw :: Context -> Text -> SomeErrorHandling Text
-- TODO error handling -- TODO error handling
parseDateRaw :: Lang -> Text -> IO (Text) parseRaw :: Lang -> Text -> IO (Text)
parseDateRaw lang text = do parseRaw lang text = do
maybeJson <- map jsonValue <$> parseDateWithDuckling lang text maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
case headMay maybeJson of case headMay maybeJson of
Just (Json.Object object) -> case HM.lookup "value" object of Just (Json.Object object) -> case HM.lookup "value" object of
Just (Json.String date) -> pure date Just (Json.String date) -> pure date
Just _ -> panic "ParseDateRaw ERROR: should be a json String" Just _ -> panic "ParseRaw ERROR: should be a json String"
Nothing -> panic $ "ParseDateRaw ERROR: no date found" <> (pack . show) lang <> " " <> text Nothing -> panic $ "ParseRaw ERROR: no date found" <> (pack . show) lang <> " " <> text
_ -> panic $ "ParseDateRaw ERROR: type error" <> (pack . show) lang <> " " <> text _ -> panic $ "ParseRaw ERROR: type error" <> (pack . show) lang <> " " <> text
-- | Current Time in DucklingTime format -- | Current Time in DucklingTime format
...@@ -116,64 +121,3 @@ parseDateWithDuckling lang input = do ...@@ -116,64 +121,3 @@ parseDateWithDuckling lang input = do
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx --pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
pure $ analyze input contxt $ HashSet.fromList [(This Time)] pure $ analyze input contxt $ HashSet.fromList [(This Time)]
-- | Permit to transform a String to an Int in a monadic context
--wrapDST :: Monad m => String -> m Int
--wrapDST = (return . decimalStringToInt)
-- | Generic parser which take at least one element not given in argument
--many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
--many1NoneOf = (many1 . noneOf)
--getMultiplicator :: Int -> Int
--getMultiplicator a
-- | 0 >= a = 1
-- | otherwise = 10 * (getMultiplicator $ div a 10)
-- | Parser for date format y-m-d
--parseGregorian :: Parser Day
--parseGregorian = do
-- y <- wrapDST =<< many1NoneOf ['-']
-- _ <- char '-'
-- m <- wrapDST =<< many1NoneOf ['-']
-- _ <- char '-'
-- d <- wrapDST =<< many1NoneOf ['T']
-- _ <- char 'T'
-- return $ fromGregorian (toInteger y) m d
--
---- | Parser for time format h:m:s
--parseTimeOfDay :: Parser TimeOfDay
--parseTimeOfDay = do
-- h <- wrapDST =<< many1NoneOf [':']
-- _ <- char ':'
-- m <- wrapDST =<< many1NoneOf [':']
-- _ <- char ':'
-- r <- many1NoneOf ['.']
-- _ <- char '.'
-- dec <- many1NoneOf ['+', '-']
-- let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
-- seconds = nb * 10^(12-l)
-- return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
--
--
-- | Parser for timezone format +hh:mm
--parseTimeZone :: Parser TimeZone
--parseTimeZone = do
-- sign <- oneOf ['+', '-']
-- h <- wrapDST =<< many1NoneOf [':']
-- _ <- char ':'
-- m <- wrapDST =<< (many1 $ anyChar)
-- let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
-- in return $ TimeZone timeInMinute False "CET"
--
---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
--parseZonedTime :: Parser ZonedTime
--parseZonedTime= do
-- d <- parseGregorian
-- tod <- parseTimeOfDay
-- tz <- parseTimeZone
-- return $ ZonedTime (LocalTime d (tod)) tz
--
---- | Opposite of toRFC3339
--fromRFC3339 :: Text -> Either ParseError ZonedTime
--fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
-- where input = unpack t
{-|
Module : Gargantext.Text.Parsers.Date.Attoparsec
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.Date.Attoparsec
where
import Control.Applicative ((<*))
import Data.Attoparsec.ByteString (Parser, take)
import Data.ByteString (ByteString)
import Data.Monoid ((<>))
import Data.Tuple.Extra (first)
import Gargantext.Prelude hiding (takeWhile, take)
-------------------------------------------------------------
parserWith :: Parser ByteString -> Parser [(ByteString, ByteString)]
parserWith sep = do
day <- take 2 <* sep
mon <- take 2 <* sep
yea <- take 4
pure $ map (first (\x -> "publication_" <> x))
[ ("day",day)
, ("month", mon)
, ("year", yea)
, ("date", yea <> "-" <> mon <> "-" <> day <> "T0:0:0")
]
{-|
Module : Gargantext.Text.Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017-present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.Date.Parsec
where
import Control.Monad ((=<<))
import Data.Either (Either)
import Data.Fixed (Fixed (MkFixed))
import Data.Foldable (length)
import Data.String (String)
import Data.Text (Text, unpack)
import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
import Data.Time.Calendar (Day, fromGregorian)
import Gargantext.Prelude
import Prelude (toInteger, div, otherwise, (++))
import Text.Parsec.Error (ParseError)
import Text.Parsec.Prim (Stream, ParsecT)
import Text.Parsec.String (Parser)
import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
import Text.XML.HXT.DOM.Util (decimalStringToInt)
import qualified Text.ParserCombinators.Parsec (parse)
-- | Permit to transform a String to an Int in a monadic context
wrapDST :: Monad m => String -> m Int
wrapDST = return . decimalStringToInt
-- | Generic parser which take at least one element not given in argument
many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
many1NoneOf = (many1 . noneOf)
getMultiplicator :: Int -> Int
getMultiplicator a
| 0 >= a = 1
| otherwise = 10 * (getMultiplicator $ div a 10)
-- | Parser for date format y-m-d
parseGregorian :: Parser Day
parseGregorian = do
y <- wrapDST =<< many1NoneOf ['-']
_ <- char '-'
m <- wrapDST =<< many1NoneOf ['-']
_ <- char '-'
d <- wrapDST =<< many1NoneOf ['T']
_ <- char 'T'
return $ fromGregorian (toInteger y) m d
---- | Parser for time format h:m:s
parseTimeOfDay :: Parser TimeOfDay
parseTimeOfDay = do
h <- wrapDST =<< many1NoneOf [':']
_ <- char ':'
m <- wrapDST =<< many1NoneOf [':']
_ <- char ':'
r <- many1NoneOf ['.']
_ <- char '.'
dec <- many1NoneOf ['+', '-']
let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
seconds = nb * 10^(12-l)
return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
-- | Parser for timezone format +hh:mm
parseTimeZone :: Parser TimeZone
parseTimeZone = do
sign <- oneOf ['+', '-']
h <- wrapDST =<< many1NoneOf [':']
_ <- char ':'
m <- wrapDST =<< (many1 $ anyChar)
let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
in return $ TimeZone timeInMinute False "CET"
---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
parseZonedTime :: Parser ZonedTime
parseZonedTime= do
d <- parseGregorian
tod <- parseTimeOfDay
tz <- parseTimeZone
return $ ZonedTime (LocalTime d (tod)) tz
---- | Opposite of toRFC3339
fromRFC3339 :: Text -> Either ParseError ZonedTime
fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
where input = unpack t
...@@ -28,7 +28,7 @@ import Data.Text (Text, unpack) ...@@ -28,7 +28,7 @@ import Data.Text (Text, unpack)
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude import Gargantext.Prelude
import System.IO (FilePath) import System.IO (FilePath)
import Gargantext.Text.Parsers.CSV (CsvDoc(..), writeCsv, headerCsvGargV3) import Gargantext.Text.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
import Data.Vector (fromList) import Data.Vector (fromList)
data Patent = Patent { _patent_title :: Text data Patent = Patent { _patent_title :: Text
...@@ -48,7 +48,7 @@ type FilePathOut = FilePath ...@@ -48,7 +48,7 @@ type FilePathOut = FilePath
json2csv :: FilePathIn -> FilePathOut -> IO () json2csv :: FilePathIn -> FilePathOut -> IO ()
json2csv fin fout = do json2csv fin fout = do
patents <- maybe (panic "json2csv error") identity <$> readPatents fin patents <- maybe (panic "json2csv error") identity <$> readPatents fin
writeCsv fout (headerCsvGargV3, fromList $ map patent2csvDoc patents) writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents)
patent2csvDoc :: Patent -> CsvDoc patent2csvDoc :: Patent -> CsvDoc
patent2csvDoc (Patent title abstract year _) = patent2csvDoc (Patent title abstract year _) =
......
{-|
Module : Gargantext.Text.Parsers.PubMed
Description : Parser for Wikimedia dump
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This version of Parsers fixes the Date of publication in Gargantext
(V3) parser of PubMed. Indeed, we can not rely neither on Journal
Publication Date neither on Article publication date, which are
incomplete structurally but for its interpretation too. Then, to
simplify and uniformize data, date of publication of database insertion
is used.
TODO:
- Add authors
- factorize
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Parsers.PubMed where
import Data.Conduit.List as CL hiding (catMaybes, head)
import Control.Monad (join)
import GHC.IO (FilePath)
import Prelude (read)
import Gargantext.Prelude
import Control.Monad.Catch (MonadThrow)
import Data.Maybe (Maybe)
import Data.Monoid (mconcat)
import Text.XML.Stream.Parse
import Data.Conduit (runConduit, (.|), ConduitT)
import Data.Text (Text, unpack)
import Data.XML.Types (Event)
import Data.Time.Segment (jour)
import Data.Time (UTCTime(..))
import qualified Data.ByteString.Lazy as DBL
import Gargantext.Text.Parsers.Wikimedia
data PubMed =
PubMed { pubmed_article :: PubMedArticle
, pubmed_date :: PubMedData
} deriving Show
data PubMedArticle =
PubMedArticle { pubmed_title :: Maybe Text
, pubmed_journal :: Maybe Text
, pubmed_abstract :: Maybe [Text]
}
deriving (Show)
data PubMedData =
PubMedData { pubmedData_date :: UTCTime
, pubmedData_year :: Integer
, pubmedData_month :: Int
, pubmedData_day :: Int
} deriving (Show)
readPubMedFile :: FilePath -> IO [PubMed]
readPubMedFile fp = do
input <- DBL.readFile fp
pubMedParser input
pubMedParser :: DBL.ByteString -> IO [PubMed]
pubMedParser bstring = runConduit $ parseLBS def bstring
.| parseArticleSet
.| CL.consume
parseArticleSet :: MonadThrow m => ConduitT Event PubMed m ()
parseArticleSet = do
as <- force "force" $ tagIgnoreAttrs "PubmedArticleSet" $ manyYield parsePubMedArticle
return as
parsePubMedArticle :: MonadThrow m => ConduitT Event o m (Maybe PubMed)
parsePubMedArticle = do
articles <- tagIgnoreAttrs "PubmedArticle" parsePubMedArticle'
return articles
parsePubMedArticle' :: MonadThrow m => ConduitT Event o m (PubMed)
parsePubMedArticle' = do
article <- force "MedlineCitation" $ tagIgnoreAttrs "MedlineCitation" parseMedlineCitation
dates <- tagIgnoreAttrs "PubmedData" $ do
dates' <- tagIgnoreAttrs "History" $ many $ tagIgnoreAttrs "PubMedPubDate" $ do
y' <- force "Year" $ tagIgnoreAttrs "Year" content
m' <- force "Month" $ tagIgnoreAttrs "Month" content
d' <- force "Day" $ tagIgnoreAttrs "Day" content
_ <- many $ ignoreAnyTreeContent
return (read $ unpack y', read $ unpack m', read $ unpack d')
_ <- many $ ignoreAnyTreeContent
return dates'
_ <- many $ ignoreAnyTreeContent
let (y,m,d) = maybe (1,1,1) identity $ join $ fmap head $ reverse <$> join dates
return $ PubMed (article) (PubMedData (jour y m d) y m d)
parseMedlineCitation :: MonadThrow m => ConduitT Event o m PubMedArticle
parseMedlineCitation = do
a <- force "article" $ manyTagsUntil "Article" parseArticle
_ <- many $ ignoreAnyTreeContent
return a
parseArticle :: MonadThrow m => ConduitT Event o m PubMedArticle
parseArticle = do
journal <- force "journal" $ manyTagsUntil "Journal" $ do
j <- manyTagsUntil "Title" content
_ <- many $ ignoreAnyTreeContent
return j
title <- do
t <- manyTagsUntil "ArticleTitle" content
return t
abstracts <- do
as <- manyTagsUntil "Abstract" $ many $ do
txt <- tagIgnoreAttrs "AbstractText" $ do
c <- content
_ <- many $ ignoreAnyTreeContent
return c
_ <- many $ ignoreAnyTreeContent
return txt
return as
-- TODO add authos
_ <- many $ ignoreAnyTreeContent
return $ PubMedArticle title journal abstracts
pubMedData :: DBL.ByteString
pubMedData = mconcat
[ "<?xml version=\"1.0\"?>\n"
, "<!DOCTYPE PubmedArticleSet PUBLIC \"-//NLM//DTD PubMedArticle, 1st June 2018//EN\" \"https://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_180601.dtd\">\n"
, "<PubmedArticleSet>\n"
, "<PubmedArticle>\n"
, " <MedlineCitation Status=\"Publisher\" Owner=\"NLM\">\n"
, " <PMID Version=\"1\">30357468</PMID>\n"
, " <DateRevised>\n"
, " <Year>2018</Year>\n"
, " </DateRevised>\n"
, " <Article PubModel=\"Print-Electronic\">\n"
, " <Journal>\n"
, " <ISSN IssnType=\"Electronic\">1432-1076</ISSN>\n"
, " <Title>European journal of pediatrics</Title>\n"
, " </Journal>\n"
, " <ArticleTitle>Title of the Article</ArticleTitle>\n"
, " <ELocationID EIdType=\"doi\" ValidYN=\"Y\">10.1007/s00431-018-3270-3</ELocationID>\n"
, " <Abstract>\n"
, " <AbstractText>Abstract Text.</AbstractText>\n"
, " </Abstract>\n"
, " <AuthorList>\n"
, " </AuthorList>\n"
, " </Article>\n"
, " </MedlineCitation>\n"
, " <PubmedData>\n"
, " <History>\n"
, " </History>\n"
, " </PubmedData>\n"
, "</PubmedArticle>\n"
, "</PubmedArticleSet>\n"
]
{-|
Module : Gargantext.Text.Parsers.RIS
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
RIS is a standardized tag format developed by Research Information
Systems, Incorporated (the format name refers to the company) to enable
citation programs to exchange data.
[More](https://en.wikipedia.org/wiki/RIS_(file_format))
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.RIS (parser, onField, fieldWith, lines) where
import Data.List (lookup)
import Control.Applicative
import Data.Attoparsec.ByteString (Parser, try, takeTill, take, many1)
import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
import Data.ByteString (ByteString, concat)
import Data.Monoid ((<>))
import Gargantext.Prelude hiding (takeWhile, take)
import qualified Data.List as DL
-------------------------------------------------------------
parser :: Parser [[(ByteString, ByteString)]]
parser = do
n <- notice "TY -"
ns <- many1 (notice "\nTY -")
pure $ [n] <> ns
notice :: Parser ByteString -> Parser [(ByteString, ByteString)]
notice s = start *> many (fieldWith field) <* end
where
field :: Parser ByteString
field = "\n" *> take 2 <* " - "
start :: Parser ByteString
start = s *> takeTill isEndOfLine
end :: Parser ByteString
end = "\nER -" *> takeTill isEndOfLine
fieldWith :: Parser ByteString -> Parser (ByteString, ByteString)
fieldWith n = do
name <- n
txt <- takeTill isEndOfLine
txts <- try lines
let txts' = case DL.length txts > 0 of
True -> txts
False -> []
pure (name, concat ([txt] <> txts'))
lines :: Parser [ByteString]
lines = many line
where
line :: Parser ByteString
line = "\n " *> takeTill isEndOfLine
-------------------------------------------------------------
-- Field for First elem of a Tuple, Key for corresponding Map
onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
-> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
onField k f m = m <> ( maybe [] f (lookup k m) )
{-|
Module : Gargantext.Text.Parsers.RIS.Presse
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Presse RIS format parser for Europresse Database.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.RIS.Presse (presseEnrich) where
import Data.List (lookup)
import Data.Either (either)
import Data.Tuple.Extra (first, both, uncurry)
import Data.Attoparsec.ByteString (parseOnly)
import Data.ByteString (ByteString, length)
import Gargantext.Prelude hiding (takeWhile, take, length)
import Gargantext.Text.Parsers.RIS (onField)
import Gargantext.Core (Lang(..))
import qualified Gargantext.Text.Parsers.Date.Attoparsec as Date
presseEnrich :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
presseEnrich = (onField "DA" parseDate)
. (onField "LA" parseLang)
. fixFields
parseDate :: ByteString -> [(ByteString, ByteString)]
parseDate str = either (const []) identity $ parseOnly (Date.parserWith "/") str
parseLang :: ByteString -> [(ByteString, ByteString)]
parseLang "Français" = [(langField, cs $ show FR)]
parseLang "English" = [(langField, cs $ show EN)]
parseLang x = [(langField, x)]
langField :: ByteString
langField = "language"
fixFields :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
fixFields ns = map (first fixFields'') ns
where
-- | Title is sometimes longer than abstract
fixFields'' = case uncurry (>) <$> look'' of
Just True -> fixFields' "abstract" "title"
_ -> fixFields' "title" "abstract"
look'' :: Maybe (Int, Int)
look'' = both length <$> look
look :: Maybe (ByteString,ByteString)
look = (,) <$> lookup "TI" ns <*> lookup "N2" ns
fixFields' :: ByteString -> ByteString
-> ByteString -> ByteString
fixFields' title abstract champs
| champs == "AU" = "authors"
| champs == "TI" = title
| champs == "JF" = "source"
| champs == "DI" = "doi"
| champs == "UR" = "url"
| champs == "N2" = abstract
| otherwise = champs
...@@ -14,27 +14,21 @@ commentary with @some markup@. ...@@ -14,27 +14,21 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.WOS (wosParser) where module Gargantext.Text.Parsers.WOS (parser, keys) where
-- TOFIX : Should import Gargantext.Prelude here import Control.Applicative
import Prelude hiding (takeWhile, take, concat, readFile, lines, concat) import Data.Attoparsec.ByteString (Parser, string, takeTill, take, manyTill, many1)
import qualified Data.List as DL
import Data.Monoid ((<>))
import Data.Attoparsec.ByteString (Parser, try, string
, takeTill, take
, manyTill, many1)
import Data.Attoparsec.ByteString.Char8 (anyChar, isEndOfLine) import Data.Attoparsec.ByteString.Char8 (anyChar, isEndOfLine)
import Data.ByteString (ByteString, concat) import Data.ByteString (ByteString)
import Data.ByteString.Char8 (pack) import Data.ByteString.Char8 (pack)
import Control.Applicative import Gargantext.Text.Parsers.RIS (fieldWith)
import Prelude hiding (takeWhile, take, concat, readFile, lines, concat)
------------------------------------------------------------- -------------------------------------------------------------
-- | wosParser parses ISI format from -- | wosParser parses ISI format from
-- Web Of Science Database -- Web Of Science Database
wosParser :: Parser [[(ByteString, ByteString)]] parser :: Parser [[(ByteString, ByteString)]]
wosParser = do parser = do
-- TODO Warning if version /= 1.0 -- TODO Warning if version /= 1.0
-- FIXME anyChar (string ..) /= exact string "\nVR 1.0" ? -- FIXME anyChar (string ..) /= exact string "\nVR 1.0" ?
_ <- manyTill anyChar (string $ pack "\nVR 1.0") _ <- manyTill anyChar (string $ pack "\nVR 1.0")
...@@ -42,8 +36,11 @@ wosParser = do ...@@ -42,8 +36,11 @@ wosParser = do
pure ns pure ns
notice :: Parser [(ByteString, ByteString)] notice :: Parser [(ByteString, ByteString)]
notice = start *> fields <* end notice = start *> many (fieldWith field) <* end
where where
field :: Parser ByteString
field = "\n" *> take 2 <* " "
start :: Parser ByteString start :: Parser ByteString
start = "\nPT " *> takeTill isEndOfLine start = "\nPT " *> takeTill isEndOfLine
...@@ -51,28 +48,8 @@ notice = start *> fields <* end ...@@ -51,28 +48,8 @@ notice = start *> fields <* end
end = manyTill anyChar (string $ pack "\nER\n") end = manyTill anyChar (string $ pack "\nER\n")
fields :: Parser [(ByteString, ByteString)] keys :: ByteString -> ByteString
fields = many field keys champs
where
field :: Parser (ByteString, ByteString)
field = do
name <- "\n" *> take 2 <* " "
txt <- takeTill isEndOfLine
txts <- try lines
let txts' = case DL.length txts > 0 of
True -> txts
False -> []
pure (translate name, concat ([txt] <> txts'))
lines :: Parser [ByteString]
lines = many line
where
line :: Parser ByteString
line = "\n " *> takeTill isEndOfLine
translate :: ByteString -> ByteString
translate champs
| champs == "AF" = "authors" | champs == "AF" = "authors"
| champs == "TI" = "title" | champs == "TI" = "title"
| champs == "SO" = "source" | champs == "SO" = "source"
...@@ -80,5 +57,3 @@ translate champs ...@@ -80,5 +57,3 @@ translate champs
| champs == "PD" = "publication_date" | champs == "PD" = "publication_date"
| champs == "AB" = "abstract" | champs == "AB" = "abstract"
| otherwise = champs | otherwise = champs
-------------------------------------------------------------
...@@ -35,7 +35,7 @@ import Gargantext.Text.Parsers.CSV ...@@ -35,7 +35,7 @@ import Gargantext.Text.Parsers.CSV
type DocId = Int type DocId = Int
type DocSearchEngine = SearchEngine type DocSearchEngine = SearchEngine
Doc CsvGargV3
DocId DocId
DocField DocField
NoFeatures NoFeatures
...@@ -48,7 +48,7 @@ initialDocSearchEngine :: DocSearchEngine ...@@ -48,7 +48,7 @@ initialDocSearchEngine :: DocSearchEngine
initialDocSearchEngine = initialDocSearchEngine =
initSearchEngine docSearchConfig defaultSearchRankParameters initSearchEngine docSearchConfig defaultSearchRankParameters
docSearchConfig :: SearchConfig Doc DocId DocField NoFeatures docSearchConfig :: SearchConfig CsvGargV3 DocId DocField NoFeatures
docSearchConfig = docSearchConfig =
SearchConfig { SearchConfig {
documentKey = d_docId, documentKey = d_docId,
...@@ -57,7 +57,7 @@ docSearchConfig = ...@@ -57,7 +57,7 @@ docSearchConfig =
documentFeatureValue = const noFeatures documentFeatureValue = const noFeatures
} }
where where
extractTerms :: Doc -> DocField -> [Text] extractTerms :: CsvGargV3 -> DocField -> [Text]
extractTerms doc TitleField = monoTexts (d_title doc) extractTerms doc TitleField = monoTexts (d_title doc)
extractTerms doc AbstractField = monoTexts (d_abstract doc) extractTerms doc AbstractField = monoTexts (d_abstract doc)
......
...@@ -27,13 +27,13 @@ group :: [TokenTag] -> [TokenTag] ...@@ -27,13 +27,13 @@ group :: [TokenTag] -> [TokenTag]
group [] = [] group [] = []
group ntags = group2 NP NP group ntags = group2 NP NP
$ group2 NP VB $ group2 NP VB
-- $ group2 NP IN -- group2 NP IN
-- - $ group2 IN DT -- group2 IN DT
$ group2 VB NP $ group2 VB NP
$ group2 JJ NP $ group2 JJ NP
$ group2 NP JJ $ group2 NP JJ
$ group2 JJ JJ $ group2 JJ JJ
-- - $ group2 JJ CC -- group2 JJ CC
$ ntags $ ntags
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
...@@ -13,6 +13,7 @@ Portability : POSIX ...@@ -13,6 +13,7 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleContexts #-}
module Gargantext.Viz.Chart module Gargantext.Viz.Chart
where where
...@@ -23,10 +24,27 @@ import Data.List (unzip, sortOn) ...@@ -23,10 +24,27 @@ import Data.List (unzip, sortOn)
import Data.Map (toList) import Data.Map (toList)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Config
import Gargantext.Database.Schema.NodeNode (selectDocsDates) import Gargantext.Database.Schema.NodeNode (selectDocsDates)
import Gargantext.Database.Utils import Gargantext.Database.Utils
import Gargantext.Database.Types.Node (CorpusId) import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Database.Node.Select
import Gargantext.Text.Metrics.Count (occurrencesWith) import Gargantext.Text.Metrics.Count (occurrencesWith)
import Gargantext.Core.Types.Main
-- Pie Chart
import Data.Maybe (catMaybes)
import qualified Data.Map as Map
import qualified Data.List as List
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.NTree
import Gargantext.Database.Metrics.NgramsByNode
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node
import Gargantext.Core.Types
import Gargantext.Database.Flow
import Servant
data Chart = ChartHisto | ChartScatter | ChartPie data Chart = ChartHisto | ChartScatter | ChartPie
...@@ -47,3 +65,62 @@ histoData cId = do ...@@ -47,3 +65,62 @@ histoData cId = do
$ occurrencesWith identity dates $ occurrencesWith identity dates
pure (Histo ls css) pure (Histo ls css)
pieData :: FlowCmdM env err m
=> CorpusId -> NgramsType -> ListType
-> m Histo
pieData cId nt lt = do
ls' <- selectNodesWithUsername NodeList userMaster
ls <- map (_node_id) <$> getListsWithParentId cId
ts <- mapTermListRoot ls nt
let
dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ Map.toList dico
group dico' x = case Map.lookup x dico' of
Nothing -> x
Just x' -> maybe x identity x'
(_total,mapTerms) <- countNodesByNgramsWith (group dico)
<$> getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms
let (dates, count) = unzip $ map (\(t,(d,_)) -> (t, d)) $ Map.toList mapTerms
pure (Histo dates (map round count))
treeData :: FlowCmdM env err m
=> CorpusId -> NgramsType -> ListType
-> m [MyTree]
treeData cId nt lt = do
ls' <- selectNodesWithUsername NodeList userMaster
ls <- map (_node_id) <$> getListsWithParentId cId
ts <- mapTermListRoot ls nt
let
dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ Map.toList dico
cs' <- getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms
m <- getListNgrams ls nt
pure $ toTree lt cs' m
treeData' :: FlowCmdM env ServantErr m
=> CorpusId -> NgramsType -> ListType
-> m [MyTree]
treeData' cId nt lt = do
ls' <- selectNodesWithUsername NodeList userMaster
ls <- map (_node_id) <$> getListsWithParentId cId
ts <- mapTermListRoot ls nt
let
dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ Map.toList dico
cs' <- getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms
m <- getListNgrams ls nt
pure $ toTree lt cs' m
...@@ -28,8 +28,10 @@ import Control.Monad.IO.Class (liftIO) ...@@ -28,8 +28,10 @@ import Control.Monad.IO.Class (liftIO)
import Gargantext.API.Ngrams.Tools import Gargantext.API.Ngrams.Tools
import Gargantext.API.Types import Gargantext.API.Types
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Database.Config
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser) import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Node.Select
import Gargantext.Database.Schema.Node (getNode) import Gargantext.Database.Schema.Node (getNode)
import Gargantext.Database.Schema.Node (defaultList) import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Types.Node hiding (node_id) -- (GraphId, ListId, CorpusId, NodeId) import Gargantext.Database.Types.Node hiding (node_id) -- (GraphId, ListId, CorpusId, NodeId)
...@@ -66,12 +68,13 @@ getGraph nId = do ...@@ -66,12 +68,13 @@ getGraph nId = do
-- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10]) -- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10])
let cId = maybe (panic "no parentId") identity $ _node_parentId nodeGraph let cId = maybe (panic "no parentId") identity $ _node_parentId nodeGraph
lIds <- selectNodesWithUsername NodeList userMaster
lId <- defaultList cId lId <- defaultList cId
ngs <- filterListWithRoot GraphTerm <$> mapTermListRoot [lId] NgramsTerms ngs <- filterListWithRoot GraphTerm <$> mapTermListRoot [lId] NgramsTerms
myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal False) myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal False)
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId NgramsTerms (Map.keys ngs) <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) NgramsTerms (Map.keys ngs)
graph <- liftIO $ cooc2graph myCooc graph <- liftIO $ cooc2graph myCooc
pure $ set graph_metadata (Just metadata) pure $ set graph_metadata (Just metadata)
......
...@@ -82,6 +82,7 @@ getPhylo _phyloId _lId l f b l' ms x y z ts s o e d b' = do ...@@ -82,6 +82,7 @@ getPhylo _phyloId _lId l f b l' ms x y z ts s o e d b' = do
q = initPhyloQueryView l f b l' ms fs' ts so e d b' q = initPhyloQueryView l f b l' ms fs' ts so e d b'
-- | TODO remove phylo for real data here -- | TODO remove phylo for real data here
pure (toPhyloView q phylo) pure (toPhyloView q phylo)
-- TODO remove phylo for real data here
------------------------------------------------------------------------ ------------------------------------------------------------------------
{- {-
......
resolver: lts-12.10 resolver: lts-12.26
flags: {} flags: {}
extra-package-dbs: [] extra-package-dbs: []
packages: packages:
- . - .
- 'deps/servant-job'
- 'deps/clustering-louvain'
- 'deps/patches-map'
- 'deps/patches-class'
- 'deps/haskell-opaleye'
- 'deps/hsparql'
allow-newer: true allow-newer: true
extra-deps: extra-deps:
- time-units-1.0.0
- git: https://github.com/delanoe/data-time-segment.git - git: https://github.com/delanoe/data-time-segment.git
commit: 4e3d57d80e9dfe6624c8eeaa8595fc8fe64d8723 commit: 10a416b9f6c443866b36479c3441ebb3bcdeb7ef
- git: https://gitlab.iscpif.fr/gargantext/hlcm.git - git: https://gitlab.iscpif.fr/gargantext/hlcm.git
commit: 6f0595d2421005837d59151a8b26eee83ebb67b5 commit: 6f0595d2421005837d59151a8b26eee83ebb67b5
- git: https://github.com/delanoe/servant-static-th.git - git: https://github.com/delanoe/servant-static-th.git
...@@ -23,7 +16,22 @@ extra-deps: ...@@ -23,7 +16,22 @@ extra-deps:
commit: 90eef7604bb230644c2246eccd094d7bfefcb135 commit: 90eef7604bb230644c2246eccd094d7bfefcb135
- git: https://github.com/paulrzcz/HSvm.git - git: https://github.com/paulrzcz/HSvm.git
commit: 3fe28b683aba5ddf05e3b5f8eced0bd05c5a29f9 commit: 3fe28b683aba5ddf05e3b5f8eced0bd05c5a29f9
#- opaleye-0.6.7002.0 - git: https://github.com/robstewart57/rdf4h.git
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed
commit: dcaa0f5dd53f20648f4f5a615d29163582a4219c
- git: https://gitlab.iscpif.fr/gargantext/patches-class
commit: 746b4ce0af8f9e600d555ad7e5b2973a940cdad9
- git: https://github.com/np/servant-job.git
commit: ac4227441bbca30c44235582b5ec31340c569021
- git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git
commit: 1c636112b151110408e7c5a28cec39e46657358e
- git: https://github.com/np/patches-map
commit: 8c6f38c4844ead53e664cf9c82ba461715dbe445
- git: https://github.com/delanoe/haskell-opaleye.git #- opaleye-0.6.7002.0
commit: 53385de076be09f728a1b58c035a18e9ff9bcfd6
- git: https://github.com/delanoe/hsparql.git
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- KMP-0.1.0.2 - KMP-0.1.0.2
- accelerate-1.2.0.0 - accelerate-1.2.0.0
- aeson-lens-0.5.0.0 - aeson-lens-0.5.0.0
...@@ -35,10 +43,10 @@ extra-deps: ...@@ -35,10 +43,10 @@ extra-deps:
- multiset-0.3.4.1 # stack test - multiset-0.3.4.1 # stack test
- probable-0.1.3 - probable-0.1.3
- rake-0.0.1 - rake-0.0.1
- rdf4h-3.1.1
- json-stream-0.4.2.4 # Text.Parsers (JSON) - json-stream-0.4.2.4 # Text.Parsers (JSON)
- serialise-0.2.0.0 - serialise-0.2.0.0
- servant-flatten-0.2 - servant-flatten-0.2
- servant-multipart-0.11.2 - servant-multipart-0.11.2
- stemmer-0.5.2 - stemmer-0.5.2
- time-units-1.0.0
- validity-0.9.0.0 # patches-{map,class} - validity-0.9.0.0 # patches-{map,class}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment