Commit f525a787 authored by qlobbe's avatar qlobbe

Merge branch 'dev' into dev-phylo

parents 6fc1eb02 8f864992
...@@ -13,12 +13,37 @@ Institute of Paris Île-de-France (ISC-PIF) and its partners. ...@@ -13,12 +13,37 @@ Institute of Paris Île-de-France (ISC-PIF) and its partners.
## Installation ## Installation
### Docker Disclaimer: this project is still on development, this is work in
curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devops/docker-install | sh progress. Please report and improve this documentation if you encounter
issues.
### Debian ### Build Core Code
curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devops/debian-install | sh #### Docker
curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devops/docker-install | sh
#### Debian
curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devops/debian-install | sh
### Add dependencies
1. CoreNLP is needed (EN and FR); This dependency will not be needed
soon.
- wget https://dl.gargantext.org/coreNLP.tar.bz2
- tar xvjf coreNLP.tar.bz2
- ./startServer.sh
2. Louvain C++ needed to draw the socio-semantic graphs
- git clone https://gitlab.iscpif.fr/gargantext/clustering-louvain-cplusplus.git
- cd clustering-louvain-cplusplus
- ./install
### Initialization
Users has to be created first
1. stack ghci
2. runCmd insertUsersDemo
Then you can log in with user1:1resu
## Use Cases ## Use Cases
...@@ -33,21 +58,3 @@ curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devo ...@@ -33,21 +58,3 @@ curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devo
stack --docker exec gargantext-cli -- CorpusFromGarg.csv ListFromGarg.csv Ouput.json stack --docker exec gargantext-cli -- CorpusFromGarg.csv ListFromGarg.csv Ouput.json
```sql
INSERT INTO auth_user (password, is_superuser, username, first_name, last_name, email, is_staff, is_active)
VALUES ('1resu', true, 'user1', 'user', '1', 'a@localhost', true, true);
-- nodetype NodeUser has id 1
-- inserted user_id = 3
INSERT INTO nodes (typename, user_id, name)
VALUES (1, 3, 'user1');
-- same for master user -- 'gargantua'
INSERT INTO auth_user (password, is_superuser, username, first_name, last_name, email, is_staff, is_active)
VALUES ('autnagrag, true, 'gargantua, 'gargantua, '1', 'g@localhost', true, true);
-- nodetype NodeUser has id 1
-- inserted user_id = 5
INSERT INTO nodes (typename, user_id, name)
VALUES (1, 5, 'gargantua);
```
...@@ -19,21 +19,23 @@ Import a corpus binary. ...@@ -19,21 +19,23 @@ Import a corpus binary.
module Main where module Main where
import Data.Either
import Prelude (read) import Prelude (read)
import Control.Exception (finally) import Control.Exception (finally)
import Servant (ServantErr)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Flow (FlowCmdM, flowCorpusFile) import Gargantext.Database.Flow (FlowCmdM, flowCorpusFile)
import Gargantext.Text.Corpus.Parsers (FileFormat(CsvHalFormat)) import Gargantext.Text.Corpus.Parsers (FileFormat(..))
import Gargantext.Database.Utils (Cmd, ) import Gargantext.Database.Utils (Cmd, )
import Gargantext.Database.Types.Node (CorpusId, toHyperdataDocument) import Gargantext.Database.Types.Node (CorpusId, toHyperdataDocument)
import Gargantext.Database.Schema.User (insertUsersDemo) import Gargantext.Database.Schema.User (insertUsersDemo)
import Gargantext.Text.Terms (TermType(..)) import Gargantext.Text.Terms (TermType(..))
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.API -- (GargError)
import Gargantext.API.Node () -- instances import Gargantext.API.Node () -- instances
import Gargantext.API.Settings (withDevEnv, runCmdDev, DevEnv) import Gargantext.API.Settings (withDevEnv, runCmdDev, DevEnv)
import System.Environment (getArgs) import System.Environment (getArgs)
--import Gargantext.Text.Corpus.Parsers.GrandDebat (readFile, GrandDebatReference(..)) --import Gargantext.Text.Corpus.Parsers.GrandDebat (readFile, GrandDebatReference(..))
import Data.Text (Text)
import qualified Data.Text as Text import qualified Data.Text as Text
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
...@@ -42,16 +44,17 @@ main = do ...@@ -42,16 +44,17 @@ main = do
[userCreate, user, name, iniPath, limit, corpusPath] <- getArgs [userCreate, user, name, iniPath, limit, corpusPath] <- getArgs
--{- --{-
let createUsers :: Cmd ServantErr Int64 let createUsers :: Cmd GargError Int64
createUsers = insertUsersDemo createUsers = insertUsersDemo
let let
--tt = (Unsupervised EN 5 1 Nothing) --tt = (Unsupervised EN 6 0 Nothing)
tt = (Mono EN) tt = (Multi EN)
cmd :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId format = CsvGargV3 -- CsvHalFormat --WOS
cmd = flowCorpusFile (cs user) (cs name) (read limit :: Int) tt CsvHalFormat corpusPath cmd :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
cmd = flowCorpusFile (cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath
{- {-
let debatCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
debatCorpus = do debatCorpus = do
docs <- liftIO ( splitEvery 500 docs <- liftIO ( splitEvery 500
<$> take (read limit :: Int) <$> take (read limit :: Int)
......
from fpco/stack-build:lts-12.26
RUN apt-get update && \
apt-get install -y git libigraph0-dev && \
rm -rf /var/lib/apt/lists/*
RUN mkdir -v /deps && \
cd /deps && \
git clone https://gitlab.iscpif.fr/gargantext/clustering-louvain-cplusplus && \
cd clustering-louvain-cplusplus && \
./install
version: '3'
services:
postgres:
image: 'postgres:latest'
ports:
- 5432:5432
environment:
POSTGRES_USER: gargantua
POSTGRES_PASSWORD: C8kdcUrAQy66U
POSTGRES_DB: gargandbV5
volumes:
- pgdata:/var/lib/postgresql/data
- ../:/gargantext
- ../dbs:/dbs
volumes:
pgdata:
#!/bin/bash
sudo su postgres
PW="password"
DB="gargandbV5"
USER="gargantua"
psql -c "CREATE USER \"${USER}\"
psql -c "ALTER USER \"${USER}\" with PASSWORD \"${PW}\""
psql -c "DROP DATABASE IF EXISTS \"${DB}\""
createdb "${DB}"
psql "${DB}" < schema.sql
psql -c "ALTER DATABASE \"${DB}\" OWNER to \"${USER}\" ;"
#!/bin/bash #!/bin/bash
DB="gargandbV5" DB="gargandbV5"
rm ../../tmp*
rm ../../repo*
psql -c "drop database IF EXISTS \"${DB}\"" psql -c "drop database IF EXISTS \"${DB}\""
createdb "${DB}" createdb "${DB}"
......
...@@ -84,8 +84,7 @@ CREATE TABLE public.nodes_nodes ( ...@@ -84,8 +84,7 @@ CREATE TABLE public.nodes_nodes (
node1_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE, node1_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
node2_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE, node2_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
score real, score real,
favorite boolean, category integer,
delete boolean,
PRIMARY KEY (node1_id,node2_id) PRIMARY KEY (node1_id,node2_id)
); );
ALTER TABLE public.nodes_nodes OWNER TO gargantua; ALTER TABLE public.nodes_nodes OWNER TO gargantua;
...@@ -141,7 +140,7 @@ CREATE UNIQUE INDEX ON public.nodes USING btree (typename, parent_id, ((hyperdat ...@@ -141,7 +140,7 @@ CREATE UNIQUE INDEX ON public.nodes USING btree (typename, parent_id, ((hyperdat
CREATE UNIQUE INDEX ON public.ngrams (terms); -- TEST GIN CREATE UNIQUE INDEX ON public.ngrams (terms); -- TEST GIN
CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, delete); CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, category);
CREATE UNIQUE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id); CREATE UNIQUE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id);
CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type); CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type);
......
...@@ -30,7 +30,7 @@ library: ...@@ -30,7 +30,7 @@ library:
- Gargantext.API.FrontEnd - Gargantext.API.FrontEnd
- Gargantext.API.Ngrams - Gargantext.API.Ngrams
- Gargantext.API.Node - Gargantext.API.Node
- Gargantext.API.Orchestrator # - Gargantext.API.Orchestrator
- Gargantext.API.Search - Gargantext.API.Search
- Gargantext.API.Settings - Gargantext.API.Settings
- Gargantext.Core - Gargantext.Core
...@@ -48,6 +48,7 @@ library: ...@@ -48,6 +48,7 @@ library:
- Gargantext.Text - Gargantext.Text
- Gargantext.Text.Context - Gargantext.Text.Context
- Gargantext.Text.Corpus.Parsers - Gargantext.Text.Corpus.Parsers
- Gargantext.Text.Corpus.API
- Gargantext.Text.Corpus.Parsers.CSV - Gargantext.Text.Corpus.Parsers.CSV
- Gargantext.Text.Examples - Gargantext.Text.Examples
- Gargantext.Text.List.CSV - Gargantext.Text.List.CSV
...@@ -57,7 +58,6 @@ library: ...@@ -57,7 +58,6 @@ library:
- Gargantext.Text.Metrics.Count - Gargantext.Text.Metrics.Count
- Gargantext.Text.Search - Gargantext.Text.Search
- Gargantext.Text.Terms - Gargantext.Text.Terms
- Gargantext.Text.Terms.Stop
- Gargantext.Text.Terms.Mono - Gargantext.Text.Terms.Mono
- Gargantext.Text.Terms.Multi.Lang.En - Gargantext.Text.Terms.Multi.Lang.En
- Gargantext.Text.Terms.Multi.Lang.Fr - Gargantext.Text.Terms.Multi.Lang.Fr
...@@ -102,6 +102,7 @@ library: ...@@ -102,6 +102,7 @@ library:
- crawlerPubMed - crawlerPubMed
- crawlerIsidore - crawlerIsidore
- crawlerHAL - crawlerHAL
- crawlerISTEX
- data-time-segment - data-time-segment
- deepseq - deepseq
- directory - directory
...@@ -170,7 +171,7 @@ library: ...@@ -170,7 +171,7 @@ library:
- servant-auth - servant-auth
- servant-blaze - servant-blaze
- servant-client - servant-client
- servant-job # - servant-job
- servant-mock - servant-mock
- servant-multipart - servant-multipart
- servant-server - servant-server
...@@ -206,6 +207,7 @@ library: ...@@ -206,6 +207,7 @@ library:
- zip - zip
- zlib - zlib
# - utc # - utc
# API external connections
executables: executables:
gargantext-server: gargantext-server:
......
...@@ -126,7 +126,7 @@ instance HasInvalidError GargError where ...@@ -126,7 +126,7 @@ instance HasInvalidError GargError where
instance HasTreeError GargError where instance HasTreeError GargError where
_TreeError = _GargTreeError _TreeError = _GargTreeError
showAsServantErr :: Show a => a -> ServantErr showAsServantErr :: Show a => a -> ServerError
showAsServantErr a = err500 { errBody = BL8.pack $ show a } showAsServantErr a = err500 { errBody = BL8.pack $ show a }
fireWall :: Applicative f => Request -> FireWall -> f Bool fireWall :: Applicative f => Request -> FireWall -> f Bool
...@@ -321,7 +321,7 @@ serverGargAPI -- orchestrator ...@@ -321,7 +321,7 @@ serverGargAPI -- orchestrator
:<|> New.info fakeUserId :<|> New.info fakeUserId
-- :<|> orchestrator -- :<|> orchestrator
where where
fakeUserId = 1 -- TODO fakeUserId = 2 -- TODO, byDefault user1 (if users automatically generated with inserUsersDemo)
serverStatic :: Server (Get '[HTML] Html) serverStatic :: Server (Get '[HTML] Html)
serverStatic = $(do serverStatic = $(do
......
...@@ -24,26 +24,28 @@ New corpus means either: ...@@ -24,26 +24,28 @@ New corpus means either:
module Gargantext.API.Corpus.New module Gargantext.API.Corpus.New
where where
import Data.Either
import Control.Monad.IO.Class (liftIO)
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Swagger import Data.Swagger
import Data.Text (Text) import Data.Text (Text)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Flow (flowCorpusSearchInDatabase) import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
import Gargantext.Database.Types.Node (CorpusId) import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Text.Terms (TermType(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Utils (hash)
import Servant import Servant
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary import Test.QuickCheck.Arbitrary
import Gargantext.Database.Flow (FlowCmdM) import Gargantext.Core (Lang(..))
import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
import qualified Gargantext.Text.Corpus.API as API import qualified Gargantext.Text.Corpus.API as API
import Gargantext.Database.Types.Node (UserId) import Gargantext.Database.Types.Node (UserId)
data Query = Query { query_query :: Text data Query = Query { query_query :: Text
, query_corpus_id :: Int , query_corpus_id :: Int
, query_files_id :: [Text] , query_databases :: [API.ExternalAPIs]
} }
deriving (Eq, Show, Generic) deriving (Eq, Show, Generic)
...@@ -54,7 +56,7 @@ instance Arbitrary Query where ...@@ -54,7 +56,7 @@ instance Arbitrary Query where
arbitrary = elements [ Query q n fs arbitrary = elements [ Query q n fs
| q <- ["a","b"] | q <- ["a","b"]
, n <- [0..10] , n <- [0..10]
, fs <- map (map hash) [["a","b"], ["c","d"]] , fs <- take 3 $ repeat API.externalAPIs
] ]
instance ToSchema Query where instance ToSchema Query where
...@@ -62,20 +64,24 @@ instance ToSchema Query where ...@@ -62,20 +64,24 @@ instance ToSchema Query where
genericDeclareNamedSchema genericDeclareNamedSchema
defaultSchemaOptions {fieldLabelModifier = \fieldLabel -> drop 6 fieldLabel} defaultSchemaOptions {fieldLabelModifier = \fieldLabel -> drop 6 fieldLabel}
type Api = Summary "New Corpus endpoint" type Api = Summary "New Corpus endpoint"
:> ReqBody '[JSON] Query :> ReqBody '[JSON] Query
:> Post '[JSON] CorpusId :> Post '[JSON] CorpusId
:<|> Get '[JSON] ApiInfo :<|> Get '[JSON] ApiInfo
-- | TODO manage several apis
api :: (FlowCmdM env err m) => Query -> m CorpusId
api (Query q _ as) = do
cId <- case head as of
Nothing -> flowCorpusSearchInDatabase "user1" EN q
Just API.All -> flowCorpusSearchInDatabase "user1" EN q
Just a -> do
docs <- liftIO $ API.get a q (Just 1000)
cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
pure cId'
api :: FlowCmdM env err m => Query -> m CorpusId
api (Query q _ _) = do
cId <- flowCorpusSearchInDatabase "user1" EN q
pure cId pure cId
------------------------------------------------ ------------------------------------------------
data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]} data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
deriving (Generic) deriving (Generic)
......
...@@ -56,7 +56,7 @@ import Data.Map.Strict (Map) ...@@ -56,7 +56,7 @@ import Data.Map.Strict (Map)
import qualified Data.Set as Set import qualified Data.Set as Set
import Control.Category ((>>>)) import Control.Category ((>>>))
import Control.Concurrent import Control.Concurrent
import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~)) import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~))
import Control.Monad.Error.Class (MonadError) import Control.Monad.Error.Class (MonadError)
import Control.Monad.Reader import Control.Monad.Reader
import Control.Monad.State import Control.Monad.State
...@@ -72,7 +72,7 @@ import GHC.Generics (Generic) ...@@ -72,7 +72,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
-- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..)) -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
import Gargantext.Database.Config (userMaster) import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlySlow) import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast)
import Gargantext.Database.Schema.Ngrams (NgramsType) import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Types.Node (NodeType(..)) import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Utils (fromField', HasConnection) import Gargantext.Database.Utils (fromField', HasConnection)
...@@ -99,18 +99,22 @@ instance ToParamSchema TODO where ...@@ -99,18 +99,22 @@ instance ToParamSchema TODO where
------------------------------------------------------------------------ ------------------------------------------------------------------------
--data FacetFormat = Table | Chart --data FacetFormat = Table | Chart
data TabType = Docs | Terms | Sources | Authors | Institutes | Trash data TabType = Docs | Trash | MoreFav | MoreTrash
| Terms | Sources | Authors | Institutes
| Contacts | Contacts
deriving (Generic, Enum, Bounded) deriving (Generic, Enum, Bounded, Show)
instance FromHttpApiData TabType instance FromHttpApiData TabType
where where
parseUrlPiece "Docs" = pure Docs parseUrlPiece "Docs" = pure Docs
parseUrlPiece "Trash" = pure Trash
parseUrlPiece "MoreFav" = pure MoreFav
parseUrlPiece "MoreTrash" = pure MoreTrash
parseUrlPiece "Terms" = pure Terms parseUrlPiece "Terms" = pure Terms
parseUrlPiece "Sources" = pure Sources parseUrlPiece "Sources" = pure Sources
parseUrlPiece "Institutes" = pure Institutes parseUrlPiece "Institutes" = pure Institutes
parseUrlPiece "Authors" = pure Authors parseUrlPiece "Authors" = pure Authors
parseUrlPiece "Trash" = pure Trash
parseUrlPiece "Contacts" = pure Contacts parseUrlPiece "Contacts" = pure Contacts
...@@ -437,11 +441,11 @@ instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where ...@@ -437,11 +441,11 @@ instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
-- If they happen to be equal then the patch is Keep. -- If they happen to be equal then the patch is Keep.
instance ToSchema a => ToSchema (Replace a) where instance ToSchema a => ToSchema (Replace a) where
declareNamedSchema (_ :: proxy (Replace a)) = do declareNamedSchema (_ :: Proxy (Replace a)) = do
-- TODO Keep constructor is not supported here. -- TODO Keep constructor is not supported here.
aSchema <- declareSchemaRef (Proxy :: Proxy a) aSchema <- declareSchemaRef (Proxy :: Proxy a)
return $ NamedSchema (Just "Replace") $ mempty return $ NamedSchema (Just "Replace") $ mempty
& type_ .~ SwaggerObject & type_ ?~ SwaggerObject
& properties .~ & properties .~
InsOrdHashMap.fromList InsOrdHashMap.fromList
[ ("old", aSchema) [ ("old", aSchema)
...@@ -888,10 +892,10 @@ getTableNgrams :: forall env err m. ...@@ -888,10 +892,10 @@ getTableNgrams :: forall env err m.
-> Maybe OrderBy -> Maybe OrderBy
-> (NgramsTerm -> Bool) -> (NgramsTerm -> Bool)
-> m (Versioned NgramsTable) -> m (Versioned NgramsTable)
getTableNgrams nType nId tabType listId limit_ offset getTableNgrams _nType nId tabType listId limit_ offset
listType minSize maxSize orderBy searchQuery = do listType minSize maxSize orderBy searchQuery = do
lIds <- selectNodesWithUsername NodeList userMaster _lIds <- selectNodesWithUsername NodeList userMaster
let let
ngramsType = ngramsTypeFromTabType tabType ngramsType = ngramsTypeFromTabType tabType
offset' = maybe 0 identity offset offset' = maybe 0 identity offset
...@@ -935,11 +939,15 @@ getTableNgrams nType nId tabType listId limit_ offset ...@@ -935,11 +939,15 @@ getTableNgrams nType nId tabType listId limit_ offset
setScores False table = pure table setScores False table = pure table
setScores True table = do setScores True table = do
let ngrams_terms = (table ^.. each . ne_ngrams) let ngrams_terms = (table ^.. each . ne_ngrams)
occurrences <- getOccByNgramsOnlyFast nId
ngramsType
ngrams_terms
{-
occurrences <- getOccByNgramsOnlySlow nType nId occurrences <- getOccByNgramsOnlySlow nType nId
(lIds <> [listId]) (lIds <> [listId])
ngramsType ngramsType
ngrams_terms ngrams_terms
-}
let let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
......
...@@ -33,7 +33,6 @@ import qualified Data.Set as Set ...@@ -33,7 +33,6 @@ import qualified Data.Set as Set
type RootTerm = Text type RootTerm = Text
getListNgrams :: RepoCmdM env err m getListNgrams :: RepoCmdM env err m
=> [ListId] -> NgramsType => [ListId] -> NgramsType
-> m (Map Text NgramsRepoElement) -> m (Map Text NgramsRepoElement)
...@@ -68,7 +67,7 @@ mapTermListRoot :: RepoCmdM env err m ...@@ -68,7 +67,7 @@ mapTermListRoot :: RepoCmdM env err m
-> m (Map Text (ListType, (Maybe Text))) -> m (Map Text (ListType, (Maybe Text)))
mapTermListRoot nodeIds ngramsType = do mapTermListRoot nodeIds ngramsType = do
ngrams <- getListNgrams nodeIds ngramsType ngrams <- getListNgrams nodeIds ngramsType
pure $ Map.fromList [(t, (_nre_list nre, _nre_root nre)) pure $ Map.fromList [ (t, (_nre_list nre, _nre_root nre))
| (t, nre) <- Map.toList ngrams | (t, nre) <- Map.toList ngrams
] ]
...@@ -104,14 +103,12 @@ getCoocByNgrams = getCoocByNgrams' identity ...@@ -104,14 +103,12 @@ getCoocByNgrams = getCoocByNgrams' identity
getCoocByNgrams' :: (Ord a, Ord c) => (b -> Set c) -> Diagonal -> Map a b -> Map (a, a) Int getCoocByNgrams' :: (Ord a, Ord c) => (b -> Set c) -> Diagonal -> Map a b -> Map (a, a) Int
getCoocByNgrams' f (Diagonal diag) m = getCoocByNgrams' f (Diagonal diag) m =
Map.fromList [((t1,t2) Map.fromList [( (t1,t2)
,maybe 0 Set.size $ Set.intersection , maybe 0 Set.size $ Set.intersection
<$> (fmap f $ Map.lookup t1 m) <$> (fmap f $ Map.lookup t1 m)
<*> (fmap f $ Map.lookup t2 m) <*> (fmap f $ Map.lookup t2 m)
) | (t1,t2) <- case diag of ) | (t1,t2) <- case diag of
True -> [ (x,y) | x <- Map.keys m, y <- Map.keys m, x <= y] True -> [ (x,y) | x <- Map.keys m, y <- Map.keys m, x <= y]
False -> listToCombi identity (Map.keys m) False -> listToCombi identity (Map.keys m)
] ]
...@@ -7,7 +7,6 @@ Maintainer : team@gargantext.org ...@@ -7,7 +7,6 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
-- TODO-ACCESS: CanGetNode -- TODO-ACCESS: CanGetNode
-- TODO-EVENTS: No events as this is a read only query. -- TODO-EVENTS: No events as this is a read only query.
Node API Node API
...@@ -19,7 +18,6 @@ Node API ...@@ -19,7 +18,6 @@ Node API
-- TODO-EVENTS: DeletedNodes [NodeId] -- TODO-EVENTS: DeletedNodes [NodeId]
-- {"tag": "DeletedNodes", "nodes": [Int*]} -- {"tag": "DeletedNodes", "nodes": [Int*]}
-} -}
{-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
...@@ -38,7 +36,7 @@ Node API ...@@ -38,7 +36,7 @@ Node API
module Gargantext.API.Node module Gargantext.API.Node
where where
import Control.Lens (prism', (.~), (?~)) import Control.Lens ((.~), (?~))
import Control.Monad ((>>), forM) import Control.Monad ((>>), forM)
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Data.Aeson (FromJSON, ToJSON) import Data.Aeson (FromJSON, ToJSON)
...@@ -52,15 +50,15 @@ import Gargantext.API.Metrics ...@@ -52,15 +50,15 @@ import Gargantext.API.Metrics
import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, apiNgramsTableCorpus, QueryParamR, TODO) import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, apiNgramsTableCorpus, QueryParamR, TODO)
import Gargantext.API.Ngrams.NTree (MyTree) import Gargantext.API.Ngrams.NTree (MyTree)
import Gargantext.API.Search (SearchDocsAPI, searchDocs) import Gargantext.API.Search (SearchDocsAPI, searchDocs)
import Gargantext.API.Table
import Gargantext.API.Types import Gargantext.API.Types
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Types.Main (Tree, NodeTree, ListType) import Gargantext.Core.Types.Main (Tree, NodeTree, ListType)
import Gargantext.Database.Config (nodeTypeId) import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),runViewAuthorsDoc) import Gargantext.Database.Facet (FacetDoc, OrderBy(..))
import Gargantext.Database.Node.Children (getChildren) import Gargantext.Database.Node.Children (getChildren)
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, getNode', deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..)) import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, getNode', deleteNode, deleteNodes, mkNodeWithParent, JSONB, HasNodeError(..))
import Gargantext.Database.Schema.NodeNode (nodesToFavorite, nodesToTrash) import Gargantext.Database.Schema.NodeNode (nodeNodesCategory)
import Gargantext.Database.Tree (treeDB, HasTreeError(..), TreeError(..)) import Gargantext.Database.Tree (treeDB)
import Gargantext.Database.Types.Node import Gargantext.Database.Types.Node
import Gargantext.Database.Utils -- (Cmd, CmdM) import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -97,7 +95,7 @@ nodesAPI ids = deleteNodes ids ...@@ -97,7 +95,7 @@ nodesAPI ids = deleteNodes ids
-- TODO-EVENTS: -- TODO-EVENTS:
-- PutNode ? -- PutNode ?
-- TODO needs design discussion. -- TODO needs design discussion.
type Roots = Get '[JSON] [NodeAny] type Roots = Get '[JSON] [Node HyperdataAny]
:<|> Put '[JSON] Int -- TODO :<|> Put '[JSON] Int -- TODO
-- | TODO: access by admin only -- | TODO: access by admin only
...@@ -132,8 +130,7 @@ type NodeAPI a = Get '[JSON] (Node a) ...@@ -132,8 +130,7 @@ type NodeAPI a = Get '[JSON] (Node a)
:<|> "ngrams" :> TableNgramsApi :<|> "ngrams" :> TableNgramsApi
:<|> "pairing" :> PairingApi :<|> "pairing" :> PairingApi
:<|> "favorites" :> FavApi :<|> "category" :> CatApi
:<|> "documents" :> DocsApi
:<|> "search" :> SearchDocsAPI :<|> "search" :> SearchDocsAPI
-- VIZ -- VIZ
...@@ -163,27 +160,28 @@ type ChildrenApi a = Summary " Summary children" ...@@ -163,27 +160,28 @@ type ChildrenApi a = Summary " Summary children"
-- TODO: make the NodeId type indexed by `a`, then we no longer need the proxy. -- TODO: make the NodeId type indexed by `a`, then we no longer need the proxy.
nodeAPI :: JSONB a => proxy a -> UserId -> NodeId -> GargServer (NodeAPI a) nodeAPI :: JSONB a => proxy a -> UserId -> NodeId -> GargServer (NodeAPI a)
nodeAPI p uId id nodeAPI p uId id
= getNode id p = getNode id p
:<|> rename id :<|> rename id
:<|> postNode uId id :<|> postNode uId id
:<|> putNode id :<|> putNode id
:<|> deleteNodeApi id :<|> deleteNodeApi id
:<|> getChildren id p :<|> getChildren id p
-- TODO gather it -- TODO gather it
:<|> getTable id :<|> tableApi id
:<|> apiNgramsTableCorpus id :<|> apiNgramsTableCorpus id
:<|> getPairing id :<|> getPairing id
-- :<|> getTableNgramsDoc id -- :<|> getTableNgramsDoc id
:<|> favApi id :<|> catApi id
:<|> delDocs id
:<|> searchDocs id :<|> searchDocs id
:<|> getScatter id :<|> getScatter id
:<|> getChart id :<|> getChart id
:<|> getPie id :<|> getPie id
:<|> getTree id :<|> getTree id
:<|> phyloAPI id :<|> phyloAPI id uId
:<|> postUpload id :<|> postUpload id
where where
deleteNodeApi id' = do deleteNodeApi id' = do
...@@ -194,8 +192,6 @@ nodeAPI p uId id ...@@ -194,8 +192,6 @@ nodeAPI p uId id
-- Annuaire -- Annuaire
-- :<|> query -- :<|> query
------------------------------------------------------------------------ ------------------------------------------------------------------------
data RenameNode = RenameNode { r_name :: Text } data RenameNode = RenameNode { r_name :: Text }
deriving (Generic) deriving (Generic)
...@@ -217,55 +213,30 @@ instance Arbitrary PostNode where ...@@ -217,55 +213,30 @@ instance Arbitrary PostNode where
arbitrary = elements [PostNode "Node test" NodeCorpus] arbitrary = elements [PostNode "Node test" NodeCorpus]
------------------------------------------------------------------------ ------------------------------------------------------------------------
type DocsApi = Summary "Docs : Move to trash" type CatApi = Summary " To Categorize NodeNodes: 0 for delete, 1/null neutral, 2 favorite"
:> ReqBody '[JSON] Documents :> ReqBody '[JSON] NodesToCategory
:> Delete '[JSON] [Int]
data Documents = Documents { documents :: [NodeId]}
deriving (Generic)
instance FromJSON Documents
instance ToJSON Documents
instance ToSchema Documents
delDocs :: CorpusId -> Documents -> Cmd err [Int]
delDocs cId ds = nodesToTrash $ map (\n -> (cId, n, True)) $ documents ds
------------------------------------------------------------------------
type FavApi = Summary " Favorites label"
:> ReqBody '[JSON] Favorites
:> Put '[JSON] [Int] :> Put '[JSON] [Int]
:<|> Summary " Favorites unlabel"
:> ReqBody '[JSON] Favorites
:> Delete '[JSON] [Int]
data Favorites = Favorites { favorites :: [NodeId]} data NodesToCategory = NodesToCategory { ntc_nodesId :: [NodeId]
, ntc_category :: Int
}
deriving (Generic) deriving (Generic)
instance FromJSON Favorites instance FromJSON NodesToCategory
instance ToJSON Favorites instance ToJSON NodesToCategory
instance ToSchema Favorites instance ToSchema NodesToCategory
putFav :: CorpusId -> Favorites -> Cmd err [Int]
putFav cId fs = nodesToFavorite $ map (\n -> (cId, n, True)) $ favorites fs
delFav :: CorpusId -> Favorites -> Cmd err [Int]
delFav cId fs = nodesToFavorite $ map (\n -> (cId, n, False)) $ favorites fs
favApi :: CorpusId -> GargServer FavApi catApi :: CorpusId -> GargServer CatApi
favApi cId = putFav cId :<|> delFav cId catApi = putCat
where
putCat :: CorpusId -> NodesToCategory -> Cmd err [Int]
putCat cId cs' = nodeNodesCategory $ map (\n -> (cId, n, ntc_category cs')) (ntc_nodesId cs')
------------------------------------------------------------------------ ------------------------------------------------------------------------
type TableApi = Summary " Table API"
:> QueryParam "view" TabType
:> QueryParam "offset" Int
:> QueryParam "limit" Int
:> QueryParam "order" OrderBy
:> Get '[JSON] [FacetDoc]
-- TODO adapt FacetDoc -> ListDoc (and add type of document as column) -- TODO adapt FacetDoc -> ListDoc (and add type of document as column)
type PairingApi = Summary " Pairing API" type PairingApi = Summary " Pairing API"
:> QueryParam "view" TabType -- TODO change TabType -> DocType (CorpusId for pairing) :> QueryParam "view" TabType
-- TODO change TabType -> DocType (CorpusId for pairing)
:> QueryParam "offset" Int :> QueryParam "offset" Int
:> QueryParam "limit" Int :> QueryParam "limit" Int
:> QueryParam "order" OrderBy :> QueryParam "order" OrderBy
...@@ -290,8 +261,6 @@ type TreeApi = Summary " Tree API" ...@@ -290,8 +261,6 @@ type TreeApi = Summary " Tree API"
:> QueryParamR "listType" ListType :> QueryParamR "listType" ListType
:> Get '[JSON] (ChartMetrics [MyTree]) :> Get '[JSON] (ChartMetrics [MyTree])
-- Depending on the Type of the Node, we could post -- Depending on the Type of the Node, we could post
-- New documents for a corpus -- New documents for a corpus
-- New map list terms -- New map list terms
...@@ -302,7 +271,9 @@ type TreeApi = Summary " Tree API" ...@@ -302,7 +271,9 @@ type TreeApi = Summary " Tree API"
------------------------------------------------------------------------ ------------------------------------------------------------------------
{-
NOTE: These instances are not necessary. However, these messages could be part
of a display function for NodeError/TreeError.
instance HasNodeError ServantErr where instance HasNodeError ServantErr where
_NodeError = prism' mk (const Nothing) -- panic "HasNodeError ServantErr: not a prism") _NodeError = prism' mk (const Nothing) -- panic "HasNodeError ServantErr: not a prism")
where where
...@@ -320,7 +291,6 @@ instance HasNodeError ServantErr where ...@@ -320,7 +291,6 @@ instance HasNodeError ServantErr where
mk ManyParents = err500 { errBody = e <> "Too many parents" } mk ManyParents = err500 { errBody = e <> "Too many parents" }
mk ManyNodeUsers = err500 { errBody = e <> "Many userNode/user" } mk ManyNodeUsers = err500 { errBody = e <> "Many userNode/user" }
-- TODO(orphan): There should be a proper APIError data type with a case TreeError.
instance HasTreeError ServantErr where instance HasTreeError ServantErr where
_TreeError = prism' mk (const Nothing) -- panic "HasTreeError ServantErr: not a prism") _TreeError = prism' mk (const Nothing) -- panic "HasTreeError ServantErr: not a prism")
where where
...@@ -328,6 +298,7 @@ instance HasTreeError ServantErr where ...@@ -328,6 +298,7 @@ instance HasTreeError ServantErr where
mk NoRoot = err404 { errBody = e <> "Root node not found" } mk NoRoot = err404 { errBody = e <> "Root node not found" }
mk EmptyRoot = err500 { errBody = e <> "Root node should not be empty" } mk EmptyRoot = err500 { errBody = e <> "Root node should not be empty" }
mk TooManyRoots = err500 { errBody = e <> "Too many root nodes" } mk TooManyRoots = err500 { errBody = e <> "Too many root nodes" }
-}
type TreeAPI = Get '[JSON] (Tree NodeTree) type TreeAPI = Get '[JSON] (Tree NodeTree)
-- TODO-ACCESS: CanTree or CanGetNode -- TODO-ACCESS: CanTree or CanGetNode
...@@ -340,24 +311,6 @@ treeAPI = treeDB ...@@ -340,24 +311,6 @@ treeAPI = treeDB
rename :: NodeId -> RenameNode -> Cmd err [Int] rename :: NodeId -> RenameNode -> Cmd err [Int]
rename nId (RenameNode name') = U.update (U.Rename nId name') rename nId (RenameNode name') = U.update (U.Rename nId name')
getTable :: NodeId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err [FacetDoc]
getTable cId ft o l order =
case ft of
(Just Docs) -> runViewDocuments cId False o l order
(Just Trash) -> runViewDocuments cId True o l order
_ -> panic "not implemented"
getPairing :: ContactId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err [FacetDoc]
getPairing cId ft o l order =
case ft of
(Just Docs) -> runViewAuthorsDoc cId False o l order
(Just Trash) -> runViewAuthorsDoc cId True o l order
_ -> panic "not implemented"
postNode :: HasNodeError err => UserId -> NodeId -> PostNode -> Cmd err [NodeId] postNode :: HasNodeError err => UserId -> NodeId -> PostNode -> Cmd err [NodeId]
postNode uId pId (PostNode nodeName nt) = mkNodeWithParent nt (Just pId) uId nodeName postNode uId pId (PostNode nodeName nt) = mkNodeWithParent nt (Just pId) uId nodeName
......
...@@ -56,7 +56,6 @@ instance Arbitrary SearchQuery where ...@@ -56,7 +56,6 @@ instance Arbitrary SearchQuery where
arbitrary = elements [SearchQuery ["electrodes"]] arbitrary = elements [SearchQuery ["electrodes"]]
----------------------------------------------------------------------- -----------------------------------------------------------------------
data SearchDocResults = SearchDocResults { sdr_results :: [FacetDoc]} data SearchDocResults = SearchDocResults { sdr_results :: [FacetDoc]}
deriving (Generic) deriving (Generic)
$(deriveJSON (unPrefix "sdr_") ''SearchDocResults) $(deriveJSON (unPrefix "sdr_") ''SearchDocResults)
...@@ -102,7 +101,7 @@ searchPairs pId (SearchQuery q) o l order = ...@@ -102,7 +101,7 @@ searchPairs pId (SearchQuery q) o l order =
searchDocs :: NodeId -> GargServer SearchDocsAPI searchDocs :: NodeId -> GargServer SearchDocsAPI
searchDocs nId (SearchQuery q) o l order = searchDocs nId (SearchQuery q) o l order =
SearchDocResults <$> searchInCorpus nId q o l order SearchDocResults <$> searchInCorpus nId False q o l order
--SearchResults <$> searchInCorpusWithContacts nId q o l order --SearchResults <$> searchInCorpusWithContacts nId q o l order
...@@ -46,7 +46,7 @@ import qualified Data.ByteString.Lazy as L ...@@ -46,7 +46,7 @@ import qualified Data.ByteString.Lazy as L
import Servant import Servant
import Servant.Client (BaseUrl, parseBaseUrl) import Servant.Client (BaseUrl, parseBaseUrl)
import Servant.Job.Async (newJobEnv, defaultSettings) --import Servant.Job.Async (newJobEnv, defaultSettings)
import Web.HttpApiData (parseUrlPiece) import Web.HttpApiData (parseUrlPiece)
import qualified Jose.Jwk as Jose import qualified Jose.Jwk as Jose
import qualified Jose.Jwa as Jose import qualified Jose.Jwa as Jose
...@@ -60,7 +60,7 @@ import Control.Lens ...@@ -60,7 +60,7 @@ import Control.Lens
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Utils (databaseParameters, HasConnection(..), Cmd', runCmd) import Gargantext.Database.Utils (databaseParameters, HasConnection(..), Cmd', runCmd)
import Gargantext.API.Ngrams (NgramsRepo, HasRepoVar(..), HasRepoSaver(..), HasRepo(..), RepoEnv(..), r_version, saveRepo, initRepo, renv_var, renv_lock) import Gargantext.API.Ngrams (NgramsRepo, HasRepoVar(..), HasRepoSaver(..), HasRepo(..), RepoEnv(..), r_version, saveRepo, initRepo, renv_var, renv_lock)
import Gargantext.API.Orchestrator.Types --import Gargantext.API.Orchestrator.Types
type PortNumber = Int type PortNumber = Int
...@@ -147,7 +147,7 @@ data Env = Env ...@@ -147,7 +147,7 @@ data Env = Env
, _env_repo :: !RepoEnv , _env_repo :: !RepoEnv
, _env_manager :: !Manager , _env_manager :: !Manager
, _env_self_url :: !BaseUrl , _env_self_url :: !BaseUrl
, _env_scrapers :: !ScrapersEnv --, _env_scrapers :: !ScrapersEnv
} }
deriving (Generic) deriving (Generic)
...@@ -243,7 +243,7 @@ newEnv port file = do ...@@ -243,7 +243,7 @@ newEnv port file = do
param <- databaseParameters file param <- databaseParameters file
conn <- connect param conn <- connect param
repo <- readRepoEnv repo <- readRepoEnv
scrapers_env <- newJobEnv defaultSettings manager --scrapers_env <- newJobEnv defaultSettings manager
logger <- newStderrLoggerSet defaultBufSize logger <- newStderrLoggerSet defaultBufSize
pure $ Env pure $ Env
...@@ -252,7 +252,7 @@ newEnv port file = do ...@@ -252,7 +252,7 @@ newEnv port file = do
, _env_conn = conn , _env_conn = conn
, _env_repo = repo , _env_repo = repo
, _env_manager = manager , _env_manager = manager
, _env_scrapers = scrapers_env --, _env_scrapers = scrapers_env
, _env_self_url = self_url , _env_self_url = self_url
} }
...@@ -305,7 +305,7 @@ withDevEnv iniPath k = do ...@@ -305,7 +305,7 @@ withDevEnv iniPath k = do
runCmdRepl :: Show err => Cmd' DevEnv err a -> IO a runCmdRepl :: Show err => Cmd' DevEnv err a -> IO a
runCmdRepl f = withDevEnv "gargantext.ini" $ \env -> runCmdDev env f runCmdRepl f = withDevEnv "gargantext.ini" $ \env -> runCmdDev env f
runCmdReplServantErr :: Cmd' DevEnv ServantErr a -> IO a runCmdReplServantErr :: Cmd' DevEnv ServerError a -> IO a
runCmdReplServantErr = runCmdRepl runCmdReplServantErr = runCmdRepl
-- Use only for dev -- Use only for dev
...@@ -324,5 +324,5 @@ runCmdDevNoErr :: DevEnv -> Cmd' DevEnv () a -> IO a ...@@ -324,5 +324,5 @@ runCmdDevNoErr :: DevEnv -> Cmd' DevEnv () a -> IO a
runCmdDevNoErr = runCmdDev runCmdDevNoErr = runCmdDev
-- Use only for dev -- Use only for dev
runCmdDevServantErr :: DevEnv -> Cmd' DevEnv ServantErr a -> IO a runCmdDevServantErr :: DevEnv -> Cmd' DevEnv ServerError a -> IO a
runCmdDevServantErr = runCmdDev runCmdDevServantErr = runCmdDev
{-|
Module : Gargantext.API.Node
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-- TODO-ACCESS: CanGetNode
-- TODO-EVENTS: No events as this is a read only query.
Node API
-------------------------------------------------------------------
-- TODO-ACCESS: access by admin only.
-- At first let's just have an isAdmin check.
-- Later: check userId CanDeleteNodes Nothing
-- TODO-EVENTS: DeletedNodes [NodeId]
-- {"tag": "DeletedNodes", "nodes": [Int*]}
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Table
where
import Data.Aeson.TH (deriveJSON)
import Data.Maybe
import Data.Swagger
import Data.Text (Text())
import GHC.Generics (Generic)
import Gargantext.API.Ngrams (TabType(..))
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),runViewAuthorsDoc)
import Gargantext.Database.Learn (FavOrTrash(..), moreLike)
import Gargantext.Database.TextSearch
import Gargantext.Database.Types.Node
import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
------------------------------------------------------------------------
type TableApi = Summary " Table API"
:> ReqBody '[JSON] TableQuery
:> Post '[JSON] [FacetDoc]
--{-
data TableQuery = TableQuery
{ tq_offset :: Int
, tq_limit :: Int
, tq_orderBy :: OrderBy
, tq_view :: TabType
, tq_query :: Text
} deriving (Generic)
$(deriveJSON (unPrefix "tq_") ''TableQuery)
instance ToSchema TableQuery where
declareNamedSchema =
genericDeclareNamedSchema
defaultSchemaOptions {fieldLabelModifier = drop 3}
instance Arbitrary TableQuery where
arbitrary = elements [TableQuery 0 10 DateAsc Docs "electrodes"]
tableApi :: NodeId -> TableQuery -> Cmd err [FacetDoc]
tableApi cId (TableQuery o l order ft "") = getTable cId (Just ft) (Just o) (Just l) (Just order)
tableApi cId (TableQuery o l order ft q) = case ft of
Docs -> searchInCorpus cId False [q] (Just o) (Just l) (Just order)
Trash -> searchInCorpus cId True [q] (Just o) (Just l) (Just order)
x -> panic $ "not implemented in tableApi " <> (cs $ show x)
getTable :: NodeId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err [FacetDoc]
getTable cId ft o l order =
case ft of
(Just Docs) -> runViewDocuments cId False o l order
(Just Trash) -> runViewDocuments cId True o l order
(Just MoreFav) -> moreLike cId o l order IsFav
(Just MoreTrash) -> moreLike cId o l order IsTrash
x -> panic $ "not implemented in getTable: " <> (cs $ show x)
getPairing :: ContactId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err [FacetDoc]
getPairing cId ft o l order =
case ft of
(Just Docs) -> runViewAuthorsDoc cId False o l order
(Just Trash) -> runViewAuthorsDoc cId True o l order
_ -> panic $ "not implemented: get Pairing" <> (cs $ show ft)
{-|
Module : Gargantext.API.Utils
Description : Server API main Types
Copyright : (c) CNRS, 2017-Present
License : BSD3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Mainly copied from Servant.Job.Utils (Thanks)
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.API.Utils
where
import Gargantext.Prelude
import Data.Maybe (Maybe, fromMaybe)
import Prelude (String)
import qualified Data.Text as T
import Data.Swagger
import Data.Text (Text)
swaggerOptions :: Text -> SchemaOptions
swaggerOptions pref = defaultSchemaOptions
{ Data.Swagger.fieldLabelModifier = modifier pref
, Data.Swagger.unwrapUnaryRecords = False
}
modifier :: Text -> String -> String
modifier pref field = T.unpack
$ T.stripPrefix pref (T.pack field) ?! "Expecting prefix " <> T.unpack pref
infixr 4 ?|
-- Reverse infix form of "fromMaybe"
(?|) :: Maybe a -> a -> a
(?|) = flip fromMaybe
infixr 4 ?!
-- Reverse infix form of "fromJust" with a custom error message
(?!) :: Maybe a -> String -> a
(?!) ma' msg = ma' ?| panic (T.pack msg)
...@@ -12,6 +12,8 @@ Portability : POSIX ...@@ -12,6 +12,8 @@ Portability : POSIX
module Gargantext.Core module Gargantext.Core
where where
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Language of a Text -- | Language of a Text
-- For simplicity, we suppose text has an homogenous language -- For simplicity, we suppose text has an homogenous language
......
{-|
Module : Gargantext.Core.Flow
Description : Core Flow main Types
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ConstrainedClassMethods #-}
module Gargantext.Core.Flow where
import Control.Lens (Lens')
import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Text.Terms (TermType)
import Gargantext.Core (Lang)
import Gargantext.Database.Schema.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types.Main (HashId)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..))
import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
import Gargantext.Database.Node.Document.Insert (AddUniqId, InsertDb)
import Gargantext.Database.Utils (Cmd)
type FlowCorpus a = ( AddUniqId a
, UniqId a
, InsertDb a
, ExtractNgramsT a
, HasText a
)
class UniqId a
where
uniqId :: Lens' a (Maybe HashId)
class ExtractNgramsT h
where
extractNgramsT :: HasText h => TermType Lang -> h -> Cmd err (Map Ngrams (Map NgramsType Int))
class HasText h
where
hasText :: h -> [Text]
------------------------------------------------------------------------
instance UniqId HyperdataDocument
where
uniqId = hyperdataDocument_uniqId
instance UniqId HyperdataContact
where
uniqId = hc_uniqId
...@@ -21,6 +21,7 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main ...@@ -21,6 +21,7 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, TokenTag(..), POS(..), NER(..) , TokenTag(..), POS(..), NER(..)
, Label, Stems , Label, Stems
, HasInvalidError(..), assertValid , HasInvalidError(..), assertValid
, Name
) where ) where
import Control.Lens (Prism', (#)) import Control.Lens (Prism', (#))
...@@ -42,6 +43,7 @@ import Gargantext.Prelude ...@@ -42,6 +43,7 @@ import Gargantext.Prelude
import GHC.Generics import GHC.Generics
------------------------------------------------------------------------ ------------------------------------------------------------------------
type Name = Text
type Term = Text type Term = Text
type Stems = Set Text type Stems = Set Text
type Label = [Text] type Label = [Text]
......
...@@ -114,39 +114,16 @@ fromListTypeId i = lookup i $ fromList [ (listTypeId l, l) | l <- [minBound..max ...@@ -114,39 +114,16 @@ fromListTypeId i = lookup i $ fromList [ (listTypeId l, l) | l <- [minBound..max
-- | TfidfCorpus | TfidfGlobal | TirankLocal | TirankGlobal -- | TfidfCorpus | TfidfGlobal | TirankLocal | TirankGlobal
-- | Community Manager Use Case -- | Community Manager Use Case
type Annuaire = NodeCorpus
-- | Favorites Node enable Swap Node with some synonyms for clarity -- | Favorites Node enable Swap Node with some synonyms for clarity
type NodeSwap = Node HyperdataResource
-- | Then a Node can be a List which has some synonyms -- | Then a Node can be a List which has some synonyms
type List = Node HyperdataList
type StopList = List
type MainList = List
type MapList = List
type GroupList = List
-- | Then a Node can be a Score which has some synonyms -- | Then a Node can be a Score which has some synonyms
type Score = Node HyperdataScore
type Occurrences = Score
type Cooccurrences = Score
type Specclusion = Score
type Genclusion = Score
type Cvalue = Score
type Tficf = Score
---- TODO All these Tfidf* will be replaced with TFICF
type TfidfCorpus = Tficf
type TfidfGlobal = Tficf
type TirankLocal = Tficf
type TirankGlobal = Tficf
--
-- Temporary types to be removed
type ErrorMessage = Text
-- Queries -- Queries
type Limit = Int type Limit = Int
type Offset = Int type Offset = Int
type IsTrash = Bool
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- All the Database is structred like a hierarchical Tree -- All the Database is structred like a hierarchical Tree
......
...@@ -98,7 +98,7 @@ mv :: NodeId -> ParentId -> Cmd err [Int] ...@@ -98,7 +98,7 @@ mv :: NodeId -> ParentId -> Cmd err [Int]
mv n p = U.update $ U.Move n p mv n p = U.update $ U.Move n p
-- | TODO get Children or Node -- | TODO get Children or Node
get :: PWD -> Cmd err [NodeAny] get :: PWD -> Cmd err [Node HyperdataAny]
get [] = pure [] get [] = pure []
get pwd = runOpaQuery $ selectNodesWithParentID (last pwd) get pwd = runOpaQuery $ selectNodesWithParentID (last pwd)
...@@ -107,10 +107,10 @@ home :: Cmd err PWD ...@@ -107,10 +107,10 @@ home :: Cmd err PWD
home = map _node_id <$> getNodesWithParentId 0 Nothing home = map _node_id <$> getNodesWithParentId 0 Nothing
-- | ls == get Children -- | ls == get Children
ls :: PWD -> Cmd err [NodeAny] ls :: PWD -> Cmd err [Node HyperdataAny]
ls = get ls = get
tree :: PWD -> Cmd err [NodeAny] tree :: PWD -> Cmd err [Node HyperdataAny]
tree p = do tree p = do
ns <- get p ns <- get p
children <- mapM (\n -> get [_node_id n]) ns children <- mapM (\n -> get [_node_id n]) ns
......
...@@ -46,6 +46,7 @@ nodeTypeId n = ...@@ -46,6 +46,7 @@ nodeTypeId n =
NodeCorpusV3 -> 3 NodeCorpusV3 -> 3
NodeCorpus -> 30 NodeCorpus -> 30
NodeAnnuaire -> 31 NodeAnnuaire -> 31
NodeTexts -> 40
NodeDocument -> 4 NodeDocument -> 4
NodeContact -> 41 NodeContact -> 41
--NodeSwap -> 19 --NodeSwap -> 19
...@@ -60,6 +61,7 @@ nodeTypeId n = ...@@ -60,6 +61,7 @@ nodeTypeId n =
NodePhylo -> 90 NodePhylo -> 90
NodeDashboard -> 7 NodeDashboard -> 7
NodeChart -> 51 NodeChart -> 51
NodeNoteBook -> 88
-- Cooccurrences -> 9 -- Cooccurrences -> 9
-- --
......
...@@ -65,11 +65,11 @@ import qualified Opaleye.Internal.Unpackspec() ...@@ -65,11 +65,11 @@ import qualified Opaleye.Internal.Unpackspec()
--instance FromJSON Facet --instance FromJSON Facet
--instance ToJSON Facet --instance ToJSON Facet
type Favorite = Bool type Favorite = Int
type Title = Text type Title = Text
-- TODO remove Title -- TODO remove Title
type FacetDoc = Facet NodeId UTCTime Title HyperdataDocument Favorite Int type FacetDoc = Facet NodeId UTCTime Title HyperdataDocument (Maybe Favorite) (Maybe Double)
type FacetSources = FacetDoc type FacetSources = FacetDoc
type FacetAuthors = FacetDoc type FacetAuthors = FacetDoc
type FacetTerms = FacetDoc type FacetTerms = FacetDoc
...@@ -146,12 +146,12 @@ instance ToSchema FacetDoc ...@@ -146,12 +146,12 @@ instance ToSchema FacetDoc
-- | Mock and Quickcheck instances -- | Mock and Quickcheck instances
instance Arbitrary FacetDoc where instance Arbitrary FacetDoc where
arbitrary = elements [ FacetDoc id' (jour year 01 01) t hp fav ngramCount arbitrary = elements [ FacetDoc id' (jour year 01 01) t hp (Just cat) (Just ngramCount)
| id' <- [1..10] | id' <- [1..10]
, year <- [1990..2000] , year <- [1990..2000]
, t <- ["title", "another title"] , t <- ["title", "another title"]
, hp <- arbitraryHyperdataDocuments , hp <- arbitraryHyperdataDocuments
, fav <- [True, False] , cat <- [0..2]
, ngramCount <- [3..100] , ngramCount <- [3..100]
] ]
...@@ -164,12 +164,11 @@ type FacetDocRead = Facet (Column PGInt4 ) ...@@ -164,12 +164,11 @@ type FacetDocRead = Facet (Column PGInt4 )
(Column PGTimestamptz) (Column PGTimestamptz)
(Column PGText ) (Column PGText )
(Column PGJsonb ) (Column PGJsonb )
(Column PGBool) (Column (Nullable PGInt4)) -- Category
(Column PGInt4 ) (Column (Nullable PGFloat8)) -- Score
----------------------------------------------------------------------- -----------------------------------------------------------------------
----------------------------------------------------------------------- -----------------------------------------------------------------------
type Trash = Bool
data OrderBy = DateAsc | DateDesc data OrderBy = DateAsc | DateDesc
| TitleAsc | TitleDesc | TitleAsc | TitleDesc
| ScoreDesc | ScoreAsc | ScoreDesc | ScoreAsc
...@@ -197,13 +196,13 @@ instance Arbitrary OrderBy ...@@ -197,13 +196,13 @@ instance Arbitrary OrderBy
arbitrary = elements [minBound..maxBound] arbitrary = elements [minBound..maxBound]
runViewAuthorsDoc :: ContactId -> Trash -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc] runViewAuthorsDoc :: ContactId -> IsTrash -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc]
runViewAuthorsDoc cId t o l order = runOpaQuery $ filterWith o l order $ viewAuthorsDoc cId t ntId runViewAuthorsDoc cId t o l order = runOpaQuery $ filterWith o l order $ viewAuthorsDoc cId t ntId
where where
ntId = NodeDocument ntId = NodeDocument
-- TODO add delete ? -- TODO add delete ?
viewAuthorsDoc :: ContactId -> Trash -> NodeType -> Query FacetDocRead viewAuthorsDoc :: ContactId -> IsTrash -> NodeType -> Query FacetDocRead
viewAuthorsDoc cId _ nt = proc () -> do viewAuthorsDoc cId _ nt = proc () -> do
(doc,(_,(_,(_,contact)))) <- queryAuthorsDoc -< () (doc,(_,(_,(_,contact)))) <- queryAuthorsDoc -< ()
...@@ -215,7 +214,7 @@ viewAuthorsDoc cId _ nt = proc () -> do ...@@ -215,7 +214,7 @@ viewAuthorsDoc cId _ nt = proc () -> do
restrict -< _node_id contact .== (toNullable $ pgNodeId cId) restrict -< _node_id contact .== (toNullable $ pgNodeId cId)
restrict -< _node_typename doc .== (pgInt4 $ nodeTypeId nt) restrict -< _node_typename doc .== (pgInt4 $ nodeTypeId nt)
returnA -< FacetDoc (_node_id doc) (_node_date doc) (_node_name doc) (_node_hyperdata doc) (pgBool True) (pgInt4 1) returnA -< FacetDoc (_node_id doc) (_node_date doc) (_node_name doc) (_node_hyperdata doc) (toNullable $ pgInt4 1) (toNullable $ pgDouble 1)
queryAuthorsDoc :: Query (NodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) queryAuthorsDoc :: Query (NodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))
queryAuthorsDoc = leftJoin5 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeTable cond12 cond23 cond34 cond45 queryAuthorsDoc = leftJoin5 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeTable cond12 cond23 cond34 cond45
...@@ -237,21 +236,22 @@ queryAuthorsDoc = leftJoin5 queryNodeTable queryNodeNgramTable queryNgramsTable ...@@ -237,21 +236,22 @@ queryAuthorsDoc = leftJoin5 queryNodeTable queryNodeNgramTable queryNgramsTable
------------------------------------------------------------------------ ------------------------------------------------------------------------
runViewDocuments :: CorpusId -> Trash -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc] runViewDocuments :: CorpusId -> IsTrash -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc]
runViewDocuments cId t o l order = runViewDocuments cId t o l order =
runOpaQuery $ filterWith o l order $ viewDocuments cId t ntId runOpaQuery $ filterWith o l order $ viewDocuments cId t ntId
where where
ntId = nodeTypeId NodeDocument ntId = nodeTypeId NodeDocument
viewDocuments :: CorpusId -> Trash -> NodeTypeId -> Query FacetDocRead viewDocuments :: CorpusId -> IsTrash -> NodeTypeId -> Query FacetDocRead
viewDocuments cId t ntId = proc () -> do viewDocuments cId t ntId = proc () -> do
n <- queryNodeTable -< () n <- queryNodeTable -< ()
nn <- queryNodeNodeTable -< () nn <- queryNodeNodeTable -< ()
restrict -< _node_id n .== nn_node2_id nn restrict -< _node_id n .== nn_node2_id nn
restrict -< nn_node1_id nn .== (pgNodeId cId) restrict -< nn_node1_id nn .== (pgNodeId cId)
restrict -< _node_typename n .== (pgInt4 ntId) restrict -< _node_typename n .== (pgInt4 ntId)
restrict -< nn_delete nn .== (pgBool t) restrict -< if t then nn_category nn .== (pgInt4 0)
returnA -< FacetDoc (_node_id n) (_node_date n) (_node_name n) (_node_hyperdata n) (nn_favorite nn) (pgInt4 1) else nn_category nn .>= (pgInt4 1)
returnA -< FacetDoc (_node_id n) (_node_date n) (_node_name n) (_node_hyperdata n) (toNullable $ nn_category nn) (toNullable $ nn_score nn)
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
This diff is collapsed.
{-|
Module : Gargantext.Database.Learn
Description : Learn Small Data Analytics with big data connection (DB)
opyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MonoLocalBinds #-}
module Gargantext.Database.Learn where
import Data.Text (Text)
import Data.Tuple (snd)
import Data.Maybe
import Gargantext.Database.Facet
import Gargantext.Database.Types.Node
import Gargantext.Prelude
import Gargantext.Text.Learn
import qualified Data.List as List
import qualified Data.Text as Text
--import Gargantext.Database.Schema.NodeNode (nodeNodesCategory)
import Gargantext.Database.Utils (Cmd)
import Gargantext.Core.Types (Offset, Limit)
data FavOrTrash = IsFav | IsTrash
deriving (Eq)
moreLike :: CorpusId -> Maybe Offset -> Maybe Limit -> Maybe OrderBy
-> FavOrTrash -> Cmd err [FacetDoc]
moreLike cId o l order ft = do
priors <- getPriors ft cId
moreLikeWith cId o l order ft priors
---------------------------------------------------------------------------
getPriors :: FavOrTrash -> CorpusId -> Cmd err (Events Bool)
getPriors ft cId = do
docs_fav <- filter (\(FacetDoc _ _ _ _ f _) -> f == Just 2)
<$> runViewDocuments cId False Nothing Nothing Nothing
docs_trash <- List.take (List.length docs_fav)
<$> runViewDocuments cId True Nothing Nothing Nothing
let priors = priorEventsWith text (fav2bool ft) ( List.zip (repeat False) docs_fav
<> List.zip (repeat True ) docs_trash
)
pure priors
moreLikeWith :: CorpusId -> Maybe Offset -> Maybe Limit -> Maybe OrderBy
-> FavOrTrash -> Events Bool -> Cmd err [FacetDoc]
moreLikeWith cId o l order ft priors = do
docs_test <- filter (\(FacetDoc _ _ _ _ f _) -> f == Just 1)
<$> runViewDocuments cId False o Nothing order
let results = map fst
$ filter ((==) (Just $ not $ fav2bool ft) . snd)
$ map (\f -> (f, detectDefaultWithPriors text priors f)) docs_test
pure $ List.take (maybe 10 identity l) results
---------------------------------------------------------------------------
fav2bool :: FavOrTrash -> Bool
fav2bool ft = if (==) ft IsFav then True else False
text :: FacetDoc -> Text
text (FacetDoc _ _ _ h _ _) = title <> "" <> Text.take 100 abstr
where
title = maybe "" identity (_hyperdataDocument_title h)
abstr = maybe "" identity (_hyperdataDocument_abstract h)
---------------------------------------------------------------------------
{-
apply :: (FlowCmdM env e m) => FavOrTrash -> CorpusId -> [NodeId] -> m [Int]
apply favTrash cId ns = case favTrash of
IsFav -> nodeNodesCategory $ map (\n -> (cId, n, 2)) ns
IsTrash -> nodeNodesCategory $ map (\n -> (cId, n, 0)) ns
moreLikeAndApply :: FlowCmdM DevEnv GargError m => FavOrTrash -> CorpusId -> m [Int]
moreLikeAndApply ft cId = do
priors <- getPriors ft cId
moreLikeWithAndApply priors ft cId
moreLikeWithAndApply :: FlowCmdM DevEnv GargError m => Events Bool -> FavOrTrash -> CorpusId -> m [Int]
moreLikeWithAndApply priors ft cId = do
ids <- map facetDoc_id <$> moreLikeWith cId ft priors
apply ft cId ids
-}
...@@ -35,7 +35,6 @@ import Gargantext.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-}) ...@@ -35,7 +35,6 @@ import Gargantext.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import qualified Data.Map as Map import qualified Data.Map as Map
--import qualified Data.Vector.Storable as Vec --import qualified Data.Vector.Storable as Vec
getMetrics :: FlowCmdM env err m getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text]) -> m (Map Text (ListType, Maybe Text), [Scored Text])
...@@ -44,34 +43,6 @@ getMetrics cId maybeListId tabType maybeLimit = do ...@@ -44,34 +43,6 @@ getMetrics cId maybeListId tabType maybeLimit = do
pure (ngs, scored myCooc) pure (ngs, scored myCooc)
{- | TODO remove unused function
getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text])
getMetrics cId maybeListId tabType maybeLimit = do
(ngs, ngs', metrics) <- getLocalMetrics cId maybeListId tabType maybeLimit
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster "" (Nothing :: Maybe HyperdataCorpus)
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster
metrics' <- getTficfWith cId masterCorpusId (lIds <> [lId]) (ngramsTypeFromTabType tabType) ngs'
pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics'])
getLocalMetrics :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( Map Text (ListType, Maybe Text)
, Map Text (Maybe RootTerm)
, Map Text (Vec.Vector Double)
)
getLocalMetrics cId maybeListId tabType maybeLimit = do
(ngs, ngs', myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
pure (ngs, ngs', localMetrics myCooc)
-}
getNgramsCooc :: (FlowCmdM env err m) getNgramsCooc :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( Map Text (ListType, Maybe Text) -> m ( Map Text (ListType, Maybe Text)
...@@ -100,6 +71,7 @@ getNgrams :: (FlowCmdM env err m) ...@@ -100,6 +71,7 @@ getNgrams :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType => CorpusId -> Maybe ListId -> TabType
-> m (Map Text (ListType, Maybe Text), Map Text (Maybe RootTerm)) -> m (Map Text (ListType, Maybe Text), Map Text (Maybe RootTerm))
getNgrams cId maybeListId tabType = do getNgrams cId maybeListId tabType = do
lId <- case maybeListId of lId <- case maybeListId of
Nothing -> defaultList cId Nothing -> defaultList cId
Just lId' -> pure lId' Just lId' -> pure lId'
......
...@@ -149,7 +149,7 @@ getNodesByNgramsUser cId nt = ...@@ -149,7 +149,7 @@ getNodesByNgramsUser cId nt =
WHERE nn.node1_id = ? -- CorpusId WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False AND nn.category > 0
GROUP BY nng.node2_id, ng.terms GROUP BY nng.node2_id, ng.terms
ORDER BY (nng.node2_id, ng.terms) DESC ORDER BY (nng.node2_id, ng.terms) DESC
-- LIMIT ? -- LIMIT ?
...@@ -210,7 +210,7 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql| ...@@ -210,7 +210,7 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
WHERE nn.node1_id = ? -- CorpusId WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False AND nn.category > 0
GROUP BY nng.node2_id, ng.terms GROUP BY nng.node2_id, ng.terms
|] |]
...@@ -247,7 +247,7 @@ queryNgramsOnlyByNodeUser = [sql| ...@@ -247,7 +247,7 @@ queryNgramsOnlyByNodeUser = [sql|
WHERE nn.node1_id = ? -- CorpusId WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False AND nn.category > 0
GROUP BY ng.terms, nng.node2_id GROUP BY ng.terms, nng.node2_id
|] |]
...@@ -298,10 +298,6 @@ getNodesByNgramsMaster ucId mcId = Map.unionsWith (<>) ...@@ -298,10 +298,6 @@ getNodesByNgramsMaster ucId mcId = Map.unionsWith (<>)
<$> mapM (selectNgramsByNodeMaster 1000 ucId mcId) [0,500..10000] <$> mapM (selectNgramsByNodeMaster 1000 ucId mcId) [0,500..10000]
type Limit = Int
type Offset = Int
selectNgramsByNodeMaster :: Int -> UserCorpusId -> MasterCorpusId -> Int -> Cmd err [(NodeId, Text)] selectNgramsByNodeMaster :: Int -> UserCorpusId -> MasterCorpusId -> Int -> Cmd err [(NodeId, Text)]
selectNgramsByNodeMaster n ucId mcId p = runPGSQuery selectNgramsByNodeMaster n ucId mcId p = runPGSQuery
queryNgramsByNodeMaster' queryNgramsByNodeMaster'
...@@ -330,7 +326,7 @@ SELECT n.id, ng.terms FROM nodes n ...@@ -330,7 +326,7 @@ SELECT n.id, ng.terms FROM nodes n
WHERE nn.node1_id = ? -- UserCorpusId WHERE nn.node1_id = ? -- UserCorpusId
-- AND n.typename = ? -- NodeTypeId -- AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False AND nn.category > 0
AND node_pos(n.id,?) >= ? AND node_pos(n.id,?) >= ?
AND node_pos(n.id,?) < ? AND node_pos(n.id,?) < ?
GROUP BY n.id, ng.terms GROUP BY n.id, ng.terms
......
...@@ -22,12 +22,12 @@ import Gargantext.Core.Types ...@@ -22,12 +22,12 @@ import Gargantext.Core.Types
import Gargantext.Database.Utils (runOpaQuery, Cmd) import Gargantext.Database.Utils (runOpaQuery, Cmd)
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.NodeNodeNgrams import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node
import Gargantext.Prelude import Gargantext.Prelude
import Opaleye import Opaleye
import Control.Arrow (returnA) import Control.Arrow (returnA)
selectNgramsByDoc :: [CorpusId] -> DocumentId -> NgramsType -> Cmd err [Text] selectNgramsByDoc :: [CorpusId] -> DocId -> NgramsType -> Cmd err [Text]
selectNgramsByDoc cIds dId nt = runOpaQuery (query cIds dId nt) selectNgramsByDoc cIds dId nt = runOpaQuery (query cIds dId nt)
where where
...@@ -44,6 +44,6 @@ selectNgramsByDoc cIds dId nt = runOpaQuery (query cIds dId nt) ...@@ -44,6 +44,6 @@ selectNgramsByDoc cIds dId nt = runOpaQuery (query cIds dId nt)
returnA -< ngrams_terms ng returnA -< ngrams_terms ng
postNgrams :: CorpusId -> DocumentId -> [Text] -> Cmd err Int postNgrams :: CorpusId -> DocId -> [Text] -> Cmd err Int
postNgrams = undefined postNgrams = undefined
...@@ -43,7 +43,7 @@ getChildren pId _ maybeNodeType maybeOffset maybeLimit = runOpaQuery ...@@ -43,7 +43,7 @@ getChildren pId _ maybeNodeType maybeOffset maybeLimit = runOpaQuery
selectChildren :: ParentId -> Maybe NodeType -> Query NodeRead selectChildren :: ParentId -> Maybe NodeType -> Query NodeRead
selectChildren parentId maybeNodeType = proc () -> do selectChildren parentId maybeNodeType = proc () -> do
row@(Node nId typeName _ parent_id _ _ _) <- queryNodeTable -< () row@(Node nId typeName _ parent_id _ _ _) <- queryNodeTable -< ()
(NodeNode n1id n2id _ _ _) <- queryNodeNodeTable -< () (NodeNode n1id n2id _ _) <- queryNodeNodeTable -< ()
let nodeType = maybe 0 nodeTypeId maybeNodeType let nodeType = maybe 0 nodeTypeId maybeNodeType
restrict -< typeName .== pgInt4 nodeType restrict -< typeName .== pgInt4 nodeType
......
...@@ -28,7 +28,8 @@ import Data.Time (UTCTime) ...@@ -28,7 +28,8 @@ import Data.Time (UTCTime)
import Database.PostgreSQL.Simple.FromField (FromField, fromField) import Database.PostgreSQL.Simple.FromField (FromField, fromField)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Schema.Node (NodeWrite, Name, node) import Gargantext.Core.Types (Name)
import Gargantext.Database.Schema.Node (NodeWrite, node)
import Gargantext.Database.Types.Node (Node,Hyperdata,NodeType(..), UserId, AnnuaireId) import Gargantext.Database.Types.Node (Node,Hyperdata,NodeType(..), UserId, AnnuaireId)
import Gargantext.Database.Utils (fromField') import Gargantext.Database.Utils (fromField')
import Gargantext.Prelude import Gargantext.Prelude
......
...@@ -40,7 +40,6 @@ import Gargantext.Prelude ...@@ -40,7 +40,6 @@ import Gargantext.Prelude
import GHC.Generics (Generic) import GHC.Generics (Generic)
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
add :: ParentId -> [NodeId] -> Cmd err [Only Int] add :: ParentId -> [NodeId] -> Cmd err [Only Int]
add pId ns = runPGSQuery queryAdd (Only $ Values fields inputData) add pId ns = runPGSQuery queryAdd (Only $ Values fields inputData)
where where
...@@ -54,17 +53,16 @@ add_debug pId ns = formatPGSQuery queryAdd (Only $ Values fields inputData) ...@@ -54,17 +53,16 @@ add_debug pId ns = formatPGSQuery queryAdd (Only $ Values fields inputData)
inputData = prepare pId ns inputData = prepare pId ns
-- | Input Tables: types of the tables -- | Input Tables: types of the tables
inputSqlTypes :: [Text] inputSqlTypes :: [Text]
inputSqlTypes = ["int4","int4","bool","bool"] inputSqlTypes = ["int4","int4","int4"]
-- | SQL query to add documents -- | SQL query to add documents
-- TODO return id of added documents only -- TODO return id of added documents only
queryAdd :: Query queryAdd :: Query
queryAdd = [sql| queryAdd = [sql|
WITH input_rows(node1_id,node2_id, favorite, delete) AS (?) WITH input_rows(node1_id,node2_id,category) AS (?)
INSERT INTO nodes_nodes (node1_id, node2_id, favorite, delete) INSERT INTO nodes_nodes (node1_id, node2_id,category)
SELECT * FROM input_rows SELECT * FROM input_rows
ON CONFLICT (node1_id, node2_id) DO NOTHING -- on unique index ON CONFLICT (node1_id, node2_id) DO NOTHING -- on unique index
RETURNING 1 RETURNING 1
...@@ -72,7 +70,7 @@ queryAdd = [sql| ...@@ -72,7 +70,7 @@ queryAdd = [sql|
|] |]
prepare :: ParentId -> [NodeId] -> [InputData] prepare :: ParentId -> [NodeId] -> [InputData]
prepare pId ns = map (\nId -> InputData pId nId False False) ns prepare pId ns = map (\nId -> InputData pId nId) ns
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- * Main Types used -- * Main Types used
...@@ -80,14 +78,11 @@ prepare pId ns = map (\nId -> InputData pId nId False False) ns ...@@ -80,14 +78,11 @@ prepare pId ns = map (\nId -> InputData pId nId False False) ns
data InputData = InputData { inNode1_id :: NodeId data InputData = InputData { inNode1_id :: NodeId
, inNode2_id :: NodeId , inNode2_id :: NodeId
, inNode_fav :: Bool
, inNode_del :: Bool
} deriving (Show, Generic, Typeable) } deriving (Show, Generic, Typeable)
instance ToRow InputData where instance ToRow InputData where
toRow inputData = [ toField (inNode1_id inputData) toRow inputData = [ toField (inNode1_id inputData)
, toField (inNode2_id inputData) , toField (inNode2_id inputData)
, toField (inNode_fav inputData) , toField (1 :: Int)
, toField (inNode_del inputData)
] ]
...@@ -21,9 +21,9 @@ import qualified Data.Text as DT ...@@ -21,9 +21,9 @@ import qualified Data.Text as DT
import Database.PostgreSQL.Simple import Database.PostgreSQL.Simple
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Types (Name)
import Gargantext.Database.Utils import Gargantext.Database.Utils
import Gargantext.Database.Types.Node (NodeId, ParentId) import Gargantext.Database.Types.Node (NodeId, ParentId)
import Gargantext.Database.Schema.Node (Name)
-- import Data.ByteString -- import Data.ByteString
--rename :: NodeId -> Text -> IO ByteString --rename :: NodeId -> Text -> IO ByteString
......
This diff is collapsed.
...@@ -43,33 +43,29 @@ import Opaleye ...@@ -43,33 +43,29 @@ import Opaleye
import Control.Arrow (returnA) import Control.Arrow (returnA)
import qualified Opaleye as O import qualified Opaleye as O
data NodeNodePoly node1_id node2_id score fav del data NodeNodePoly node1_id node2_id score cat
= NodeNode { nn_node1_id :: node1_id = NodeNode { nn_node1_id :: node1_id
, nn_node2_id :: node2_id , nn_node2_id :: node2_id
, nn_score :: score , nn_score :: score
, nn_favorite :: fav , nn_category :: cat
, nn_delete :: del
} deriving (Show) } deriving (Show)
type NodeNodeWrite = NodeNodePoly (Column (PGInt4)) type NodeNodeWrite = NodeNodePoly (Column (PGInt4))
(Column (PGInt4)) (Column (PGInt4))
(Maybe (Column (PGFloat8))) (Maybe (Column (PGFloat8)))
(Maybe (Column (PGBool))) (Maybe (Column (PGInt4)))
(Maybe (Column (PGBool)))
type NodeNodeRead = NodeNodePoly (Column (PGInt4)) type NodeNodeRead = NodeNodePoly (Column (PGInt4))
(Column (PGInt4)) (Column (PGInt4))
(Column (PGFloat8)) (Column (PGFloat8))
(Column (PGBool)) (Column (PGInt4))
(Column (PGBool))
type NodeNodeReadNull = NodeNodePoly (Column (Nullable PGInt4)) type NodeNodeReadNull = NodeNodePoly (Column (Nullable PGInt4))
(Column (Nullable PGInt4)) (Column (Nullable PGInt4))
(Column (Nullable PGFloat8)) (Column (Nullable PGFloat8))
(Column (Nullable PGBool)) (Column (Nullable PGInt4))
(Column (Nullable PGBool))
type NodeNode = NodeNodePoly Int Int (Maybe Double) (Maybe Bool) (Maybe Bool) type NodeNode = NodeNodePoly Int Int (Maybe Double) (Maybe Int)
$(makeAdaptorAndInstance "pNodeNode" ''NodeNodePoly) $(makeAdaptorAndInstance "pNodeNode" ''NodeNodePoly)
$(makeLensesWith abbreviatedFields ''NodeNodePoly) $(makeLensesWith abbreviatedFields ''NodeNodePoly)
...@@ -79,8 +75,7 @@ nodeNodeTable = Table "nodes_nodes" (pNodeNode ...@@ -79,8 +75,7 @@ nodeNodeTable = Table "nodes_nodes" (pNodeNode
NodeNode { nn_node1_id = required "node1_id" NodeNode { nn_node1_id = required "node1_id"
, nn_node2_id = required "node2_id" , nn_node2_id = required "node2_id"
, nn_score = optional "score" , nn_score = optional "score"
, nn_favorite = optional "favorite" , nn_category = optional "category"
, nn_delete = optional "delete"
} }
) )
...@@ -95,33 +90,39 @@ nodesNodes = runOpaQuery queryNodeNodeTable ...@@ -95,33 +90,39 @@ nodesNodes = runOpaQuery queryNodeNodeTable
instance QueryRunnerColumnDefault (Nullable PGInt4) Int where instance QueryRunnerColumnDefault (Nullable PGInt4) Int where
queryRunnerColumnDefault = fieldQueryRunnerColumn queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault (Nullable PGFloat8) Int where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault (Nullable PGFloat8) Double where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGBool (Maybe Bool) where instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
queryRunnerColumnDefault = fieldQueryRunnerColumn queryRunnerColumnDefault = fieldQueryRunnerColumn
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Favorite management -- | Favorite management
nodeToFavorite :: CorpusId -> DocId -> Bool -> Cmd err [Int] nodeNodeCategory :: CorpusId -> DocId -> Int -> Cmd err [Int]
nodeToFavorite cId dId b = map (\(PGS.Only a) -> a) <$> runPGSQuery favQuery (b,cId,dId) nodeNodeCategory cId dId c = map (\(PGS.Only a) -> a) <$> runPGSQuery favQuery (c,cId,dId)
where where
favQuery :: PGS.Query favQuery :: PGS.Query
favQuery = [sql|UPDATE nodes_nodes SET favorite = ? favQuery = [sql|UPDATE nodes_nodes SET category = ?
WHERE node1_id = ? AND node2_id = ? WHERE node1_id = ? AND node2_id = ?
RETURNING node2_id; RETURNING node2_id;
|] |]
nodesToFavorite :: [(CorpusId,DocId,Bool)] -> Cmd err [Int] nodeNodesCategory :: [(CorpusId,DocId,Int)] -> Cmd err [Int]
nodesToFavorite inputData = map (\(PGS.Only a) -> a) nodeNodesCategory inputData = map (\(PGS.Only a) -> a)
<$> runPGSQuery trashQuery (PGS.Only $ Values fields inputData) <$> runPGSQuery catQuery (PGS.Only $ Values fields inputData)
where where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","bool"] fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","int4"]
trashQuery :: PGS.Query catQuery :: PGS.Query
trashQuery = [sql| UPDATE nodes_nodes as old SET catQuery = [sql| UPDATE nodes_nodes as old SET
favorite = new.favorite category = new.category
from (?) as new(node1_id,node2_id,favorite) from (?) as new(node1_id,node2_id,category)
WHERE old.node1_id = new.node1_id WHERE old.node1_id = new.node1_id
AND old.node2_id = new.node2_id AND old.node2_id = new.node2_id
RETURNING new.node2_id RETURNING new.node2_id
...@@ -144,24 +145,23 @@ queryDocs :: CorpusId -> O.Query (Column PGJsonb) ...@@ -144,24 +145,23 @@ queryDocs :: CorpusId -> O.Query (Column PGJsonb)
queryDocs cId = proc () -> do queryDocs cId = proc () -> do
(n, nn) <- joinInCorpus -< () (n, nn) <- joinInCorpus -< ()
restrict -< ( nn_node1_id nn) .== (toNullable $ pgNodeId cId) restrict -< ( nn_node1_id nn) .== (toNullable $ pgNodeId cId)
restrict -< ( nn_delete nn) .== (toNullable $ pgBool False) restrict -< ( nn_category nn) .>= (toNullable $ pgInt4 1)
restrict -< (_node_typename n) .== (pgInt4 $ nodeTypeId NodeDocument) restrict -< (_node_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
returnA -< view (node_hyperdata) n returnA -< view (node_hyperdata) n
selectDocNodes :: CorpusId -> Cmd err [NodeDocument] selectDocNodes :: CorpusId -> Cmd err [Node HyperdataDocument]
selectDocNodes cId = runOpaQuery (queryDocNodes cId) selectDocNodes cId = runOpaQuery (queryDocNodes cId)
queryDocNodes :: CorpusId -> O.Query NodeRead queryDocNodes :: CorpusId -> O.Query NodeRead
queryDocNodes cId = proc () -> do queryDocNodes cId = proc () -> do
(n, nn) <- joinInCorpus -< () (n, nn) <- joinInCorpus -< ()
restrict -< ( nn_node1_id nn) .== (toNullable $ pgNodeId cId) restrict -< ( nn_node1_id nn) .== (toNullable $ pgNodeId cId)
restrict -< ( nn_delete nn) .== (toNullable $ pgBool False) restrict -< ( nn_category nn) .>= (toNullable $ pgInt4 1)
restrict -< (_node_typename n) .== (pgInt4 $ nodeTypeId NodeDocument) restrict -< (_node_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
returnA -< n returnA -< n
joinInCorpus :: O.Query (NodeRead, NodeNodeReadNull) joinInCorpus :: O.Query (NodeRead, NodeNodeReadNull)
joinInCorpus = leftJoin queryNodeTable queryNodeNodeTable cond joinInCorpus = leftJoin queryNodeTable queryNodeNodeTable cond
where where
......
...@@ -56,19 +56,21 @@ queryInDatabase _ q = proc () -> do ...@@ -56,19 +56,21 @@ queryInDatabase _ q = proc () -> do
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | todo add limit and offset and order -- | todo add limit and offset and order
searchInCorpus :: CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc] searchInCorpus :: CorpusId -> IsTrash -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc]
searchInCorpus cId q o l order = runOpaQuery (filterWith o l order $ queryInCorpus cId q') searchInCorpus cId t q o l order = runOpaQuery (filterWith o l order $ queryInCorpus cId t q')
where where
q' = intercalate " | " $ map stemIt q q' = intercalate " | " $ map stemIt q
queryInCorpus :: CorpusId -> Text -> O.Query FacetDocRead queryInCorpus :: CorpusId -> IsTrash -> Text -> O.Query FacetDocRead
queryInCorpus cId q = proc () -> do queryInCorpus cId t q = proc () -> do
(n, nn) <- joinInCorpus -< () (n, nn) <- joinInCorpus -< ()
restrict -< ( nn_node1_id nn) .== (toNullable $ pgNodeId cId) restrict -< ( nn_node1_id nn) .== (toNullable $ pgNodeId cId)
restrict -< ( nn_delete nn) .== (toNullable $ pgBool False) restrict -< if t
then ( nn_category nn) .== (toNullable $ pgInt4 0)
else ( nn_category nn) .>= (toNullable $ pgInt4 1)
restrict -< (_ns_search n) @@ (pgTSQuery (unpack q)) restrict -< (_ns_search n) @@ (pgTSQuery (unpack q))
restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument) restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
returnA -< FacetDoc (_ns_id n) (_ns_date n) (_ns_name n) (_ns_hyperdata n) (pgBool True) (pgInt4 1) returnA -< FacetDoc (_ns_id n) (_ns_date n) (_ns_name n) (_ns_hyperdata n) (nn_category nn) (nn_score nn)
joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull) joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
......
...@@ -98,7 +98,7 @@ dbTree rootId = map (\(nId, tId, pId, n) -> DbTreeNode nId tId pId n) <$> runPGS ...@@ -98,7 +98,7 @@ dbTree rootId = map (\(nId, tId, pId, n) -> DbTreeNode nId tId pId n) <$> runPGS
FROM nodes AS c FROM nodes AS c
INNER JOIN tree AS s ON c.parent_id = s.id INNER JOIN tree AS s ON c.parent_id = s.id
WHERE c.typename IN (2,3,30,31,7,9,90) WHERE c.typename IN (2,3,5,30,31,40,7,9,90)
) )
SELECT * from tree; SELECT * from tree;
|] (Only rootId) |] (Only rootId)
......
...@@ -19,6 +19,7 @@ Portability : POSIX ...@@ -19,6 +19,7 @@ Portability : POSIX
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-}
-- {-# LANGUAGE DuplicateRecordFields #-} -- {-# LANGUAGE DuplicateRecordFields #-}
module Gargantext.Database.Types.Node module Gargantext.Database.Types.Node
...@@ -57,6 +58,7 @@ import Test.QuickCheck (elements) ...@@ -57,6 +58,7 @@ import Test.QuickCheck (elements)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Viz.Phylo (Phylo)
--import Gargantext.Database.Utils --import Gargantext.Database.Utils
------------------------------------------------------------------------ ------------------------------------------------------------------------
newtype NodeId = NodeId Int newtype NodeId = NodeId Int
...@@ -75,6 +77,35 @@ instance FromField NodeId where ...@@ -75,6 +77,35 @@ instance FromField NodeId where
instance ToSchema NodeId instance ToSchema NodeId
type NodeTypeId = Int
type NodeName = Text
type TSVector = Text
------------------------------------------------------------------------
data NodePoly id typename userId
parentId name date
hyperdata = Node { _node_id :: id
, _node_typename :: typename
, _node_userId :: userId
, _node_parentId :: parentId
, _node_name :: name
, _node_date :: date
, _node_hyperdata :: hyperdata
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_node_") ''NodePoly)
$(makeLenses ''NodePoly)
-- | NodePoly indicates that Node has a Polymorphism Type
type Node json = NodePoly NodeId NodeTypeId UserId (Maybe ParentId) NodeName UTCTime json
------------------------------------------------------------------------
instance FromHttpApiData NodeId where instance FromHttpApiData NodeId where
parseUrlPiece n = pure $ NodeId $ (read . cs) n parseUrlPiece n = pure $ NodeId $ (read . cs) n
...@@ -86,7 +117,7 @@ type ParentId = NodeId ...@@ -86,7 +117,7 @@ type ParentId = NodeId
type CorpusId = NodeId type CorpusId = NodeId
type ListId = NodeId type ListId = NodeId
type DocumentId = NodeId type DocumentId = NodeId
type DocId = DocumentId -- todo: remove this type DocId = NodeId
type RootId = NodeId type RootId = NodeId
type MasterCorpusId = CorpusId type MasterCorpusId = CorpusId
type UserCorpusId = CorpusId type UserCorpusId = CorpusId
...@@ -126,7 +157,6 @@ $(deriveJSON (unPrefix "statusV3_") ''StatusV3) ...@@ -126,7 +157,6 @@ $(deriveJSON (unPrefix "statusV3_") ''StatusV3)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Only Hyperdata types should be member of this type class. -- Only Hyperdata types should be member of this type class.
class Hyperdata a
------------------------------------------------------------------------ ------------------------------------------------------------------------
data HyperdataDocumentV3 = HyperdataDocumentV3 { hyperdataDocumentV3_publication_day :: !(Maybe Int) data HyperdataDocumentV3 = HyperdataDocumentV3 { hyperdataDocumentV3_publication_day :: !(Maybe Int)
...@@ -149,6 +179,7 @@ data HyperdataDocumentV3 = HyperdataDocumentV3 { hyperdataDocumentV3_publication ...@@ -149,6 +179,7 @@ data HyperdataDocumentV3 = HyperdataDocumentV3 { hyperdataDocumentV3_publication
} deriving (Show, Generic) } deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataDocumentV3_") ''HyperdataDocumentV3) $(deriveJSON (unPrefix "hyperdataDocumentV3_") ''HyperdataDocumentV3)
class Hyperdata a
instance Hyperdata HyperdataDocumentV3 instance Hyperdata HyperdataDocumentV3
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -305,6 +336,14 @@ hyperdataCorpus = case decode corpusExample of ...@@ -305,6 +336,14 @@ hyperdataCorpus = case decode corpusExample of
instance Arbitrary HyperdataCorpus where instance Arbitrary HyperdataCorpus where
arbitrary = pure hyperdataCorpus -- TODO arbitrary = pure hyperdataCorpus -- TODO
------------------------------------------------------------------------
data HyperdataList = HyperdataList {hd_list :: !(Maybe Text)
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hd_") ''HyperdataList)
instance Hyperdata HyperdataList
------------------------------------------------------------------------ ------------------------------------------------------------------------
data HyperdataAnnuaire = HyperdataAnnuaire { hyperdataAnnuaire_title :: !(Maybe Text) data HyperdataAnnuaire = HyperdataAnnuaire { hyperdataAnnuaire_title :: !(Maybe Text)
, hyperdataAnnuaire_desc :: !(Maybe Text) , hyperdataAnnuaire_desc :: !(Maybe Text)
...@@ -329,14 +368,10 @@ instance Arbitrary HyperdataAny where ...@@ -329,14 +368,10 @@ instance Arbitrary HyperdataAny where
arbitrary = pure $ HyperdataAny mempty -- TODO produce arbitrary objects arbitrary = pure $ HyperdataAny mempty -- TODO produce arbitrary objects
------------------------------------------------------------------------ ------------------------------------------------------------------------
data HyperdataList = HyperdataList { hyperdataList_preferences :: !(Maybe Text) {-
} deriving (Show, Generic) instance Arbitrary HyperdataList' where
$(deriveJSON (unPrefix "hyperdataList_") ''HyperdataList) arbitrary = elements [HyperdataList' (Just "from list A")]
-}
instance Hyperdata HyperdataList
instance Arbitrary HyperdataList where
arbitrary = elements [HyperdataList (Just "from list A")]
---- ----
data HyperdataListModel = HyperdataListModel { _hlm_params :: !(Int, Int) data HyperdataListModel = HyperdataListModel { _hlm_params :: !(Int, Int)
...@@ -384,6 +419,7 @@ instance Hyperdata HyperdataGraph ...@@ -384,6 +419,7 @@ instance Hyperdata HyperdataGraph
-- TODO add the Graph Structure here -- TODO add the Graph Structure here
data HyperdataPhylo = HyperdataPhylo { hyperdataPhylo_preferences :: !(Maybe Text) data HyperdataPhylo = HyperdataPhylo { hyperdataPhylo_preferences :: !(Maybe Text)
, hyperdataPhylo_data :: !(Maybe Phylo)
} deriving (Show, Generic) } deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataPhylo_") ''HyperdataPhylo) $(deriveJSON (unPrefix "hyperdataPhylo_") ''HyperdataPhylo)
...@@ -398,41 +434,26 @@ $(deriveJSON (unPrefix "hyperdataNotebook_") ''HyperdataNotebook) ...@@ -398,41 +434,26 @@ $(deriveJSON (unPrefix "hyperdataNotebook_") ''HyperdataNotebook)
instance Hyperdata HyperdataNotebook instance Hyperdata HyperdataNotebook
-- | NodePoly indicates that Node has a Polymorphism Type -- | TODO CLEAN
type Node json = NodePoly NodeId NodeTypeId UserId (Maybe ParentId) NodeName UTCTime json data HyperData = HyperdataTexts { hd_preferences :: Maybe Text }
| HyperdataList' { hd_preferences :: Maybe Text}
-- type Node json = NodePoly NodeId NodeTypeId UserId ParentId NodeName UTCTime json deriving (Show, Generic)
type NodeTypeId = Int
type NodeName = Text
type TSVector = Text
$(deriveJSON (unPrefix "hd_") ''HyperData)
-- | Then a Node can be either a Folder or a Corpus or a Document instance Hyperdata HyperData
type NodeUser = Node HyperdataUser
type NodeFolder = Node HyperdataFolder
type NodeCorpus = Node HyperdataCorpus
type NodeCorpusV3 = Node HyperdataCorpus
type NodeDocument = Node HyperdataDocument
type NodeAnnuaire = Node HyperdataAnnuaire
-- | Any others nodes
type NodeAny = Node HyperdataAny
---- | Then a Node can be either a Graph or a Phylo or a Notebook
type NodeList = Node HyperdataList
type NodeGraph = Node HyperdataGraph
type NodePhylo = Node HyperdataPhylo
type NodeNotebook = Node HyperdataNotebook
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Then a Node can be either a Folder or a Corpus or a Document
data NodeType = NodeUser data NodeType = NodeUser
| NodeFolder | NodeFolder
| NodeCorpus | NodeCorpusV3 | NodeDocument | NodeCorpus | NodeCorpusV3 | NodeTexts | NodeDocument
| NodeAnnuaire | NodeContact | NodeAnnuaire | NodeContact
| NodeGraph | NodePhylo | NodeGraph | NodePhylo
| NodeDashboard | NodeChart | NodeDashboard | NodeChart | NodeNoteBook
| NodeList | NodeListModel deriving (Show, Read, Eq, Generic, Bounded, Enum) | NodeList | NodeListModel
deriving (Show, Read, Eq, Generic, Bounded, Enum)
{- {-
...@@ -454,23 +475,6 @@ instance FromHttpApiData NodeType ...@@ -454,23 +475,6 @@ instance FromHttpApiData NodeType
instance ToParamSchema NodeType instance ToParamSchema NodeType
instance ToSchema NodeType instance ToSchema NodeType
------------------------------------------------------------------------
data NodePoly id typename userId
parentId name date
hyperdata = Node { _node_id :: id
, _node_typename :: typename
, _node_userId :: userId
, _node_parentId :: parentId
, _node_name :: name
, _node_date :: date
, _node_hyperdata :: hyperdata
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_node_") ''NodePoly)
$(makeLenses ''NodePoly)
data NodePolySearch id typename userId data NodePolySearch id typename userId
parentId name date parentId name date
......
...@@ -304,3 +304,6 @@ tail' = listSafeN "tail" tailMay ...@@ -304,3 +304,6 @@ tail' = listSafeN "tail" tailMay
init' :: Text -> [a] -> [a] init' :: Text -> [a] -> [a]
init' = listSafeN "init" initMay init' = listSafeN "init" initMay
------------------------------------------------------------------------
...@@ -54,9 +54,9 @@ class ReadFile a where ...@@ -54,9 +54,9 @@ class ReadFile a where
readFile' :: FilePath -> IO a readFile' :: FilePath -> IO a
saveFile :: (MonadReader env m, MonadIO m, HasSettings env, SaveFile a) writeFile :: (MonadReader env m, MonadIO m, HasSettings env, SaveFile a)
=> a -> m FilePath => a -> m FilePath
saveFile a = do writeFile a = do
dataPath <- view (settings . fileFolder) <$> ask dataPath <- view (settings . fileFolder) <$> ask
(fp,fn) <- liftIO $ (toPath 3) . hash . Text.pack . show <$> newStdGen (fp,fn) <- liftIO $ (toPath 3) . hash . Text.pack . show <$> newStdGen
......
...@@ -49,9 +49,6 @@ type Corpus a = [Sentence a] -- a list of sentences ...@@ -49,9 +49,6 @@ type Corpus a = [Sentence a] -- a list of sentences
-- | Contexts definition to build/unbuild contexts. -- | Contexts definition to build/unbuild contexts.
data SplitContext = Chars Int | Sentences Int | Paragraphs Int data SplitContext = Chars Int | Sentences Int | Paragraphs Int
tag :: Text -> [Tag Text]
tag = parseTags
-- | splitBy contexts of Chars or Sentences or Paragraphs -- | splitBy contexts of Chars or Sentences or Paragraphs
-- To see some examples at a higher level (sentences and paragraph), see -- To see some examples at a higher level (sentences and paragraph), see
-- 'Gargantext.Text.Examples.ex_terms' -- 'Gargantext.Text.Examples.ex_terms'
...@@ -67,10 +64,9 @@ tag = parseTags ...@@ -67,10 +64,9 @@ tag = parseTags
splitBy :: SplitContext -> Text -> [Text] splitBy :: SplitContext -> Text -> [Text]
splitBy (Chars n) = map pack . chunkAlong (n+1) 1 . unpack splitBy (Chars n) = map pack . chunkAlong (n+1) 1 . unpack
splitBy (Sentences n) = map unsentences . chunkAlong (n+1) 1 . sentences splitBy (Sentences n) = map unsentences . chunkAlong (n+1) 1 . sentences
splitBy (Paragraphs _) = map unTag . filter isTagText . tag splitBy (Paragraphs _) = map unTag . filter isTagText . parseTags
where where
unTag :: IsString p => Tag p -> p unTag :: IsString p => Tag p -> p
unTag (TagText x) = x unTag (TagText x) = x
unTag _ = "" unTag _ = ""
...@@ -12,33 +12,61 @@ Portability : POSIX ...@@ -12,33 +12,61 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Text.Corpus.API module Gargantext.Text.Corpus.API
where where
--{-
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Data.Aeson import Data.Aeson
import Data.Text (Text) import Data.Maybe
import Gargantext.Prelude import Gargantext.Prelude
--import qualified PUBMED as PubMed import Gargantext.Core (Lang(..))
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import Test.QuickCheck.Arbitrary import Test.QuickCheck.Arbitrary
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Data.Swagger import Data.Swagger
--import qualified Gargantext.Text.Corpus.API.Isidore as Isidore
import qualified Gargantext.Text.Corpus.API.Pubmed as PUBMED
import qualified Gargantext.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Text.Corpus.API.Istex as ISTEX
data ExternalAPIs = ALL -- | Main Types
data ExternalAPIs = All
| PubMed | PubMed
| HAL
| IsTex | HAL_EN
| IsidoreQuery | IsidoreAuth | HAL_FR
| IsTex_EN
| IsTex_FR
| Isidore_EN
| Isidore_FR
-- | IsidoreAuth
deriving (Show, Eq, Enum, Bounded, Generic) deriving (Show, Eq, Enum, Bounded, Generic)
-- | Get External API metadata main function
get :: ExternalAPIs -> Query -> Maybe Limit -> IO [HyperdataDocument]
get All _ _ = undefined
get PubMed q l = PUBMED.get q l
get HAL_EN q l = HAL.get EN q l
get HAL_FR q l = HAL.get FR q l
get IsTex_EN q l = ISTEX.get EN q l
get IsTex_FR q l = ISTEX.get FR q l
get Isidore_EN q l = ISIDORE.get EN (fromIntegral <$> l) (Just q) Nothing
get Isidore_FR q l = ISIDORE.get FR (fromIntegral <$> l) (Just q) Nothing
-- | Main Instances
instance FromJSON ExternalAPIs instance FromJSON ExternalAPIs
instance ToJSON ExternalAPIs instance ToJSON ExternalAPIs
type Query = Text
externalAPIs :: [ExternalAPIs] externalAPIs :: [ExternalAPIs]
externalAPIs = [minBound..maxBound] externalAPIs = [minBound..maxBound]
...@@ -48,7 +76,8 @@ instance Arbitrary ExternalAPIs ...@@ -48,7 +76,8 @@ instance Arbitrary ExternalAPIs
arbitrary = elements externalAPIs arbitrary = elements externalAPIs
instance ToSchema ExternalAPIs instance ToSchema ExternalAPIs
{-
crawl :: Crawler -> Query -> IO [PubMed.Doc] -- | Some Sugar for the documentation
crawl Pubmed = PubMed.crawler type Query = PUBMED.Query
--} type Limit = PUBMED.Limit
{-|
Module : Gargantext.Text.Corpus.API.Hal
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Corpus.API.Hal
where
import Data.Maybe
import Data.Text (Text, pack, intercalate)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import qualified Gargantext.Text.Corpus.Parsers.Date as Date
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import qualified HAL as HAL
import qualified HAL.Doc.Corpus as HAL
import qualified HAL.Client as HAL
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do
docs <- HAL.getMetadataWith q (fromIntegral <$> ml)
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) docs
toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus i t ab d s aus affs) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d)
pure $ HyperdataDocument (Just "Hal")
(Just $ pack $ show i)
Nothing
Nothing
Nothing
Nothing
(Just $ intercalate " " t)
(Just $ foldl (\x y -> x <> ", " <> y) "" aus)
(Just $ foldl (\x y -> x <> ", " <> y) "" affs)
(Just $ maybe "Nothing" identity s)
(Just $ intercalate " " ab)
(fmap (pack . show) utctime)
pub_year
pub_month
pub_day
Nothing
Nothing
Nothing
(Just $ (pack . show) la)
...@@ -69,9 +69,9 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do ...@@ -69,9 +69,9 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText (OnlyText t2 ) = t2 langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
(utcTime, (pub_year, pub_month, pub_day)) <- Date.split l (maybe (Just "2019") (Just) d) (utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit l (maybe (Just "2019") (Just) d)
pure $ HyperdataDocument (Just "IsidoreApi") pure $ HyperdataDocument (Just "Isidore")
Nothing Nothing
u u
Nothing Nothing
...@@ -80,7 +80,7 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do ...@@ -80,7 +80,7 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
(Just $ cleanText $ langText t) (Just $ cleanText $ langText t)
(creator2text <$> as) (creator2text <$> as)
Nothing Nothing
(_sourceName <$> s) (Just $ maybe "Nothing" identity $ _sourceName <$> s)
(cleanText <$> langText <$> a) (cleanText <$> langText <$> a)
(fmap (Text.pack . show) utcTime) (fmap (Text.pack . show) utcTime)
(pub_year) (pub_year)
......
{-|
Module : Gargantext.Text.Corpus.API.Istex
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Corpus.API.Istex
where
import Data.Either (either)
import Data.Maybe
import Data.List (concat)
import Data.Maybe (catMaybes)
import Data.Text (Text, pack)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import qualified Gargantext.Text.Corpus.Parsers.Date as Date
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import qualified ISTEX as ISTEX
import qualified ISTEX.Client as ISTEX
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do
docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
either (panic . pack . show) (toDoc' la) docs
toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
toDoc' la docs' = do
--printDebug "ISTEX" (ISTEX._documents_total docs')
mapM (toDoc la) (ISTEX._documents_hits docs')
-- | TODO remove dateSplit here
-- TODO current year as default
toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
toDoc la (ISTEX.Document i t a ab d s) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") (Just . pack . show) d)
pure $ HyperdataDocument (Just "Istex")
(Just i)
Nothing
Nothing
Nothing
Nothing
t
(Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a))
(Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a))
(Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s))
ab
(fmap (pack . show) utctime)
pub_year
pub_month
pub_day
Nothing
Nothing
Nothing
(Just $ (pack . show) la)
{-|
Module : Gargantext.Text.Corpus.API.Pubmed
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Corpus.API.Pubmed
where
import Data.Maybe
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import qualified Data.Text as Text
import qualified PUBMED as PubMed
import qualified PUBMED.Parser as PubMedDoc
type Query = Text
type Limit = PubMed.Limit
get :: Query -> Maybe Limit -> IO [HyperdataDocument]
get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN)) <$> PubMed.getMetadataWith q l
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
(PubMedDoc.PubMedDate a y m d)
) = HyperdataDocument (Just "PubMed")
Nothing
Nothing
Nothing
Nothing
Nothing
t
(authors aus)
Nothing
j
(abstract as)
(Just $ Text.pack $ show a)
(Just $ fromIntegral y)
(Just m)
(Just d)
Nothing
Nothing
Nothing
(Just $ (Text.pack . show) l)
where
authors :: Maybe [PubMedDoc.Author] -> Maybe Text
authors aus' = case aus' of
Nothing -> Nothing
Just au -> Just $ (Text.intercalate ", ") $ catMaybes $ map PubMedDoc.foreName au
abstract :: Maybe [Text] -> Maybe Text
abstract as' = fmap (Text.intercalate ", ") as'
...@@ -52,8 +52,8 @@ import qualified Gargantext.Text.Corpus.Parsers.WOS as WOS ...@@ -52,8 +52,8 @@ import qualified Gargantext.Text.Corpus.Parsers.WOS as WOS
import qualified Gargantext.Text.Corpus.Parsers.RIS as RIS import qualified Gargantext.Text.Corpus.Parsers.RIS as RIS
import Gargantext.Text.Corpus.Parsers.RIS.Presse (presseEnrich) import Gargantext.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
import qualified Gargantext.Text.Corpus.Parsers.Date as Date import qualified Gargantext.Text.Corpus.Parsers.Date as Date
import Gargantext.Text.Corpus.Parsers.CSV (parseHal) import Gargantext.Text.Corpus.Parsers.CSV (parseHal, parseCsv)
import Gargantext.Text.Terms.Stop (detectLang) import Gargantext.Text.Learn (detectLangDefault)
------------------------------------------------------------------------ ------------------------------------------------------------------------
type ParseError = String type ParseError = String
...@@ -88,6 +88,7 @@ parseFormat = undefined ...@@ -88,6 +88,7 @@ parseFormat = undefined
-- TODO: to debug maybe add the filepath in error message -- TODO: to debug maybe add the filepath in error message
parseFile :: FileFormat -> FilePath -> IO [HyperdataDocument] parseFile :: FileFormat -> FilePath -> IO [HyperdataDocument]
parseFile CsvHalFormat p = parseHal p parseFile CsvHalFormat p = parseHal p
parseFile CsvGargV3 p = parseCsv p
parseFile RisPresse p = join $ mapM (toDoc RIS) <$> snd <$> enrichWith RisPresse <$> readFileWith RIS p parseFile RisPresse p = join $ mapM (toDoc RIS) <$> snd <$> enrichWith RisPresse <$> readFileWith RIS p
parseFile WOS p = join $ mapM (toDoc WOS) <$> snd <$> enrichWith WOS <$> readFileWith WOS p parseFile WOS p = join $ mapM (toDoc WOS) <$> snd <$> enrichWith WOS <$> readFileWith WOS p
parseFile ff p = join $ mapM (toDoc ff) <$> snd <$> enrichWith ff <$> readFileWith ff p parseFile ff p = join $ mapM (toDoc ff) <$> snd <$> enrichWith ff <$> readFileWith ff p
...@@ -96,11 +97,11 @@ toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument ...@@ -96,11 +97,11 @@ toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument
-- TODO use language for RIS -- TODO use language for RIS
toDoc ff d = do toDoc ff d = do
let abstract = lookup "abstract" d let abstract = lookup "abstract" d
let lang = maybe EN identity (join $ detectLang <$> (fmap (DT.take 50) abstract)) let lang = maybe EN identity (join $ detectLangDefault <$> (fmap (DT.take 50) abstract))
let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d
(utcTime, (pub_year, pub_month, pub_day)) <- Date.split lang dateToParse (utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit lang dateToParse
pure $ HyperdataDocument (Just $ DT.pack $ show ff) pure $ HyperdataDocument (Just $ DT.pack $ show ff)
(lookup "doi" d) (lookup "doi" d)
......
...@@ -101,7 +101,6 @@ fromDocs docs = V.map fromDocs' docs ...@@ -101,7 +101,6 @@ fromDocs docs = V.map fromDocs' docs
--------------------------------------------------------------- ---------------------------------------------------------------
-- | Split a document in its context -- | Split a document in its context
-- TODO adapt the size of the paragraph according to the corpus average -- TODO adapt the size of the paragraph according to the corpus average
splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc m splt doc = let docSize = (length $ csv_abstract doc) in splitDoc m splt doc = let docSize = (length $ csv_abstract doc) in
if docSize > 1000 if docSize > 1000
...@@ -113,22 +112,21 @@ splitDoc m splt doc = let docSize = (length $ csv_abstract doc) in ...@@ -113,22 +112,21 @@ splitDoc m splt doc = let docSize = (length $ csv_abstract doc) in
V.fromList [doc] V.fromList [doc]
else else
V.fromList [doc] V.fromList [doc]
where
splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc splitDoc' contextSize (CsvDoc t s py pm pd abst auth) = V.fromList $ [firstDoc] <> nextDocs
splitDoc' contextSize (CsvDoc t s py pm pd abst auth) = V.fromList $ [firstDoc] <> nextDocs where
where firstDoc = CsvDoc t s py pm pd firstAbstract auth
firstDoc = CsvDoc t s py pm pd firstAbstract auth firstAbstract = head' "splitDoc'1" abstracts
firstAbstract = head' "splitDoc'1" abstracts
nextDocs = map (\txt -> CsvDoc
nextDocs = map (\txt -> CsvDoc (head' "splitDoc'2" $ sentences txt)
(head' "splitDoc'2" $ sentences txt) s py pm pd
s py pm pd (unsentences $ tail' "splitDoc'1" $ sentences txt)
(unsentences $ tail' "splitDoc'1" $ sentences txt) auth
auth ) (tail' "splitDoc'2" abstracts)
) (tail' "splitDoc'2" abstracts)
abstracts = (splitBy $ contextSize) abst
abstracts = (splitBy $ contextSize) abst
--------------------------------------------------------------- ---------------------------------------------------------------
--------------------------------------------------------------- ---------------------------------------------------------------
...@@ -196,8 +194,8 @@ delimiter :: Word8 ...@@ -196,8 +194,8 @@ delimiter :: Word8
delimiter = fromIntegral $ ord '\t' delimiter = fromIntegral $ ord '\t'
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
readCsvOn :: [CsvDoc -> Text] -> FilePath -> IO [Text] readCsvOn' :: [CsvDoc -> Text] -> FilePath -> IO [Text]
readCsvOn fields fp = V.toList readCsvOn' fields fp = V.toList
<$> V.map (\l -> intercalate (pack " ") $ map (\field -> field l) fields) <$> V.map (\l -> intercalate (pack " ") $ map (\field -> field l) fields)
<$> snd <$> snd
<$> readFile fp <$> readFile fp
...@@ -231,6 +229,7 @@ readCsvLazyBS bs = case decodeByNameWith csvDecodeOptions bs of ...@@ -231,6 +229,7 @@ readCsvLazyBS bs = case decodeByNameWith csvDecodeOptions bs of
Right csvDocs -> csvDocs Right csvDocs -> csvDocs
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO use readFileLazy -- | TODO use readFileLazy
readCsvHal :: FilePath -> IO (Header, Vector CsvHal) readCsvHal :: FilePath -> IO (Header, Vector CsvHal)
readCsvHal = fmap readCsvHalLazyBS . BL.readFile readCsvHal = fmap readCsvHalLazyBS . BL.readFile
...@@ -361,8 +360,35 @@ csvHal2doc (CsvHal title source ...@@ -361,8 +360,35 @@ csvHal2doc (CsvHal title source
Nothing Nothing
Nothing Nothing
csv2doc :: CsvDoc -> HyperdataDocument
csv2doc (CsvDoc title source
pub_year pub_month pub_day
abstract authors ) = HyperdataDocument (Just "CsvHal")
Nothing
Nothing
Nothing
Nothing
Nothing
(Just title)
(Just authors)
Nothing
(Just source)
(Just abstract)
(Just $ pack . show $ jour (fromIntegral pub_year) pub_month pub_day)
(Just $ fromIntegral pub_year)
(Just pub_month)
(Just pub_day)
Nothing
Nothing
Nothing
Nothing
------------------------------------------------------------------------ ------------------------------------------------------------------------
parseHal :: FilePath -> IO [HyperdataDocument] parseHal :: FilePath -> IO [HyperdataDocument]
parseHal fp = map csvHal2doc <$> V.toList <$> snd <$> readCsvHal fp parseHal fp = V.toList <$> V.map csvHal2doc <$> snd <$> readCsvHal fp
------------------------------------------------------------------------ ------------------------------------------------------------------------
parseCsv :: FilePath -> IO [HyperdataDocument]
parseCsv fp = V.toList <$> V.map csv2doc <$> snd <$> readFile fp
...@@ -18,7 +18,7 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00" ...@@ -18,7 +18,7 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Corpus.Parsers.Date (parse, parseRaw, split) where module Gargantext.Text.Corpus.Parsers.Date (parse, parseRaw, dateSplit, Year, Month, Day) where
import Data.HashMap.Strict as HM hiding (map) import Data.HashMap.Strict as HM hiding (map)
import Data.Text (Text, unpack, splitOn, pack) import Data.Text (Text, unpack, splitOn, pack)
...@@ -40,9 +40,9 @@ import qualified Duckling.Core as DC ...@@ -40,9 +40,9 @@ import qualified Duckling.Core as DC
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Parse date to Ints -- | Parse date to Ints
-- TODO add hours, minutes and seconds -- TODO add hours, minutes and seconds
split :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day)) dateSplit :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
split _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing)) dateSplit _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
split l (Just txt) = do dateSplit l (Just txt) = do
utcTime <- parse l txt utcTime <- parse l txt
let (y, m, d) = split' utcTime let (y, m, d) = split' utcTime
pure (Just utcTime, (Just y, Just m,Just d)) pure (Just utcTime, (Just y, Just m,Just d))
......
...@@ -19,18 +19,17 @@ TODO: create a separate Lib. ...@@ -19,18 +19,17 @@ TODO: create a separate Lib.
module Gargantext.Text.Corpus.Parsers.GrandDebat module Gargantext.Text.Corpus.Parsers.GrandDebat
where where
import GHC.IO (FilePath)
import Data.Aeson (ToJSON, FromJSON) import Data.Aeson (ToJSON, FromJSON)
import qualified Data.JsonStream.Parser as P
--import Data.Either (either)
import Data.Maybe (Maybe()) import Data.Maybe (Maybe())
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.ByteString.Lazy as DBL
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.Prelude
import Gargantext.Database.Types.Node
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Types.Node
import Gargantext.Prelude
import Gargantext.Prelude.Utils
import qualified Data.ByteString.Lazy as DBL
import qualified Data.JsonStream.Parser as P
import qualified Data.Text as Text
data GrandDebatReference = GrandDebatReference data GrandDebatReference = GrandDebatReference
...@@ -91,16 +90,12 @@ instance ToHyperdataDocument GrandDebatReference ...@@ -91,16 +90,12 @@ instance ToHyperdataDocument GrandDebatReference
True -> r' True -> r'
False -> "" False -> ""
class ReadFile a
where
readFile :: FilePath -> IO a
instance ReadFile [GrandDebatReference] instance ReadFile [GrandDebatReference]
where where
-- | read json: 3 version below are working but with increased optimization -- | read json: 3 version below are working but with increased optimization
--readFile fp = maybe [] identity <$> decode <$> DBL.readFile fp --readFile fp = maybe [] identity <$> decode <$> DBL.readFile fp
--readFile fp = either (panic . Text.pack) identity <$> P.eitherDecode <$> DBL.readFile fp --readFile fp = either (panic . Text.pack) identity <$> P.eitherDecode <$> DBL.readFile fp
readFile fp = P.parseLazyByteString (P.arrayOf P.value) <$> DBL.readFile fp readFile' fp = P.parseLazyByteString (P.arrayOf P.value) <$> DBL.readFile fp
{-|
Module : Gargantext.Text.Terms.Stop
Description : Mono Terms module
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO:
- generalize to byteString
- Stop words and (how to learn it).
- Main type here is String check if Chars on Text would be optimized
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
module Gargantext.Text.Learn -- (detectLang, detectLangs, stopList)
where
import Codec.Serialise
import qualified Data.List as DL
import Data.Maybe (maybe)
import Data.Map.Strict (Map, toList)
import qualified Data.Map.Strict as DM
import GHC.Generics
import Data.String (String)
import Data.Text (Text)
import Data.Text (pack, unpack, toLower)
import Data.Tuple.Extra (both)
import qualified Data.ByteString.Lazy as BSL
import Gargantext.Prelude
import Gargantext.Prelude.Utils
import Gargantext.Core (Lang(..), allLangs)
import Gargantext.Text.Terms.Mono (words)
import Gargantext.Text.Metrics.Count (occurrencesWith)
import qualified Gargantext.Text.Samples.FR as FR
import qualified Gargantext.Text.Samples.EN as EN
--import qualified Gargantext.Text.Samples.DE as DE
--import qualified Gargantext.Text.Samples.SP as SP
--import qualified Gargantext.Text.Samples.CH as CH
------------------------------------------------------------------------
data Candidate = Candidate { stop :: Double
, noStop :: Double
} deriving (Show)
------------------------------------------------------------------------
-- * Analyze candidate
type StringSize = Int
type TotalFreq = Int
type Freq = Int
type Word = String
data CatWord a = CatWord a Word
type CatProb a = Map a Double
type Events a = Map a EventBook
------------------------------------------------------------------------
data EventBook = EventBook { events_freq :: Map String Freq
, events_n :: Map StringSize TotalFreq
}
deriving (Show, Generic)
instance Serialise EventBook
instance (Serialise a, Ord a) => SaveFile (Events a) where
saveFile' f d = BSL.writeFile f (serialise d)
instance (Serialise a, Ord a) => ReadFile (Events a) where
readFile' filepath = deserialise <$> BSL.readFile filepath
------------------------------------------------------------------------
detectStopDefault :: Text -> Maybe Bool
detectStopDefault = undefined
detectBool :: [(Bool, Text)] -> Text -> Maybe Bool
detectBool events = detectDefault False events
detectDefault :: Ord a => a -> [(a, Text)] -> Text -> Maybe a
detectDefault = detectDefaultWith identity
detectDefaultWith :: Ord a => (b -> Text) -> a -> [(a, b)] -> b -> Maybe a
detectDefaultWith f d events = detectDefaultWithPriors f ps
where
ps = priorEventsWith f d events
detectDefaultWithPriors :: Ord b => (a -> Text) -> Events b -> a -> Maybe b
detectDefaultWithPriors f priors = detectCat 99 priors . f
priorEventsWith :: Ord a => (t -> Text) -> a -> [(a, t)] -> Events a
priorEventsWith f d e = toEvents d [0..2] 10 es
where
es = map (\(a,b) -> CatWord a (unpack $ toLower $ f b)) e
------------------------------------------------------------------------
detectLangDefault :: Text -> Maybe Lang
detectLangDefault = detectCat 99 eventLang
where
eventLang :: Events Lang
eventLang = toEvents FR [0..2] 10 [ langWord l | l <- allLangs ]
langWord :: Lang -> CatWord Lang
langWord l = CatWord l (textSample l)
textSample :: Lang -> String
textSample EN = EN.textSample
textSample FR = FR.textSample
--textSample DE = DE.textSample
--textSample SP = SP.textSample
--textSample CH = CH.textSample
------------------------------------------------------------------------
detectCat :: Ord a => Int -> Events a -> Text -> Maybe a
detectCat n es = head . map fst . (detectCat' n es) . unpack
where
detectCat' :: Ord a => Int -> Events a -> String -> [(a, Double)]
detectCat' n' es' s = DL.reverse $ DL.sortOn snd
$ toList
$ detectWith n' es' (wordsToBook [0..2] n' s)
detectWith :: Ord a => Int -> Events a -> EventBook -> CatProb a
detectWith n'' el (EventBook mapFreq _) =
DM.unionsWith (+)
$ map DM.fromList
$ map (\(s,m) -> map (\(l,f) -> (l, (fromIntegral m) * f)) $ toPrior n'' s el)
$ filter (\x -> fst x /= " ")
$ DM.toList mapFreq
-- | TODO: monoids (but proba >= 0)
toPrior :: Int -> String -> Events a -> [(a, Double)]
toPrior n'' s el = prior n'' $ pebLang s el
where
pebLang :: String -> Events a -> [(a, (Freq,TotalFreq))]
pebLang st = map (\(l,eb) -> (l, peb st eb)) . DM.toList
peb :: String -> EventBook -> (Freq, TotalFreq)
peb st (EventBook mapFreq mapN) = (fromIntegral a, fromIntegral b)
where
a = maybe 0 identity $ DM.lookup st mapFreq
b = maybe 1 identity $ DM.lookup (length st) mapN
prior :: Int -> [(a, (Freq, TotalFreq))] -> [(a, Double)]
prior i ps = zip ls $ zipWith (\x y -> x^i * y) (map (\(a,_) -> part a (sum $ map fst ps')) ps')
(map (\(a,b) -> a / b) ps')
where
(ls, ps'') = DL.unzip ps
ps' = map (both fromIntegral) ps''
part :: (Eq p, Fractional p) => p -> p -> p
part 0 _ = 0
part _ 0 = 0
part x y = x / y
{-
toProba :: (Eq b, Fractional b, Functor t, Foldable t) =>
t (a, b) -> t (a, b)
toProba xs = map (\(a,b) -> (a, part b total)) xs
where
total = sum $ map snd xs
-}
-- | TODO: monoids
toEvents :: Ord a => a -> [Int] -> Int -> [CatWord a] -> Events a
toEvents e ns n = foldl' (opEvent (+)) (emptyEvent e ns n) . map (toEvent ns n)
where
emptyEvent :: Ord a => a -> [Int] -> Int -> Events a
emptyEvent e' ns' n'= toEvent ns' n' (CatWord e' "")
toEvent :: Ord a => [Int] -> Int -> CatWord a -> Events a
toEvent ns'' n'' (CatWord l txt) = DM.fromList [(l, wordsToBook ns'' n'' txt)]
opEvent :: Ord a => (Freq -> Freq -> Freq) -> Events a -> Events a -> Events a
opEvent f = DM.unionWith (op f)
------------------------------------------------------------------------
emptyEventBook :: [Int] -> Int -> EventBook
emptyEventBook ns n = wordToBook ns n " "
wordsToBook :: [Int] -> Int -> String -> EventBook
wordsToBook ns n txt = foldl' (op (+)) (emptyEventBook ns n) eventsBook
where
ws = map unpack $ words $ pack txt
eventsBook = map (wordToBook ns n) ws
wordToBook :: [Int] -> Int -> Word -> EventBook
wordToBook ns n txt = EventBook ef en
where
chks = allChunks ns n txt
en = DM.fromList $ map (\(n',ns') -> (n', length ns')) $ zip ns chks
ef = foldl' DM.union DM.empty $ map (occurrencesWith identity) chks
op :: (Freq -> Freq -> Freq) -> EventBook -> EventBook -> EventBook
op f (EventBook ef1 en1)
(EventBook ef2 en2) = EventBook (DM.unionWith f ef1 ef2)
(DM.unionWith f en1 en2)
------------------------------------------------------------------------
------------------------------------------------------------------------
allChunks :: [Int] -> Int -> String -> [[String]]
allChunks ns m st = map (\n -> chunks n m st) ns
-- | Chunks is the same function as splitBy in Context but for Strings,
-- not Text (without pack and unpack operations that are not needed).
chunks :: Int -> Int -> String -> [String]
chunks n m = DL.take m . filter (not . all (== ' '))
. chunkAlong (n+1) 1
. DL.concat
. DL.take 1000
. DL.repeat
. blanks
-- | String preparation
blanks :: String -> String
blanks [] = []
blanks xs = [' '] <> xs <> [' ']
{-
-- Some previous tests to be removed
--import GHC.Base (Functor)
--import Numeric.Probability.Distribution ((??))
--import qualified Numeric.Probability.Distribution as D
-- | Blocks increase the size of the word to ease computations
-- some border and unexepected effects can happen, need to be tested
blockOf :: Int -> String -> String
blockOf n = DL.concat . DL.take n . DL.repeat
-- * Make the distributions
makeDist :: [String] -> D.T Double String
makeDist = D.uniform . DL.concat . map (DL.concat . allChunks [0,2] 10)
stopDist :: D.T Double String
stopDist = makeDist $ map show ([0..9]::[Int]) <> EN.stopList
candDist :: D.T Double String
candDist = makeDist candList
------------------------------------------------------------------------
sumProba :: Num a => D.T a String -> [Char] -> a
sumProba ds x = sum $ map ((~?) ds) $ DL.concat $ allChunks [0,2] 10 $ map toLower x
-- | Get probability according a distribution
(~?) :: (Num prob, Eq a) => D.T prob a -> a -> prob
(~?) ds x = (==x) ?? ds
------------------------------------------------------------------------
candidate :: [Char] -> Candidate
candidate x = Candidate (sumProba stopDist x) (sumProba candDist x)
------------------------------------------------------------------------
candList :: [String]
candList = [ "france", "alexandre", "mael", "constitution"
, "etats-unis", "associes", "car", "train", "spam"]
--}
...@@ -61,7 +61,7 @@ buildNgramsLists :: Lang -> Int -> Int -> StopSize -> UserCorpusId -> MasterCorp ...@@ -61,7 +61,7 @@ buildNgramsLists :: Lang -> Int -> Int -> StopSize -> UserCorpusId -> MasterCorp
-> Cmd err (Map NgramsType [NgramsElement]) -> Cmd err (Map NgramsType [NgramsElement])
buildNgramsLists l n m s uCid mCid = do buildNgramsLists l n m s uCid mCid = do
ngTerms <- buildNgramsTermsList l n m s uCid mCid ngTerms <- buildNgramsTermsList l n m s uCid mCid
--ngTerms <- buildNgramsTermsList' uCid (ngramsGroup l n m) (isStopTerm s . fst) 550 300 --ngTerms <- buildNgramsTermsList' uCid (ngramsGroup l n m) (isStopTerm s . fst) 500 50
othersTerms <- mapM (buildNgramsOthersList uCid identity) [Authors, Sources, Institutes] othersTerms <- mapM (buildNgramsOthersList uCid identity) [Authors, Sources, Institutes]
pure $ Map.unions $ othersTerms <> [ngTerms] pure $ Map.unions $ othersTerms <> [ngTerms]
...@@ -73,7 +73,8 @@ buildNgramsOthersList uCid groupIt nt = do ...@@ -73,7 +73,8 @@ buildNgramsOthersList uCid groupIt nt = do
let let
all' = Map.toList ngs all' = Map.toList ngs
pure $ (toElements GraphTerm $ take 10 all') <> (toElements CandidateTerm $ drop 10 all') pure $ (toElements GraphTerm all') <> (toElements CandidateTerm all')
--pure $ (toElements GraphTerm $ take 10 all') <> (toElements CandidateTerm $ drop 10 all')
where where
toElements nType x = Map.fromList [(nt, [ mkNgramsElement t nType Nothing (mSetFromList []) toElements nType x = Map.fromList [(nt, [ mkNgramsElement t nType Nothing (mSetFromList [])
| (t,_ns) <- x | (t,_ns) <- x
...@@ -123,8 +124,8 @@ buildNgramsTermsList l n m s uCid mCid = do ...@@ -123,8 +124,8 @@ buildNgramsTermsList l n m s uCid mCid = do
candidates <- sortTficf <$> getTficf' uCid mCid NgramsTerms (ngramsGroup l n m) candidates <- sortTficf <$> getTficf' uCid mCid NgramsTerms (ngramsGroup l n m)
let let
candidatesSize = 2000 candidatesSize = 2000
a = 500 a = 10
b = 500 b = 10
candidatesHead = List.take candidatesSize candidates candidatesHead = List.take candidatesSize candidates
candidatesTail = List.drop candidatesSize candidates candidatesTail = List.drop candidatesSize candidates
termList = (toTermList a b ((isStopTerm s) . fst) candidatesHead) termList = (toTermList a b ((isStopTerm s) . fst) candidatesHead)
......
...@@ -102,13 +102,26 @@ takeScored listSize incSize = both (map _scored_terms) ...@@ -102,13 +102,26 @@ takeScored listSize incSize = both (map _scored_terms)
linearTakes :: (Ord b1, Ord b2) linearTakes :: (Ord b1, Ord b2)
=> GraphListSize -> InclusionSize => GraphListSize -> InclusionSize
-> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a]) -> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a])
linearTakes gls incSize speGen incExc = (List.splitAt gls) linearTakes mls incSize speGen incExc = (List.splitAt mls)
. List.concat . List.concat
. map (take $ round . map (take $ round
$ (fromIntegral gls :: Double) $ (fromIntegral mls :: Double)
/ (fromIntegral incSize :: Double) / (fromIntegral incSize :: Double)
) )
. map (sortOn incExc) . map (sortOn speGen)
. splitEvery incSize . splitEvery incSize
. sortOn speGen . take 5000
. takePercent (0.70)
. sortOn incExc
takePercent :: Double -> [a] -> [a]
takePercent l xs = List.take l' xs
where
l' = round $ l * (fromIntegral $ List.length xs)
splitTake :: (Int, a -> Bool) -> (Int, a -> Bool) -> [a] -> ([a], [a])
splitTake (a, af) (b, bf) xs = (mpa <> mpb, ca <> cb)
where
(mpa, ca) = List.splitAt a $ List.filter af xs
(mpb, cb) = List.splitAt b $ List.filter bf xs
...@@ -22,4 +22,89 @@ import Data.String (String) ...@@ -22,4 +22,89 @@ import Data.String (String)
textSample :: String textSample :: String
textSample = "Text mining, also referred to as text data mining, roughly equivalent to text analytics, is the process of deriving high-quality information from text. High-quality information is typically derived through the devising of patterns and trends through means such as statistical pattern learning. Text mining usually involves the process of structuring the input text (usually parsing, along with the addition of some derived linguistic features and the removal of others, and subsequent insertion into a database), deriving patterns within the structured data, and finally evaluation and interpretation of the output. 'High quality' in text mining usually refers to some combination of relevance, novelty, and interestingness. Typical text mining tasks include text categorization, text clustering, concept/entity extraction, production of granular taxonomies, sentiment analysis, document summarization, and entity relation modeling (i.e., learning relations between named entities). Text analysis involves information retrieval, lexical analysis to study word frequency distributions, pattern recognition, tagging/annotation, information extraction, data mining techniques including link and association analysis, visualization, and predictive analytics. The overarching goal is, essentially, to turn text into data for analysis, via application of natural language processing (NLP) and analytical methods. A typical application is to scan a set of documents written in a natural language and either model the document set for predictive classification purposes or populate a database or search index with the information extracted." textSample = "Text mining, also referred to as text data mining, roughly equivalent to text analytics, is the process of deriving high-quality information from text. High-quality information is typically derived through the devising of patterns and trends through means such as statistical pattern learning. Text mining usually involves the process of structuring the input text (usually parsing, along with the addition of some derived linguistic features and the removal of others, and subsequent insertion into a database), deriving patterns within the structured data, and finally evaluation and interpretation of the output. 'High quality' in text mining usually refers to some combination of relevance, novelty, and interestingness. Typical text mining tasks include text categorization, text clustering, concept/entity extraction, production of granular taxonomies, sentiment analysis, document summarization, and entity relation modeling (i.e., learning relations between named entities). Text analysis involves information retrieval, lexical analysis to study word frequency distributions, pattern recognition, tagging/annotation, information extraction, data mining techniques including link and association analysis, visualization, and predictive analytics. The overarching goal is, essentially, to turn text into data for analysis, via application of natural language processing (NLP) and analytical methods. A typical application is to scan a set of documents written in a natural language and either model the document set for predictive classification purposes or populate a database or search index with the information extracted."
stopList :: [String]
stopList =
["a", "a's", "able", "about", "above", "according", "accordingly"
, "across", "actually", "after", "afterwards", "again", "against"
, "ain't", "all", "allow", "allows", "almost", "alone", "along"
, "already", "also", "although", "always", "am", "among", "amongst", "an"
, "analyze", "and", "another", "any", "anybody", "anyhow", "anyone"
, "anything", "anyway", "anyways", "anywhere", "apart", "appear"
, "apply", "appreciate", "appropriate", "are", "aren't", "around"
, "as", "aside", "ask", "asking", "associated", "at", "available"
, "away", "awfully", "b", "based", "be", "became", "because", "become"
, "becomes", "becoming", "been", "before", "beforehand", "behind"
, "being", "believe", "below", "beside", "besides", "best", "better"
, "between", "beyond", "both", "brief", "but", "by", "c", "c'mon", "c's"
, "came", "can", "can't", "cannot", "cant", "cause", "causes", "certain"
, "certainly", "changes", "clearly", "co", "com", "come", "comes"
, "common", "concerning", "consequently", "consider", "considering"
, "contain", "containing", "contains", "corresponding", "could"
, "couldn't", "course", "currently", "d", "definitely", "described"
, "despite", "detecting", "detects", "did", "didn't", "different", "do"
, "does", "doesn't", "doing", "don't", "done", "down", "downwards"
, "during", "e", "each", "edu", "eg", "eight", "either", "else"
, "elsewhere", "enough", "entirely", "especially", "et", "etc", "even"
, "ever", "every", "everybody", "everyone", "everything", "everywhere"
, "ex", "exactly", "example", "except", "f", "far", "few", "fifth"
, "find", "first", "five", "followed", "following", "follows", "for"
, "former", "formerly", "forth", "four", "from", "further", "furthermore"
, "g", "get", "gets", "getting", "gif", "given", "gives", "go", "goes"
, "going", "gone", "got", "gotten", "greetings", "h", "had", "hadn't"
, "happens", "hardly", "has", "hasn't", "have", "haven't", "having"
, "he", "he'd", "he'll", "he's", "hello", "help", "hence", "her"
, "here", "here's", "hereafter", "hereby", "herein", "hereupon", "hers"
, "herself", "hi", "him", "himself", "his", "hither", "hopefully", "how"
, "how's", "howbeit", "however", "i", "i'd", "i'll", "i'm", "i've"
, "identify", "ie", "if", "ignored", "immediate", "in", "inasmuch"
, "inc", "indeed", "indicate", "indicated", "indicates", "inner"
, "insofar", "instead", "into", "involves", "inward", "is", "isn't"
, "it", "it'd", "it'll", "it's", "its", "itself", "j", "just", "k"
, "keep", "keeps", "kept", "know", "known", "knows", "l", "last"
, "late", "lately", "later", "latter", "latterly", "least", "less"
, "lest", "let", "let's", "like", "liked", "likely", "little", "look"
, "looking", "looks", "ltd", "m", "main", "mainly", "many", "may"
, "maybe", "me", "mean", "meanwhile", "merely", "might", "min", "more"
, "moreover", "most", "mostly", "much", "must", "mustn't", "my", "myself"
, "n", "name", "namely", "nd", "near", "nearly", "necessary", "need"
, "needs", "neither", "never", "nevertheless", "new", "next", "nine"
, "no", "nobody", "non", "none", "noone", "nor", "normally", "not"
, "nothing", "novel", "now", "nowhere", "o", "obviously", "of", "off"
, "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones"
, "only", "onto", "or", "other", "others", "otherwise", "ought", "our"
, "ours", "ourselves", "out", "outside", "over", "overall", "own", "p"
, "particular", "particularly", "per", "perhaps", "placed", "please"
, "plus", "possible", "presents", "presumably", "probably", "provides"
, "q", "que", "quite", "qv", "r", "rather", "rd", "re", "really"
, "reasonably", "regarding", "regardless", "regards", "relatively"
, "respectively", "right", "s", "said", "same", "saw", "say", "saying"
, "says", "sds", "second", "secondly", "see", "seeing", "seem", "seemed"
, "seeming", "seems", "seen", "self", "selves", "sensible", "sent"
, "serious", "seriously", "seven", "several", "shall", "shan't"
, "she", "she'd", "she'll", "she's", "should", "shouldn't", "since"
, "six", "so", "some", "somebody", "somehow", "someone", "something"
, "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry"
, "specified", "specify", "specifying", "still", "sub", "such", "sup"
, "sure", "t", "t's", "take", "taken", "tell", "tends", "th", "than"
, "thank", "thanks", "thanx", "that", "that's", "thats", "the", "their"
, "theirs", "them", "themselves", "then", "thence", "there", "there's"
, "thereafter", "thereby", "therefore", "therein", "theres", "thereupon"
, "these", "they", "they'd", "they'll", "they're", "they've", "think"
, "third", "this", "thorough", "thoroughly", "those", "though", "three"
, "through", "throughout", "thru", "thus", "to", "together", "too"
, "took", "toward", "towards", "tried", "tries", "truly", "try"
, "trying", "twice", "two", "u", "un", "under", "unfortunately"
, "unless", "unlikely", "until", "unto", "up", "upon", "us", "use"
, "used", "useful", "uses", "using", "usually", "uucp", "v", "value"
, "various", "very", "via", "viz", "vs", "w", "want", "wants", "was"
, "wasn't", "way", "we", "we'd", "we'll", "we're", "we've", "welcome"
, "well", "went", "were", "weren't", "what", "what's", "whatever", "when"
, "when's", "whence", "whenever", "where", "where's", "whereafter"
, "whereas", "whereby", "wherein", "whereupon", "wherever", "whether"
, "which", "while", "whither", "who", "who's", "whoever", "whole", "whom"
, "whose", "why", "why's", "will", "willing", "wish", "with", "within"
, "without", "won't", "wonder", "would", "wouldn't", "x", "y", "yes"
, "yet", "you", "you'd", "you'll", "you're", "you've", "your", "yours"
, "yourself", "yourselves", "z", "zero"]
...@@ -120,7 +120,7 @@ termsUnsupervised (Unsupervised l n s m) = ...@@ -120,7 +120,7 @@ termsUnsupervised (Unsupervised l n s m) =
pure pure
. map (text2term l) . map (text2term l)
. List.nub . List.nub
. (List.filter (\l' -> List.length l' > s)) . (List.filter (\l' -> List.length l' >= s))
. List.concat . List.concat
. mainEleveWith (maybe (panic "no model") identity m) n . mainEleveWith (maybe (panic "no model") identity m) n
. uniText . uniText
...@@ -129,11 +129,10 @@ termsUnsupervised _ = undefined ...@@ -129,11 +129,10 @@ termsUnsupervised _ = undefined
newTries :: Int -> Text -> Tries Token () newTries :: Int -> Text -> Tries Token ()
newTries n t = buildTries n (fmap toToken $ uniText t) newTries n t = buildTries n (fmap toToken $ uniText t)
-- | TODO removing long terms > 24
uniText :: Text -> [[Text]] uniText :: Text -> [[Text]]
uniText = uniText = map (List.filter (not . isPunctuation))
-- map (map (Text.toLower))
map (List.filter (not . isPunctuation))
. map tokenize . map tokenize
. sentences -- | TODO get sentences according to lang . sentences -- | TODO get sentences according to lang
. Text.toLower . Text.toLower
{-| {-|
Module : Gargantext.Text.Ngrams.Token.Text Module : Gargantext.Text.Ngrams.Token.Text
Description : Description : Tokenizer main functions
Copyright : (c) Grzegorz Chrupała first, after: CNRS, 2018-Present Copyright : (c) Grzegorz Chrupała first, after: CNRS, 2018-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
...@@ -77,20 +77,20 @@ run :: Tokenizer -> (Text -> [Text]) ...@@ -77,20 +77,20 @@ run :: Tokenizer -> (Text -> [Text])
run f = \txt -> map T.copy $ (map unwrap . unE . f) txt run f = \txt -> map T.copy $ (map unwrap . unE . f) txt
defaultTokenizer :: Tokenizer defaultTokenizer :: Tokenizer
defaultTokenizer = whitespace defaultTokenizer = whitespace
>=> uris >=> uris
>=> punctuation >=> punctuation
>=> contractions >=> contractions
>=> negatives >=> negatives
-- | Detect common uris and freeze them -- | Detect common uris and freeze them
uris :: Tokenizer uris :: Tokenizer
uris x | isUri x = E [Left x] uris x | isUri x = E [Left x]
| True = E [Right x] | True = E [Right x]
where isUri u = any (`T.isPrefixOf` u) ["http://","ftp://","mailto:"] where isUri u = any (`T.isPrefixOf` u) ["http://","ftp://","mailto:","https://"]
-- | Split off initial and final punctuation -- | Split off initial and final punctuation
punctuation :: Tokenizer punctuation :: Tokenizer
punctuation = finalPunctuation >=> initialPunctuation punctuation = finalPunctuation >=> initialPunctuation
--hyphens :: Tokenizer --hyphens :: Tokenizer
......
...@@ -36,7 +36,7 @@ import GHC.Real (round) ...@@ -36,7 +36,7 @@ import GHC.Real (round)
import Data.Text (Text) import Data.Text (Text)
import NLP.RAKE.Text import NLP.RAKE.Text
import Gargantext.Text.Terms.Stop (stopList) import Gargantext.Text.Samples.EN (stopList)
import Gargantext.Prelude import Gargantext.Prelude
select :: Double -> [a] -> [a] select :: Double -> [a] -> [a]
......
This diff is collapsed.
...@@ -106,7 +106,7 @@ treeData cId nt lt = do ...@@ -106,7 +106,7 @@ treeData cId nt lt = do
pure $ toTree lt cs' m pure $ toTree lt cs' m
treeData' :: FlowCmdM env ServantErr m treeData' :: FlowCmdM env ServerError m
=> CorpusId -> NgramsType -> ListType => CorpusId -> NgramsType -> ListType
-> m [MyTree] -> m [MyTree]
treeData' cId nt lt = do treeData' cId nt lt = do
......
...@@ -77,7 +77,7 @@ getGraph nId = do ...@@ -77,7 +77,7 @@ getGraph nId = do
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) NgramsTerms (Map.keys ngs) <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) NgramsTerms (Map.keys ngs)
graph <- liftIO $ cooc2graph 1 myCooc graph <- liftIO $ cooc2graph 0 myCooc
pure $ set graph_metadata (Just metadata) graph pure $ set graph_metadata (Just metadata) graph
......
...@@ -27,22 +27,18 @@ import Data.String.Conversions ...@@ -27,22 +27,18 @@ import Data.String.Conversions
--import Control.Monad.Reader (ask) --import Control.Monad.Reader (ask)
import qualified Data.ByteString as DB import qualified Data.ByteString as DB
import qualified Data.ByteString.Lazy as DBL import qualified Data.ByteString.Lazy as DBL
import Data.Text (Text)
import Data.Map (empty)
import Data.Swagger import Data.Swagger
import Gargantext.API.Types import Gargantext.API.Types
import Gargantext.API.Utils (swaggerOptions)
import Gargantext.Database.Types.Node (PhyloId, ListId, CorpusId) import Gargantext.Database.Types.Node (PhyloId, ListId, CorpusId)
import Gargantext.Database.Schema.Node (insertNodes, nodePhyloW, getNodePhylo)
import Gargantext.Database.Types.Node -- (NodePhylo(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Main import Gargantext.Viz.Phylo.Main
import Gargantext.Viz.Phylo.Aggregates
import Gargantext.Viz.Phylo.Example import Gargantext.Viz.Phylo.Example
import Gargantext.Viz.Phylo.Tools
import Gargantext.API.Ngrams (TODO(..)) import Gargantext.API.Ngrams (TODO(..))
--import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Viz.Phylo.LevelMaker
import Servant import Servant
import Servant.Job.Utils (swaggerOptions)
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Web.HttpApiData (parseUrlPiece, readTextData) import Web.HttpApiData (parseUrlPiece, readTextData)
...@@ -56,10 +52,11 @@ type PhyloAPI = Summary "Phylo API" ...@@ -56,10 +52,11 @@ type PhyloAPI = Summary "Phylo API"
:<|> PostPhylo :<|> PostPhylo
phyloAPI :: PhyloId -> GargServer PhyloAPI phyloAPI :: PhyloId -> UserId -> GargServer PhyloAPI
phyloAPI n = getPhylo' n phyloAPI n u = getPhylo n
:<|> postPhylo n u
-- :<|> putPhylo n -- :<|> putPhylo n
:<|> postPhylo n -- :<|> deletePhylo n
newtype SVG = SVG DB.ByteString newtype SVG = SVG DB.ByteString
...@@ -82,7 +79,8 @@ instance MimeRender SVG SVG where ...@@ -82,7 +79,8 @@ instance MimeRender SVG SVG where
------------------------------------------------------------------------ ------------------------------------------------------------------------
type GetPhylo = QueryParam "listId" ListId type GetPhylo = QueryParam "listId" ListId
:> QueryParam "level" Level :> QueryParam "level" Level
:> QueryParam "filiation" Filiation :> QueryParam "minSizeBranch" MinSizeBranch
{- :> QueryParam "filiation" Filiation
:> QueryParam "childs" Bool :> QueryParam "childs" Bool
:> QueryParam "depth" Level :> QueryParam "depth" Level
:> QueryParam "metrics" [Metric] :> QueryParam "metrics" [Metric]
...@@ -95,55 +93,50 @@ type GetPhylo = QueryParam "listId" ListId ...@@ -95,55 +93,50 @@ type GetPhylo = QueryParam "listId" ListId
:> QueryParam "export" ExportMode :> QueryParam "export" ExportMode
:> QueryParam "display" DisplayMode :> QueryParam "display" DisplayMode
:> QueryParam "verbose" Bool :> QueryParam "verbose" Bool
-}
:> Get '[SVG] SVG :> Get '[SVG] SVG
-- | TODO -- | TODO
-- Add real text processing -- Add real text processing
-- Fix Filter parameters -- Fix Filter parameters
{-
getPhylo :: PhyloId -> GargServer GetPhylo getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo _phyloId _lId l f b l' ms x y z ts s o e d b' = do --getPhylo phId _lId l msb _f _b _l' _ms _x _y _z _ts _s _o _e _d _b' = do
getPhylo phId _lId l msb = do
phNode <- getNodePhylo phId
let let
fs' = maybe (Just []) (\p -> Just [p]) $ LonelyBranch <$> (LBParams <$> x <*> y <*> z) level = maybe 2 identity l
so = (,) <$> s <*> o branc = maybe 2 identity msb
q = initPhyloQueryView l f b l' ms fs' ts so e d b' maybePhylo = hyperdataPhylo_data $ _node_hyperdata phNode
-- | TODO remove phylo for real data here
pure (toPhyloView q phylo)
-- TODO remove phylo for real data here
-}
getPhylo' :: PhyloId -> GargServer GetPhylo p <- liftIO $ viewPhylo2Svg $ viewPhylo level branc $ maybe phyloFromQuery identity maybePhylo
getPhylo' _phyloId _lId _l _f _b _l' _ms _x _y _z _ts _s _o _e _d _b' = do
p <- liftIO $ viewPhylo2Svg phyloView
pure (SVG p) pure (SVG p)
------------------------------------------------------------------------ ------------------------------------------------------------------------
{-
type PutPhylo = (Put '[JSON] Phylo )
--putPhylo :: PhyloId -> Maybe ListId -> PhyloQueryBuild -> Phylo
putPhylo :: PhyloId -> GargServer PutPhylo
putPhylo = undefined
-}
------------------------------------------------------------------------
type PostPhylo = QueryParam "listId" ListId type PostPhylo = QueryParam "listId" ListId
:> ReqBody '[JSON] PhyloQueryBuild -- :> ReqBody '[JSON] PhyloQueryBuild
:> (Post '[JSON] Phylo) :> (Post '[JSON] NodeId)
postPhylo :: CorpusId -> GargServer PostPhylo postPhylo :: CorpusId -> UserId -> GargServer PostPhylo
postPhylo _n _lId q = do postPhylo n userId _lId = do
-- TODO get Reader settings -- TODO get Reader settings
-- s <- ask -- s <- ask
let let
vrs = Just ("1" :: Text) -- _vrs = Just ("1" :: Text)
sft = Just (Software "Gargantext" "4") -- _sft = Just (Software "Gargantext" "4")
prm = initPhyloParam vrs sft (Just q) -- _prm = initPhyloParam vrs sft (Just q)
pure (toPhyloBase q prm (parseDocs (initFoundationsRoots actants) corpus) termList empty) phy <- flowPhylo n
pId <- insertNodes [nodePhyloW (Just "Phylo") (Just $ HyperdataPhylo Nothing (Just phy)) n userId]
pure $ NodeId (fromIntegral pId)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | DELETE Phylo == delete a node -- | DELETE Phylo == delete a node
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------
{-
type PutPhylo = (Put '[JSON] Phylo )
--putPhylo :: PhyloId -> Maybe ListId -> PhyloQueryBuild -> Phylo
putPhylo :: PhyloId -> GargServer PutPhylo
putPhylo = undefined
-}
-- | Instances -- | Instances
...@@ -160,7 +153,6 @@ instance Arbitrary Phylo ...@@ -160,7 +153,6 @@ instance Arbitrary Phylo
where where
arbitrary = elements [phylo] arbitrary = elements [phylo]
instance ToSchema Cluster instance ToSchema Cluster
instance ToSchema EdgeType instance ToSchema EdgeType
instance ToSchema Filiation instance ToSchema Filiation
......
...@@ -19,7 +19,6 @@ module Gargantext.Viz.Phylo.Main ...@@ -19,7 +19,6 @@ module Gargantext.Viz.Phylo.Main
where where
import Control.Monad.IO.Class (liftIO)
import Data.GraphViz import Data.GraphViz
import Data.Maybe import Data.Maybe
import Data.Text (Text) import Data.Text (Text)
...@@ -39,7 +38,6 @@ import Gargantext.Viz.Phylo.LevelMaker ...@@ -39,7 +38,6 @@ import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.View.Export import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine import Gargantext.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
import Servant
import qualified Data.ByteString as DB import qualified Data.ByteString as DB
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -47,51 +45,35 @@ import qualified Data.Text as Text ...@@ -47,51 +45,35 @@ import qualified Data.Text as Text
type MinSizeBranch = Int type MinSizeBranch = Int
flowPhylo :: FlowCmdM env ServantErr m flowPhylo :: FlowCmdM env err m
=> CorpusId => CorpusId
-> Level -> MinSizeBranch -> m Phylo
-> FilePath flowPhylo cId = do
-> m FilePath
flowPhylo cId l m fp = do
list <- defaultList cId list <- defaultList cId
-- listMaster <- selectNodesWithUsername NodeList userMaster
termList <- Map.toList <$> getTermsWith Text.words [list] NgramsTerms GraphTerm termList <- Map.toList <$> getTermsWith Text.words [list] NgramsTerms GraphTerm
--printDebug "termList" termList
--x <- mapTermListRoot [list] NgramsTerms
--printDebug "mapTermListRoot" x
-- TODO optimize unwords
docs' <- catMaybes <$> map (\h -> (,) <$> _hyperdataDocument_publication_year h
<*> _hyperdataDocument_abstract h
) <$> selectDocs cId
let patterns = buildPatterns termList
let docs = map ( (\(y,t) -> Document y t) . filterTerms patterns) docs'
--printDebug "docs" docs
--printDebug "docs" termList
liftIO $ flowPhylo' (List.sortOn date docs) termList l m fp
docs' <- catMaybes
<$> map (\h -> (,) <$> _hyperdataDocument_publication_year h
<*> _hyperdataDocument_abstract h
)
<$> selectDocs cId
parse :: TermList -> [(Date, Text)] -> IO [Document] let
parse l c = do patterns = buildPatterns termList
let patterns = buildPatterns l -- | To filter the Ngrams of a document based on the termList
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) c filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
filterTerms patterns' (y,d) = (y,termsInText patterns' d)
where
-- | To filter the Ngrams of a document based on the termList --------------------------------------
filterTerms :: Patterns -> (Date, Text) -> (Date, [Text]) termsInText :: Patterns -> Text -> [Text]
filterTerms patterns (y,d) = (y,termsInText patterns d) termsInText pats txt = List.nub $ List.concat $ map (map Text.unwords) $ extractTermsWithList pats txt
where --------------------------------------
--------------------------------------
termsInText :: Patterns -> Text -> [Text] docs = map ( (\(y,t) -> Document y t) . filterTerms patterns) docs'
termsInText pats txt = List.nub $ List.concat $ map (map Text.unwords) $ extractTermsWithList pats txt
-------------------------------------- --liftIO $ flowPhylo' (List.sortOn date docs) termList l m fp
pure $ buildPhylo (List.sortOn date docs) termList
-- TODO SortedList Document -- TODO SortedList Document
......
...@@ -815,7 +815,7 @@ getProximity cluster = case cluster of ...@@ -815,7 +815,7 @@ getProximity cluster = case cluster of
-- | To initialize all the Cluster / Proximity with their default parameters -- | To initialize all the Cluster / Proximity with their default parameters
initFis :: Maybe Bool -> Maybe Support -> Maybe Int -> FisParams initFis :: Maybe Bool -> Maybe Support -> Maybe Int -> FisParams
initFis (def True -> kmf) (def 1 -> min') (def 1 -> thr) = FisParams kmf min' thr initFis (def True -> kmf) (def 2 -> min') (def 4 -> thr) = FisParams kmf min' thr
initHamming :: Maybe Double -> HammingParams initHamming :: Maybe Double -> HammingParams
initHamming (def 0.01 -> sens) = HammingParams sens initHamming (def 0.01 -> sens) = HammingParams sens
...@@ -847,7 +847,7 @@ initPhyloQueryBuild :: Text -> Text -> Maybe Int ...@@ -847,7 +847,7 @@ initPhyloQueryBuild :: Text -> Text -> Maybe Int
-> Maybe Double -> Maybe Double -> Maybe Int -> Maybe Double -> Maybe Double -> Maybe Int
-> Maybe Level -> Maybe Cluster -> PhyloQueryBuild -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain) initPhyloQueryBuild name desc (def 5 -> grain)
(def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics) (def 1 -> steps) (def defaultFis -> cluster) (def [] -> metrics)
(def [] -> filters) (def defaultWeightedLogJaccard -> matching') (def 5 -> frame) (def [] -> filters) (def defaultWeightedLogJaccard -> matching') (def 5 -> frame)
(def 0.8 -> frameThr) (def 0.5 -> reBranchThr) (def 4 -> reBranchNth) (def 0.8 -> frameThr) (def 0.5 -> reBranchThr) (def 4 -> reBranchNth)
(def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) = (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
......
resolver: lts-12.26 resolver: lts-14.1
flags: {} flags: {}
extra-package-dbs: [] extra-package-dbs: []
packages: packages:
...@@ -22,14 +22,21 @@ extra-deps: ...@@ -22,14 +22,21 @@ extra-deps:
commit: 3fe28b683aba5ddf05e3b5f8eced0bd05c5a29f9 commit: 3fe28b683aba5ddf05e3b5f8eced0bd05c5a29f9
- git: https://github.com/robstewart57/rdf4h.git - git: https://github.com/robstewart57/rdf4h.git
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4 commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
#
# External API connectin to get data
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: dcaa0f5dd53f20648f4f5a615d29163582a4219c commit: 01a6bf1e79cd5aef8628b240bbd47cb2a0864d5e
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: daeae80365250c4bd539f0a65e271f9aa37f731f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: bf57642f6b66f554fdc0a38ac391cd8200dffcb3 commit: ef9e638c97788df251f50b71fcdd9551b87f12c5
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
#
- git: https://gitlab.iscpif.fr/gargantext/patches-class - git: https://gitlab.iscpif.fr/gargantext/patches-class
commit: 746b4ce0af8f9e600d555ad7e5b2973a940cdad9 commit: 746b4ce0af8f9e600d555ad7e5b2973a940cdad9
- git: https://github.com/np/servant-job.git #- git: https://github.com/np/servant-job.git
commit: ac4227441bbca30c44235582b5ec31340c569021 # commit: ac4227441bbca30c44235582b5ec31340c569021
- git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git - git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git
commit: 1c636112b151110408e7c5a28cec39e46657358e commit: 1c636112b151110408e7c5a28cec39e46657358e
- git: https://github.com/np/patches-map - git: https://github.com/np/patches-map
...@@ -38,10 +45,8 @@ extra-deps: ...@@ -38,10 +45,8 @@ extra-deps:
commit: 53385de076be09f728a1b58c035a18e9ff9bcfd6 commit: 53385de076be09f728a1b58c035a18e9ff9bcfd6
- git: https://github.com/delanoe/hsparql.git - git: https://github.com/delanoe/hsparql.git
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: e39454101b53916e3082085ebfe922df695fc775
- KMP-0.1.0.2 - KMP-0.1.0.2
- accelerate-1.2.0.0 - accelerate-1.2.0.1
- aeson-lens-0.5.0.0 - aeson-lens-0.5.0.0
- deepseq-th-0.1.0.4 - deepseq-th-0.1.0.4
- duckling-0.1.3.0 - duckling-0.1.3.0
...@@ -56,8 +61,9 @@ extra-deps: ...@@ -56,8 +61,9 @@ extra-deps:
- json-stream-0.4.2.4 # Text.Parsers (JSON) - json-stream-0.4.2.4 # Text.Parsers (JSON)
- serialise-0.2.0.0 - serialise-0.2.0.0
- servant-flatten-0.2 - servant-flatten-0.2
- servant-multipart-0.11.2 #- servant-multipart-0.11.2
- stemmer-0.5.2 - stemmer-0.5.2
- time-units-1.0.0 - time-units-1.0.0
- validity-0.9.0.0 # patches-{map,class} - validity-0.9.0.0 # patches-{map,class}
- directory-1.3.1.5 - directory-1.3.1.5
- process-1.6.5.1@sha256:77a9afeb676357f67fe5cf1ad79aca0745fb6f7fb96b786d510af08f622643f6,2468
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment