Commit f525a787 authored by qlobbe's avatar qlobbe

Merge branch 'dev' into dev-phylo

parents 6fc1eb02 8f864992
......@@ -13,12 +13,37 @@ Institute of Paris Île-de-France (ISC-PIF) and its partners.
## Installation
### Docker
curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devops/docker-install | sh
Disclaimer: this project is still on development, this is work in
progress. Please report and improve this documentation if you encounter
issues.
### Debian
curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devops/debian-install | sh
### Build Core Code
#### Docker
curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devops/docker-install | sh
#### Debian
curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devops/debian-install | sh
### Add dependencies
1. CoreNLP is needed (EN and FR); This dependency will not be needed
soon.
- wget https://dl.gargantext.org/coreNLP.tar.bz2
- tar xvjf coreNLP.tar.bz2
- ./startServer.sh
2. Louvain C++ needed to draw the socio-semantic graphs
- git clone https://gitlab.iscpif.fr/gargantext/clustering-louvain-cplusplus.git
- cd clustering-louvain-cplusplus
- ./install
### Initialization
Users has to be created first
1. stack ghci
2. runCmd insertUsersDemo
Then you can log in with user1:1resu
## Use Cases
......@@ -33,21 +58,3 @@ curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devo
stack --docker exec gargantext-cli -- CorpusFromGarg.csv ListFromGarg.csv Ouput.json
```sql
INSERT INTO auth_user (password, is_superuser, username, first_name, last_name, email, is_staff, is_active)
VALUES ('1resu', true, 'user1', 'user', '1', 'a@localhost', true, true);
-- nodetype NodeUser has id 1
-- inserted user_id = 3
INSERT INTO nodes (typename, user_id, name)
VALUES (1, 3, 'user1');
-- same for master user -- 'gargantua'
INSERT INTO auth_user (password, is_superuser, username, first_name, last_name, email, is_staff, is_active)
VALUES ('autnagrag, true, 'gargantua, 'gargantua, '1', 'g@localhost', true, true);
-- nodetype NodeUser has id 1
-- inserted user_id = 5
INSERT INTO nodes (typename, user_id, name)
VALUES (1, 5, 'gargantua);
```
......@@ -19,21 +19,23 @@ Import a corpus binary.
module Main where
import Data.Either
import Prelude (read)
import Control.Exception (finally)
import Servant (ServantErr)
import Gargantext.Prelude
import Gargantext.Database.Flow (FlowCmdM, flowCorpusFile)
import Gargantext.Text.Corpus.Parsers (FileFormat(CsvHalFormat))
import Gargantext.Text.Corpus.Parsers (FileFormat(..))
import Gargantext.Database.Utils (Cmd, )
import Gargantext.Database.Types.Node (CorpusId, toHyperdataDocument)
import Gargantext.Database.Schema.User (insertUsersDemo)
import Gargantext.Text.Terms (TermType(..))
import Gargantext.Core (Lang(..))
import Gargantext.API -- (GargError)
import Gargantext.API.Node () -- instances
import Gargantext.API.Settings (withDevEnv, runCmdDev, DevEnv)
import System.Environment (getArgs)
--import Gargantext.Text.Corpus.Parsers.GrandDebat (readFile, GrandDebatReference(..))
import Data.Text (Text)
import qualified Data.Text as Text
import Control.Monad.IO.Class (liftIO)
......@@ -42,16 +44,17 @@ main = do
[userCreate, user, name, iniPath, limit, corpusPath] <- getArgs
--{-
let createUsers :: Cmd ServantErr Int64
let createUsers :: Cmd GargError Int64
createUsers = insertUsersDemo
let
--tt = (Unsupervised EN 5 1 Nothing)
tt = (Mono EN)
cmd :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
cmd = flowCorpusFile (cs user) (cs name) (read limit :: Int) tt CsvHalFormat corpusPath
--tt = (Unsupervised EN 6 0 Nothing)
tt = (Multi EN)
format = CsvGargV3 -- CsvHalFormat --WOS
cmd :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
cmd = flowCorpusFile (cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath
{-
let debatCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
let debatCorpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
debatCorpus = do
docs <- liftIO ( splitEvery 500
<$> take (read limit :: Int)
......
from fpco/stack-build:lts-12.26
RUN apt-get update && \
apt-get install -y git libigraph0-dev && \
rm -rf /var/lib/apt/lists/*
RUN mkdir -v /deps && \
cd /deps && \
git clone https://gitlab.iscpif.fr/gargantext/clustering-louvain-cplusplus && \
cd clustering-louvain-cplusplus && \
./install
version: '3'
services:
postgres:
image: 'postgres:latest'
ports:
- 5432:5432
environment:
POSTGRES_USER: gargantua
POSTGRES_PASSWORD: C8kdcUrAQy66U
POSTGRES_DB: gargandbV5
volumes:
- pgdata:/var/lib/postgresql/data
- ../:/gargantext
- ../dbs:/dbs
volumes:
pgdata:
#!/bin/bash
sudo su postgres
PW="password"
DB="gargandbV5"
USER="gargantua"
psql -c "CREATE USER \"${USER}\"
psql -c "ALTER USER \"${USER}\" with PASSWORD \"${PW}\""
psql -c "DROP DATABASE IF EXISTS \"${DB}\""
createdb "${DB}"
psql "${DB}" < schema.sql
psql -c "ALTER DATABASE \"${DB}\" OWNER to \"${USER}\" ;"
#!/bin/bash
DB="gargandbV5"
rm ../../tmp*
rm ../../repo*
psql -c "drop database IF EXISTS \"${DB}\""
createdb "${DB}"
......
......@@ -84,8 +84,7 @@ CREATE TABLE public.nodes_nodes (
node1_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
node2_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
score real,
favorite boolean,
delete boolean,
category integer,
PRIMARY KEY (node1_id,node2_id)
);
ALTER TABLE public.nodes_nodes OWNER TO gargantua;
......@@ -141,7 +140,7 @@ CREATE UNIQUE INDEX ON public.nodes USING btree (typename, parent_id, ((hyperdat
CREATE UNIQUE INDEX ON public.ngrams (terms); -- TEST GIN
CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, delete);
CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, category);
CREATE UNIQUE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id);
CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type);
......
......@@ -30,7 +30,7 @@ library:
- Gargantext.API.FrontEnd
- Gargantext.API.Ngrams
- Gargantext.API.Node
- Gargantext.API.Orchestrator
# - Gargantext.API.Orchestrator
- Gargantext.API.Search
- Gargantext.API.Settings
- Gargantext.Core
......@@ -48,6 +48,7 @@ library:
- Gargantext.Text
- Gargantext.Text.Context
- Gargantext.Text.Corpus.Parsers
- Gargantext.Text.Corpus.API
- Gargantext.Text.Corpus.Parsers.CSV
- Gargantext.Text.Examples
- Gargantext.Text.List.CSV
......@@ -57,7 +58,6 @@ library:
- Gargantext.Text.Metrics.Count
- Gargantext.Text.Search
- Gargantext.Text.Terms
- Gargantext.Text.Terms.Stop
- Gargantext.Text.Terms.Mono
- Gargantext.Text.Terms.Multi.Lang.En
- Gargantext.Text.Terms.Multi.Lang.Fr
......@@ -102,6 +102,7 @@ library:
- crawlerPubMed
- crawlerIsidore
- crawlerHAL
- crawlerISTEX
- data-time-segment
- deepseq
- directory
......@@ -170,7 +171,7 @@ library:
- servant-auth
- servant-blaze
- servant-client
- servant-job
# - servant-job
- servant-mock
- servant-multipart
- servant-server
......@@ -206,6 +207,7 @@ library:
- zip
- zlib
# - utc
# API external connections
executables:
gargantext-server:
......
......@@ -126,7 +126,7 @@ instance HasInvalidError GargError where
instance HasTreeError GargError where
_TreeError = _GargTreeError
showAsServantErr :: Show a => a -> ServantErr
showAsServantErr :: Show a => a -> ServerError
showAsServantErr a = err500 { errBody = BL8.pack $ show a }
fireWall :: Applicative f => Request -> FireWall -> f Bool
......@@ -321,7 +321,7 @@ serverGargAPI -- orchestrator
:<|> New.info fakeUserId
-- :<|> orchestrator
where
fakeUserId = 1 -- TODO
fakeUserId = 2 -- TODO, byDefault user1 (if users automatically generated with inserUsersDemo)
serverStatic :: Server (Get '[HTML] Html)
serverStatic = $(do
......
......@@ -24,26 +24,28 @@ New corpus means either:
module Gargantext.API.Corpus.New
where
import Data.Either
import Control.Monad.IO.Class (liftIO)
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Text.Terms (TermType(..))
import Gargantext.Prelude
import Gargantext.Prelude.Utils (hash)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.Database.Flow (FlowCmdM)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
import qualified Gargantext.Text.Corpus.API as API
import Gargantext.Database.Types.Node (UserId)
data Query = Query { query_query :: Text
, query_corpus_id :: Int
, query_files_id :: [Text]
, query_databases :: [API.ExternalAPIs]
}
deriving (Eq, Show, Generic)
......@@ -54,7 +56,7 @@ instance Arbitrary Query where
arbitrary = elements [ Query q n fs
| q <- ["a","b"]
, n <- [0..10]
, fs <- map (map hash) [["a","b"], ["c","d"]]
, fs <- take 3 $ repeat API.externalAPIs
]
instance ToSchema Query where
......@@ -62,20 +64,24 @@ instance ToSchema Query where
genericDeclareNamedSchema
defaultSchemaOptions {fieldLabelModifier = \fieldLabel -> drop 6 fieldLabel}
type Api = Summary "New Corpus endpoint"
:> ReqBody '[JSON] Query
:> Post '[JSON] CorpusId
:<|> Get '[JSON] ApiInfo
-- | TODO manage several apis
api :: (FlowCmdM env err m) => Query -> m CorpusId
api (Query q _ as) = do
cId <- case head as of
Nothing -> flowCorpusSearchInDatabase "user1" EN q
Just API.All -> flowCorpusSearchInDatabase "user1" EN q
Just a -> do
docs <- liftIO $ API.get a q (Just 1000)
cId' <- flowCorpus "user1" (Left q) (Multi EN) [docs]
pure cId'
api :: FlowCmdM env err m => Query -> m CorpusId
api (Query q _ _) = do
cId <- flowCorpusSearchInDatabase "user1" EN q
pure cId
------------------------------------------------
data ApiInfo = ApiInfo { api_info :: [API.ExternalAPIs]}
deriving (Generic)
......
......@@ -56,7 +56,7 @@ import Data.Map.Strict (Map)
import qualified Data.Set as Set
import Control.Category ((>>>))
import Control.Concurrent
import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~))
import Control.Lens (makeLenses, makePrisms, Getter, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^..), (^?), (+~), (%~), (%=), sumOf, at, _Just, Each(..), itraverse_, both, forOf_, (%%~), (?~))
import Control.Monad.Error.Class (MonadError)
import Control.Monad.Reader
import Control.Monad.State
......@@ -72,7 +72,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix)
-- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlySlow)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Utils (fromField', HasConnection)
......@@ -99,18 +99,22 @@ instance ToParamSchema TODO where
------------------------------------------------------------------------
--data FacetFormat = Table | Chart
data TabType = Docs | Terms | Sources | Authors | Institutes | Trash
data TabType = Docs | Trash | MoreFav | MoreTrash
| Terms | Sources | Authors | Institutes
| Contacts
deriving (Generic, Enum, Bounded)
deriving (Generic, Enum, Bounded, Show)
instance FromHttpApiData TabType
where
parseUrlPiece "Docs" = pure Docs
parseUrlPiece "Trash" = pure Trash
parseUrlPiece "MoreFav" = pure MoreFav
parseUrlPiece "MoreTrash" = pure MoreTrash
parseUrlPiece "Terms" = pure Terms
parseUrlPiece "Sources" = pure Sources
parseUrlPiece "Institutes" = pure Institutes
parseUrlPiece "Authors" = pure Authors
parseUrlPiece "Trash" = pure Trash
parseUrlPiece "Contacts" = pure Contacts
......@@ -437,11 +441,11 @@ instance (Eq a, Arbitrary a) => Arbitrary (Replace a) where
-- If they happen to be equal then the patch is Keep.
instance ToSchema a => ToSchema (Replace a) where
declareNamedSchema (_ :: proxy (Replace a)) = do
declareNamedSchema (_ :: Proxy (Replace a)) = do
-- TODO Keep constructor is not supported here.
aSchema <- declareSchemaRef (Proxy :: Proxy a)
return $ NamedSchema (Just "Replace") $ mempty
& type_ .~ SwaggerObject
& type_ ?~ SwaggerObject
& properties .~
InsOrdHashMap.fromList
[ ("old", aSchema)
......@@ -888,10 +892,10 @@ getTableNgrams :: forall env err m.
-> Maybe OrderBy
-> (NgramsTerm -> Bool)
-> m (Versioned NgramsTable)
getTableNgrams nType nId tabType listId limit_ offset
getTableNgrams _nType nId tabType listId limit_ offset
listType minSize maxSize orderBy searchQuery = do
lIds <- selectNodesWithUsername NodeList userMaster
_lIds <- selectNodesWithUsername NodeList userMaster
let
ngramsType = ngramsTypeFromTabType tabType
offset' = maybe 0 identity offset
......@@ -935,11 +939,15 @@ getTableNgrams nType nId tabType listId limit_ offset
setScores False table = pure table
setScores True table = do
let ngrams_terms = (table ^.. each . ne_ngrams)
occurrences <- getOccByNgramsOnlyFast nId
ngramsType
ngrams_terms
{-
occurrences <- getOccByNgramsOnlySlow nType nId
(lIds <> [listId])
ngramsType
ngrams_terms
-}
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
......
......@@ -33,7 +33,6 @@ import qualified Data.Set as Set
type RootTerm = Text
getListNgrams :: RepoCmdM env err m
=> [ListId] -> NgramsType
-> m (Map Text NgramsRepoElement)
......@@ -68,7 +67,7 @@ mapTermListRoot :: RepoCmdM env err m
-> m (Map Text (ListType, (Maybe Text)))
mapTermListRoot nodeIds ngramsType = do
ngrams <- getListNgrams nodeIds ngramsType
pure $ Map.fromList [(t, (_nre_list nre, _nre_root nre))
pure $ Map.fromList [ (t, (_nre_list nre, _nre_root nre))
| (t, nre) <- Map.toList ngrams
]
......@@ -104,8 +103,8 @@ getCoocByNgrams = getCoocByNgrams' identity
getCoocByNgrams' :: (Ord a, Ord c) => (b -> Set c) -> Diagonal -> Map a b -> Map (a, a) Int
getCoocByNgrams' f (Diagonal diag) m =
Map.fromList [((t1,t2)
,maybe 0 Set.size $ Set.intersection
Map.fromList [( (t1,t2)
, maybe 0 Set.size $ Set.intersection
<$> (fmap f $ Map.lookup t1 m)
<*> (fmap f $ Map.lookup t2 m)
) | (t1,t2) <- case diag of
......@@ -113,5 +112,3 @@ getCoocByNgrams' f (Diagonal diag) m =
False -> listToCombi identity (Map.keys m)
]
......@@ -7,7 +7,6 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-- TODO-ACCESS: CanGetNode
-- TODO-EVENTS: No events as this is a read only query.
Node API
......@@ -19,7 +18,6 @@ Node API
-- TODO-EVENTS: DeletedNodes [NodeId]
-- {"tag": "DeletedNodes", "nodes": [Int*]}
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
......@@ -38,7 +36,7 @@ Node API
module Gargantext.API.Node
where
import Control.Lens (prism', (.~), (?~))
import Control.Lens ((.~), (?~))
import Control.Monad ((>>), forM)
import Control.Monad.IO.Class (liftIO)
import Data.Aeson (FromJSON, ToJSON)
......@@ -52,15 +50,15 @@ import Gargantext.API.Metrics
import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, apiNgramsTableCorpus, QueryParamR, TODO)
import Gargantext.API.Ngrams.NTree (MyTree)
import Gargantext.API.Search (SearchDocsAPI, searchDocs)
import Gargantext.API.Table
import Gargantext.API.Types
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Types.Main (Tree, NodeTree, ListType)
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),runViewAuthorsDoc)
import Gargantext.Database.Facet (FacetDoc, OrderBy(..))
import Gargantext.Database.Node.Children (getChildren)
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, getNode', deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..))
import Gargantext.Database.Schema.NodeNode (nodesToFavorite, nodesToTrash)
import Gargantext.Database.Tree (treeDB, HasTreeError(..), TreeError(..))
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, getNode', deleteNode, deleteNodes, mkNodeWithParent, JSONB, HasNodeError(..))
import Gargantext.Database.Schema.NodeNode (nodeNodesCategory)
import Gargantext.Database.Tree (treeDB)
import Gargantext.Database.Types.Node
import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude
......@@ -97,7 +95,7 @@ nodesAPI ids = deleteNodes ids
-- TODO-EVENTS:
-- PutNode ?
-- TODO needs design discussion.
type Roots = Get '[JSON] [NodeAny]
type Roots = Get '[JSON] [Node HyperdataAny]
:<|> Put '[JSON] Int -- TODO
-- | TODO: access by admin only
......@@ -132,8 +130,7 @@ type NodeAPI a = Get '[JSON] (Node a)
:<|> "ngrams" :> TableNgramsApi
:<|> "pairing" :> PairingApi
:<|> "favorites" :> FavApi
:<|> "documents" :> DocsApi
:<|> "category" :> CatApi
:<|> "search" :> SearchDocsAPI
-- VIZ
......@@ -171,19 +168,20 @@ nodeAPI p uId id
:<|> getChildren id p
-- TODO gather it
:<|> getTable id
:<|> tableApi id
:<|> apiNgramsTableCorpus id
:<|> getPairing id
-- :<|> getTableNgramsDoc id
:<|> favApi id
:<|> delDocs id
:<|> catApi id
:<|> searchDocs id
:<|> getScatter id
:<|> getChart id
:<|> getPie id
:<|> getTree id
:<|> phyloAPI id
:<|> phyloAPI id uId
:<|> postUpload id
where
deleteNodeApi id' = do
......@@ -194,8 +192,6 @@ nodeAPI p uId id
-- Annuaire
-- :<|> query
------------------------------------------------------------------------
data RenameNode = RenameNode { r_name :: Text }
deriving (Generic)
......@@ -217,55 +213,30 @@ instance Arbitrary PostNode where
arbitrary = elements [PostNode "Node test" NodeCorpus]
------------------------------------------------------------------------
type DocsApi = Summary "Docs : Move to trash"
:> ReqBody '[JSON] Documents
:> Delete '[JSON] [Int]
data Documents = Documents { documents :: [NodeId]}
deriving (Generic)
instance FromJSON Documents
instance ToJSON Documents
instance ToSchema Documents
delDocs :: CorpusId -> Documents -> Cmd err [Int]
delDocs cId ds = nodesToTrash $ map (\n -> (cId, n, True)) $ documents ds
------------------------------------------------------------------------
type FavApi = Summary " Favorites label"
:> ReqBody '[JSON] Favorites
type CatApi = Summary " To Categorize NodeNodes: 0 for delete, 1/null neutral, 2 favorite"
:> ReqBody '[JSON] NodesToCategory
:> Put '[JSON] [Int]
:<|> Summary " Favorites unlabel"
:> ReqBody '[JSON] Favorites
:> Delete '[JSON] [Int]
data Favorites = Favorites { favorites :: [NodeId]}
data NodesToCategory = NodesToCategory { ntc_nodesId :: [NodeId]
, ntc_category :: Int
}
deriving (Generic)
instance FromJSON Favorites
instance ToJSON Favorites
instance ToSchema Favorites
putFav :: CorpusId -> Favorites -> Cmd err [Int]
putFav cId fs = nodesToFavorite $ map (\n -> (cId, n, True)) $ favorites fs
delFav :: CorpusId -> Favorites -> Cmd err [Int]
delFav cId fs = nodesToFavorite $ map (\n -> (cId, n, False)) $ favorites fs
instance FromJSON NodesToCategory
instance ToJSON NodesToCategory
instance ToSchema NodesToCategory
favApi :: CorpusId -> GargServer FavApi
favApi cId = putFav cId :<|> delFav cId
catApi :: CorpusId -> GargServer CatApi
catApi = putCat
where
putCat :: CorpusId -> NodesToCategory -> Cmd err [Int]
putCat cId cs' = nodeNodesCategory $ map (\n -> (cId, n, ntc_category cs')) (ntc_nodesId cs')
------------------------------------------------------------------------
type TableApi = Summary " Table API"
:> QueryParam "view" TabType
:> QueryParam "offset" Int
:> QueryParam "limit" Int
:> QueryParam "order" OrderBy
:> Get '[JSON] [FacetDoc]
-- TODO adapt FacetDoc -> ListDoc (and add type of document as column)
type PairingApi = Summary " Pairing API"
:> QueryParam "view" TabType -- TODO change TabType -> DocType (CorpusId for pairing)
:> QueryParam "view" TabType
-- TODO change TabType -> DocType (CorpusId for pairing)
:> QueryParam "offset" Int
:> QueryParam "limit" Int
:> QueryParam "order" OrderBy
......@@ -290,8 +261,6 @@ type TreeApi = Summary " Tree API"
:> QueryParamR "listType" ListType
:> Get '[JSON] (ChartMetrics [MyTree])
-- Depending on the Type of the Node, we could post
-- New documents for a corpus
-- New map list terms
......@@ -302,7 +271,9 @@ type TreeApi = Summary " Tree API"
------------------------------------------------------------------------
{-
NOTE: These instances are not necessary. However, these messages could be part
of a display function for NodeError/TreeError.
instance HasNodeError ServantErr where
_NodeError = prism' mk (const Nothing) -- panic "HasNodeError ServantErr: not a prism")
where
......@@ -320,7 +291,6 @@ instance HasNodeError ServantErr where
mk ManyParents = err500 { errBody = e <> "Too many parents" }
mk ManyNodeUsers = err500 { errBody = e <> "Many userNode/user" }
-- TODO(orphan): There should be a proper APIError data type with a case TreeError.
instance HasTreeError ServantErr where
_TreeError = prism' mk (const Nothing) -- panic "HasTreeError ServantErr: not a prism")
where
......@@ -328,6 +298,7 @@ instance HasTreeError ServantErr where
mk NoRoot = err404 { errBody = e <> "Root node not found" }
mk EmptyRoot = err500 { errBody = e <> "Root node should not be empty" }
mk TooManyRoots = err500 { errBody = e <> "Too many root nodes" }
-}
type TreeAPI = Get '[JSON] (Tree NodeTree)
-- TODO-ACCESS: CanTree or CanGetNode
......@@ -340,24 +311,6 @@ treeAPI = treeDB
rename :: NodeId -> RenameNode -> Cmd err [Int]
rename nId (RenameNode name') = U.update (U.Rename nId name')
getTable :: NodeId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err [FacetDoc]
getTable cId ft o l order =
case ft of
(Just Docs) -> runViewDocuments cId False o l order
(Just Trash) -> runViewDocuments cId True o l order
_ -> panic "not implemented"
getPairing :: ContactId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err [FacetDoc]
getPairing cId ft o l order =
case ft of
(Just Docs) -> runViewAuthorsDoc cId False o l order
(Just Trash) -> runViewAuthorsDoc cId True o l order
_ -> panic "not implemented"
postNode :: HasNodeError err => UserId -> NodeId -> PostNode -> Cmd err [NodeId]
postNode uId pId (PostNode nodeName nt) = mkNodeWithParent nt (Just pId) uId nodeName
......
......@@ -56,7 +56,6 @@ instance Arbitrary SearchQuery where
arbitrary = elements [SearchQuery ["electrodes"]]
-----------------------------------------------------------------------
data SearchDocResults = SearchDocResults { sdr_results :: [FacetDoc]}
deriving (Generic)
$(deriveJSON (unPrefix "sdr_") ''SearchDocResults)
......@@ -102,7 +101,7 @@ searchPairs pId (SearchQuery q) o l order =
searchDocs :: NodeId -> GargServer SearchDocsAPI
searchDocs nId (SearchQuery q) o l order =
SearchDocResults <$> searchInCorpus nId q o l order
SearchDocResults <$> searchInCorpus nId False q o l order
--SearchResults <$> searchInCorpusWithContacts nId q o l order
......@@ -46,7 +46,7 @@ import qualified Data.ByteString.Lazy as L
import Servant
import Servant.Client (BaseUrl, parseBaseUrl)
import Servant.Job.Async (newJobEnv, defaultSettings)
--import Servant.Job.Async (newJobEnv, defaultSettings)
import Web.HttpApiData (parseUrlPiece)
import qualified Jose.Jwk as Jose
import qualified Jose.Jwa as Jose
......@@ -60,7 +60,7 @@ import Control.Lens
import Gargantext.Prelude
import Gargantext.Database.Utils (databaseParameters, HasConnection(..), Cmd', runCmd)
import Gargantext.API.Ngrams (NgramsRepo, HasRepoVar(..), HasRepoSaver(..), HasRepo(..), RepoEnv(..), r_version, saveRepo, initRepo, renv_var, renv_lock)
import Gargantext.API.Orchestrator.Types
--import Gargantext.API.Orchestrator.Types
type PortNumber = Int
......@@ -147,7 +147,7 @@ data Env = Env
, _env_repo :: !RepoEnv
, _env_manager :: !Manager
, _env_self_url :: !BaseUrl
, _env_scrapers :: !ScrapersEnv
--, _env_scrapers :: !ScrapersEnv
}
deriving (Generic)
......@@ -243,7 +243,7 @@ newEnv port file = do
param <- databaseParameters file
conn <- connect param
repo <- readRepoEnv
scrapers_env <- newJobEnv defaultSettings manager
--scrapers_env <- newJobEnv defaultSettings manager
logger <- newStderrLoggerSet defaultBufSize
pure $ Env
......@@ -252,7 +252,7 @@ newEnv port file = do
, _env_conn = conn
, _env_repo = repo
, _env_manager = manager
, _env_scrapers = scrapers_env
--, _env_scrapers = scrapers_env
, _env_self_url = self_url
}
......@@ -305,7 +305,7 @@ withDevEnv iniPath k = do
runCmdRepl :: Show err => Cmd' DevEnv err a -> IO a
runCmdRepl f = withDevEnv "gargantext.ini" $ \env -> runCmdDev env f
runCmdReplServantErr :: Cmd' DevEnv ServantErr a -> IO a
runCmdReplServantErr :: Cmd' DevEnv ServerError a -> IO a
runCmdReplServantErr = runCmdRepl
-- Use only for dev
......@@ -324,5 +324,5 @@ runCmdDevNoErr :: DevEnv -> Cmd' DevEnv () a -> IO a
runCmdDevNoErr = runCmdDev
-- Use only for dev
runCmdDevServantErr :: DevEnv -> Cmd' DevEnv ServantErr a -> IO a
runCmdDevServantErr :: DevEnv -> Cmd' DevEnv ServerError a -> IO a
runCmdDevServantErr = runCmdDev
{-|
Module : Gargantext.API.Node
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-- TODO-ACCESS: CanGetNode
-- TODO-EVENTS: No events as this is a read only query.
Node API
-------------------------------------------------------------------
-- TODO-ACCESS: access by admin only.
-- At first let's just have an isAdmin check.
-- Later: check userId CanDeleteNodes Nothing
-- TODO-EVENTS: DeletedNodes [NodeId]
-- {"tag": "DeletedNodes", "nodes": [Int*]}
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Table
where
import Data.Aeson.TH (deriveJSON)
import Data.Maybe
import Data.Swagger
import Data.Text (Text())
import GHC.Generics (Generic)
import Gargantext.API.Ngrams (TabType(..))
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),runViewAuthorsDoc)
import Gargantext.Database.Learn (FavOrTrash(..), moreLike)
import Gargantext.Database.TextSearch
import Gargantext.Database.Types.Node
import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
------------------------------------------------------------------------
type TableApi = Summary " Table API"
:> ReqBody '[JSON] TableQuery
:> Post '[JSON] [FacetDoc]
--{-
data TableQuery = TableQuery
{ tq_offset :: Int
, tq_limit :: Int
, tq_orderBy :: OrderBy
, tq_view :: TabType
, tq_query :: Text
} deriving (Generic)
$(deriveJSON (unPrefix "tq_") ''TableQuery)
instance ToSchema TableQuery where
declareNamedSchema =
genericDeclareNamedSchema
defaultSchemaOptions {fieldLabelModifier = drop 3}
instance Arbitrary TableQuery where
arbitrary = elements [TableQuery 0 10 DateAsc Docs "electrodes"]
tableApi :: NodeId -> TableQuery -> Cmd err [FacetDoc]
tableApi cId (TableQuery o l order ft "") = getTable cId (Just ft) (Just o) (Just l) (Just order)
tableApi cId (TableQuery o l order ft q) = case ft of
Docs -> searchInCorpus cId False [q] (Just o) (Just l) (Just order)
Trash -> searchInCorpus cId True [q] (Just o) (Just l) (Just order)
x -> panic $ "not implemented in tableApi " <> (cs $ show x)
getTable :: NodeId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err [FacetDoc]
getTable cId ft o l order =
case ft of
(Just Docs) -> runViewDocuments cId False o l order
(Just Trash) -> runViewDocuments cId True o l order
(Just MoreFav) -> moreLike cId o l order IsFav
(Just MoreTrash) -> moreLike cId o l order IsTrash
x -> panic $ "not implemented in getTable: " <> (cs $ show x)
getPairing :: ContactId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err [FacetDoc]
getPairing cId ft o l order =
case ft of
(Just Docs) -> runViewAuthorsDoc cId False o l order
(Just Trash) -> runViewAuthorsDoc cId True o l order
_ -> panic $ "not implemented: get Pairing" <> (cs $ show ft)
{-|
Module : Gargantext.API.Utils
Description : Server API main Types
Copyright : (c) CNRS, 2017-Present
License : BSD3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Mainly copied from Servant.Job.Utils (Thanks)
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.API.Utils
where
import Gargantext.Prelude
import Data.Maybe (Maybe, fromMaybe)
import Prelude (String)
import qualified Data.Text as T
import Data.Swagger
import Data.Text (Text)
swaggerOptions :: Text -> SchemaOptions
swaggerOptions pref = defaultSchemaOptions
{ Data.Swagger.fieldLabelModifier = modifier pref
, Data.Swagger.unwrapUnaryRecords = False
}
modifier :: Text -> String -> String
modifier pref field = T.unpack
$ T.stripPrefix pref (T.pack field) ?! "Expecting prefix " <> T.unpack pref
infixr 4 ?|
-- Reverse infix form of "fromMaybe"
(?|) :: Maybe a -> a -> a
(?|) = flip fromMaybe
infixr 4 ?!
-- Reverse infix form of "fromJust" with a custom error message
(?!) :: Maybe a -> String -> a
(?!) ma' msg = ma' ?| panic (T.pack msg)
......@@ -12,6 +12,8 @@ Portability : POSIX
module Gargantext.Core
where
------------------------------------------------------------------------
-- | Language of a Text
-- For simplicity, we suppose text has an homogenous language
......
{-|
Module : Gargantext.Core.Flow
Description : Core Flow main Types
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ConstrainedClassMethods #-}
module Gargantext.Core.Flow where
import Control.Lens (Lens')
import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Text.Terms (TermType)
import Gargantext.Core (Lang)
import Gargantext.Database.Schema.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types.Main (HashId)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..))
import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
import Gargantext.Database.Node.Document.Insert (AddUniqId, InsertDb)
import Gargantext.Database.Utils (Cmd)
type FlowCorpus a = ( AddUniqId a
, UniqId a
, InsertDb a
, ExtractNgramsT a
, HasText a
)
class UniqId a
where
uniqId :: Lens' a (Maybe HashId)
class ExtractNgramsT h
where
extractNgramsT :: HasText h => TermType Lang -> h -> Cmd err (Map Ngrams (Map NgramsType Int))
class HasText h
where
hasText :: h -> [Text]
------------------------------------------------------------------------
instance UniqId HyperdataDocument
where
uniqId = hyperdataDocument_uniqId
instance UniqId HyperdataContact
where
uniqId = hc_uniqId
......@@ -21,6 +21,7 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, TokenTag(..), POS(..), NER(..)
, Label, Stems
, HasInvalidError(..), assertValid
, Name
) where
import Control.Lens (Prism', (#))
......@@ -42,6 +43,7 @@ import Gargantext.Prelude
import GHC.Generics
------------------------------------------------------------------------
type Name = Text
type Term = Text
type Stems = Set Text
type Label = [Text]
......
......@@ -114,39 +114,16 @@ fromListTypeId i = lookup i $ fromList [ (listTypeId l, l) | l <- [minBound..max
-- | TfidfCorpus | TfidfGlobal | TirankLocal | TirankGlobal
-- | Community Manager Use Case
type Annuaire = NodeCorpus
-- | Favorites Node enable Swap Node with some synonyms for clarity
type NodeSwap = Node HyperdataResource
-- | Then a Node can be a List which has some synonyms
type List = Node HyperdataList
type StopList = List
type MainList = List
type MapList = List
type GroupList = List
-- | Then a Node can be a Score which has some synonyms
type Score = Node HyperdataScore
type Occurrences = Score
type Cooccurrences = Score
type Specclusion = Score
type Genclusion = Score
type Cvalue = Score
type Tficf = Score
---- TODO All these Tfidf* will be replaced with TFICF
type TfidfCorpus = Tficf
type TfidfGlobal = Tficf
type TirankLocal = Tficf
type TirankGlobal = Tficf
--
-- Temporary types to be removed
type ErrorMessage = Text
-- Queries
type Limit = Int
type Offset = Int
type IsTrash = Bool
------------------------------------------------------------------------
-- All the Database is structred like a hierarchical Tree
......
......@@ -98,7 +98,7 @@ mv :: NodeId -> ParentId -> Cmd err [Int]
mv n p = U.update $ U.Move n p
-- | TODO get Children or Node
get :: PWD -> Cmd err [NodeAny]
get :: PWD -> Cmd err [Node HyperdataAny]
get [] = pure []
get pwd = runOpaQuery $ selectNodesWithParentID (last pwd)
......@@ -107,10 +107,10 @@ home :: Cmd err PWD
home = map _node_id <$> getNodesWithParentId 0 Nothing
-- | ls == get Children
ls :: PWD -> Cmd err [NodeAny]
ls :: PWD -> Cmd err [Node HyperdataAny]
ls = get
tree :: PWD -> Cmd err [NodeAny]
tree :: PWD -> Cmd err [Node HyperdataAny]
tree p = do
ns <- get p
children <- mapM (\n -> get [_node_id n]) ns
......
......@@ -46,6 +46,7 @@ nodeTypeId n =
NodeCorpusV3 -> 3
NodeCorpus -> 30
NodeAnnuaire -> 31
NodeTexts -> 40
NodeDocument -> 4
NodeContact -> 41
--NodeSwap -> 19
......@@ -60,6 +61,7 @@ nodeTypeId n =
NodePhylo -> 90
NodeDashboard -> 7
NodeChart -> 51
NodeNoteBook -> 88
-- Cooccurrences -> 9
--
......
......@@ -65,11 +65,11 @@ import qualified Opaleye.Internal.Unpackspec()
--instance FromJSON Facet
--instance ToJSON Facet
type Favorite = Bool
type Favorite = Int
type Title = Text
-- TODO remove Title
type FacetDoc = Facet NodeId UTCTime Title HyperdataDocument Favorite Int
type FacetDoc = Facet NodeId UTCTime Title HyperdataDocument (Maybe Favorite) (Maybe Double)
type FacetSources = FacetDoc
type FacetAuthors = FacetDoc
type FacetTerms = FacetDoc
......@@ -146,12 +146,12 @@ instance ToSchema FacetDoc
-- | Mock and Quickcheck instances
instance Arbitrary FacetDoc where
arbitrary = elements [ FacetDoc id' (jour year 01 01) t hp fav ngramCount
arbitrary = elements [ FacetDoc id' (jour year 01 01) t hp (Just cat) (Just ngramCount)
| id' <- [1..10]
, year <- [1990..2000]
, t <- ["title", "another title"]
, hp <- arbitraryHyperdataDocuments
, fav <- [True, False]
, cat <- [0..2]
, ngramCount <- [3..100]
]
......@@ -164,12 +164,11 @@ type FacetDocRead = Facet (Column PGInt4 )
(Column PGTimestamptz)
(Column PGText )
(Column PGJsonb )
(Column PGBool)
(Column PGInt4 )
(Column (Nullable PGInt4)) -- Category
(Column (Nullable PGFloat8)) -- Score
-----------------------------------------------------------------------
-----------------------------------------------------------------------
type Trash = Bool
data OrderBy = DateAsc | DateDesc
| TitleAsc | TitleDesc
| ScoreDesc | ScoreAsc
......@@ -197,13 +196,13 @@ instance Arbitrary OrderBy
arbitrary = elements [minBound..maxBound]
runViewAuthorsDoc :: ContactId -> Trash -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc]
runViewAuthorsDoc :: ContactId -> IsTrash -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc]
runViewAuthorsDoc cId t o l order = runOpaQuery $ filterWith o l order $ viewAuthorsDoc cId t ntId
where
ntId = NodeDocument
-- TODO add delete ?
viewAuthorsDoc :: ContactId -> Trash -> NodeType -> Query FacetDocRead
viewAuthorsDoc :: ContactId -> IsTrash -> NodeType -> Query FacetDocRead
viewAuthorsDoc cId _ nt = proc () -> do
(doc,(_,(_,(_,contact)))) <- queryAuthorsDoc -< ()
......@@ -215,7 +214,7 @@ viewAuthorsDoc cId _ nt = proc () -> do
restrict -< _node_id contact .== (toNullable $ pgNodeId cId)
restrict -< _node_typename doc .== (pgInt4 $ nodeTypeId nt)
returnA -< FacetDoc (_node_id doc) (_node_date doc) (_node_name doc) (_node_hyperdata doc) (pgBool True) (pgInt4 1)
returnA -< FacetDoc (_node_id doc) (_node_date doc) (_node_name doc) (_node_hyperdata doc) (toNullable $ pgInt4 1) (toNullable $ pgDouble 1)
queryAuthorsDoc :: Query (NodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))
queryAuthorsDoc = leftJoin5 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeTable cond12 cond23 cond34 cond45
......@@ -237,21 +236,22 @@ queryAuthorsDoc = leftJoin5 queryNodeTable queryNodeNgramTable queryNgramsTable
------------------------------------------------------------------------
runViewDocuments :: CorpusId -> Trash -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc]
runViewDocuments :: CorpusId -> IsTrash -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc]
runViewDocuments cId t o l order =
runOpaQuery $ filterWith o l order $ viewDocuments cId t ntId
where
ntId = nodeTypeId NodeDocument
viewDocuments :: CorpusId -> Trash -> NodeTypeId -> Query FacetDocRead
viewDocuments :: CorpusId -> IsTrash -> NodeTypeId -> Query FacetDocRead
viewDocuments cId t ntId = proc () -> do
n <- queryNodeTable -< ()
nn <- queryNodeNodeTable -< ()
restrict -< _node_id n .== nn_node2_id nn
restrict -< nn_node1_id nn .== (pgNodeId cId)
restrict -< _node_typename n .== (pgInt4 ntId)
restrict -< nn_delete nn .== (pgBool t)
returnA -< FacetDoc (_node_id n) (_node_date n) (_node_name n) (_node_hyperdata n) (nn_favorite nn) (pgInt4 1)
restrict -< if t then nn_category nn .== (pgInt4 0)
else nn_category nn .>= (pgInt4 1)
returnA -< FacetDoc (_node_id n) (_node_date n) (_node_name n) (_node_hyperdata n) (toNullable $ nn_category nn) (toNullable $ nn_score nn)
------------------------------------------------------------------------
......
This diff is collapsed.
{-|
Module : Gargantext.Database.Learn
Description : Learn Small Data Analytics with big data connection (DB)
opyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MonoLocalBinds #-}
module Gargantext.Database.Learn where
import Data.Text (Text)
import Data.Tuple (snd)
import Data.Maybe
import Gargantext.Database.Facet
import Gargantext.Database.Types.Node
import Gargantext.Prelude
import Gargantext.Text.Learn
import qualified Data.List as List
import qualified Data.Text as Text
--import Gargantext.Database.Schema.NodeNode (nodeNodesCategory)
import Gargantext.Database.Utils (Cmd)
import Gargantext.Core.Types (Offset, Limit)
data FavOrTrash = IsFav | IsTrash
deriving (Eq)
moreLike :: CorpusId -> Maybe Offset -> Maybe Limit -> Maybe OrderBy
-> FavOrTrash -> Cmd err [FacetDoc]
moreLike cId o l order ft = do
priors <- getPriors ft cId
moreLikeWith cId o l order ft priors
---------------------------------------------------------------------------
getPriors :: FavOrTrash -> CorpusId -> Cmd err (Events Bool)
getPriors ft cId = do
docs_fav <- filter (\(FacetDoc _ _ _ _ f _) -> f == Just 2)
<$> runViewDocuments cId False Nothing Nothing Nothing
docs_trash <- List.take (List.length docs_fav)
<$> runViewDocuments cId True Nothing Nothing Nothing
let priors = priorEventsWith text (fav2bool ft) ( List.zip (repeat False) docs_fav
<> List.zip (repeat True ) docs_trash
)
pure priors
moreLikeWith :: CorpusId -> Maybe Offset -> Maybe Limit -> Maybe OrderBy
-> FavOrTrash -> Events Bool -> Cmd err [FacetDoc]
moreLikeWith cId o l order ft priors = do
docs_test <- filter (\(FacetDoc _ _ _ _ f _) -> f == Just 1)
<$> runViewDocuments cId False o Nothing order
let results = map fst
$ filter ((==) (Just $ not $ fav2bool ft) . snd)
$ map (\f -> (f, detectDefaultWithPriors text priors f)) docs_test
pure $ List.take (maybe 10 identity l) results
---------------------------------------------------------------------------
fav2bool :: FavOrTrash -> Bool
fav2bool ft = if (==) ft IsFav then True else False
text :: FacetDoc -> Text
text (FacetDoc _ _ _ h _ _) = title <> "" <> Text.take 100 abstr
where
title = maybe "" identity (_hyperdataDocument_title h)
abstr = maybe "" identity (_hyperdataDocument_abstract h)
---------------------------------------------------------------------------
{-
apply :: (FlowCmdM env e m) => FavOrTrash -> CorpusId -> [NodeId] -> m [Int]
apply favTrash cId ns = case favTrash of
IsFav -> nodeNodesCategory $ map (\n -> (cId, n, 2)) ns
IsTrash -> nodeNodesCategory $ map (\n -> (cId, n, 0)) ns
moreLikeAndApply :: FlowCmdM DevEnv GargError m => FavOrTrash -> CorpusId -> m [Int]
moreLikeAndApply ft cId = do
priors <- getPriors ft cId
moreLikeWithAndApply priors ft cId
moreLikeWithAndApply :: FlowCmdM DevEnv GargError m => Events Bool -> FavOrTrash -> CorpusId -> m [Int]
moreLikeWithAndApply priors ft cId = do
ids <- map facetDoc_id <$> moreLikeWith cId ft priors
apply ft cId ids
-}
......@@ -35,7 +35,6 @@ import Gargantext.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import qualified Data.Map as Map
--import qualified Data.Vector.Storable as Vec
getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text])
......@@ -44,34 +43,6 @@ getMetrics cId maybeListId tabType maybeLimit = do
pure (ngs, scored myCooc)
{- | TODO remove unused function
getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text])
getMetrics cId maybeListId tabType maybeLimit = do
(ngs, ngs', metrics) <- getLocalMetrics cId maybeListId tabType maybeLimit
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster "" (Nothing :: Maybe HyperdataCorpus)
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster
metrics' <- getTficfWith cId masterCorpusId (lIds <> [lId]) (ngramsTypeFromTabType tabType) ngs'
pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics'])
getLocalMetrics :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( Map Text (ListType, Maybe Text)
, Map Text (Maybe RootTerm)
, Map Text (Vec.Vector Double)
)
getLocalMetrics cId maybeListId tabType maybeLimit = do
(ngs, ngs', myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
pure (ngs, ngs', localMetrics myCooc)
-}
getNgramsCooc :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( Map Text (ListType, Maybe Text)
......@@ -100,6 +71,7 @@ getNgrams :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType
-> m (Map Text (ListType, Maybe Text), Map Text (Maybe RootTerm))
getNgrams cId maybeListId tabType = do
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
......
......@@ -149,7 +149,7 @@ getNodesByNgramsUser cId nt =
WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
ORDER BY (nng.node2_id, ng.terms) DESC
-- LIMIT ?
......@@ -210,7 +210,7 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
|]
......@@ -247,7 +247,7 @@ queryNgramsOnlyByNodeUser = [sql|
WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False
AND nn.category > 0
GROUP BY ng.terms, nng.node2_id
|]
......@@ -298,10 +298,6 @@ getNodesByNgramsMaster ucId mcId = Map.unionsWith (<>)
<$> mapM (selectNgramsByNodeMaster 1000 ucId mcId) [0,500..10000]
type Limit = Int
type Offset = Int
selectNgramsByNodeMaster :: Int -> UserCorpusId -> MasterCorpusId -> Int -> Cmd err [(NodeId, Text)]
selectNgramsByNodeMaster n ucId mcId p = runPGSQuery
queryNgramsByNodeMaster'
......@@ -330,7 +326,7 @@ SELECT n.id, ng.terms FROM nodes n
WHERE nn.node1_id = ? -- UserCorpusId
-- AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False
AND nn.category > 0
AND node_pos(n.id,?) >= ?
AND node_pos(n.id,?) < ?
GROUP BY n.id, ng.terms
......
......@@ -27,7 +27,7 @@ import Gargantext.Prelude
import Opaleye
import Control.Arrow (returnA)
selectNgramsByDoc :: [CorpusId] -> DocumentId -> NgramsType -> Cmd err [Text]
selectNgramsByDoc :: [CorpusId] -> DocId -> NgramsType -> Cmd err [Text]
selectNgramsByDoc cIds dId nt = runOpaQuery (query cIds dId nt)
where
......@@ -44,6 +44,6 @@ selectNgramsByDoc cIds dId nt = runOpaQuery (query cIds dId nt)
returnA -< ngrams_terms ng
postNgrams :: CorpusId -> DocumentId -> [Text] -> Cmd err Int
postNgrams :: CorpusId -> DocId -> [Text] -> Cmd err Int
postNgrams = undefined
......@@ -43,7 +43,7 @@ getChildren pId _ maybeNodeType maybeOffset maybeLimit = runOpaQuery
selectChildren :: ParentId -> Maybe NodeType -> Query NodeRead
selectChildren parentId maybeNodeType = proc () -> do
row@(Node nId typeName _ parent_id _ _ _) <- queryNodeTable -< ()
(NodeNode n1id n2id _ _ _) <- queryNodeNodeTable -< ()
(NodeNode n1id n2id _ _) <- queryNodeNodeTable -< ()
let nodeType = maybe 0 nodeTypeId maybeNodeType
restrict -< typeName .== pgInt4 nodeType
......
......@@ -28,7 +28,8 @@ import Data.Time (UTCTime)
import Database.PostgreSQL.Simple.FromField (FromField, fromField)
import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Schema.Node (NodeWrite, Name, node)
import Gargantext.Core.Types (Name)
import Gargantext.Database.Schema.Node (NodeWrite, node)
import Gargantext.Database.Types.Node (Node,Hyperdata,NodeType(..), UserId, AnnuaireId)
import Gargantext.Database.Utils (fromField')
import Gargantext.Prelude
......
......@@ -40,7 +40,6 @@ import Gargantext.Prelude
import GHC.Generics (Generic)
---------------------------------------------------------------------------
add :: ParentId -> [NodeId] -> Cmd err [Only Int]
add pId ns = runPGSQuery queryAdd (Only $ Values fields inputData)
where
......@@ -54,17 +53,16 @@ add_debug pId ns = formatPGSQuery queryAdd (Only $ Values fields inputData)
inputData = prepare pId ns
-- | Input Tables: types of the tables
inputSqlTypes :: [Text]
inputSqlTypes = ["int4","int4","bool","bool"]
inputSqlTypes = ["int4","int4","int4"]
-- | SQL query to add documents
-- TODO return id of added documents only
queryAdd :: Query
queryAdd = [sql|
WITH input_rows(node1_id,node2_id, favorite, delete) AS (?)
INSERT INTO nodes_nodes (node1_id, node2_id, favorite, delete)
WITH input_rows(node1_id,node2_id,category) AS (?)
INSERT INTO nodes_nodes (node1_id, node2_id,category)
SELECT * FROM input_rows
ON CONFLICT (node1_id, node2_id) DO NOTHING -- on unique index
RETURNING 1
......@@ -72,7 +70,7 @@ queryAdd = [sql|
|]
prepare :: ParentId -> [NodeId] -> [InputData]
prepare pId ns = map (\nId -> InputData pId nId False False) ns
prepare pId ns = map (\nId -> InputData pId nId) ns
------------------------------------------------------------------------
-- * Main Types used
......@@ -80,14 +78,11 @@ prepare pId ns = map (\nId -> InputData pId nId False False) ns
data InputData = InputData { inNode1_id :: NodeId
, inNode2_id :: NodeId
, inNode_fav :: Bool
, inNode_del :: Bool
} deriving (Show, Generic, Typeable)
instance ToRow InputData where
toRow inputData = [ toField (inNode1_id inputData)
, toField (inNode2_id inputData)
, toField (inNode_fav inputData)
, toField (inNode_del inputData)
, toField (1 :: Int)
]
......@@ -21,9 +21,9 @@ import qualified Data.Text as DT
import Database.PostgreSQL.Simple
import Gargantext.Prelude
import Gargantext.Core.Types (Name)
import Gargantext.Database.Utils
import Gargantext.Database.Types.Node (NodeId, ParentId)
import Gargantext.Database.Schema.Node (Name)
-- import Data.ByteString
--rename :: NodeId -> Text -> IO ByteString
......
This diff is collapsed.
......@@ -43,33 +43,29 @@ import Opaleye
import Control.Arrow (returnA)
import qualified Opaleye as O
data NodeNodePoly node1_id node2_id score fav del
data NodeNodePoly node1_id node2_id score cat
= NodeNode { nn_node1_id :: node1_id
, nn_node2_id :: node2_id
, nn_score :: score
, nn_favorite :: fav
, nn_delete :: del
, nn_category :: cat
} deriving (Show)
type NodeNodeWrite = NodeNodePoly (Column (PGInt4))
(Column (PGInt4))
(Maybe (Column (PGFloat8)))
(Maybe (Column (PGBool)))
(Maybe (Column (PGBool)))
(Maybe (Column (PGInt4)))
type NodeNodeRead = NodeNodePoly (Column (PGInt4))
(Column (PGInt4))
(Column (PGFloat8))
(Column (PGBool))
(Column (PGBool))
(Column (PGInt4))
type NodeNodeReadNull = NodeNodePoly (Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGFloat8))
(Column (Nullable PGBool))
(Column (Nullable PGBool))
(Column (Nullable PGInt4))
type NodeNode = NodeNodePoly Int Int (Maybe Double) (Maybe Bool) (Maybe Bool)
type NodeNode = NodeNodePoly Int Int (Maybe Double) (Maybe Int)
$(makeAdaptorAndInstance "pNodeNode" ''NodeNodePoly)
$(makeLensesWith abbreviatedFields ''NodeNodePoly)
......@@ -79,8 +75,7 @@ nodeNodeTable = Table "nodes_nodes" (pNodeNode
NodeNode { nn_node1_id = required "node1_id"
, nn_node2_id = required "node2_id"
, nn_score = optional "score"
, nn_favorite = optional "favorite"
, nn_delete = optional "delete"
, nn_category = optional "category"
}
)
......@@ -95,33 +90,39 @@ nodesNodes = runOpaQuery queryNodeNodeTable
instance QueryRunnerColumnDefault (Nullable PGInt4) Int where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault (Nullable PGFloat8) Int where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault (Nullable PGFloat8) Double where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGBool (Maybe Bool) where
instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
------------------------------------------------------------------------
-- | Favorite management
nodeToFavorite :: CorpusId -> DocId -> Bool -> Cmd err [Int]
nodeToFavorite cId dId b = map (\(PGS.Only a) -> a) <$> runPGSQuery favQuery (b,cId,dId)
nodeNodeCategory :: CorpusId -> DocId -> Int -> Cmd err [Int]
nodeNodeCategory cId dId c = map (\(PGS.Only a) -> a) <$> runPGSQuery favQuery (c,cId,dId)
where
favQuery :: PGS.Query
favQuery = [sql|UPDATE nodes_nodes SET favorite = ?
favQuery = [sql|UPDATE nodes_nodes SET category = ?
WHERE node1_id = ? AND node2_id = ?
RETURNING node2_id;
|]
nodesToFavorite :: [(CorpusId,DocId,Bool)] -> Cmd err [Int]
nodesToFavorite inputData = map (\(PGS.Only a) -> a)
<$> runPGSQuery trashQuery (PGS.Only $ Values fields inputData)
nodeNodesCategory :: [(CorpusId,DocId,Int)] -> Cmd err [Int]
nodeNodesCategory inputData = map (\(PGS.Only a) -> a)
<$> runPGSQuery catQuery (PGS.Only $ Values fields inputData)
where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","bool"]
trashQuery :: PGS.Query
trashQuery = [sql| UPDATE nodes_nodes as old SET
favorite = new.favorite
from (?) as new(node1_id,node2_id,favorite)
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","int4"]
catQuery :: PGS.Query
catQuery = [sql| UPDATE nodes_nodes as old SET
category = new.category
from (?) as new(node1_id,node2_id,category)
WHERE old.node1_id = new.node1_id
AND old.node2_id = new.node2_id
RETURNING new.node2_id
......@@ -144,24 +145,23 @@ queryDocs :: CorpusId -> O.Query (Column PGJsonb)
queryDocs cId = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< ( nn_node1_id nn) .== (toNullable $ pgNodeId cId)
restrict -< ( nn_delete nn) .== (toNullable $ pgBool False)
restrict -< ( nn_category nn) .>= (toNullable $ pgInt4 1)
restrict -< (_node_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
returnA -< view (node_hyperdata) n
selectDocNodes :: CorpusId -> Cmd err [NodeDocument]
selectDocNodes :: CorpusId -> Cmd err [Node HyperdataDocument]
selectDocNodes cId = runOpaQuery (queryDocNodes cId)
queryDocNodes :: CorpusId -> O.Query NodeRead
queryDocNodes cId = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< ( nn_node1_id nn) .== (toNullable $ pgNodeId cId)
restrict -< ( nn_delete nn) .== (toNullable $ pgBool False)
restrict -< ( nn_category nn) .>= (toNullable $ pgInt4 1)
restrict -< (_node_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
returnA -< n
joinInCorpus :: O.Query (NodeRead, NodeNodeReadNull)
joinInCorpus = leftJoin queryNodeTable queryNodeNodeTable cond
where
......
......@@ -56,19 +56,21 @@ queryInDatabase _ q = proc () -> do
------------------------------------------------------------------------
-- | todo add limit and offset and order
searchInCorpus :: CorpusId -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc]
searchInCorpus cId q o l order = runOpaQuery (filterWith o l order $ queryInCorpus cId q')
searchInCorpus :: CorpusId -> IsTrash -> [Text] -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc]
searchInCorpus cId t q o l order = runOpaQuery (filterWith o l order $ queryInCorpus cId t q')
where
q' = intercalate " | " $ map stemIt q
queryInCorpus :: CorpusId -> Text -> O.Query FacetDocRead
queryInCorpus cId q = proc () -> do
queryInCorpus :: CorpusId -> IsTrash -> Text -> O.Query FacetDocRead
queryInCorpus cId t q = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< ( nn_node1_id nn) .== (toNullable $ pgNodeId cId)
restrict -< ( nn_delete nn) .== (toNullable $ pgBool False)
restrict -< if t
then ( nn_category nn) .== (toNullable $ pgInt4 0)
else ( nn_category nn) .>= (toNullable $ pgInt4 1)
restrict -< (_ns_search n) @@ (pgTSQuery (unpack q))
restrict -< (_ns_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
returnA -< FacetDoc (_ns_id n) (_ns_date n) (_ns_name n) (_ns_hyperdata n) (pgBool True) (pgInt4 1)
returnA -< FacetDoc (_ns_id n) (_ns_date n) (_ns_name n) (_ns_hyperdata n) (nn_category nn) (nn_score nn)
joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
......
......@@ -98,7 +98,7 @@ dbTree rootId = map (\(nId, tId, pId, n) -> DbTreeNode nId tId pId n) <$> runPGS
FROM nodes AS c
INNER JOIN tree AS s ON c.parent_id = s.id
WHERE c.typename IN (2,3,30,31,7,9,90)
WHERE c.typename IN (2,3,5,30,31,40,7,9,90)
)
SELECT * from tree;
|] (Only rootId)
......
......@@ -19,6 +19,7 @@ Portability : POSIX
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-- {-# LANGUAGE DuplicateRecordFields #-}
module Gargantext.Database.Types.Node
......@@ -57,6 +58,7 @@ import Test.QuickCheck (elements)
import Gargantext.Prelude
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Viz.Phylo (Phylo)
--import Gargantext.Database.Utils
------------------------------------------------------------------------
newtype NodeId = NodeId Int
......@@ -75,6 +77,35 @@ instance FromField NodeId where
instance ToSchema NodeId
type NodeTypeId = Int
type NodeName = Text
type TSVector = Text
------------------------------------------------------------------------
data NodePoly id typename userId
parentId name date
hyperdata = Node { _node_id :: id
, _node_typename :: typename
, _node_userId :: userId
, _node_parentId :: parentId
, _node_name :: name
, _node_date :: date
, _node_hyperdata :: hyperdata
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_node_") ''NodePoly)
$(makeLenses ''NodePoly)
-- | NodePoly indicates that Node has a Polymorphism Type
type Node json = NodePoly NodeId NodeTypeId UserId (Maybe ParentId) NodeName UTCTime json
------------------------------------------------------------------------
instance FromHttpApiData NodeId where
parseUrlPiece n = pure $ NodeId $ (read . cs) n
......@@ -86,7 +117,7 @@ type ParentId = NodeId
type CorpusId = NodeId
type ListId = NodeId
type DocumentId = NodeId
type DocId = DocumentId -- todo: remove this
type DocId = NodeId
type RootId = NodeId
type MasterCorpusId = CorpusId
type UserCorpusId = CorpusId
......@@ -126,7 +157,6 @@ $(deriveJSON (unPrefix "statusV3_") ''StatusV3)
------------------------------------------------------------------------
-- Only Hyperdata types should be member of this type class.
class Hyperdata a
------------------------------------------------------------------------
data HyperdataDocumentV3 = HyperdataDocumentV3 { hyperdataDocumentV3_publication_day :: !(Maybe Int)
......@@ -149,6 +179,7 @@ data HyperdataDocumentV3 = HyperdataDocumentV3 { hyperdataDocumentV3_publication
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataDocumentV3_") ''HyperdataDocumentV3)
class Hyperdata a
instance Hyperdata HyperdataDocumentV3
------------------------------------------------------------------------
......@@ -305,6 +336,14 @@ hyperdataCorpus = case decode corpusExample of
instance Arbitrary HyperdataCorpus where
arbitrary = pure hyperdataCorpus -- TODO
------------------------------------------------------------------------
data HyperdataList = HyperdataList {hd_list :: !(Maybe Text)
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hd_") ''HyperdataList)
instance Hyperdata HyperdataList
------------------------------------------------------------------------
data HyperdataAnnuaire = HyperdataAnnuaire { hyperdataAnnuaire_title :: !(Maybe Text)
, hyperdataAnnuaire_desc :: !(Maybe Text)
......@@ -329,14 +368,10 @@ instance Arbitrary HyperdataAny where
arbitrary = pure $ HyperdataAny mempty -- TODO produce arbitrary objects
------------------------------------------------------------------------
data HyperdataList = HyperdataList { hyperdataList_preferences :: !(Maybe Text)
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataList_") ''HyperdataList)
instance Hyperdata HyperdataList
instance Arbitrary HyperdataList where
arbitrary = elements [HyperdataList (Just "from list A")]
{-
instance Arbitrary HyperdataList' where
arbitrary = elements [HyperdataList' (Just "from list A")]
-}
----
data HyperdataListModel = HyperdataListModel { _hlm_params :: !(Int, Int)
......@@ -384,6 +419,7 @@ instance Hyperdata HyperdataGraph
-- TODO add the Graph Structure here
data HyperdataPhylo = HyperdataPhylo { hyperdataPhylo_preferences :: !(Maybe Text)
, hyperdataPhylo_data :: !(Maybe Phylo)
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataPhylo_") ''HyperdataPhylo)
......@@ -398,41 +434,26 @@ $(deriveJSON (unPrefix "hyperdataNotebook_") ''HyperdataNotebook)
instance Hyperdata HyperdataNotebook
-- | NodePoly indicates that Node has a Polymorphism Type
type Node json = NodePoly NodeId NodeTypeId UserId (Maybe ParentId) NodeName UTCTime json
-- type Node json = NodePoly NodeId NodeTypeId UserId ParentId NodeName UTCTime json
type NodeTypeId = Int
type NodeName = Text
type TSVector = Text
-- | Then a Node can be either a Folder or a Corpus or a Document
type NodeUser = Node HyperdataUser
type NodeFolder = Node HyperdataFolder
-- | TODO CLEAN
data HyperData = HyperdataTexts { hd_preferences :: Maybe Text }
| HyperdataList' { hd_preferences :: Maybe Text}
deriving (Show, Generic)
type NodeCorpus = Node HyperdataCorpus
type NodeCorpusV3 = Node HyperdataCorpus
type NodeDocument = Node HyperdataDocument
$(deriveJSON (unPrefix "hd_") ''HyperData)
type NodeAnnuaire = Node HyperdataAnnuaire
instance Hyperdata HyperData
-- | Any others nodes
type NodeAny = Node HyperdataAny
---- | Then a Node can be either a Graph or a Phylo or a Notebook
type NodeList = Node HyperdataList
type NodeGraph = Node HyperdataGraph
type NodePhylo = Node HyperdataPhylo
type NodeNotebook = Node HyperdataNotebook
------------------------------------------------------------------------
-- | Then a Node can be either a Folder or a Corpus or a Document
data NodeType = NodeUser
| NodeFolder
| NodeCorpus | NodeCorpusV3 | NodeDocument
| NodeCorpus | NodeCorpusV3 | NodeTexts | NodeDocument
| NodeAnnuaire | NodeContact
| NodeGraph | NodePhylo
| NodeDashboard | NodeChart
| NodeList | NodeListModel deriving (Show, Read, Eq, Generic, Bounded, Enum)
| NodeDashboard | NodeChart | NodeNoteBook
| NodeList | NodeListModel
deriving (Show, Read, Eq, Generic, Bounded, Enum)
{-
......@@ -454,23 +475,6 @@ instance FromHttpApiData NodeType
instance ToParamSchema NodeType
instance ToSchema NodeType
------------------------------------------------------------------------
data NodePoly id typename userId
parentId name date
hyperdata = Node { _node_id :: id
, _node_typename :: typename
, _node_userId :: userId
, _node_parentId :: parentId
, _node_name :: name
, _node_date :: date
, _node_hyperdata :: hyperdata
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_node_") ''NodePoly)
$(makeLenses ''NodePoly)
data NodePolySearch id typename userId
parentId name date
......
......@@ -304,3 +304,6 @@ tail' = listSafeN "tail" tailMay
init' :: Text -> [a] -> [a]
init' = listSafeN "init" initMay
------------------------------------------------------------------------
......@@ -54,9 +54,9 @@ class ReadFile a where
readFile' :: FilePath -> IO a
saveFile :: (MonadReader env m, MonadIO m, HasSettings env, SaveFile a)
writeFile :: (MonadReader env m, MonadIO m, HasSettings env, SaveFile a)
=> a -> m FilePath
saveFile a = do
writeFile a = do
dataPath <- view (settings . fileFolder) <$> ask
(fp,fn) <- liftIO $ (toPath 3) . hash . Text.pack . show <$> newStdGen
......
......@@ -49,9 +49,6 @@ type Corpus a = [Sentence a] -- a list of sentences
-- | Contexts definition to build/unbuild contexts.
data SplitContext = Chars Int | Sentences Int | Paragraphs Int
tag :: Text -> [Tag Text]
tag = parseTags
-- | splitBy contexts of Chars or Sentences or Paragraphs
-- To see some examples at a higher level (sentences and paragraph), see
-- 'Gargantext.Text.Examples.ex_terms'
......@@ -67,10 +64,9 @@ tag = parseTags
splitBy :: SplitContext -> Text -> [Text]
splitBy (Chars n) = map pack . chunkAlong (n+1) 1 . unpack
splitBy (Sentences n) = map unsentences . chunkAlong (n+1) 1 . sentences
splitBy (Paragraphs _) = map unTag . filter isTagText . tag
splitBy (Paragraphs _) = map unTag . filter isTagText . parseTags
where
unTag :: IsString p => Tag p -> p
unTag (TagText x) = x
unTag _ = ""
......@@ -12,33 +12,61 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Text.Corpus.API
where
--{-
import GHC.Generics (Generic)
import Data.Aeson
import Data.Text (Text)
import Data.Maybe
import Gargantext.Prelude
--import qualified PUBMED as PubMed
import Gargantext.Core (Lang(..))
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import Test.QuickCheck.Arbitrary
import Test.QuickCheck (elements)
import Data.Swagger
--import qualified Gargantext.Text.Corpus.API.Isidore as Isidore
import qualified Gargantext.Text.Corpus.API.Pubmed as PUBMED
import qualified Gargantext.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Text.Corpus.API.Istex as ISTEX
data ExternalAPIs = ALL
-- | Main Types
data ExternalAPIs = All
| PubMed
| HAL
| IsTex
| IsidoreQuery | IsidoreAuth
| HAL_EN
| HAL_FR
| IsTex_EN
| IsTex_FR
| Isidore_EN
| Isidore_FR
-- | IsidoreAuth
deriving (Show, Eq, Enum, Bounded, Generic)
-- | Get External API metadata main function
get :: ExternalAPIs -> Query -> Maybe Limit -> IO [HyperdataDocument]
get All _ _ = undefined
get PubMed q l = PUBMED.get q l
get HAL_EN q l = HAL.get EN q l
get HAL_FR q l = HAL.get FR q l
get IsTex_EN q l = ISTEX.get EN q l
get IsTex_FR q l = ISTEX.get FR q l
get Isidore_EN q l = ISIDORE.get EN (fromIntegral <$> l) (Just q) Nothing
get Isidore_FR q l = ISIDORE.get FR (fromIntegral <$> l) (Just q) Nothing
-- | Main Instances
instance FromJSON ExternalAPIs
instance ToJSON ExternalAPIs
type Query = Text
externalAPIs :: [ExternalAPIs]
externalAPIs = [minBound..maxBound]
......@@ -48,7 +76,8 @@ instance Arbitrary ExternalAPIs
arbitrary = elements externalAPIs
instance ToSchema ExternalAPIs
{-
crawl :: Crawler -> Query -> IO [PubMed.Doc]
crawl Pubmed = PubMed.crawler
--}
-- | Some Sugar for the documentation
type Query = PUBMED.Query
type Limit = PUBMED.Limit
{-|
Module : Gargantext.Text.Corpus.API.Hal
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Corpus.API.Hal
where
import Data.Maybe
import Data.Text (Text, pack, intercalate)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import qualified Gargantext.Text.Corpus.Parsers.Date as Date
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import qualified HAL as HAL
import qualified HAL.Doc.Corpus as HAL
import qualified HAL.Client as HAL
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do
docs <- HAL.getMetadataWith q (fromIntegral <$> ml)
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) docs
toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus i t ab d s aus affs) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d)
pure $ HyperdataDocument (Just "Hal")
(Just $ pack $ show i)
Nothing
Nothing
Nothing
Nothing
(Just $ intercalate " " t)
(Just $ foldl (\x y -> x <> ", " <> y) "" aus)
(Just $ foldl (\x y -> x <> ", " <> y) "" affs)
(Just $ maybe "Nothing" identity s)
(Just $ intercalate " " ab)
(fmap (pack . show) utctime)
pub_year
pub_month
pub_day
Nothing
Nothing
Nothing
(Just $ (pack . show) la)
......@@ -69,9 +69,9 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
(utcTime, (pub_year, pub_month, pub_day)) <- Date.split l (maybe (Just "2019") (Just) d)
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit l (maybe (Just "2019") (Just) d)
pure $ HyperdataDocument (Just "IsidoreApi")
pure $ HyperdataDocument (Just "Isidore")
Nothing
u
Nothing
......@@ -80,7 +80,7 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
(Just $ cleanText $ langText t)
(creator2text <$> as)
Nothing
(_sourceName <$> s)
(Just $ maybe "Nothing" identity $ _sourceName <$> s)
(cleanText <$> langText <$> a)
(fmap (Text.pack . show) utcTime)
(pub_year)
......
{-|
Module : Gargantext.Text.Corpus.API.Istex
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Corpus.API.Istex
where
import Data.Either (either)
import Data.Maybe
import Data.List (concat)
import Data.Maybe (catMaybes)
import Data.Text (Text, pack)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import qualified Gargantext.Text.Corpus.Parsers.Date as Date
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import qualified ISTEX as ISTEX
import qualified ISTEX.Client as ISTEX
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do
docs <- ISTEX.getMetadataWith q (fromIntegral <$> ml)
either (panic . pack . show) (toDoc' la) docs
toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
toDoc' la docs' = do
--printDebug "ISTEX" (ISTEX._documents_total docs')
mapM (toDoc la) (ISTEX._documents_hits docs')
-- | TODO remove dateSplit here
-- TODO current year as default
toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
toDoc la (ISTEX.Document i t a ab d s) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") (Just . pack . show) d)
pure $ HyperdataDocument (Just "Istex")
(Just i)
Nothing
Nothing
Nothing
Nothing
t
(Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a))
(Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a))
(Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s))
ab
(fmap (pack . show) utctime)
pub_year
pub_month
pub_day
Nothing
Nothing
Nothing
(Just $ (pack . show) la)
{-|
Module : Gargantext.Text.Corpus.API.Pubmed
Description : Pubmed API connection
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Corpus.API.Pubmed
where
import Data.Maybe
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import qualified Data.Text as Text
import qualified PUBMED as PubMed
import qualified PUBMED.Parser as PubMedDoc
type Query = Text
type Limit = PubMed.Limit
get :: Query -> Maybe Limit -> IO [HyperdataDocument]
get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN)) <$> PubMed.getMetadataWith q l
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
(PubMedDoc.PubMedDate a y m d)
) = HyperdataDocument (Just "PubMed")
Nothing
Nothing
Nothing
Nothing
Nothing
t
(authors aus)
Nothing
j
(abstract as)
(Just $ Text.pack $ show a)
(Just $ fromIntegral y)
(Just m)
(Just d)
Nothing
Nothing
Nothing
(Just $ (Text.pack . show) l)
where
authors :: Maybe [PubMedDoc.Author] -> Maybe Text
authors aus' = case aus' of
Nothing -> Nothing
Just au -> Just $ (Text.intercalate ", ") $ catMaybes $ map PubMedDoc.foreName au
abstract :: Maybe [Text] -> Maybe Text
abstract as' = fmap (Text.intercalate ", ") as'
......@@ -52,8 +52,8 @@ import qualified Gargantext.Text.Corpus.Parsers.WOS as WOS
import qualified Gargantext.Text.Corpus.Parsers.RIS as RIS
import Gargantext.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
import qualified Gargantext.Text.Corpus.Parsers.Date as Date
import Gargantext.Text.Corpus.Parsers.CSV (parseHal)
import Gargantext.Text.Terms.Stop (detectLang)
import Gargantext.Text.Corpus.Parsers.CSV (parseHal, parseCsv)
import Gargantext.Text.Learn (detectLangDefault)
------------------------------------------------------------------------
type ParseError = String
......@@ -88,6 +88,7 @@ parseFormat = undefined
-- TODO: to debug maybe add the filepath in error message
parseFile :: FileFormat -> FilePath -> IO [HyperdataDocument]
parseFile CsvHalFormat p = parseHal p
parseFile CsvGargV3 p = parseCsv p
parseFile RisPresse p = join $ mapM (toDoc RIS) <$> snd <$> enrichWith RisPresse <$> readFileWith RIS p
parseFile WOS p = join $ mapM (toDoc WOS) <$> snd <$> enrichWith WOS <$> readFileWith WOS p
parseFile ff p = join $ mapM (toDoc ff) <$> snd <$> enrichWith ff <$> readFileWith ff p
......@@ -96,11 +97,11 @@ toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument
-- TODO use language for RIS
toDoc ff d = do
let abstract = lookup "abstract" d
let lang = maybe EN identity (join $ detectLang <$> (fmap (DT.take 50) abstract))
let lang = maybe EN identity (join $ detectLangDefault <$> (fmap (DT.take 50) abstract))
let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d
(utcTime, (pub_year, pub_month, pub_day)) <- Date.split lang dateToParse
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit lang dateToParse
pure $ HyperdataDocument (Just $ DT.pack $ show ff)
(lookup "doi" d)
......
......@@ -101,7 +101,6 @@ fromDocs docs = V.map fromDocs' docs
---------------------------------------------------------------
-- | Split a document in its context
-- TODO adapt the size of the paragraph according to the corpus average
splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc m splt doc = let docSize = (length $ csv_abstract doc) in
if docSize > 1000
......@@ -113,10 +112,9 @@ splitDoc m splt doc = let docSize = (length $ csv_abstract doc) in
V.fromList [doc]
else
V.fromList [doc]
splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc' contextSize (CsvDoc t s py pm pd abst auth) = V.fromList $ [firstDoc] <> nextDocs
where
splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc' contextSize (CsvDoc t s py pm pd abst auth) = V.fromList $ [firstDoc] <> nextDocs
where
firstDoc = CsvDoc t s py pm pd firstAbstract auth
firstAbstract = head' "splitDoc'1" abstracts
......@@ -196,8 +194,8 @@ delimiter :: Word8
delimiter = fromIntegral $ ord '\t'
------------------------------------------------------------------------
------------------------------------------------------------------------
readCsvOn :: [CsvDoc -> Text] -> FilePath -> IO [Text]
readCsvOn fields fp = V.toList
readCsvOn' :: [CsvDoc -> Text] -> FilePath -> IO [Text]
readCsvOn' fields fp = V.toList
<$> V.map (\l -> intercalate (pack " ") $ map (\field -> field l) fields)
<$> snd
<$> readFile fp
......@@ -231,6 +229,7 @@ readCsvLazyBS bs = case decodeByNameWith csvDecodeOptions bs of
Right csvDocs -> csvDocs
------------------------------------------------------------------------
-- | TODO use readFileLazy
readCsvHal :: FilePath -> IO (Header, Vector CsvHal)
readCsvHal = fmap readCsvHalLazyBS . BL.readFile
......@@ -361,8 +360,35 @@ csvHal2doc (CsvHal title source
Nothing
Nothing
csv2doc :: CsvDoc -> HyperdataDocument
csv2doc (CsvDoc title source
pub_year pub_month pub_day
abstract authors ) = HyperdataDocument (Just "CsvHal")
Nothing
Nothing
Nothing
Nothing
Nothing
(Just title)
(Just authors)
Nothing
(Just source)
(Just abstract)
(Just $ pack . show $ jour (fromIntegral pub_year) pub_month pub_day)
(Just $ fromIntegral pub_year)
(Just pub_month)
(Just pub_day)
Nothing
Nothing
Nothing
Nothing
------------------------------------------------------------------------
parseHal :: FilePath -> IO [HyperdataDocument]
parseHal fp = map csvHal2doc <$> V.toList <$> snd <$> readCsvHal fp
parseHal fp = V.toList <$> V.map csvHal2doc <$> snd <$> readCsvHal fp
------------------------------------------------------------------------
parseCsv :: FilePath -> IO [HyperdataDocument]
parseCsv fp = V.toList <$> V.map csv2doc <$> snd <$> readFile fp
......@@ -18,7 +18,7 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Corpus.Parsers.Date (parse, parseRaw, split) where
module Gargantext.Text.Corpus.Parsers.Date (parse, parseRaw, dateSplit, Year, Month, Day) where
import Data.HashMap.Strict as HM hiding (map)
import Data.Text (Text, unpack, splitOn, pack)
......@@ -40,9 +40,9 @@ import qualified Duckling.Core as DC
------------------------------------------------------------------------
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
split :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
split _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
split l (Just txt) = do
dateSplit :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
dateSplit _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
dateSplit l (Just txt) = do
utcTime <- parse l txt
let (y, m, d) = split' utcTime
pure (Just utcTime, (Just y, Just m,Just d))
......
......@@ -19,18 +19,17 @@ TODO: create a separate Lib.
module Gargantext.Text.Corpus.Parsers.GrandDebat
where
import GHC.IO (FilePath)
import Data.Aeson (ToJSON, FromJSON)
import qualified Data.JsonStream.Parser as P
--import Data.Either (either)
import Data.Maybe (Maybe())
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.ByteString.Lazy as DBL
import GHC.Generics (Generic)
import Gargantext.Prelude
import Gargantext.Database.Types.Node
import Gargantext.Core (Lang(..))
import Gargantext.Database.Types.Node
import Gargantext.Prelude
import Gargantext.Prelude.Utils
import qualified Data.ByteString.Lazy as DBL
import qualified Data.JsonStream.Parser as P
import qualified Data.Text as Text
data GrandDebatReference = GrandDebatReference
......@@ -91,16 +90,12 @@ instance ToHyperdataDocument GrandDebatReference
True -> r'
False -> ""
class ReadFile a
where
readFile :: FilePath -> IO a
instance ReadFile [GrandDebatReference]
where
-- | read json: 3 version below are working but with increased optimization
--readFile fp = maybe [] identity <$> decode <$> DBL.readFile fp
--readFile fp = either (panic . Text.pack) identity <$> P.eitherDecode <$> DBL.readFile fp
readFile fp = P.parseLazyByteString (P.arrayOf P.value) <$> DBL.readFile fp
readFile' fp = P.parseLazyByteString (P.arrayOf P.value) <$> DBL.readFile fp
{-|
Module : Gargantext.Text.Terms.Stop
Description : Mono Terms module
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO:
- generalize to byteString
- Stop words and (how to learn it).
- Main type here is String check if Chars on Text would be optimized
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
module Gargantext.Text.Learn -- (detectLang, detectLangs, stopList)
where
import Codec.Serialise
import qualified Data.List as DL
import Data.Maybe (maybe)
import Data.Map.Strict (Map, toList)
import qualified Data.Map.Strict as DM
import GHC.Generics
import Data.String (String)
import Data.Text (Text)
import Data.Text (pack, unpack, toLower)
import Data.Tuple.Extra (both)
import qualified Data.ByteString.Lazy as BSL
import Gargantext.Prelude
import Gargantext.Prelude.Utils
import Gargantext.Core (Lang(..), allLangs)
import Gargantext.Text.Terms.Mono (words)
import Gargantext.Text.Metrics.Count (occurrencesWith)
import qualified Gargantext.Text.Samples.FR as FR
import qualified Gargantext.Text.Samples.EN as EN
--import qualified Gargantext.Text.Samples.DE as DE
--import qualified Gargantext.Text.Samples.SP as SP
--import qualified Gargantext.Text.Samples.CH as CH
------------------------------------------------------------------------
data Candidate = Candidate { stop :: Double
, noStop :: Double
} deriving (Show)
------------------------------------------------------------------------
-- * Analyze candidate
type StringSize = Int
type TotalFreq = Int
type Freq = Int
type Word = String
data CatWord a = CatWord a Word
type CatProb a = Map a Double
type Events a = Map a EventBook
------------------------------------------------------------------------
data EventBook = EventBook { events_freq :: Map String Freq
, events_n :: Map StringSize TotalFreq
}
deriving (Show, Generic)
instance Serialise EventBook
instance (Serialise a, Ord a) => SaveFile (Events a) where
saveFile' f d = BSL.writeFile f (serialise d)
instance (Serialise a, Ord a) => ReadFile (Events a) where
readFile' filepath = deserialise <$> BSL.readFile filepath
------------------------------------------------------------------------
detectStopDefault :: Text -> Maybe Bool
detectStopDefault = undefined
detectBool :: [(Bool, Text)] -> Text -> Maybe Bool
detectBool events = detectDefault False events
detectDefault :: Ord a => a -> [(a, Text)] -> Text -> Maybe a
detectDefault = detectDefaultWith identity
detectDefaultWith :: Ord a => (b -> Text) -> a -> [(a, b)] -> b -> Maybe a
detectDefaultWith f d events = detectDefaultWithPriors f ps
where
ps = priorEventsWith f d events
detectDefaultWithPriors :: Ord b => (a -> Text) -> Events b -> a -> Maybe b
detectDefaultWithPriors f priors = detectCat 99 priors . f
priorEventsWith :: Ord a => (t -> Text) -> a -> [(a, t)] -> Events a
priorEventsWith f d e = toEvents d [0..2] 10 es
where
es = map (\(a,b) -> CatWord a (unpack $ toLower $ f b)) e
------------------------------------------------------------------------
detectLangDefault :: Text -> Maybe Lang
detectLangDefault = detectCat 99 eventLang
where
eventLang :: Events Lang
eventLang = toEvents FR [0..2] 10 [ langWord l | l <- allLangs ]
langWord :: Lang -> CatWord Lang
langWord l = CatWord l (textSample l)
textSample :: Lang -> String
textSample EN = EN.textSample
textSample FR = FR.textSample
--textSample DE = DE.textSample
--textSample SP = SP.textSample
--textSample CH = CH.textSample
------------------------------------------------------------------------
detectCat :: Ord a => Int -> Events a -> Text -> Maybe a
detectCat n es = head . map fst . (detectCat' n es) . unpack
where
detectCat' :: Ord a => Int -> Events a -> String -> [(a, Double)]
detectCat' n' es' s = DL.reverse $ DL.sortOn snd
$ toList
$ detectWith n' es' (wordsToBook [0..2] n' s)
detectWith :: Ord a => Int -> Events a -> EventBook -> CatProb a
detectWith n'' el (EventBook mapFreq _) =
DM.unionsWith (+)
$ map DM.fromList
$ map (\(s,m) -> map (\(l,f) -> (l, (fromIntegral m) * f)) $ toPrior n'' s el)
$ filter (\x -> fst x /= " ")
$ DM.toList mapFreq
-- | TODO: monoids (but proba >= 0)
toPrior :: Int -> String -> Events a -> [(a, Double)]
toPrior n'' s el = prior n'' $ pebLang s el
where
pebLang :: String -> Events a -> [(a, (Freq,TotalFreq))]
pebLang st = map (\(l,eb) -> (l, peb st eb)) . DM.toList
peb :: String -> EventBook -> (Freq, TotalFreq)
peb st (EventBook mapFreq mapN) = (fromIntegral a, fromIntegral b)
where
a = maybe 0 identity $ DM.lookup st mapFreq
b = maybe 1 identity $ DM.lookup (length st) mapN
prior :: Int -> [(a, (Freq, TotalFreq))] -> [(a, Double)]
prior i ps = zip ls $ zipWith (\x y -> x^i * y) (map (\(a,_) -> part a (sum $ map fst ps')) ps')
(map (\(a,b) -> a / b) ps')
where
(ls, ps'') = DL.unzip ps
ps' = map (both fromIntegral) ps''
part :: (Eq p, Fractional p) => p -> p -> p
part 0 _ = 0
part _ 0 = 0
part x y = x / y
{-
toProba :: (Eq b, Fractional b, Functor t, Foldable t) =>
t (a, b) -> t (a, b)
toProba xs = map (\(a,b) -> (a, part b total)) xs
where
total = sum $ map snd xs
-}
-- | TODO: monoids
toEvents :: Ord a => a -> [Int] -> Int -> [CatWord a] -> Events a
toEvents e ns n = foldl' (opEvent (+)) (emptyEvent e ns n) . map (toEvent ns n)
where
emptyEvent :: Ord a => a -> [Int] -> Int -> Events a
emptyEvent e' ns' n'= toEvent ns' n' (CatWord e' "")
toEvent :: Ord a => [Int] -> Int -> CatWord a -> Events a
toEvent ns'' n'' (CatWord l txt) = DM.fromList [(l, wordsToBook ns'' n'' txt)]
opEvent :: Ord a => (Freq -> Freq -> Freq) -> Events a -> Events a -> Events a
opEvent f = DM.unionWith (op f)
------------------------------------------------------------------------
emptyEventBook :: [Int] -> Int -> EventBook
emptyEventBook ns n = wordToBook ns n " "
wordsToBook :: [Int] -> Int -> String -> EventBook
wordsToBook ns n txt = foldl' (op (+)) (emptyEventBook ns n) eventsBook
where
ws = map unpack $ words $ pack txt
eventsBook = map (wordToBook ns n) ws
wordToBook :: [Int] -> Int -> Word -> EventBook
wordToBook ns n txt = EventBook ef en
where
chks = allChunks ns n txt
en = DM.fromList $ map (\(n',ns') -> (n', length ns')) $ zip ns chks
ef = foldl' DM.union DM.empty $ map (occurrencesWith identity) chks
op :: (Freq -> Freq -> Freq) -> EventBook -> EventBook -> EventBook
op f (EventBook ef1 en1)
(EventBook ef2 en2) = EventBook (DM.unionWith f ef1 ef2)
(DM.unionWith f en1 en2)
------------------------------------------------------------------------
------------------------------------------------------------------------
allChunks :: [Int] -> Int -> String -> [[String]]
allChunks ns m st = map (\n -> chunks n m st) ns
-- | Chunks is the same function as splitBy in Context but for Strings,
-- not Text (without pack and unpack operations that are not needed).
chunks :: Int -> Int -> String -> [String]
chunks n m = DL.take m . filter (not . all (== ' '))
. chunkAlong (n+1) 1
. DL.concat
. DL.take 1000
. DL.repeat
. blanks
-- | String preparation
blanks :: String -> String
blanks [] = []
blanks xs = [' '] <> xs <> [' ']
{-
-- Some previous tests to be removed
--import GHC.Base (Functor)
--import Numeric.Probability.Distribution ((??))
--import qualified Numeric.Probability.Distribution as D
-- | Blocks increase the size of the word to ease computations
-- some border and unexepected effects can happen, need to be tested
blockOf :: Int -> String -> String
blockOf n = DL.concat . DL.take n . DL.repeat
-- * Make the distributions
makeDist :: [String] -> D.T Double String
makeDist = D.uniform . DL.concat . map (DL.concat . allChunks [0,2] 10)
stopDist :: D.T Double String
stopDist = makeDist $ map show ([0..9]::[Int]) <> EN.stopList
candDist :: D.T Double String
candDist = makeDist candList
------------------------------------------------------------------------
sumProba :: Num a => D.T a String -> [Char] -> a
sumProba ds x = sum $ map ((~?) ds) $ DL.concat $ allChunks [0,2] 10 $ map toLower x
-- | Get probability according a distribution
(~?) :: (Num prob, Eq a) => D.T prob a -> a -> prob
(~?) ds x = (==x) ?? ds
------------------------------------------------------------------------
candidate :: [Char] -> Candidate
candidate x = Candidate (sumProba stopDist x) (sumProba candDist x)
------------------------------------------------------------------------
candList :: [String]
candList = [ "france", "alexandre", "mael", "constitution"
, "etats-unis", "associes", "car", "train", "spam"]
--}
......@@ -61,7 +61,7 @@ buildNgramsLists :: Lang -> Int -> Int -> StopSize -> UserCorpusId -> MasterCorp
-> Cmd err (Map NgramsType [NgramsElement])
buildNgramsLists l n m s uCid mCid = do
ngTerms <- buildNgramsTermsList l n m s uCid mCid
--ngTerms <- buildNgramsTermsList' uCid (ngramsGroup l n m) (isStopTerm s . fst) 550 300
--ngTerms <- buildNgramsTermsList' uCid (ngramsGroup l n m) (isStopTerm s . fst) 500 50
othersTerms <- mapM (buildNgramsOthersList uCid identity) [Authors, Sources, Institutes]
pure $ Map.unions $ othersTerms <> [ngTerms]
......@@ -73,7 +73,8 @@ buildNgramsOthersList uCid groupIt nt = do
let
all' = Map.toList ngs
pure $ (toElements GraphTerm $ take 10 all') <> (toElements CandidateTerm $ drop 10 all')
pure $ (toElements GraphTerm all') <> (toElements CandidateTerm all')
--pure $ (toElements GraphTerm $ take 10 all') <> (toElements CandidateTerm $ drop 10 all')
where
toElements nType x = Map.fromList [(nt, [ mkNgramsElement t nType Nothing (mSetFromList [])
| (t,_ns) <- x
......@@ -123,8 +124,8 @@ buildNgramsTermsList l n m s uCid mCid = do
candidates <- sortTficf <$> getTficf' uCid mCid NgramsTerms (ngramsGroup l n m)
let
candidatesSize = 2000
a = 500
b = 500
a = 10
b = 10
candidatesHead = List.take candidatesSize candidates
candidatesTail = List.drop candidatesSize candidates
termList = (toTermList a b ((isStopTerm s) . fst) candidatesHead)
......
......@@ -102,13 +102,26 @@ takeScored listSize incSize = both (map _scored_terms)
linearTakes :: (Ord b1, Ord b2)
=> GraphListSize -> InclusionSize
-> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a])
linearTakes gls incSize speGen incExc = (List.splitAt gls)
linearTakes mls incSize speGen incExc = (List.splitAt mls)
. List.concat
. map (take $ round
$ (fromIntegral gls :: Double)
$ (fromIntegral mls :: Double)
/ (fromIntegral incSize :: Double)
)
. map (sortOn incExc)
. map (sortOn speGen)
. splitEvery incSize
. sortOn speGen
. take 5000
. takePercent (0.70)
. sortOn incExc
takePercent :: Double -> [a] -> [a]
takePercent l xs = List.take l' xs
where
l' = round $ l * (fromIntegral $ List.length xs)
splitTake :: (Int, a -> Bool) -> (Int, a -> Bool) -> [a] -> ([a], [a])
splitTake (a, af) (b, bf) xs = (mpa <> mpb, ca <> cb)
where
(mpa, ca) = List.splitAt a $ List.filter af xs
(mpb, cb) = List.splitAt b $ List.filter bf xs
......@@ -22,4 +22,89 @@ import Data.String (String)
textSample :: String
textSample = "Text mining, also referred to as text data mining, roughly equivalent to text analytics, is the process of deriving high-quality information from text. High-quality information is typically derived through the devising of patterns and trends through means such as statistical pattern learning. Text mining usually involves the process of structuring the input text (usually parsing, along with the addition of some derived linguistic features and the removal of others, and subsequent insertion into a database), deriving patterns within the structured data, and finally evaluation and interpretation of the output. 'High quality' in text mining usually refers to some combination of relevance, novelty, and interestingness. Typical text mining tasks include text categorization, text clustering, concept/entity extraction, production of granular taxonomies, sentiment analysis, document summarization, and entity relation modeling (i.e., learning relations between named entities). Text analysis involves information retrieval, lexical analysis to study word frequency distributions, pattern recognition, tagging/annotation, information extraction, data mining techniques including link and association analysis, visualization, and predictive analytics. The overarching goal is, essentially, to turn text into data for analysis, via application of natural language processing (NLP) and analytical methods. A typical application is to scan a set of documents written in a natural language and either model the document set for predictive classification purposes or populate a database or search index with the information extracted."
stopList :: [String]
stopList =
["a", "a's", "able", "about", "above", "according", "accordingly"
, "across", "actually", "after", "afterwards", "again", "against"
, "ain't", "all", "allow", "allows", "almost", "alone", "along"
, "already", "also", "although", "always", "am", "among", "amongst", "an"
, "analyze", "and", "another", "any", "anybody", "anyhow", "anyone"
, "anything", "anyway", "anyways", "anywhere", "apart", "appear"
, "apply", "appreciate", "appropriate", "are", "aren't", "around"
, "as", "aside", "ask", "asking", "associated", "at", "available"
, "away", "awfully", "b", "based", "be", "became", "because", "become"
, "becomes", "becoming", "been", "before", "beforehand", "behind"
, "being", "believe", "below", "beside", "besides", "best", "better"
, "between", "beyond", "both", "brief", "but", "by", "c", "c'mon", "c's"
, "came", "can", "can't", "cannot", "cant", "cause", "causes", "certain"
, "certainly", "changes", "clearly", "co", "com", "come", "comes"
, "common", "concerning", "consequently", "consider", "considering"
, "contain", "containing", "contains", "corresponding", "could"
, "couldn't", "course", "currently", "d", "definitely", "described"
, "despite", "detecting", "detects", "did", "didn't", "different", "do"
, "does", "doesn't", "doing", "don't", "done", "down", "downwards"
, "during", "e", "each", "edu", "eg", "eight", "either", "else"
, "elsewhere", "enough", "entirely", "especially", "et", "etc", "even"
, "ever", "every", "everybody", "everyone", "everything", "everywhere"
, "ex", "exactly", "example", "except", "f", "far", "few", "fifth"
, "find", "first", "five", "followed", "following", "follows", "for"
, "former", "formerly", "forth", "four", "from", "further", "furthermore"
, "g", "get", "gets", "getting", "gif", "given", "gives", "go", "goes"
, "going", "gone", "got", "gotten", "greetings", "h", "had", "hadn't"
, "happens", "hardly", "has", "hasn't", "have", "haven't", "having"
, "he", "he'd", "he'll", "he's", "hello", "help", "hence", "her"
, "here", "here's", "hereafter", "hereby", "herein", "hereupon", "hers"
, "herself", "hi", "him", "himself", "his", "hither", "hopefully", "how"
, "how's", "howbeit", "however", "i", "i'd", "i'll", "i'm", "i've"
, "identify", "ie", "if", "ignored", "immediate", "in", "inasmuch"
, "inc", "indeed", "indicate", "indicated", "indicates", "inner"
, "insofar", "instead", "into", "involves", "inward", "is", "isn't"
, "it", "it'd", "it'll", "it's", "its", "itself", "j", "just", "k"
, "keep", "keeps", "kept", "know", "known", "knows", "l", "last"
, "late", "lately", "later", "latter", "latterly", "least", "less"
, "lest", "let", "let's", "like", "liked", "likely", "little", "look"
, "looking", "looks", "ltd", "m", "main", "mainly", "many", "may"
, "maybe", "me", "mean", "meanwhile", "merely", "might", "min", "more"
, "moreover", "most", "mostly", "much", "must", "mustn't", "my", "myself"
, "n", "name", "namely", "nd", "near", "nearly", "necessary", "need"
, "needs", "neither", "never", "nevertheless", "new", "next", "nine"
, "no", "nobody", "non", "none", "noone", "nor", "normally", "not"
, "nothing", "novel", "now", "nowhere", "o", "obviously", "of", "off"
, "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones"
, "only", "onto", "or", "other", "others", "otherwise", "ought", "our"
, "ours", "ourselves", "out", "outside", "over", "overall", "own", "p"
, "particular", "particularly", "per", "perhaps", "placed", "please"
, "plus", "possible", "presents", "presumably", "probably", "provides"
, "q", "que", "quite", "qv", "r", "rather", "rd", "re", "really"
, "reasonably", "regarding", "regardless", "regards", "relatively"
, "respectively", "right", "s", "said", "same", "saw", "say", "saying"
, "says", "sds", "second", "secondly", "see", "seeing", "seem", "seemed"
, "seeming", "seems", "seen", "self", "selves", "sensible", "sent"
, "serious", "seriously", "seven", "several", "shall", "shan't"
, "she", "she'd", "she'll", "she's", "should", "shouldn't", "since"
, "six", "so", "some", "somebody", "somehow", "someone", "something"
, "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry"
, "specified", "specify", "specifying", "still", "sub", "such", "sup"
, "sure", "t", "t's", "take", "taken", "tell", "tends", "th", "than"
, "thank", "thanks", "thanx", "that", "that's", "thats", "the", "their"
, "theirs", "them", "themselves", "then", "thence", "there", "there's"
, "thereafter", "thereby", "therefore", "therein", "theres", "thereupon"
, "these", "they", "they'd", "they'll", "they're", "they've", "think"
, "third", "this", "thorough", "thoroughly", "those", "though", "three"
, "through", "throughout", "thru", "thus", "to", "together", "too"
, "took", "toward", "towards", "tried", "tries", "truly", "try"
, "trying", "twice", "two", "u", "un", "under", "unfortunately"
, "unless", "unlikely", "until", "unto", "up", "upon", "us", "use"
, "used", "useful", "uses", "using", "usually", "uucp", "v", "value"
, "various", "very", "via", "viz", "vs", "w", "want", "wants", "was"
, "wasn't", "way", "we", "we'd", "we'll", "we're", "we've", "welcome"
, "well", "went", "were", "weren't", "what", "what's", "whatever", "when"
, "when's", "whence", "whenever", "where", "where's", "whereafter"
, "whereas", "whereby", "wherein", "whereupon", "wherever", "whether"
, "which", "while", "whither", "who", "who's", "whoever", "whole", "whom"
, "whose", "why", "why's", "will", "willing", "wish", "with", "within"
, "without", "won't", "wonder", "would", "wouldn't", "x", "y", "yes"
, "yet", "you", "you'd", "you'll", "you're", "you've", "your", "yours"
, "yourself", "yourselves", "z", "zero"]
......@@ -120,7 +120,7 @@ termsUnsupervised (Unsupervised l n s m) =
pure
. map (text2term l)
. List.nub
. (List.filter (\l' -> List.length l' > s))
. (List.filter (\l' -> List.length l' >= s))
. List.concat
. mainEleveWith (maybe (panic "no model") identity m) n
. uniText
......@@ -129,10 +129,9 @@ termsUnsupervised _ = undefined
newTries :: Int -> Text -> Tries Token ()
newTries n t = buildTries n (fmap toToken $ uniText t)
-- | TODO removing long terms > 24
uniText :: Text -> [[Text]]
uniText =
-- map (map (Text.toLower))
map (List.filter (not . isPunctuation))
uniText = map (List.filter (not . isPunctuation))
. map tokenize
. sentences -- | TODO get sentences according to lang
. Text.toLower
......
{-|
Module : Gargantext.Text.Ngrams.Token.Text
Description :
Description : Tokenizer main functions
Copyright : (c) Grzegorz Chrupała first, after: CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
......@@ -87,7 +87,7 @@ defaultTokenizer = whitespace
uris :: Tokenizer
uris x | isUri x = E [Left x]
| True = E [Right x]
where isUri u = any (`T.isPrefixOf` u) ["http://","ftp://","mailto:"]
where isUri u = any (`T.isPrefixOf` u) ["http://","ftp://","mailto:","https://"]
-- | Split off initial and final punctuation
punctuation :: Tokenizer
......
......@@ -36,7 +36,7 @@ import GHC.Real (round)
import Data.Text (Text)
import NLP.RAKE.Text
import Gargantext.Text.Terms.Stop (stopList)
import Gargantext.Text.Samples.EN (stopList)
import Gargantext.Prelude
select :: Double -> [a] -> [a]
......
This diff is collapsed.
......@@ -106,7 +106,7 @@ treeData cId nt lt = do
pure $ toTree lt cs' m
treeData' :: FlowCmdM env ServantErr m
treeData' :: FlowCmdM env ServerError m
=> CorpusId -> NgramsType -> ListType
-> m [MyTree]
treeData' cId nt lt = do
......
......@@ -77,7 +77,7 @@ getGraph nId = do
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) NgramsTerms (Map.keys ngs)
graph <- liftIO $ cooc2graph 1 myCooc
graph <- liftIO $ cooc2graph 0 myCooc
pure $ set graph_metadata (Just metadata) graph
......
......@@ -27,22 +27,18 @@ import Data.String.Conversions
--import Control.Monad.Reader (ask)
import qualified Data.ByteString as DB
import qualified Data.ByteString.Lazy as DBL
import Data.Text (Text)
import Data.Map (empty)
import Data.Swagger
import Gargantext.API.Types
import Gargantext.API.Utils (swaggerOptions)
import Gargantext.Database.Types.Node (PhyloId, ListId, CorpusId)
import Gargantext.Database.Schema.Node (insertNodes, nodePhyloW, getNodePhylo)
import Gargantext.Database.Types.Node -- (NodePhylo(..))
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Main
import Gargantext.Viz.Phylo.Aggregates
import Gargantext.Viz.Phylo.Example
import Gargantext.Viz.Phylo.Tools
import Gargantext.API.Ngrams (TODO(..))
--import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Viz.Phylo.LevelMaker
import Servant
import Servant.Job.Utils (swaggerOptions)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Web.HttpApiData (parseUrlPiece, readTextData)
......@@ -56,10 +52,11 @@ type PhyloAPI = Summary "Phylo API"
:<|> PostPhylo
phyloAPI :: PhyloId -> GargServer PhyloAPI
phyloAPI n = getPhylo' n
phyloAPI :: PhyloId -> UserId -> GargServer PhyloAPI
phyloAPI n u = getPhylo n
:<|> postPhylo n u
-- :<|> putPhylo n
:<|> postPhylo n
-- :<|> deletePhylo n
newtype SVG = SVG DB.ByteString
......@@ -82,7 +79,8 @@ instance MimeRender SVG SVG where
------------------------------------------------------------------------
type GetPhylo = QueryParam "listId" ListId
:> QueryParam "level" Level
:> QueryParam "filiation" Filiation
:> QueryParam "minSizeBranch" MinSizeBranch
{- :> QueryParam "filiation" Filiation
:> QueryParam "childs" Bool
:> QueryParam "depth" Level
:> QueryParam "metrics" [Metric]
......@@ -95,55 +93,50 @@ type GetPhylo = QueryParam "listId" ListId
:> QueryParam "export" ExportMode
:> QueryParam "display" DisplayMode
:> QueryParam "verbose" Bool
-}
:> Get '[SVG] SVG
-- | TODO
-- Add real text processing
-- Fix Filter parameters
{-
getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo _phyloId _lId l f b l' ms x y z ts s o e d b' = do
--getPhylo phId _lId l msb _f _b _l' _ms _x _y _z _ts _s _o _e _d _b' = do
getPhylo phId _lId l msb = do
phNode <- getNodePhylo phId
let
fs' = maybe (Just []) (\p -> Just [p]) $ LonelyBranch <$> (LBParams <$> x <*> y <*> z)
so = (,) <$> s <*> o
q = initPhyloQueryView l f b l' ms fs' ts so e d b'
-- | TODO remove phylo for real data here
pure (toPhyloView q phylo)
-- TODO remove phylo for real data here
-}
level = maybe 2 identity l
branc = maybe 2 identity msb
maybePhylo = hyperdataPhylo_data $ _node_hyperdata phNode
getPhylo' :: PhyloId -> GargServer GetPhylo
getPhylo' _phyloId _lId _l _f _b _l' _ms _x _y _z _ts _s _o _e _d _b' = do
p <- liftIO $ viewPhylo2Svg phyloView
p <- liftIO $ viewPhylo2Svg $ viewPhylo level branc $ maybe phyloFromQuery identity maybePhylo
pure (SVG p)
------------------------------------------------------------------------
{-
type PutPhylo = (Put '[JSON] Phylo )
--putPhylo :: PhyloId -> Maybe ListId -> PhyloQueryBuild -> Phylo
putPhylo :: PhyloId -> GargServer PutPhylo
putPhylo = undefined
-}
------------------------------------------------------------------------
type PostPhylo = QueryParam "listId" ListId
:> ReqBody '[JSON] PhyloQueryBuild
:> (Post '[JSON] Phylo)
-- :> ReqBody '[JSON] PhyloQueryBuild
:> (Post '[JSON] NodeId)
postPhylo :: CorpusId -> GargServer PostPhylo
postPhylo _n _lId q = do
postPhylo :: CorpusId -> UserId -> GargServer PostPhylo
postPhylo n userId _lId = do
-- TODO get Reader settings
-- s <- ask
let
vrs = Just ("1" :: Text)
sft = Just (Software "Gargantext" "4")
prm = initPhyloParam vrs sft (Just q)
pure (toPhyloBase q prm (parseDocs (initFoundationsRoots actants) corpus) termList empty)
-- _vrs = Just ("1" :: Text)
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
phy <- flowPhylo n
pId <- insertNodes [nodePhyloW (Just "Phylo") (Just $ HyperdataPhylo Nothing (Just phy)) n userId]
pure $ NodeId (fromIntegral pId)
------------------------------------------------------------------------
-- | DELETE Phylo == delete a node
------------------------------------------------------------------------
------------------------------------------------------------------------
{-
type PutPhylo = (Put '[JSON] Phylo )
--putPhylo :: PhyloId -> Maybe ListId -> PhyloQueryBuild -> Phylo
putPhylo :: PhyloId -> GargServer PutPhylo
putPhylo = undefined
-}
-- | Instances
......@@ -160,7 +153,6 @@ instance Arbitrary Phylo
where
arbitrary = elements [phylo]
instance ToSchema Cluster
instance ToSchema EdgeType
instance ToSchema Filiation
......
......@@ -19,7 +19,6 @@ module Gargantext.Viz.Phylo.Main
where
import Control.Monad.IO.Class (liftIO)
import Data.GraphViz
import Data.Maybe
import Data.Text (Text)
......@@ -39,7 +38,6 @@ import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
import Servant
import qualified Data.ByteString as DB
import qualified Data.List as List
import qualified Data.Map as Map
......@@ -47,52 +45,36 @@ import qualified Data.Text as Text
type MinSizeBranch = Int
flowPhylo :: FlowCmdM env ServantErr m
flowPhylo :: FlowCmdM env err m
=> CorpusId
-> Level -> MinSizeBranch
-> FilePath
-> m FilePath
flowPhylo cId l m fp = do
-> m Phylo
flowPhylo cId = do
list <- defaultList cId
-- listMaster <- selectNodesWithUsername NodeList userMaster
termList <- Map.toList <$> getTermsWith Text.words [list] NgramsTerms GraphTerm
--printDebug "termList" termList
--x <- mapTermListRoot [list] NgramsTerms
--printDebug "mapTermListRoot" x
-- TODO optimize unwords
docs' <- catMaybes <$> map (\h -> (,) <$> _hyperdataDocument_publication_year h
docs' <- catMaybes
<$> map (\h -> (,) <$> _hyperdataDocument_publication_year h
<*> _hyperdataDocument_abstract h
) <$> selectDocs cId
let patterns = buildPatterns termList
let docs = map ( (\(y,t) -> Document y t) . filterTerms patterns) docs'
--printDebug "docs" docs
--printDebug "docs" termList
liftIO $ flowPhylo' (List.sortOn date docs) termList l m fp
)
<$> selectDocs cId
parse :: TermList -> [(Date, Text)] -> IO [Document]
parse l c = do
let patterns = buildPatterns l
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) c
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
filterTerms patterns (y,d) = (y,termsInText patterns d)
let
patterns = buildPatterns termList
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
filterTerms patterns' (y,d) = (y,termsInText patterns' d)
where
--------------------------------------
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = List.nub $ List.concat $ map (map Text.unwords) $ extractTermsWithList pats txt
--------------------------------------
docs = map ( (\(y,t) -> Document y t) . filterTerms patterns) docs'
--liftIO $ flowPhylo' (List.sortOn date docs) termList l m fp
pure $ buildPhylo (List.sortOn date docs) termList
-- TODO SortedList Document
flowPhylo' :: [Document] -> TermList -- ^Build
......
......@@ -815,7 +815,7 @@ getProximity cluster = case cluster of
-- | To initialize all the Cluster / Proximity with their default parameters
initFis :: Maybe Bool -> Maybe Support -> Maybe Int -> FisParams
initFis (def True -> kmf) (def 1 -> min') (def 1 -> thr) = FisParams kmf min' thr
initFis (def True -> kmf) (def 2 -> min') (def 4 -> thr) = FisParams kmf min' thr
initHamming :: Maybe Double -> HammingParams
initHamming (def 0.01 -> sens) = HammingParams sens
......@@ -847,7 +847,7 @@ initPhyloQueryBuild :: Text -> Text -> Maybe Int
-> Maybe Double -> Maybe Double -> Maybe Int
-> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain)
(def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics)
(def 1 -> steps) (def defaultFis -> cluster) (def [] -> metrics)
(def [] -> filters) (def defaultWeightedLogJaccard -> matching') (def 5 -> frame)
(def 0.8 -> frameThr) (def 0.5 -> reBranchThr) (def 4 -> reBranchNth)
(def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
......
resolver: lts-12.26
resolver: lts-14.1
flags: {}
extra-package-dbs: []
packages:
......@@ -22,14 +22,21 @@ extra-deps:
commit: 3fe28b683aba5ddf05e3b5f8eced0bd05c5a29f9
- git: https://github.com/robstewart57/rdf4h.git
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
#
# External API connectin to get data
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: dcaa0f5dd53f20648f4f5a615d29163582a4219c
commit: 01a6bf1e79cd5aef8628b240bbd47cb2a0864d5e
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: daeae80365250c4bd539f0a65e271f9aa37f731f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: bf57642f6b66f554fdc0a38ac391cd8200dffcb3
commit: ef9e638c97788df251f50b71fcdd9551b87f12c5
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
#
- git: https://gitlab.iscpif.fr/gargantext/patches-class
commit: 746b4ce0af8f9e600d555ad7e5b2973a940cdad9
- git: https://github.com/np/servant-job.git
commit: ac4227441bbca30c44235582b5ec31340c569021
#- git: https://github.com/np/servant-job.git
# commit: ac4227441bbca30c44235582b5ec31340c569021
- git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git
commit: 1c636112b151110408e7c5a28cec39e46657358e
- git: https://github.com/np/patches-map
......@@ -38,10 +45,8 @@ extra-deps:
commit: 53385de076be09f728a1b58c035a18e9ff9bcfd6
- git: https://github.com/delanoe/hsparql.git
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: e39454101b53916e3082085ebfe922df695fc775
- KMP-0.1.0.2
- accelerate-1.2.0.0
- accelerate-1.2.0.1
- aeson-lens-0.5.0.0
- deepseq-th-0.1.0.4
- duckling-0.1.3.0
......@@ -56,8 +61,9 @@ extra-deps:
- json-stream-0.4.2.4 # Text.Parsers (JSON)
- serialise-0.2.0.0
- servant-flatten-0.2
- servant-multipart-0.11.2
#- servant-multipart-0.11.2
- stemmer-0.5.2
- time-units-1.0.0
- validity-0.9.0.0 # patches-{map,class}
- directory-1.3.1.5
- process-1.6.5.1@sha256:77a9afeb676357f67fe5cf1ad79aca0745fb6f7fb96b786d510af08f622643f6,2468
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment