Commit 36c913d9 authored by qlobbe's avatar qlobbe

Merge branch 'dev' into dev-phylo

parents 62f57e5a c74fd659
...@@ -33,3 +33,21 @@ curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devo ...@@ -33,3 +33,21 @@ curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devo
stack --docker exec gargantext-cli -- CorpusFromGarg.csv ListFromGarg.csv Ouput.json stack --docker exec gargantext-cli -- CorpusFromGarg.csv ListFromGarg.csv Ouput.json
```sql
INSERT INTO auth_user (password, is_superuser, username, first_name, last_name, email, is_staff, is_active)
VALUES ('1resu', true, 'user1', 'user', '1', 'a@localhost', true, true);
-- nodetype NodeUser has id 1
-- inserted user_id = 3
INSERT INTO nodes (typename, user_id, name)
VALUES (1, 3, 'user1');
-- same for master user -- 'gargantua'
INSERT INTO auth_user (password, is_superuser, username, first_name, last_name, email, is_staff, is_active)
VALUES ('autnagrag, true, 'gargantua, 'gargantua, '1', 'g@localhost', true, true);
-- nodetype NodeUser has id 1
-- inserted user_id = 5
INSERT INTO nodes (typename, user_id, name)
VALUES (1, 5, 'gargantua);
```
...@@ -46,7 +46,7 @@ main = do ...@@ -46,7 +46,7 @@ main = do
createUsers = insertUsersDemo createUsers = insertUsersDemo
let cmd :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId let cmd :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
cmd = flowCorpusFile (cs user) (cs name) (read limit :: Int) (Multi EN) CsvHalFormat corpusPath cmd = flowCorpusFile (cs user) (cs name) (read limit :: Int) (Unsupervised EN 5 1 Nothing) CsvHalFormat corpusPath
{- {-
let debatCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId let debatCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
debatCorpus = do debatCorpus = do
......
...@@ -10,7 +10,7 @@ else ...@@ -10,7 +10,7 @@ else
fi fi
sudo apt update sudo apt update
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-9.6 nginx sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-9.6 nginx libigraph-dev
#echo "Which user?" #echo "Which user?"
#read USER #read USER
......
...@@ -9,5 +9,6 @@ else ...@@ -9,5 +9,6 @@ else
fi fi
sudo apt update sudo apt update
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-9.6 nginx libpq-dev sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql nginx libpq-dev
sudo apt install postgresql-server-dev-9.6
if [ "$#" -lt 3 ]; then
echo "Usage: $0 <name> <path> <limit>"
exit 1
fi
name="$1"
path="$2"
limit="$3"
stack --docker exec gargantext-import -- true "user1" "$name" gargantext.ini "$limit" "$path"
set -eu
docker stop dbgarg || :
docker rm --volumes dbgarg || :
export PGPASSWORD="$(grep DB_PASS gargantext.ini | \awk '{print $3}')"
docker run --name dbgarg -e POSTGRES_USER=gargantua -e POSTGRES_DB=gargandbV5 -e POSTGRES_PASSWORD=${PGPASSWORD} -d postgres
sleep 3
docker run -e PGPASSWORD -i --rm --link dbgarg:postgres postgres psql -h postgres -U gargantua -d gargandbV5 < src/Gargantext/Database/Schema/schema.sql
#docker run -e PGPASSWORD -i --rm --link dbgarg:postgres postgres psql -h postgres -U gargantua -d gargandbV5 < gargantext.dump
#docker run -e PGPASSWORD -i --rm --link dbgarg:postgres postgres dropdb -h postgres -U gargantua gargandbV5
stack --docker exec gargantext-server -- --run Prod --ini gargantext.ini
name: gargantext name: gargantext
version: '4.0.0.5' version: '4.0.0.6'
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -47,6 +47,7 @@ library: ...@@ -47,6 +47,7 @@ library:
- Gargantext.Prelude - Gargantext.Prelude
- Gargantext.Text - Gargantext.Text
- Gargantext.Text.Context - Gargantext.Text.Context
- Gargantext.Text.Crawlers
- Gargantext.Text.Examples - Gargantext.Text.Examples
- Gargantext.Text.List.CSV - Gargantext.Text.List.CSV
- Gargantext.Text.Metrics - Gargantext.Text.Metrics
...@@ -102,6 +103,8 @@ library: ...@@ -102,6 +103,8 @@ library:
- containers - containers
- contravariant - contravariant
- crawlerPubMed - crawlerPubMed
- crawlerIsidore
- crawlerHAL
- data-time-segment - data-time-segment
- deepseq - deepseq
- directory - directory
...@@ -110,10 +113,12 @@ library: ...@@ -110,10 +113,12 @@ library:
- filepath - filepath
- fullstop - fullstop
- fclabels - fclabels
- fgl
- fast-logger - fast-logger
- filelock - filelock
- full-text-search - full-text-search
- graphviz - graphviz
- haskell-igraph
- http-client - http-client
- http-client-tls - http-client-tls
- http-conduit - http-conduit
...@@ -153,6 +158,9 @@ library: ...@@ -153,6 +158,9 @@ library:
- protolude - protolude
- pureMD5 - pureMD5
- SHA - SHA
- simple-reflect
- cereal # (IGraph)
- singletons # (IGraph)
- random - random
- rake - rake
- regex-compat - regex-compat
......
...@@ -75,7 +75,7 @@ import Gargantext.API.Ngrams (HasRepo(..), HasRepoSaver(..), saveRepo, TableNgra ...@@ -75,7 +75,7 @@ import Gargantext.API.Ngrams (HasRepo(..), HasRepoSaver(..), saveRepo, TableNgra
import Gargantext.API.Node import Gargantext.API.Node
import Gargantext.API.Search ( SearchAPI, search, SearchQuery) import Gargantext.API.Search ( SearchAPI, search, SearchQuery)
import Gargantext.API.Types import Gargantext.API.Types
import Gargantext.API.Upload import qualified Gargantext.API.Corpus.New as New
import Gargantext.Core.Types (HasInvalidError(..)) import Gargantext.Core.Types (HasInvalidError(..))
import Gargantext.Database.Facet import Gargantext.Database.Facet
import Gargantext.Database.Schema.Node (HasNodeError(..), NodeError) import Gargantext.Database.Schema.Node (HasNodeError(..), NodeError)
...@@ -262,7 +262,7 @@ type GargAPI' = ...@@ -262,7 +262,7 @@ type GargAPI' =
:<|> "count" :> Summary "Count endpoint" :<|> "count" :> Summary "Count endpoint"
:> ReqBody '[JSON] Query :> CountAPI :> ReqBody '[JSON] Query :> CountAPI
-- Corpus endpoint -- Corpus endpoint --> TODO rename s/search/filter/g
:<|> "search":> Summary "Search endpoint" :<|> "search":> Summary "Search endpoint"
:> ReqBody '[JSON] SearchQuery :> ReqBody '[JSON] SearchQuery
:> QueryParam "offset" Int :> QueryParam "offset" Int
...@@ -279,7 +279,7 @@ type GargAPI' = ...@@ -279,7 +279,7 @@ type GargAPI' =
:<|> "tree" :> Summary "Tree endpoint" :<|> "tree" :> Summary "Tree endpoint"
:> Capture "id" NodeId :> TreeAPI :> Capture "id" NodeId :> TreeAPI
:<|> "upload" :> ApiUpload :<|> "new" :> New.Api
-- :<|> "scraper" :> WithCallbacks ScraperAPI -- :<|> "scraper" :> WithCallbacks ScraperAPI
...@@ -323,7 +323,7 @@ serverGargAPI -- orchestrator ...@@ -323,7 +323,7 @@ serverGargAPI -- orchestrator
:<|> search :<|> search
:<|> graphAPI -- TODO: mock :<|> graphAPI -- TODO: mock
:<|> treeAPI :<|> treeAPI
:<|> upload :<|> New.api
-- :<|> orchestrator -- :<|> orchestrator
where where
fakeUserId = 1 -- TODO fakeUserId = 1 -- TODO
...@@ -421,3 +421,7 @@ startGargantextMock port = do ...@@ -421,3 +421,7 @@ startGargantextMock port = do
application <- makeMockApp . MockEnv $ FireWall False application <- makeMockApp . MockEnv $ FireWall False
run port application run port application
-} -}
{-|
Module : Gargantext.API.Corpus.New
Description : New corpus API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
New corpus means either:
- new corpus
- new data in existing corpus
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.API.Corpus.New
where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Prelude
import Gargantext.Prelude.Utils (hash)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.Database.Flow (FlowCmdM)
data Query = Query { query_query :: Text
, query_corpus_id :: Int
, query_files_id :: [Text]
}
deriving (Eq, Show, Generic)
deriveJSON (unPrefix "query_") ''Query
instance Arbitrary Query where
arbitrary = elements [ Query q n fs
| q <- ["a","b"]
, n <- [0..10]
, fs <- map (map hash) [["a","b"], ["c","d"]]
]
instance ToSchema Query where
declareNamedSchema =
genericDeclareNamedSchema
defaultSchemaOptions {fieldLabelModifier = \fieldLabel -> drop 6 fieldLabel}
type Api = Summary "New Corpus endpoint"
:> ReqBody '[JSON] Query
:> Post '[JSON] CorpusId
api :: FlowCmdM env err m => Query -> m CorpusId
api (Query q _ _) = do
cId <- flowCorpusSearchInDatabase "user1" EN q
pure cId
...@@ -69,6 +69,7 @@ data QueryBool = QueryBool Text ...@@ -69,6 +69,7 @@ data QueryBool = QueryBool Text
queries :: [QueryBool] queries :: [QueryBool]
queries = [QueryBool (pack "(X OR X') AND (Y OR Y') NOT (Z OR Z')")] queries = [QueryBool (pack "(X OR X') AND (Y OR Y') NOT (Z OR Z')")]
--queries = [QueryBool (pack "(X + X') * (Y + Y') - (Z + Z')")]
instance Arbitrary QueryBool where instance Arbitrary QueryBool where
arbitrary = elements queries arbitrary = elements queries
......
...@@ -95,6 +95,7 @@ data TODO = TODO ...@@ -95,6 +95,7 @@ data TODO = TODO
deriving (Generic) deriving (Generic)
instance ToSchema TODO where instance ToSchema TODO where
instance ToParamSchema TODO where
------------------------------------------------------------------------ ------------------------------------------------------------------------
--data FacetFormat = Table | Chart --data FacetFormat = Table | Chart
...@@ -200,12 +201,12 @@ mkNgramsElement ngrams list rp children = ...@@ -200,12 +201,12 @@ mkNgramsElement ngrams list rp children =
-- TODO review -- TODO review
size = 1 + count " " ngrams size = 1 + count " " ngrams
newNgramsElement :: NgramsTerm -> NgramsElement newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
newNgramsElement ngrams = mkNgramsElement ngrams GraphTerm Nothing mempty newNgramsElement mayList ngrams = mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
instance ToSchema NgramsElement instance ToSchema NgramsElement
instance Arbitrary NgramsElement where instance Arbitrary NgramsElement where
arbitrary = elements [newNgramsElement "sport"] arbitrary = elements [newNgramsElement Nothing "sport"]
ngramsElementToRepo :: NgramsElement -> NgramsRepoElement ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
ngramsElementToRepo ngramsElementToRepo
...@@ -793,9 +794,9 @@ putListNgrams listId ngramsType nes = do ...@@ -793,9 +794,9 @@ putListNgrams listId ngramsType nes = do
where where
m = Map.fromList $ (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) <$> nes m = Map.fromList $ (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) <$> nes
tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> [NgramsTerm] -> m () tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> Maybe ListType -> [NgramsTerm] -> m ()
tableNgramsPost tabType listId = tableNgramsPost tabType listId mayList =
putListNgrams listId (ngramsTypeFromTabType tabType) . fmap newNgramsElement putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
-- Apply the given patch to the DB and returns the patch to be applied on the -- Apply the given patch to the DB and returns the patch to be applied on the
-- client. -- client.
...@@ -1008,6 +1009,7 @@ type TableNgramsApiPut = Summary " Table Ngrams API Change" ...@@ -1008,6 +1009,7 @@ type TableNgramsApiPut = Summary " Table Ngrams API Change"
type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams" type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
:> QueryParamR "ngramsType" TabType :> QueryParamR "ngramsType" TabType
:> QueryParamR "list" ListId :> QueryParamR "list" ListId
:> QueryParam "listType" ListType
:> ReqBody '[JSON] [NgramsTerm] :> ReqBody '[JSON] [NgramsTerm]
:> Post '[JSON] () :> Post '[JSON] ()
......
...@@ -24,45 +24,54 @@ Node API ...@@ -24,45 +24,54 @@ Node API
{-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-} {-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node module Gargantext.API.Node
where where
import Control.Lens (prism') import Control.Lens (prism', (.~), (?~))
import Control.Monad ((>>)) import Control.Monad ((>>), forM)
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Data.Aeson (FromJSON, ToJSON) import Data.Aeson (FromJSON, ToJSON)
import Data.Maybe
import Data.Monoid (mempty)
import Data.Swagger import Data.Swagger
import Data.Text (Text()) import Data.Text (Text())
import Data.Time (UTCTime) import Data.Time (UTCTime)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.API.Metrics import Gargantext.API.Metrics
import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, apiNgramsTableCorpus, QueryParamR) import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, apiNgramsTableCorpus, QueryParamR, TODO)
import Gargantext.API.Ngrams.NTree (MyTree)
import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery) import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery)
import Gargantext.API.Types import Gargantext.API.Types
import Gargantext.Core.Types (Offset, Limit) import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Types.Main (Tree, NodeTree, ListType) import Gargantext.Core.Types.Main (Tree, NodeTree, ListType)
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),runViewAuthorsDoc) import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),runViewAuthorsDoc)
import Gargantext.Database.Node.Children (getChildren) import Gargantext.Database.Node.Children (getChildren)
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..)) import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, getNode', deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..))
import Gargantext.Database.Schema.NodeNode (nodesToFavorite, nodesToTrash) import Gargantext.Database.Schema.NodeNode (nodesToFavorite, nodesToTrash)
import Gargantext.Database.Tree (treeDB, HasTreeError(..), TreeError(..)) import Gargantext.Database.Tree (treeDB, HasTreeError(..), TreeError(..))
import Gargantext.Database.Types.Node import Gargantext.Database.Types.Node
import Gargantext.Database.Utils -- (Cmd, CmdM) import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Utils (hash)
import Gargantext.Text.Metrics (Scored(..)) import Gargantext.Text.Metrics (Scored(..))
import Gargantext.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Gargantext.Viz.Chart import Gargantext.Viz.Chart
import Gargantext.API.Ngrams.NTree (MyTree) import Gargantext.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Servant import Servant
import Servant.Multipart
import Servant.Swagger (HasSwagger(toSwagger))
import Servant.Swagger.Internal
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -133,13 +142,14 @@ type NodeAPI a = Get '[JSON] (Node a) ...@@ -133,13 +142,14 @@ type NodeAPI a = Get '[JSON] (Node a)
:> QueryParam "limit" Int :> QueryParam "limit" Int
:> QueryParam "order" OrderBy :> QueryParam "order" OrderBy
:> SearchAPI :> SearchAPI
-- VIZ -- VIZ
:<|> "metrics" :> MetricsAPI :<|> "metrics" :> MetricsAPI
:<|> "chart" :> ChartApi :<|> "chart" :> ChartApi
:<|> "pie" :> PieApi :<|> "pie" :> PieApi
:<|> "tree" :> TreeApi :<|> "tree" :> TreeApi
:<|> "phylo" :> PhyloAPI :<|> "phylo" :> PhyloAPI
:<|> "upload" :> UploadAPI
-- TODO-ACCESS: check userId CanRenameNode nodeId -- TODO-ACCESS: check userId CanRenameNode nodeId
-- TODO-EVENTS: NodeRenamed RenameNode or re-use some more general NodeEdited... -- TODO-EVENTS: NodeRenamed RenameNode or re-use some more general NodeEdited...
...@@ -164,7 +174,7 @@ nodeAPI p uId id ...@@ -164,7 +174,7 @@ nodeAPI p uId id
:<|> rename id :<|> rename id
:<|> postNode uId id :<|> postNode uId id
:<|> putNode id :<|> putNode id
:<|> deleteNode id :<|> deleteNodeApi id
:<|> getChildren id p :<|> getChildren id p
-- TODO gather it -- TODO gather it
...@@ -182,10 +192,18 @@ nodeAPI p uId id ...@@ -182,10 +192,18 @@ nodeAPI p uId id
:<|> getPie id :<|> getPie id
:<|> getTree id :<|> getTree id
:<|> phyloAPI id :<|> phyloAPI id
:<|> postUpload id
where
deleteNodeApi id' = do
node <- getNode' id'
if _node_typename node == nodeTypeId NodeUser
then panic "not allowed" -- TODO add proper Right Management Type
else deleteNode id'
-- Annuaire -- Annuaire
-- :<|> query -- :<|> query
------------------------------------------------------------------------ ------------------------------------------------------------------------
data RenameNode = RenameNode { r_name :: Text } data RenameNode = RenameNode { r_name :: Text }
deriving (Generic) deriving (Generic)
...@@ -374,7 +392,67 @@ getMetrics cId maybeListId tabType maybeLimit = do ...@@ -374,7 +392,67 @@ getMetrics cId maybeListId tabType maybeLimit = do
log' n x = 1 + (if x <= 0 then 0 else (log $ (10^(n::Int)) * x)) log' n x = 1 + (if x <= 0 then 0 else (log $ (10^(n::Int)) * x))
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent" errorMsg = "API.Node.metrics: key absent"
pure $ Metrics metrics pure $ Metrics metrics
-------------------------------------------------------------
type Hash = Text
data FileType = CSV | PresseRIS
deriving (Eq, Show, Generic)
instance ToSchema FileType
instance Arbitrary FileType
where
arbitrary = elements [CSV, PresseRIS]
instance ToParamSchema FileType
instance ToParamSchema (MultipartData Mem) where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance FromHttpApiData FileType
where
parseUrlPiece "CSV" = pure CSV
parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece _ = pure CSV -- TODO error here
instance (ToParamSchema a, HasSwagger sub) =>
HasSwagger (MultipartForm tag a :> sub) where
-- TODO
toSwagger _ = toSwagger (Proxy :: Proxy sub)
& addParam param
where
param = mempty
& required ?~ True
& schema .~ ParamOther sch
sch = mempty
& in_ .~ ParamFormData
& paramSchema .~ toParamSchema (Proxy :: Proxy a)
type UploadAPI = Summary "Upload file(s) to a corpus"
:> MultipartForm Mem (MultipartData Mem)
:> QueryParam "fileType" FileType
:> Post '[JSON] [Hash]
--postUpload :: NodeId -> Maybe FileType -> GargServer UploadAPI
--postUpload :: NodeId -> GargServer UploadAPI
postUpload :: NodeId -> MultipartData Mem -> Maybe FileType -> Cmd err [Hash]
postUpload _ _ Nothing = panic "fileType is a required parameter"
postUpload _ multipartData (Just fileType) = do
putStrLn $ "File Type: " <> (show fileType)
is <- liftIO $ do
putStrLn ("Inputs:" :: Text)
forM (inputs multipartData) $ \input -> do
putStrLn $ ("iName " :: Text) <> (iName input)
<> ("iValue " :: Text) <> (iValue input)
pure $ iName input
_ <- forM (files multipartData) $ \file -> do
let content = fdPayload file
putStrLn $ ("XXX " :: Text) <> (fdFileName file)
putStrLn $ ("YYY " :: Text) <> cs content
--pure $ cs content
-- is <- inputs multipartData
pure $ map (hash . cs) is
...@@ -72,7 +72,8 @@ instance Arbitrary SearchInQuery where ...@@ -72,7 +72,8 @@ instance Arbitrary SearchInQuery where
----------------------------------------------------------------------- -----------------------------------------------------------------------
data SearchResults = SearchResults { srs_results :: [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]} data SearchResults = SearchResults' { srs_resultsP :: [FacetDoc]}
| SearchResults { srs_results :: [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]}
deriving (Generic) deriving (Generic)
$(deriveJSON (unPrefix "srs_") ''SearchResults) $(deriveJSON (unPrefix "srs_") ''SearchResults)
...@@ -96,6 +97,7 @@ search (SearchQuery q pId) o l order = ...@@ -96,6 +97,7 @@ search (SearchQuery q pId) o l order =
searchIn :: NodeId -> SearchInQuery -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err SearchResults searchIn :: NodeId -> SearchInQuery -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err SearchResults
searchIn nId (SearchInQuery q ) o l order = searchIn nId (SearchInQuery q ) o l order =
SearchResults <$> searchInCorpusWithContacts nId q o l order SearchResults' <$> searchInCorpus nId q o l order
--SearchResults <$> searchInCorpusWithContacts nId q o l order
{-|
Module : Gargantext.API.Upload
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Upload
where
import qualified Data.Text as Text
import GHC.Generics (Generic)
import Gargantext.Prelude
import Data.Text (Text)
import Data.Aeson
import Servant
import Servant.Multipart
--import Servant.Mock (HasMock(mock))
import Servant.Swagger (HasSwagger(toSwagger))
-- import qualified Data.ByteString.Lazy as LBS
import Control.Monad
import Control.Monad.IO.Class
import Gargantext.API.Types
--import Servant.CSV.Cassava (CSV'(..))
--import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
--import Data.Swagger
--import Gargantext.API.Ngrams (TODO)
-- | Upload files
-- TODO Is it possible to adapt the function according to iValue input ?
--type API = MultipartForm Mem (MultipartData Mem) :> Post '[JSON] Integer
-- instance Generic Mem
--instance ToSchema Mem
--instance Arbitrary Mem
--instance ToSchema (MultipartData Mem)
--instance Arbitrary ( MultipartData Mem)
instance HasSwagger (MultipartForm tag a :> sub) where
-- TODO
toSwagger _ = undefined -- toSwagger (Proxy :: Proxy (TODO :> Post '[JSON] ()))
--declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
--instance Arbitrary (MultipartForm Mem (MultipartData Mem))
{-
instance (FromMultipart tag a, MultipartBackend tag, Servant.Multipart.LookupContext context (MultipartOptions tag))
=> HasMock (MultipartForm tag a :> sub) context where
mock _ _ = undefined
instance HasMock (MultipartForm Mem (MultipartData Mem) :> sub) context where
mock _ _ = undefined
-}
data Upload = Upload { up :: [Text] }
deriving (Generic)
instance ToJSON Upload
type ApiUpload = MultipartForm Mem (MultipartData Mem) :> Post '[JSON] Text
-- MultipartData consists in textual inputs,
-- accessible through its "inputs" field, as well
-- as files, accessible through its "files" field.
upload :: GargServer ApiUpload
upload multipartData = do
--{-
is <- liftIO $ do
putStrLn ("Inputs:" :: Text)
forM (inputs multipartData) $ \input -> do
putStrLn $ ("iName " :: Text) <> (iName input)
<> ("iValue " :: Text) <> (iValue input)
pure $ iName input
--{-
_ <- forM (files multipartData) $ \file -> do
let content = fdPayload file
putStrLn $ ("XXX " :: Text) <> (fdFileName file)
putStrLn $ ("YYY " :: Text) <> cs content
--pure $ cs content
-- is <- inputs multipartData
--}
pure $ Text.concat $ map cs is
-------------------------------------------------------------------------------
{-|
Module : Gargantext.Core.Statistics
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Core.Statistics
where
import Data.Map (Map)
import Gargantext.Prelude
import Numeric.Statistics.PCA (pcaReduceN)
import Data.Array.IArray (Array, listArray, elems)
import qualified Data.Vector.Storable as Vec
import qualified Data.List as List
import qualified Data.Map as Map
data Dimension = Dimension Int
pcaReduceTo :: Ord t
=> Dimension
-> Map t (Vec.Vector Double)
-> Map t (Vec.Vector Double)
pcaReduceTo (Dimension d) m = Map.fromList
$ zip txts
$ elems
$ pcaReduceN m'' d
where
m'' :: Array Int (Vec.Vector Double)
m'' = listArray (1, List.length m') m'
(txts,m') = List.unzip $ Map.toList m
This diff is collapsed.
...@@ -366,6 +366,11 @@ getNode nId _ = do ...@@ -366,6 +366,11 @@ getNode nId _ = do
fromMaybe (error $ "Node does node exist: " <> show nId) . headMay fromMaybe (error $ "Node does node exist: " <> show nId) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId)) <$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
getNode' :: NodeId -> Cmd err (Node Value)
getNode' nId = fromMaybe (error $ "Node does node exist: " <> show nId) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
getNodesWithType :: Column PGInt4 -> Cmd err [Node HyperdataDocument] getNodesWithType :: Column PGInt4 -> Cmd err [Node HyperdataDocument]
getNodesWithType = runOpaQuery . selectNodesWithType getNodesWithType = runOpaQuery . selectNodesWithType
...@@ -578,8 +583,6 @@ instance MkCorpus HyperdataAnnuaire ...@@ -578,8 +583,6 @@ instance MkCorpus HyperdataAnnuaire
mk n h p u = insertNodesR [nodeAnnuaireW n h p u] mk n h p u = insertNodesR [nodeAnnuaireW n h p u]
getOrMkList :: HasNodeError err => ParentId -> UserId -> Cmd err ListId getOrMkList :: HasNodeError err => ParentId -> UserId -> Cmd err ListId
getOrMkList pId uId = getOrMkList pId uId =
maybe (mkList' pId uId) (pure . view node_id) . headMay =<< getListsWithParentId pId maybe (mkList' pId uId) (pure . view node_id) . headMay =<< getListsWithParentId pId
......
...@@ -32,7 +32,7 @@ import Gargantext.Database.Facet ...@@ -32,7 +32,7 @@ import Gargantext.Database.Facet
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus) import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
import Gargantext.Database.Schema.NodeNgram import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Queries.Join (leftJoin6) import Gargantext.Database.Queries.Join (leftJoin6)
import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery) import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
...@@ -101,31 +101,31 @@ searchInCorpusWithContacts' cId q o l order = runOpaQuery $ queryInCorpusWithCon ...@@ -101,31 +101,31 @@ searchInCorpusWithContacts' cId q o l order = runOpaQuery $ queryInCorpusWithCon
queryInCorpusWithContacts :: CorpusId -> Text -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> O.Query FacetPairedRead queryInCorpusWithContacts :: CorpusId -> Text -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> O.Query FacetPairedRead
queryInCorpusWithContacts cId q _ _ _ = proc () -> do queryInCorpusWithContacts cId q _ _ _ = proc () -> do
(docs, (corpusDoc, (docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< () (docs, (corpusDoc, (_docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q ) restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument) restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
restrict -< (nn_node1_id corpusDoc) .== (toNullable $ pgNodeId cId) restrict -< (nn_node1_id corpusDoc) .== (toNullable $ pgNodeId cId)
restrict -< (nng_listType docNgrams) .== (toNullable $ pgNgramsType Authors) -- restrict -< (nng_listType docNgrams) .== (toNullable $ pgNgramsType Authors)
restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact) restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
-- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts) -- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
returnA -< FacetPaired (_ns_id docs) (_ns_date docs) (_ns_hyperdata docs) (pgInt4 0) (Pair (_node_id contacts) (ngrams_terms ngrams')) returnA -< FacetPaired (_ns_id docs) (_ns_date docs) (_ns_hyperdata docs) (pgInt4 0) (Pair (_node_id contacts) (ngrams_terms ngrams'))
joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNodeNgramsReadNull, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull)))))
joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56 joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNodeNgramsTable queryNgramsTable queryNodeNodeNgramsTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
where where
cond12 :: (NodeNgramRead, NodeRead) -> Column PGBool cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
cond12 (ng3, n2) = _node_id n2 .== nng_node_id ng3 cond12 (ng3, n2) = _node_id n2 .== nnng_node1_id ng3
--------- ---------
cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
cond23 (ng2, (nng2, _)) = nng_ngrams_id nng2 .== ngrams_id ng2 cond23 (ng2, (nnng2, _)) = nnng_ngrams_id nnng2 .== ngrams_id ng2
cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool cond34 :: (NodeNodeNgramsRead, (NgramsRead, (NodeNodeNgramsReadNull, NodeReadNull))) -> Column PGBool
cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nng_ngrams_id nng cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nnng_ngrams_id nng
cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool cond45 :: (NodeNodeRead, (NodeNodeNgramsRead, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull)))) -> Column PGBool
cond45 (nn, (nng, (_,(_,_)))) = nng_node_id nng .== nn_node2_id nn cond45 (nn, (nng, (_,(_,_)))) = nnng_node1_id nng .== nn_node2_id nn
cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNodeNgramsReadNull, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull))))) -> Column PGBool
cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn_node2_id nn cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn_node2_id nn
......
...@@ -98,18 +98,20 @@ eavg [] = 0 ...@@ -98,18 +98,20 @@ eavg [] = 0
-- Simple Average -- Simple Average
mean :: Fractional a => [a] -> a mean :: Fractional a => [a] -> a
mean xs = if L.null xs then 0.0 mean xs = sum xs / fromIntegral (length xs)
else sum xs / fromIntegral (length xs)
sumMaybe :: Num a => [Maybe a] -> Maybe a sumMaybe :: Num a => [Maybe a] -> Maybe a
sumMaybe = fmap sum . M.sequence sumMaybe = fmap sum . M.sequence
variance :: Floating a => [a] -> a variance :: Floating a => [a] -> a
variance xs = mean $ map (\x -> (x - m) ** 2) xs where variance xs = sum ys / (fromIntegral (length xs) - 1)
where
m = mean xs m = mean xs
ys = map (\x -> (x - m) ** 2) xs
deviation :: [Double] -> Double deviation :: Floating a => [a] -> a
deviation = sqrt . variance deviation = sqrt . variance
movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b] movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
...@@ -242,8 +244,8 @@ scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs' ...@@ -242,8 +244,8 @@ scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
scaleNormalize :: [Double] -> [Double] scaleNormalize :: [Double] -> [Double]
scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs' scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
where where
v = variance xs' v = variance xs'
m = mean xs' m = mean xs'
xs' = map abs xs xs' = map abs xs
normalize :: [Double] -> [Double] normalize :: [Double] -> [Double]
......
{-|
Module : Gargantext.Text.Crawlers
Description : All crawlers of Gargantext in one file.
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Crawlers
where
{-
import Data.Text (Text)
--import Gargantext.Prelude
import qualified PUBMED as PubMed
data Crawler = PubMed | HAL | Isidore
type Query = Text
--{-
crawl :: Crawler -> Query -> IO [PubMed.Doc]
crawl Pubmed = PubMed.crawler
--}
-}
...@@ -31,7 +31,7 @@ import Gargantext.Text.Metrics.Count (occurrencesWith) ...@@ -31,7 +31,7 @@ import Gargantext.Text.Metrics.Count (occurrencesWith)
import qualified Data.IntMap as IntMap import qualified Data.IntMap as IntMap
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.SVM as SVM import qualified Data.SVM as SVM
import qualified Data.Vector as Vec import qualified Data.Vector as Vec
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
...@@ -29,29 +29,26 @@ import GHC.Real (round) ...@@ -29,29 +29,26 @@ import GHC.Real (round)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Graph.Distances.Matrice import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index import Gargantext.Viz.Graph.Index
import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
import qualified Data.Array.Accelerate as DAA import qualified Data.Array.Accelerate as DAA
import qualified Data.Array.Accelerate.Interpreter as DAA import qualified Data.Array.Accelerate.Interpreter as DAA
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map import qualified Data.Map as Map
import Numeric.Statistics.PCA (pcaReduceN)
import qualified Data.Vector.Storable as Vec import qualified Data.Vector.Storable as Vec
import Data.Array.IArray (Array, listArray, elems)
type GraphListSize = Int type GraphListSize = Int
type InclusionSize = Int type InclusionSize = Int
toScored :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t] toScored :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
toScored = map2scored toScored = map2scored
. (reduceTo (Dimension 2)) . (pcaReduceTo (Dimension 2))
. (Map.filter (\v -> Vec.length v > 1)) . (Map.filter (\v -> Vec.length v > 1))
. (Map.unionsWith (<>)) . (Map.unionsWith (<>))
scored :: Ord t => Map (t,t) Int -> [Scored t] scored :: Ord t => Map (t,t) Int -> [Scored t]
scored = map2scored . (reduceTo (Dimension 2)) . scored2map scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double) scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
...@@ -66,20 +63,6 @@ data Scored ts = Scored ...@@ -66,20 +63,6 @@ data Scored ts = Scored
, _scored_speGen :: !SpecificityGenericity , _scored_speGen :: !SpecificityGenericity
} deriving (Show) } deriving (Show)
data Dimension = Dimension Int
reduceTo :: Ord t
=> Dimension
-> Map t (Vec.Vector Double)
-> Map t (Vec.Vector Double)
reduceTo (Dimension d) ss = Map.fromList $ zip txts $ elems $ pcaReduceN ss'' d
where
ss'' :: Array Int (Vec.Vector Double)
ss'' = listArray (1, List.length ss') ss'
(txts,ss') = List.unzip $ Map.toList ss
localMetrics :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double) localMetrics :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe])) localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
(Map.toList fi) (Map.toList fi)
...@@ -92,8 +75,6 @@ localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [i ...@@ -92,8 +75,6 @@ localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [i
$ DAA.zip (DAA.use is) (DAA.use ss) $ DAA.zip (DAA.use is) (DAA.use ss)
-- TODO Code to be remove below -- TODO Code to be remove below
-- TODO in the textflow we end up needing these indices , it might be -- TODO in the textflow we end up needing these indices , it might be
-- better to compute them earlier and pass them around. -- better to compute them earlier and pass them around.
...@@ -107,10 +88,6 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) s ...@@ -107,10 +88,6 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) s
$ DAA.zip (DAA.use is) (DAA.use ss) $ DAA.zip (DAA.use is) (DAA.use ss)
takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t] takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
takeScored listSize incSize = map _scored_terms takeScored listSize incSize = map _scored_terms
. linearTakes listSize incSize _scored_speGen . linearTakes listSize incSize _scored_speGen
...@@ -134,4 +111,3 @@ linearTakes gls incSize speGen incExc = take gls ...@@ -134,4 +111,3 @@ linearTakes gls incSize speGen incExc = take gls
. splitEvery incSize . splitEvery incSize
. sortOn speGen . sortOn speGen
...@@ -22,7 +22,7 @@ please follow the types. ...@@ -22,7 +22,7 @@ please follow the types.
{-# LANGUAGE PackageImports #-} {-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers (FileFormat(..), clean, parseFile) module Gargantext.Text.Parsers (FileFormat(..), clean, parseFile, cleanText)
where where
--import Data.ByteString (ByteString) --import Data.ByteString (ByteString)
...@@ -164,9 +164,14 @@ openZip fp = do ...@@ -164,9 +164,14 @@ openZip fp = do
bs <- mapConcurrently (\s -> withArchive fp (getEntry s)) entries bs <- mapConcurrently (\s -> withArchive fp (getEntry s)) entries
pure bs pure bs
cleanText :: Text -> Text
cleanText = cs . clean . cs
clean :: DB.ByteString -> DB.ByteString clean :: DB.ByteString -> DB.ByteString
clean txt = DBC.map clean' txt clean txt = DBC.map clean' txt
where where
clean' '’' = '\'' clean' '’' = '\''
clean' '\r' = ' ' clean' '\r' = ' '
clean' '\t' = ' '
clean' ';' = '.'
clean' c = c clean' c = c
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Parsers.Isidore where
import Data.Text (Text)
import Data.Either
import Gargantext.Prelude
import Data.Aeson
import Data.Proxy
import GHC.Generics
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.API
import Servant.Client
type IsidoreAPI = "sparql" :> Capture "query" Text :> Get '[JSON] [IsidoreDoc]
data IsidoreDoc =
IsidoreDoc {title :: Maybe Text}
deriving (Show, Generic)
instance FromJSON IsidoreDoc
instance ToJSON IsidoreDoc
isidoreDocsApi :: Proxy IsidoreAPI
isidoreDocsApi = Proxy
isidoreDocs :: ClientM [IsidoreDoc]
isidoreDocs = client isidoreDocsApi
getIsidoreDocs :: IO [IsidoreDoc]
getIsidoreDocs = do
manager' <- newManager tlsManagerSettings
res <- runClientM isidoreDocs $ mkClientEnv manager' $ BaseUrl Https "https://www.rechercheisidore.fr" 8080 ""
case res of
Left _ -> panic "err"
Right res' -> pure res'
...@@ -43,16 +43,16 @@ selectQueryRaw' uri q = getWith opts uri ...@@ -43,16 +43,16 @@ selectQueryRaw' uri q = getWith opts uri
& header "User-Agent" .~ ["gargantext-hsparql-client"] & header "User-Agent" .~ ["gargantext-hsparql-client"]
& param "query" .~ [Data.Text.pack q] & param "query" .~ [Data.Text.pack q]
isidoreGet :: Lang -> Text -> IO (Maybe [HyperdataDocument]) isidoreGet :: Lang -> Int -> Text -> IO (Maybe [HyperdataDocument])
isidoreGet l q = do isidoreGet la li q = do
bindingValues <- isidoreGet' q bindingValues <- isidoreGet' li q
case bindingValues of case bindingValues of
Nothing -> pure Nothing Nothing -> pure Nothing
Just dv -> pure $ Just $ map (bind2doc l) dv Just dv -> pure $ Just $ map (bind2doc la) dv
isidoreGet' :: Text -> IO (Maybe [[BindingValue]]) isidoreGet' :: Int -> Text -> IO (Maybe [[BindingValue]])
isidoreGet' q = do isidoreGet' l q = do
let s = createSelectQuery $ isidoreSelect q let s = createSelectQuery $ isidoreSelect l q
putStrLn s putStrLn s
r <- selectQueryRaw' route s r <- selectQueryRaw' route s
putStrLn $ show $ r ^. responseStatus putStrLn $ show $ r ^. responseStatus
...@@ -60,11 +60,11 @@ isidoreGet' q = do ...@@ -60,11 +60,11 @@ isidoreGet' q = do
-- res <- selectQuery route $ simpleSelect q -- res <- selectQuery route $ simpleSelect q
-- pure res -- pure res
isidoreSelect :: Text -> Query SelectQuery isidoreSelect :: Int -> Text -> Query SelectQuery
isidoreSelect q = do isidoreSelect lim q = do
-- See Predefined Namespace Prefixes: -- See Predefined Namespace Prefixes:
-- https://isidore.science/sparql?nsdecl -- https://isidore.science/sparql?nsdecl
isidore <- prefix "isidore" (iriRef "http://www.rechercheisidore.fr/class/") isidore <- prefix "isidore" (iriRef "http://isidore.science/class/")
rdf <- prefix "rdf" (iriRef "http://www.w3.org/1999/02/22-rdf-syntax-ns#") rdf <- prefix "rdf" (iriRef "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
dcterms <- prefix "dcterms" (iriRef "http://purl.org/dc/terms/") dcterms <- prefix "dcterms" (iriRef "http://purl.org/dc/terms/")
dc <- prefix "dc" (iriRef "http://purl.org/dc/elements/1.1/") dc <- prefix "dc" (iriRef "http://purl.org/dc/elements/1.1/")
...@@ -80,14 +80,13 @@ isidoreSelect q = do ...@@ -80,14 +80,13 @@ isidoreSelect q = do
source <- var source <- var
langDoc <- var langDoc <- var
publisher <- var publisher <- var
--langFr <- var
--agg <- var --agg <- var
triple_ link (rdf .:. "type") (isidore .:. "BibliographicalResource") triple_ link (rdf .:. "type") (isidore .:. "Document")
triple_ link (dcterms .:. "title") title triple_ link (dcterms .:. "title") title
triple_ link (dcterms .:. "date") date triple_ link (dcterms .:. "date") date
triple_ link (dcterms .:. "creator") authors triple_ link (dcterms .:. "creator") authors
triple_ link (dcterms .:. "language") langDoc --triple_ link (dcterms .:. "language") langDoc
triple_ link (dc .:. "description") abstract triple_ link (dc .:. "description") abstract
--triple_ link (ore .:. "isAggregatedBy") agg --triple_ link (ore .:. "isAggregatedBy") agg
--triple_ agg (dcterms .:. "title") title --triple_ agg (dcterms .:. "title") title
...@@ -96,16 +95,18 @@ isidoreSelect q = do ...@@ -96,16 +95,18 @@ isidoreSelect q = do
optional_ $ triple_ link (dcterms .:. "publisher") publisher optional_ $ triple_ link (dcterms .:. "publisher") publisher
-- TODO FIX BUG with (.||.) operator -- TODO FIX BUG with (.||.) operator
--filterExpr $ (.||.) (contains title q) (contains title q) --filterExpr_ $ (.||.) (contains title q) (contains abstract q)
filterExpr_ (containsWith title q) -- (contains abstract q) --filterExpr_ (containsWith authors q) -- (contains abstract q)
--filterExpr (containsWith abstract q) -- (contains abstract q) --filterExpr_ (containsWith title q) -- (contains abstract q)
--filterExpr_ $ (.||.) (containsWith title q) (contains abstract q)
filterExpr_ (containsWith title q)
-- TODO FIX filter with lang -- TODO FIX filter with lang
--filterExpr $ langMatches title (str ("fra" :: Text)) --filterExpr_ $ langMatches title (str ("fra" :: Text))
--filterExpr $ (.==.) lang (str ("http://lexvo.org/id/iso639-3/fra" :: Text)) --filterExpr_ $ (.==.) langDoc (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
orderNextDesc date orderNextDesc date
limit_ 10 limit_ lim
distinct_ distinct_
selectVars [link, date, langDoc, authors, source, publisher, title, abstract] selectVars [link, date, langDoc, authors, source, publisher, title, abstract]
......
{-|
Module : Gargantext.Text.Parsers.IsidoreApi
Description : To query French Humanities publication database from its API
Copyright : (c) CNRS, 2019-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.IsidoreApi where
import System.FilePath (FilePath())
import Data.Text (Text)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import Gargantext.Prelude
import Isidore.Client
import Servant.Client
import qualified Data.Text as Text
import qualified Gargantext.Text.Parsers.Date as Date
import qualified Isidore as Isidore
import Gargantext.Text.Parsers.CSV (writeDocs2Csv)
import Gargantext.Text.Parsers (cleanText)
-- | TODO work with the ServantErr
get :: Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO [HyperdataDocument]
get la l q a = do
let
printErr (DecodeFailure e _) = panic e
printErr e = panic (cs $ show e)
toIsidoreDocs :: Reply -> [IsidoreDoc]
toIsidoreDocs (ReplyOnly r) = [r]
toIsidoreDocs (Replies rs) = rs
iDocs <- either printErr _content <$> Isidore.get l q a
hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO ()
isidore2csvFile fp la li tq aq = do
hdocs <- get la li tq aq
writeDocs2Csv fp hdocs
isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do
let
author :: Author -> Text
author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
author (Authors aus) = Text.intercalate ". " $ map author aus
creator2text :: Creator -> Text
creator2text (Creator au) = author au
creator2text (Creators aus') = Text.intercalate ". " $ map author aus'
langText :: LangText -> Text
langText (LangText _l t1) = t1
langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
(utcTime, (pub_year, pub_month, pub_day)) <- Date.split l (maybe (Just "2019") (Just) d)
pure $ HyperdataDocument (Just "IsidoreApi")
Nothing
u
Nothing
Nothing
Nothing
(Just $ cleanText $ langText t)
Nothing
(creator2text <$> as)
(_sourceName <$> s)
(cleanText <$> langText <$> a)
(fmap (Text.pack . show) utcTime)
(pub_year)
(pub_month)
(pub_day)
Nothing
Nothing
Nothing
(Just $ (Text.pack . show) l)
...@@ -29,6 +29,7 @@ compute graph ...@@ -29,6 +29,7 @@ compute graph
-} -}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
module Gargantext.Text.Terms module Gargantext.Text.Terms
...@@ -37,19 +38,31 @@ module Gargantext.Text.Terms ...@@ -37,19 +38,31 @@ module Gargantext.Text.Terms
import Control.Lens import Control.Lens
import Data.Text (Text) import Data.Text (Text)
import Data.Traversable import Data.Traversable
import GHC.Base (String)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Text.Terms.Multi (multiterms) import Gargantext.Text.Terms.Multi (multiterms)
import Gargantext.Text.Terms.Mono (monoTerms) import Gargantext.Text.Terms.Mono (monoTerms)
import Gargantext.Text.Terms.Mono.Stem (stem)
import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.Text as Text
import Gargantext.Text (sentences)
import Gargantext.Text.Terms.Mono.Token.En (tokenize)
import Gargantext.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken)
data TermType lang data TermType lang
= Mono { _tt_lang :: lang } = Mono { _tt_lang :: lang }
| Multi { _tt_lang :: lang } | Multi { _tt_lang :: lang }
| MonoMulti { _tt_lang :: lang } | MonoMulti { _tt_lang :: lang }
| Unsupervised { _tt_lang :: lang
, _tt_windoSize :: Int
, _tt_ngramsSize :: Int
, _tt_model :: Maybe (Tries Token ())
}
makeLenses ''TermType makeLenses ''TermType
--group :: [Text] -> [Text] --group :: [Text] -> [Text]
...@@ -61,7 +74,17 @@ makeLenses ''TermType ...@@ -61,7 +74,17 @@ makeLenses ''TermType
-- | Sugar to extract terms from text (hiddeng mapM from end user). -- | Sugar to extract terms from text (hiddeng mapM from end user).
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms]) --extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms :: TermType Lang -> [Text] -> IO [[Terms]] extractTerms :: TermType Lang -> [Text] -> IO [[Terms]]
extractTerms termTypeLang = mapM (terms termTypeLang)
extractTerms (Unsupervised l n s m) xs = mapM (terms (Unsupervised l n s (Just m'))) xs
where
m' = case m of
Just m''-> m''
Nothing -> newTries n (Text.intercalate " " xs)
extractTerms termTypeLang xs = mapM (terms termTypeLang) xs
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Terms from Text -- | Terms from Text
-- Mono : mono terms -- Mono : mono terms
...@@ -72,6 +95,45 @@ terms :: TermType Lang -> Text -> IO [Terms] ...@@ -72,6 +95,45 @@ terms :: TermType Lang -> Text -> IO [Terms]
terms (Mono lang) txt = pure $ monoTerms lang txt terms (Mono lang) txt = pure $ monoTerms lang txt
terms (Multi lang) txt = multiterms lang txt terms (Multi lang) txt = multiterms lang txt
terms (MonoMulti lang) txt = terms (Multi lang) txt terms (MonoMulti lang) txt = terms (Multi lang) txt
terms (Unsupervised lang n s m) txt = termsUnsupervised (Unsupervised lang n s (Just m')) txt
where
m' = maybe (newTries n txt) identity m
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt -- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
------------------------------------------------------------------------ ------------------------------------------------------------------------
text2term :: Lang -> [Text] -> Terms
text2term _ [] = Terms [] Set.empty
text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
isPunctuation :: Text -> Bool
isPunctuation x = List.elem x $ (Text.pack . pure)
<$> ("!?(),;." :: String)
-- | Unsupervised ngrams extraction
-- language agnostic extraction
-- TODO: remove IO
-- TODO: newtype BlockText
type WindowSize = Int
type MinNgramSize = Int
termsUnsupervised :: TermType Lang -> Text -> IO [Terms]
termsUnsupervised (Unsupervised l n s m) =
pure
. map (text2term l)
. List.nub
. (List.filter (\l' -> List.length l' > s))
. List.concat
. mainEleveWith (maybe (panic "no model") identity m) n
. uniText
termsUnsupervised _ = undefined
newTries :: Int -> Text -> Tries Token ()
newTries n t = buildTries n (fmap toToken $ uniText t)
uniText :: Text -> [[Text]]
uniText = map (List.filter (not . isPunctuation))
. map tokenize
. sentences -- | TODO get sentences according to lang
This diff is collapsed.
...@@ -47,6 +47,7 @@ monoTerms l txt = map (monoText2term l) $ monoTexts txt ...@@ -47,6 +47,7 @@ monoTerms l txt = map (monoText2term l) $ monoTexts txt
monoTexts :: Text -> [Text] monoTexts :: Text -> [Text]
monoTexts = L.concat . monoTextsBySentence monoTexts = L.concat . monoTextsBySentence
-- | TODO use text2term only
monoText2term :: Lang -> Text -> Terms monoText2term :: Lang -> Text -> Terms
monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt) monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt)
...@@ -54,6 +55,3 @@ monoTextsBySentence :: Text -> [[Text]] ...@@ -54,6 +55,3 @@ monoTextsBySentence :: Text -> [[Text]]
monoTextsBySentence = map T.words monoTextsBySentence = map T.words
. T.split isSep . T.split isSep
. T.toLower . T.toLower
...@@ -49,6 +49,8 @@ data Node = Node { node_size :: Int ...@@ -49,6 +49,8 @@ data Node = Node { node_size :: Int
, node_type :: TypeNode -- TODO NgramsType | Person , node_type :: TypeNode -- TODO NgramsType | Person
, node_id :: Text -- TODO NgramId , node_id :: Text -- TODO NgramId
, node_label :: Text , node_label :: Text
, node_x_coord :: Double
, node_y_coord :: Double
, node_attributes :: Attributes , node_attributes :: Attributes
} }
deriving (Show, Generic) deriving (Show, Generic)
...@@ -62,6 +64,7 @@ instance ToSchema Node where ...@@ -62,6 +64,7 @@ instance ToSchema Node where
data Edge = Edge { edge_source :: Text data Edge = Edge { edge_source :: Text
, edge_target :: Text , edge_target :: Text
, edge_weight :: Double , edge_weight :: Double
, edge_confluence :: Double
, edge_id :: Text , edge_id :: Text
} }
deriving (Show, Generic) deriving (Show, Generic)
...@@ -117,7 +120,7 @@ instance Arbitrary Graph where ...@@ -117,7 +120,7 @@ instance Arbitrary Graph where
arbitrary = elements $ [defaultGraph] arbitrary = elements $ [defaultGraph]
defaultGraph :: Graph defaultGraph :: Graph
defaultGraph = Graph {_graph_nodes = [Node {node_size = 4, node_type = Terms, node_id = pack "0", node_label = pack "animal", node_attributes = Attributes {clust_default = 0}},Node {node_size = 3, node_type = Terms, node_id = pack "1", node_label = pack "bird", node_attributes = Attributes {clust_default = 0}},Node {node_size = 2, node_type = Terms, node_id = pack "2", node_label = pack "boy", node_attributes = Attributes {clust_default = 1}},Node {node_size = 2, node_type = Terms, node_id = pack "3", node_label = pack "dog", node_attributes = Attributes {clust_default = 0}},Node {node_size = 2, node_type = Terms, node_id = pack "4", node_label = pack "girl", node_attributes = Attributes {clust_default = 1}},Node {node_size = 4, node_type = Terms, node_id = pack "5", node_label = pack "human body", node_attributes = Attributes {clust_default = 1}},Node {node_size = 3, node_type = Terms, node_id = pack "6", node_label = pack "object", node_attributes = Attributes {clust_default = 2}},Node {node_size = 2, node_type = Terms, node_id = pack "7", node_label = pack "pen", node_attributes = Attributes {clust_default = 2}},Node {node_size = 2, node_type = Terms, node_id = pack "8", node_label = pack "table", node_attributes = Attributes {clust_default = 2}}], _graph_edges = [Edge {edge_source = pack "0", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "0"},Edge {edge_source = pack "1", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "1"},Edge {edge_source = pack "1", edge_target = pack "1", edge_weight = 1.0, edge_id = pack "2"},Edge {edge_source = pack "2", edge_target = pack "2", edge_weight = 1.0, edge_id = pack "3"},Edge {edge_source = pack "2", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "4"},Edge {edge_source = pack "3", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "5"},Edge {edge_source = pack "3", edge_target = pack "1", edge_weight = 1.0, edge_id = pack "6"},Edge {edge_source = pack "3", edge_target = pack "3", edge_weight = 1.0, edge_id = pack "7"},Edge {edge_source = pack "4", edge_target = pack "4", edge_weight = 1.0, edge_id = pack "8"},Edge {edge_source = pack "4", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "9"},Edge {edge_source = pack "5", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "10"},Edge {edge_source = pack "6", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "11"},Edge {edge_source = pack "7", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "12"},Edge {edge_source = pack "7", edge_target = pack "7", edge_weight = 1.0, edge_id = pack "13"},Edge {edge_source = pack "8", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "14"},Edge {edge_source = pack "8", edge_target = pack "7", edge_weight = 1.0, edge_id = pack "15"},Edge {edge_source = pack "8", edge_target = pack "8", edge_weight = 1.0, edge_id = pack "16"}], _graph_metadata = Nothing} defaultGraph = Graph {_graph_nodes = [Node {node_x_coord=0, node_y_coord=0, node_size = 4, node_type = Terms, node_id = pack "0", node_label = pack "animal", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 3, node_type = Terms, node_id = pack "1", node_label = pack "bird", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "2", node_label = pack "boy", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "3", node_label = pack "dog", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "4", node_label = pack "girl", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 4, node_type = Terms, node_id = pack "5", node_label = pack "human body", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 3, node_type = Terms, node_id = pack "6", node_label = pack "object", node_attributes = Attributes {clust_default = 2}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "7", node_label = pack "pen", node_attributes = Attributes {clust_default = 2}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "8", node_label = pack "table", node_attributes = Attributes {clust_default = 2}}], _graph_edges = [Edge {edge_source = pack "0", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "0"},Edge {edge_source = pack "1", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "1"},Edge {edge_source = pack "1", edge_target = pack "1", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "2"},Edge {edge_source = pack "2", edge_target = pack "2", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "3"},Edge {edge_source = pack "2", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "4"},Edge {edge_source = pack "3", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "5"},Edge {edge_source = pack "3", edge_target = pack "1", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "6"},Edge {edge_source = pack "3", edge_target = pack "3", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "7"},Edge {edge_source = pack "4", edge_target = pack "4", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "8"},Edge {edge_source = pack "4", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "9"},Edge {edge_source = pack "5", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "10"},Edge {edge_source = pack "6", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "11"},Edge {edge_source = pack "7", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "12"},Edge {edge_source = pack "7", edge_target = pack "7", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "13"},Edge {edge_source = pack "8", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "14"},Edge {edge_source = pack "8", edge_target = pack "7", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "15"},Edge {edge_source = pack "8", edge_target = pack "8", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "16"}], _graph_metadata = Nothing}
----------------------------------------------------------- -----------------------------------------------------------
...@@ -156,10 +159,10 @@ graphV3ToGraph (GraphV3 links nodes) = Graph (map nodeV32node nodes) (zipWith li ...@@ -156,10 +159,10 @@ graphV3ToGraph (GraphV3 links nodes) = Graph (map nodeV32node nodes) (zipWith li
where where
nodeV32node :: NodeV3 -> Node nodeV32node :: NodeV3 -> Node
nodeV32node (NodeV3 no_id' (AttributesV3 cl') no_s' no_lb') nodeV32node (NodeV3 no_id' (AttributesV3 cl') no_s' no_lb')
= Node no_s' Terms (cs $ show no_id') no_lb' (Attributes cl') = Node no_s' Terms (cs $ show no_id') no_lb' 0 0 (Attributes cl')
linkV32edge :: Int -> EdgeV3 -> Edge linkV32edge :: Int -> EdgeV3 -> Edge
linkV32edge n (EdgeV3 eo_s' eo_t' eo_w') = Edge (cs $ show eo_s') (cs $ show eo_t') ((T.read $ T.unpack eo_w') :: Double) (cs $ show n) linkV32edge n (EdgeV3 eo_s' eo_t' eo_w') = Edge (cs $ show eo_s') (cs $ show eo_t') ((T.read $ T.unpack eo_w') :: Double) 0.5 (cs $ show n)
graphV3ToGraphWithFiles :: FilePath -> FilePath -> IO () graphV3ToGraphWithFiles :: FilePath -> FilePath -> IO ()
......
...@@ -22,8 +22,7 @@ Portability : POSIX ...@@ -22,8 +22,7 @@ Portability : POSIX
module Gargantext.Viz.Graph.API module Gargantext.Viz.Graph.API
where where
import Data.List (sortOn) import Control.Lens (set)
import Control.Lens (set, view)
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Gargantext.API.Ngrams.Tools import Gargantext.API.Ngrams.Tools
import Gargantext.API.Types import Gargantext.API.Types
...@@ -69,18 +68,15 @@ getGraph nId = do ...@@ -69,18 +68,15 @@ getGraph nId = do
let cId = maybe (panic "no parentId") identity $ _node_parentId nodeGraph let cId = maybe (panic "no parentId") identity $ _node_parentId nodeGraph
lIds <- selectNodesWithUsername NodeList userMaster lIds <- selectNodesWithUsername NodeList userMaster
lId <- defaultList cId lId <- defaultList cId
ngs <- filterListWithRoot GraphTerm <$> mapTermListRoot [lId] NgramsTerms ngs <- filterListWithRoot GraphTerm <$> mapTermListRoot [lId] NgramsTerms
myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal False) myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal False)
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) NgramsTerms (Map.keys ngs) <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) NgramsTerms (Map.keys ngs)
graph <- liftIO $ cooc2graph myCooc graph <- liftIO $ cooc2graph 1 myCooc
pure $ set graph_metadata (Just metadata) pure $ set graph_metadata (Just metadata) graph
$ set graph_nodes ( sortOn node_id
$ view graph_nodes graph
) graph
postGraph :: NodeId -> GargServer (Post '[JSON] [NodeId]) postGraph :: NodeId -> GargServer (Post '[JSON] [NodeId])
......
...@@ -10,6 +10,7 @@ Portability : POSIX ...@@ -10,6 +10,7 @@ Portability : POSIX
Let be a graph with partitions (from Louvain algo), Bridgeness uniformly Let be a graph with partitions (from Louvain algo), Bridgeness uniformly
filters inter-communities links. filters inter-communities links.
TODO rewrite Bridgeness with "equivalence structurale" metrics
-} -}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
......
{-| Module : Gargantext.Viz.Graph.FGL
Description : FGL main functions used in Garg
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main FGL funs/types to ease portability with IGraph.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ConstraintKinds #-}
module Gargantext.Viz.Graph.FGL where
import Gargantext.Prelude
import qualified Data.Graph.Inductive as FGL
import Data.List as List
------------------------------------------------------------------
-- | Main Types
type Graph_Undirected = FGL.Gr () ()
type Graph_Directed = FGL.Gr () ()
type Graph = FGL.Graph
type Node = FGL.Node
type Edge = FGL.Edge
------------------------------------------------------------------
-- | Main Functions
mkGraph :: [Node] -> [Edge] -> Graph_Undirected
mkGraph = FGL.mkUGraph
neighbors :: Graph gr => gr a b -> Node -> [Node]
neighbors = FGL.neighbors
-- | TODO bug: if graph is undirected, we need to filter
-- nub . (map (\(n1,n2) -> if n1 < n2 then (n1,n2) else (n2,n1))) . FGL.edges
edges :: Graph gr => gr a b -> [Edge]
edges = FGL.edges
nodes :: Graph gr => gr a b -> [Node]
nodes = FGL.nodes
------------------------------------------------------------------
-- | Main sugared functions
mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected
mkGraphUfromEdges es = mkGraph ns es
where
ns = List.nub (a <> b)
where
(a, b) = List.unzip es
{-| Module : Gargantext.Viz.Graph.IGraph
Description : IGraph main functions used in Garg
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main IGraph funs/types to ease portability with FGL.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE DataKinds #-}
module Gargantext.Viz.Graph.IGraph where
import Data.Serialize (Serialize)
import Data.Singletons (SingI)
import Gargantext.Prelude
import IGraph hiding (mkGraph, neighbors, edges, nodes, Node, Graph)
import qualified IGraph as IG
import qualified Data.List as List
------------------------------------------------------------------
-- | Main Types
type Graph_Undirected = IG.Graph 'U () ()
type Graph_Directed = IG.Graph 'D () ()
type Node = IG.Node
type Graph = IG.Graph
------------------------------------------------------------------
-- | Main Functions
mkGraph :: (SingI d, Ord v,
Serialize v, Serialize e) =>
[v] -> [LEdge e] -> IG.Graph d v e
mkGraph = IG.mkGraph
neighbors :: IG.Graph d v e -> IG.Node -> [Node]
neighbors = IG.neighbors
edges :: IG.Graph d v e -> [Edge]
edges = IG.edges
nodes :: IG.Graph d v e -> [Node]
nodes = IG.nodes
------------------------------------------------------------------
-- | Main sugared functions
mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected
mkGraphUfromEdges es = mkGraph (List.replicate n ()) $ zip es $ repeat ()
where
(a,b) = List.unzip es
n = List.length (List.nub $ a <> b)
mkGraphDfromEdges :: [(Int, Int)] -> Graph_Directed
mkGraphDfromEdges = undefined
This diff is collapsed.
...@@ -7,7 +7,6 @@ Maintainer : team@gargantext.org ...@@ -7,7 +7,6 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
-} -}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
...@@ -16,57 +15,136 @@ Portability : POSIX ...@@ -16,57 +15,136 @@ Portability : POSIX
module Gargantext.Viz.Graph.Tools module Gargantext.Viz.Graph.Tools
where where
--import Debug.Trace (trace)
import Data.Graph.Clustering.Louvain.CplusPlus (LouvainNode(..)) import Data.Graph.Clustering.Louvain.CplusPlus (LouvainNode(..))
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain) import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Map (Map) import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Graph (Graph(..)) import Gargantext.Core.Statistics
import Gargantext.Viz.Graph -- (Graph(..)) import Gargantext.Viz.Graph
import Gargantext.Viz.Graph.Bridgeness (bridgeness) import Gargantext.Viz.Graph.Bridgeness (bridgeness)
import Gargantext.Viz.Graph.Distances.Matrice (measureConditional) import Gargantext.Viz.Graph.Distances.Matrice (measureConditional)
import Gargantext.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map) import Gargantext.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map)
import qualified Data.Map as Map import Gargantext.Viz.Graph.IGraph (mkGraphUfromEdges)
import Gargantext.Viz.Graph.Proxemy (confluence)
import GHC.Float (sin, cos)
import qualified IGraph as Igraph
import qualified IGraph.Algorithms.Layout as Layout
import qualified Data.Vector.Storable as Vec
import qualified Data.Map as Map
import qualified Data.List as List
type Threshold = Int
cooc2graph :: (Map (Text, Text) Int) -> IO Graph cooc2graph :: Threshold -> (Map (Text, Text) Int) -> IO Graph
cooc2graph myCooc = do cooc2graph threshold myCooc = do
let (ti, _) = createIndices myCooc let (ti, _) = createIndices myCooc
myCooc4 = toIndex ti myCooc myCooc' = toIndex ti myCooc
matCooc = map2mat (0) (Map.size ti) myCooc4 matCooc = map2mat (0) (Map.size ti) $ Map.filter (>threshold) myCooc'
distanceMat = measureConditional matCooc distanceMat = measureConditional matCooc
distanceMap = Map.map (\_ -> 1) $ Map.filter (>0) $ mat2map distanceMat distanceMap = Map.filter (>0.01) $ mat2map distanceMat
partitions <- case Map.size distanceMap > 0 of partitions <- case Map.size distanceMap > 0 of
True -> cLouvain distanceMap True -> cLouvain distanceMap
False -> panic "Text.Flow: DistanceMap is empty" False -> panic "Text.Flow: DistanceMap is empty"
let distanceMap' = bridgeness 300 partitions distanceMap let bridgeness' = bridgeness 300 partitions distanceMap
let confluence' = confluence (Map.keys bridgeness') 3 True False
pure $ data2graph (Map.toList ti) myCooc4 distanceMap' partitions data2graph (Map.toList ti) myCooc' bridgeness' confluence' partitions
---------------------------------------------------------- ----------------------------------------------------------
-- | From data to Graph -- | From data to Graph
-- FIXME: distance should not be a map since we just "toList" it (same as cLouvain)
data2graph :: [(Text, Int)] -> Map (Int, Int) Int data2graph :: [(Text, Int)] -> Map (Int, Int) Int
-> Map (Int, Int) Double
-> Map (Int, Int) Double -> Map (Int, Int) Double
-> [LouvainNode] -> [LouvainNode]
-> Graph -> IO Graph
data2graph labels coocs distance partitions = Graph nodes edges Nothing data2graph labels coocs bridge conf partitions = do
where
community_id_by_node_id = Map.fromList [ (n, c) | LouvainNode n c <- partitions ] let community_id_by_node_id = Map.fromList [ (n, c) | LouvainNode n c <- partitions ]
nodes = [ Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
nodes <- mapM (setCoord ForceAtlas labels bridge)
[ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
, node_type = Terms -- or Unknown , node_type = Terms -- or Unknown
, node_id = cs (show n) , node_id = cs (show n)
, node_label = l , node_label = l
, node_x_coord = 0
, node_y_coord = 0
, node_attributes = , node_attributes =
Attributes { clust_default = maybe 0 identity Attributes { clust_default = maybe 0 identity
(Map.lookup n community_id_by_node_id) } } (Map.lookup n community_id_by_node_id) } }
| (l, n) <- labels ] )
edges = [ Edge { edge_source = cs (show s) | (l, n) <- labels
]
let edges = [ Edge { edge_source = cs (show s)
, edge_target = cs (show t) , edge_target = cs (show t)
, edge_weight = w , edge_weight = d
, edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
, edge_id = cs (show i) } , edge_id = cs (show i) }
| (i, ((s,t), w)) <- zip ([0..]::[Integer]) (Map.toList distance) ] | (i, ((s,t), d)) <- zip ([0..]::[Integer]) (Map.toList bridge) ]
pure $ Graph nodes edges Nothing
------------------------------------------------------------------------
data Layout = KamadaKawai | ACP | ForceAtlas
setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
where
(x,y) = f i
-- | ACP
setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> IO Node
setCoord l labels m (n,node) = getCoord l labels m n
>>= \(x,y) -> pure $ node { node_x_coord = x
, node_y_coord = y
}
getCoord :: Ord a => Layout
-> [(a, Int)] -> Map (Int, Int) Double -> Int -> IO (Double, Double)
getCoord KamadaKawai _ m n = layout m n
getCoord ForceAtlas _ _ n = pure (sin d, cos d)
where
d = fromIntegral n
getCoord ACP labels m n = pure $ to2d $ maybe (panic "Graph.Tools no coordinate") identity
$ Map.lookup n
$ pcaReduceTo (Dimension 2)
$ mapArray labels m
where
to2d :: Vec.Vector Double -> (Double, Double)
to2d v = (x',y')
where
ds = take 2 $ Vec.toList v
x' = head' "to2d" ds
y' = last' "to2d" ds
mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
where
ns = map snd items
toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
------------------------------------------------------------------------
-- | KamadaKawai Layout
-- TODO TEST: check labels, nodeId and coordinates
layout :: Map (Int, Int) Double -> Int -> IO (Double, Double)
layout m n = maybe (panic "") identity <$> Map.lookup n <$> coord
where
coord :: IO (Map Int (Double,Double))
coord = Map.fromList <$> List.zip (Igraph.nodes g) <$> (Layout.getLayout g p)
--p = Layout.defaultLGL
p = Layout.defaultKamadaKawai
g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
...@@ -4,6 +4,10 @@ extra-package-dbs: [] ...@@ -4,6 +4,10 @@ extra-package-dbs: []
packages: packages:
- . - .
docker:
enable: false
repo: 'cgenie/stack-build:lts-12.26'
allow-newer: true allow-newer: true
extra-deps: extra-deps:
- git: https://github.com/delanoe/data-time-segment.git - git: https://github.com/delanoe/data-time-segment.git
...@@ -18,8 +22,10 @@ extra-deps: ...@@ -18,8 +22,10 @@ extra-deps:
commit: 3fe28b683aba5ddf05e3b5f8eced0bd05c5a29f9 commit: 3fe28b683aba5ddf05e3b5f8eced0bd05c5a29f9
- git: https://github.com/robstewart57/rdf4h.git - git: https://github.com/robstewart57/rdf4h.git
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4 commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed - git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: dcaa0f5dd53f20648f4f5a615d29163582a4219c commit: dcaa0f5dd53f20648f4f5a615d29163582a4219c
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: bf57642f6b66f554fdc0a38ac391cd8200dffcb3
- git: https://gitlab.iscpif.fr/gargantext/patches-class - git: https://gitlab.iscpif.fr/gargantext/patches-class
commit: 746b4ce0af8f9e600d555ad7e5b2973a940cdad9 commit: 746b4ce0af8f9e600d555ad7e5b2973a940cdad9
- git: https://github.com/np/servant-job.git - git: https://github.com/np/servant-job.git
...@@ -32,6 +38,8 @@ extra-deps: ...@@ -32,6 +38,8 @@ extra-deps:
commit: 53385de076be09f728a1b58c035a18e9ff9bcfd6 commit: 53385de076be09f728a1b58c035a18e9ff9bcfd6
- git: https://github.com/delanoe/hsparql.git - git: https://github.com/delanoe/hsparql.git
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: e39454101b53916e3082085ebfe922df695fc775
- KMP-0.1.0.2 - KMP-0.1.0.2
- accelerate-1.2.0.0 - accelerate-1.2.0.0
- aeson-lens-0.5.0.0 - aeson-lens-0.5.0.0
...@@ -39,6 +47,7 @@ extra-deps: ...@@ -39,6 +47,7 @@ extra-deps:
- duckling-0.1.3.0 - duckling-0.1.3.0
- full-text-search-0.2.1.4 - full-text-search-0.2.1.4
- fullstop-0.1.4 - fullstop-0.1.4
- haskell-igraph-0.7.1
- hgal-2.0.0.2 - hgal-2.0.0.2
- located-base-0.1.1.1 - located-base-0.1.1.1
- multiset-0.3.4.1 # stack test - multiset-0.3.4.1 # stack test
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment