Commit 36c913d9 authored by qlobbe's avatar qlobbe

Merge branch 'dev' into dev-phylo

parents 62f57e5a c74fd659
......@@ -33,3 +33,21 @@ curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devo
stack --docker exec gargantext-cli -- CorpusFromGarg.csv ListFromGarg.csv Ouput.json
```sql
INSERT INTO auth_user (password, is_superuser, username, first_name, last_name, email, is_staff, is_active)
VALUES ('1resu', true, 'user1', 'user', '1', 'a@localhost', true, true);
-- nodetype NodeUser has id 1
-- inserted user_id = 3
INSERT INTO nodes (typename, user_id, name)
VALUES (1, 3, 'user1');
-- same for master user -- 'gargantua'
INSERT INTO auth_user (password, is_superuser, username, first_name, last_name, email, is_staff, is_active)
VALUES ('autnagrag, true, 'gargantua, 'gargantua, '1', 'g@localhost', true, true);
-- nodetype NodeUser has id 1
-- inserted user_id = 5
INSERT INTO nodes (typename, user_id, name)
VALUES (1, 5, 'gargantua);
```
......@@ -46,7 +46,7 @@ main = do
createUsers = insertUsersDemo
let cmd :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
cmd = flowCorpusFile (cs user) (cs name) (read limit :: Int) (Multi EN) CsvHalFormat corpusPath
cmd = flowCorpusFile (cs user) (cs name) (read limit :: Int) (Unsupervised EN 5 1 Nothing) CsvHalFormat corpusPath
{-
let debatCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
debatCorpus = do
......
......@@ -10,7 +10,7 @@ else
fi
sudo apt update
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-9.6 nginx
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-9.6 nginx libigraph-dev
#echo "Which user?"
#read USER
......
......@@ -9,5 +9,6 @@ else
fi
sudo apt update
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-9.6 nginx libpq-dev
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql nginx libpq-dev
sudo apt install postgresql-server-dev-9.6
if [ "$#" -lt 3 ]; then
echo "Usage: $0 <name> <path> <limit>"
exit 1
fi
name="$1"
path="$2"
limit="$3"
stack --docker exec gargantext-import -- true "user1" "$name" gargantext.ini "$limit" "$path"
set -eu
docker stop dbgarg || :
docker rm --volumes dbgarg || :
export PGPASSWORD="$(grep DB_PASS gargantext.ini | \awk '{print $3}')"
docker run --name dbgarg -e POSTGRES_USER=gargantua -e POSTGRES_DB=gargandbV5 -e POSTGRES_PASSWORD=${PGPASSWORD} -d postgres
sleep 3
docker run -e PGPASSWORD -i --rm --link dbgarg:postgres postgres psql -h postgres -U gargantua -d gargandbV5 < src/Gargantext/Database/Schema/schema.sql
#docker run -e PGPASSWORD -i --rm --link dbgarg:postgres postgres psql -h postgres -U gargantua -d gargandbV5 < gargantext.dump
#docker run -e PGPASSWORD -i --rm --link dbgarg:postgres postgres dropdb -h postgres -U gargantua gargandbV5
stack --docker exec gargantext-server -- --run Prod --ini gargantext.ini
name: gargantext
version: '4.0.0.5'
version: '4.0.0.6'
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -47,6 +47,7 @@ library:
- Gargantext.Prelude
- Gargantext.Text
- Gargantext.Text.Context
- Gargantext.Text.Crawlers
- Gargantext.Text.Examples
- Gargantext.Text.List.CSV
- Gargantext.Text.Metrics
......@@ -102,6 +103,8 @@ library:
- containers
- contravariant
- crawlerPubMed
- crawlerIsidore
- crawlerHAL
- data-time-segment
- deepseq
- directory
......@@ -110,10 +113,12 @@ library:
- filepath
- fullstop
- fclabels
- fgl
- fast-logger
- filelock
- full-text-search
- graphviz
- haskell-igraph
- http-client
- http-client-tls
- http-conduit
......@@ -153,6 +158,9 @@ library:
- protolude
- pureMD5
- SHA
- simple-reflect
- cereal # (IGraph)
- singletons # (IGraph)
- random
- rake
- regex-compat
......
......@@ -75,7 +75,7 @@ import Gargantext.API.Ngrams (HasRepo(..), HasRepoSaver(..), saveRepo, TableNgra
import Gargantext.API.Node
import Gargantext.API.Search ( SearchAPI, search, SearchQuery)
import Gargantext.API.Types
import Gargantext.API.Upload
import qualified Gargantext.API.Corpus.New as New
import Gargantext.Core.Types (HasInvalidError(..))
import Gargantext.Database.Facet
import Gargantext.Database.Schema.Node (HasNodeError(..), NodeError)
......@@ -262,7 +262,7 @@ type GargAPI' =
:<|> "count" :> Summary "Count endpoint"
:> ReqBody '[JSON] Query :> CountAPI
-- Corpus endpoint
-- Corpus endpoint --> TODO rename s/search/filter/g
:<|> "search":> Summary "Search endpoint"
:> ReqBody '[JSON] SearchQuery
:> QueryParam "offset" Int
......@@ -279,7 +279,7 @@ type GargAPI' =
:<|> "tree" :> Summary "Tree endpoint"
:> Capture "id" NodeId :> TreeAPI
:<|> "upload" :> ApiUpload
:<|> "new" :> New.Api
-- :<|> "scraper" :> WithCallbacks ScraperAPI
......@@ -323,7 +323,7 @@ serverGargAPI -- orchestrator
:<|> search
:<|> graphAPI -- TODO: mock
:<|> treeAPI
:<|> upload
:<|> New.api
-- :<|> orchestrator
where
fakeUserId = 1 -- TODO
......@@ -421,3 +421,7 @@ startGargantextMock port = do
application <- makeMockApp . MockEnv $ FireWall False
run port application
-}
{-|
Module : Gargantext.API.Corpus.New
Description : New corpus API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
New corpus means either:
- new corpus
- new data in existing corpus
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.API.Corpus.New
where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Prelude
import Gargantext.Prelude.Utils (hash)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.Database.Flow (FlowCmdM)
data Query = Query { query_query :: Text
, query_corpus_id :: Int
, query_files_id :: [Text]
}
deriving (Eq, Show, Generic)
deriveJSON (unPrefix "query_") ''Query
instance Arbitrary Query where
arbitrary = elements [ Query q n fs
| q <- ["a","b"]
, n <- [0..10]
, fs <- map (map hash) [["a","b"], ["c","d"]]
]
instance ToSchema Query where
declareNamedSchema =
genericDeclareNamedSchema
defaultSchemaOptions {fieldLabelModifier = \fieldLabel -> drop 6 fieldLabel}
type Api = Summary "New Corpus endpoint"
:> ReqBody '[JSON] Query
:> Post '[JSON] CorpusId
api :: FlowCmdM env err m => Query -> m CorpusId
api (Query q _ _) = do
cId <- flowCorpusSearchInDatabase "user1" EN q
pure cId
......@@ -69,6 +69,7 @@ data QueryBool = QueryBool Text
queries :: [QueryBool]
queries = [QueryBool (pack "(X OR X') AND (Y OR Y') NOT (Z OR Z')")]
--queries = [QueryBool (pack "(X + X') * (Y + Y') - (Z + Z')")]
instance Arbitrary QueryBool where
arbitrary = elements queries
......
......@@ -95,6 +95,7 @@ data TODO = TODO
deriving (Generic)
instance ToSchema TODO where
instance ToParamSchema TODO where
------------------------------------------------------------------------
--data FacetFormat = Table | Chart
......@@ -200,12 +201,12 @@ mkNgramsElement ngrams list rp children =
-- TODO review
size = 1 + count " " ngrams
newNgramsElement :: NgramsTerm -> NgramsElement
newNgramsElement ngrams = mkNgramsElement ngrams GraphTerm Nothing mempty
newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
newNgramsElement mayList ngrams = mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
instance ToSchema NgramsElement
instance Arbitrary NgramsElement where
arbitrary = elements [newNgramsElement "sport"]
arbitrary = elements [newNgramsElement Nothing "sport"]
ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
ngramsElementToRepo
......@@ -793,9 +794,9 @@ putListNgrams listId ngramsType nes = do
where
m = Map.fromList $ (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) <$> nes
tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> [NgramsTerm] -> m ()
tableNgramsPost tabType listId =
putListNgrams listId (ngramsTypeFromTabType tabType) . fmap newNgramsElement
tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> Maybe ListType -> [NgramsTerm] -> m ()
tableNgramsPost tabType listId mayList =
putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
-- Apply the given patch to the DB and returns the patch to be applied on the
-- client.
......@@ -1008,6 +1009,7 @@ type TableNgramsApiPut = Summary " Table Ngrams API Change"
type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
:> QueryParamR "ngramsType" TabType
:> QueryParamR "list" ListId
:> QueryParam "listType" ListType
:> ReqBody '[JSON] [NgramsTerm]
:> Post '[JSON] ()
......
......@@ -24,45 +24,54 @@ Node API
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node
where
import Control.Lens (prism')
import Control.Monad ((>>))
import Control.Lens (prism', (.~), (?~))
import Control.Monad ((>>), forM)
import Control.Monad.IO.Class (liftIO)
import Data.Aeson (FromJSON, ToJSON)
import Data.Maybe
import Data.Monoid (mempty)
import Data.Swagger
import Data.Text (Text())
import Data.Time (UTCTime)
import GHC.Generics (Generic)
import Gargantext.API.Metrics
import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, apiNgramsTableCorpus, QueryParamR)
import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, apiNgramsTableCorpus, QueryParamR, TODO)
import Gargantext.API.Ngrams.NTree (MyTree)
import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery)
import Gargantext.API.Types
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Types.Main (Tree, NodeTree, ListType)
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),runViewAuthorsDoc)
import Gargantext.Database.Node.Children (getChildren)
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..))
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, getNode', deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..))
import Gargantext.Database.Schema.NodeNode (nodesToFavorite, nodesToTrash)
import Gargantext.Database.Tree (treeDB, HasTreeError(..), TreeError(..))
import Gargantext.Database.Types.Node
import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude
import Gargantext.Prelude.Utils (hash)
import Gargantext.Text.Metrics (Scored(..))
import Gargantext.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Gargantext.Viz.Chart
import Gargantext.API.Ngrams.NTree (MyTree)
import Gargantext.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Servant
import Servant.Multipart
import Servant.Swagger (HasSwagger(toSwagger))
import Servant.Swagger.Internal
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Map as Map
......@@ -133,13 +142,14 @@ type NodeAPI a = Get '[JSON] (Node a)
:> QueryParam "limit" Int
:> QueryParam "order" OrderBy
:> SearchAPI
-- VIZ
:<|> "metrics" :> MetricsAPI
:<|> "chart" :> ChartApi
:<|> "pie" :> PieApi
:<|> "tree" :> TreeApi
:<|> "phylo" :> PhyloAPI
:<|> "upload" :> UploadAPI
-- TODO-ACCESS: check userId CanRenameNode nodeId
-- TODO-EVENTS: NodeRenamed RenameNode or re-use some more general NodeEdited...
......@@ -164,7 +174,7 @@ nodeAPI p uId id
:<|> rename id
:<|> postNode uId id
:<|> putNode id
:<|> deleteNode id
:<|> deleteNodeApi id
:<|> getChildren id p
-- TODO gather it
......@@ -182,10 +192,18 @@ nodeAPI p uId id
:<|> getPie id
:<|> getTree id
:<|> phyloAPI id
:<|> postUpload id
where
deleteNodeApi id' = do
node <- getNode' id'
if _node_typename node == nodeTypeId NodeUser
then panic "not allowed" -- TODO add proper Right Management Type
else deleteNode id'
-- Annuaire
-- :<|> query
------------------------------------------------------------------------
data RenameNode = RenameNode { r_name :: Text }
deriving (Generic)
......@@ -374,7 +392,67 @@ getMetrics cId maybeListId tabType maybeLimit = do
log' n x = 1 + (if x <= 0 then 0 else (log $ (10^(n::Int)) * x))
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent"
pure $ Metrics metrics
-------------------------------------------------------------
type Hash = Text
data FileType = CSV | PresseRIS
deriving (Eq, Show, Generic)
instance ToSchema FileType
instance Arbitrary FileType
where
arbitrary = elements [CSV, PresseRIS]
instance ToParamSchema FileType
instance ToParamSchema (MultipartData Mem) where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance FromHttpApiData FileType
where
parseUrlPiece "CSV" = pure CSV
parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece _ = pure CSV -- TODO error here
instance (ToParamSchema a, HasSwagger sub) =>
HasSwagger (MultipartForm tag a :> sub) where
-- TODO
toSwagger _ = toSwagger (Proxy :: Proxy sub)
& addParam param
where
param = mempty
& required ?~ True
& schema .~ ParamOther sch
sch = mempty
& in_ .~ ParamFormData
& paramSchema .~ toParamSchema (Proxy :: Proxy a)
type UploadAPI = Summary "Upload file(s) to a corpus"
:> MultipartForm Mem (MultipartData Mem)
:> QueryParam "fileType" FileType
:> Post '[JSON] [Hash]
--postUpload :: NodeId -> Maybe FileType -> GargServer UploadAPI
--postUpload :: NodeId -> GargServer UploadAPI
postUpload :: NodeId -> MultipartData Mem -> Maybe FileType -> Cmd err [Hash]
postUpload _ _ Nothing = panic "fileType is a required parameter"
postUpload _ multipartData (Just fileType) = do
putStrLn $ "File Type: " <> (show fileType)
is <- liftIO $ do
putStrLn ("Inputs:" :: Text)
forM (inputs multipartData) $ \input -> do
putStrLn $ ("iName " :: Text) <> (iName input)
<> ("iValue " :: Text) <> (iValue input)
pure $ iName input
_ <- forM (files multipartData) $ \file -> do
let content = fdPayload file
putStrLn $ ("XXX " :: Text) <> (fdFileName file)
putStrLn $ ("YYY " :: Text) <> cs content
--pure $ cs content
-- is <- inputs multipartData
pure $ map (hash . cs) is
......@@ -72,7 +72,8 @@ instance Arbitrary SearchInQuery where
-----------------------------------------------------------------------
data SearchResults = SearchResults { srs_results :: [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]}
data SearchResults = SearchResults' { srs_resultsP :: [FacetDoc]}
| SearchResults { srs_results :: [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]}
deriving (Generic)
$(deriveJSON (unPrefix "srs_") ''SearchResults)
......@@ -96,6 +97,7 @@ search (SearchQuery q pId) o l order =
searchIn :: NodeId -> SearchInQuery -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err SearchResults
searchIn nId (SearchInQuery q ) o l order =
SearchResults <$> searchInCorpusWithContacts nId q o l order
SearchResults' <$> searchInCorpus nId q o l order
--SearchResults <$> searchInCorpusWithContacts nId q o l order
{-|
Module : Gargantext.API.Upload
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Upload
where
import qualified Data.Text as Text
import GHC.Generics (Generic)
import Gargantext.Prelude
import Data.Text (Text)
import Data.Aeson
import Servant
import Servant.Multipart
--import Servant.Mock (HasMock(mock))
import Servant.Swagger (HasSwagger(toSwagger))
-- import qualified Data.ByteString.Lazy as LBS
import Control.Monad
import Control.Monad.IO.Class
import Gargantext.API.Types
--import Servant.CSV.Cassava (CSV'(..))
--import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
--import Data.Swagger
--import Gargantext.API.Ngrams (TODO)
-- | Upload files
-- TODO Is it possible to adapt the function according to iValue input ?
--type API = MultipartForm Mem (MultipartData Mem) :> Post '[JSON] Integer
-- instance Generic Mem
--instance ToSchema Mem
--instance Arbitrary Mem
--instance ToSchema (MultipartData Mem)
--instance Arbitrary ( MultipartData Mem)
instance HasSwagger (MultipartForm tag a :> sub) where
-- TODO
toSwagger _ = undefined -- toSwagger (Proxy :: Proxy (TODO :> Post '[JSON] ()))
--declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
--instance Arbitrary (MultipartForm Mem (MultipartData Mem))
{-
instance (FromMultipart tag a, MultipartBackend tag, Servant.Multipart.LookupContext context (MultipartOptions tag))
=> HasMock (MultipartForm tag a :> sub) context where
mock _ _ = undefined
instance HasMock (MultipartForm Mem (MultipartData Mem) :> sub) context where
mock _ _ = undefined
-}
data Upload = Upload { up :: [Text] }
deriving (Generic)
instance ToJSON Upload
type ApiUpload = MultipartForm Mem (MultipartData Mem) :> Post '[JSON] Text
-- MultipartData consists in textual inputs,
-- accessible through its "inputs" field, as well
-- as files, accessible through its "files" field.
upload :: GargServer ApiUpload
upload multipartData = do
--{-
is <- liftIO $ do
putStrLn ("Inputs:" :: Text)
forM (inputs multipartData) $ \input -> do
putStrLn $ ("iName " :: Text) <> (iName input)
<> ("iValue " :: Text) <> (iValue input)
pure $ iName input
--{-
_ <- forM (files multipartData) $ \file -> do
let content = fdPayload file
putStrLn $ ("XXX " :: Text) <> (fdFileName file)
putStrLn $ ("YYY " :: Text) <> cs content
--pure $ cs content
-- is <- inputs multipartData
--}
pure $ Text.concat $ map cs is
-------------------------------------------------------------------------------
{-|
Module : Gargantext.Core.Statistics
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Core.Statistics
where
import Data.Map (Map)
import Gargantext.Prelude
import Numeric.Statistics.PCA (pcaReduceN)
import Data.Array.IArray (Array, listArray, elems)
import qualified Data.Vector.Storable as Vec
import qualified Data.List as List
import qualified Data.Map as Map
data Dimension = Dimension Int
pcaReduceTo :: Ord t
=> Dimension
-> Map t (Vec.Vector Double)
-> Map t (Vec.Vector Double)
pcaReduceTo (Dimension d) m = Map.fromList
$ zip txts
$ elems
$ pcaReduceN m'' d
where
m'' :: Array Int (Vec.Vector Double)
m'' = listArray (1, List.length m') m'
(txts,m') = List.unzip $ Map.toList m
This diff is collapsed.
......@@ -366,6 +366,11 @@ getNode nId _ = do
fromMaybe (error $ "Node does node exist: " <> show nId) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
getNode' :: NodeId -> Cmd err (Node Value)
getNode' nId = fromMaybe (error $ "Node does node exist: " <> show nId) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
getNodesWithType :: Column PGInt4 -> Cmd err [Node HyperdataDocument]
getNodesWithType = runOpaQuery . selectNodesWithType
......@@ -578,8 +583,6 @@ instance MkCorpus HyperdataAnnuaire
mk n h p u = insertNodesR [nodeAnnuaireW n h p u]
getOrMkList :: HasNodeError err => ParentId -> UserId -> Cmd err ListId
getOrMkList pId uId =
maybe (mkList' pId uId) (pure . view node_id) . headMay =<< getListsWithParentId pId
......
......@@ -32,7 +32,7 @@ import Gargantext.Database.Facet
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
import Gargantext.Database.Schema.NodeNgram
import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Queries.Join (leftJoin6)
import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
......@@ -101,31 +101,31 @@ searchInCorpusWithContacts' cId q o l order = runOpaQuery $ queryInCorpusWithCon
queryInCorpusWithContacts :: CorpusId -> Text -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> O.Query FacetPairedRead
queryInCorpusWithContacts cId q _ _ _ = proc () -> do
(docs, (corpusDoc, (docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
(docs, (corpusDoc, (_docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
restrict -< (nn_node1_id corpusDoc) .== (toNullable $ pgNodeId cId)
restrict -< (nng_listType docNgrams) .== (toNullable $ pgNgramsType Authors)
-- restrict -< (nng_listType docNgrams) .== (toNullable $ pgNgramsType Authors)
restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
-- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
returnA -< FacetPaired (_ns_id docs) (_ns_date docs) (_ns_hyperdata docs) (pgInt4 0) (Pair (_node_id contacts) (ngrams_terms ngrams'))
joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))))
joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNodeNgramsReadNull, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull)))))
joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNodeNgramsTable queryNgramsTable queryNodeNodeNgramsTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
where
cond12 :: (NodeNgramRead, NodeRead) -> Column PGBool
cond12 (ng3, n2) = _node_id n2 .== nng_node_id ng3
cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
cond12 (ng3, n2) = _node_id n2 .== nnng_node1_id ng3
---------
cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
cond23 (ng2, (nng2, _)) = nng_ngrams_id nng2 .== ngrams_id ng2
cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
cond23 (ng2, (nnng2, _)) = nnng_ngrams_id nnng2 .== ngrams_id ng2
cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nng_ngrams_id nng
cond34 :: (NodeNodeNgramsRead, (NgramsRead, (NodeNodeNgramsReadNull, NodeReadNull))) -> Column PGBool
cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nnng_ngrams_id nng
cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
cond45 (nn, (nng, (_,(_,_)))) = nng_node_id nng .== nn_node2_id nn
cond45 :: (NodeNodeRead, (NodeNodeNgramsRead, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull)))) -> Column PGBool
cond45 (nn, (nng, (_,(_,_)))) = nnng_node1_id nng .== nn_node2_id nn
cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNodeNgramsReadNull, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull))))) -> Column PGBool
cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn_node2_id nn
......
......@@ -98,18 +98,20 @@ eavg [] = 0
-- Simple Average
mean :: Fractional a => [a] -> a
mean xs = if L.null xs then 0.0
else sum xs / fromIntegral (length xs)
mean xs = sum xs / fromIntegral (length xs)
sumMaybe :: Num a => [Maybe a] -> Maybe a
sumMaybe = fmap sum . M.sequence
variance :: Floating a => [a] -> a
variance xs = mean $ map (\x -> (x - m) ** 2) xs where
variance xs = sum ys / (fromIntegral (length xs) - 1)
where
m = mean xs
ys = map (\x -> (x - m) ** 2) xs
deviation :: [Double] -> Double
deviation :: Floating a => [a] -> a
deviation = sqrt . variance
movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
......@@ -242,8 +244,8 @@ scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
scaleNormalize :: [Double] -> [Double]
scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
where
v = variance xs'
m = mean xs'
v = variance xs'
m = mean xs'
xs' = map abs xs
normalize :: [Double] -> [Double]
......
{-|
Module : Gargantext.Text.Crawlers
Description : All crawlers of Gargantext in one file.
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Crawlers
where
{-
import Data.Text (Text)
--import Gargantext.Prelude
import qualified PUBMED as PubMed
data Crawler = PubMed | HAL | Isidore
type Query = Text
--{-
crawl :: Crawler -> Query -> IO [PubMed.Doc]
crawl Pubmed = PubMed.crawler
--}
-}
......@@ -31,7 +31,7 @@ import Gargantext.Text.Metrics.Count (occurrencesWith)
import qualified Data.IntMap as IntMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.SVM as SVM
import qualified Data.SVM as SVM
import qualified Data.Vector as Vec
------------------------------------------------------------------------
......
......@@ -29,29 +29,26 @@ import GHC.Real (round)
import Gargantext.Prelude
import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index
import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
import qualified Data.Array.Accelerate as DAA
import qualified Data.Array.Accelerate.Interpreter as DAA
import qualified Data.List as List
import qualified Data.Map as Map
import Numeric.Statistics.PCA (pcaReduceN)
import qualified Data.Vector.Storable as Vec
import Data.Array.IArray (Array, listArray, elems)
type GraphListSize = Int
type InclusionSize = Int
toScored :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
toScored = map2scored
. (reduceTo (Dimension 2))
. (pcaReduceTo (Dimension 2))
. (Map.filter (\v -> Vec.length v > 1))
. (Map.unionsWith (<>))
scored :: Ord t => Map (t,t) Int -> [Scored t]
scored = map2scored . (reduceTo (Dimension 2)) . scored2map
scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
......@@ -66,20 +63,6 @@ data Scored ts = Scored
, _scored_speGen :: !SpecificityGenericity
} deriving (Show)
data Dimension = Dimension Int
reduceTo :: Ord t
=> Dimension
-> Map t (Vec.Vector Double)
-> Map t (Vec.Vector Double)
reduceTo (Dimension d) ss = Map.fromList $ zip txts $ elems $ pcaReduceN ss'' d
where
ss'' :: Array Int (Vec.Vector Double)
ss'' = listArray (1, List.length ss') ss'
(txts,ss') = List.unzip $ Map.toList ss
localMetrics :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
(Map.toList fi)
......@@ -92,8 +75,6 @@ localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [i
$ DAA.zip (DAA.use is) (DAA.use ss)
-- TODO Code to be remove below
-- TODO in the textflow we end up needing these indices , it might be
-- better to compute them earlier and pass them around.
......@@ -107,10 +88,6 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) s
$ DAA.zip (DAA.use is) (DAA.use ss)
takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
takeScored listSize incSize = map _scored_terms
. linearTakes listSize incSize _scored_speGen
......@@ -134,4 +111,3 @@ linearTakes gls incSize speGen incExc = take gls
. splitEvery incSize
. sortOn speGen
......@@ -22,7 +22,7 @@ please follow the types.
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers (FileFormat(..), clean, parseFile)
module Gargantext.Text.Parsers (FileFormat(..), clean, parseFile, cleanText)
where
--import Data.ByteString (ByteString)
......@@ -164,9 +164,14 @@ openZip fp = do
bs <- mapConcurrently (\s -> withArchive fp (getEntry s)) entries
pure bs
cleanText :: Text -> Text
cleanText = cs . clean . cs
clean :: DB.ByteString -> DB.ByteString
clean txt = DBC.map clean' txt
where
clean' '’' = '\''
clean' '\r' = ' '
clean' '\t' = ' '
clean' ';' = '.'
clean' c = c
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Parsers.Isidore where
import Data.Text (Text)
import Data.Either
import Gargantext.Prelude
import Data.Aeson
import Data.Proxy
import GHC.Generics
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.API
import Servant.Client
type IsidoreAPI = "sparql" :> Capture "query" Text :> Get '[JSON] [IsidoreDoc]
data IsidoreDoc =
IsidoreDoc {title :: Maybe Text}
deriving (Show, Generic)
instance FromJSON IsidoreDoc
instance ToJSON IsidoreDoc
isidoreDocsApi :: Proxy IsidoreAPI
isidoreDocsApi = Proxy
isidoreDocs :: ClientM [IsidoreDoc]
isidoreDocs = client isidoreDocsApi
getIsidoreDocs :: IO [IsidoreDoc]
getIsidoreDocs = do
manager' <- newManager tlsManagerSettings
res <- runClientM isidoreDocs $ mkClientEnv manager' $ BaseUrl Https "https://www.rechercheisidore.fr" 8080 ""
case res of
Left _ -> panic "err"
Right res' -> pure res'
......@@ -43,16 +43,16 @@ selectQueryRaw' uri q = getWith opts uri
& header "User-Agent" .~ ["gargantext-hsparql-client"]
& param "query" .~ [Data.Text.pack q]
isidoreGet :: Lang -> Text -> IO (Maybe [HyperdataDocument])
isidoreGet l q = do
bindingValues <- isidoreGet' q
isidoreGet :: Lang -> Int -> Text -> IO (Maybe [HyperdataDocument])
isidoreGet la li q = do
bindingValues <- isidoreGet' li q
case bindingValues of
Nothing -> pure Nothing
Just dv -> pure $ Just $ map (bind2doc l) dv
Just dv -> pure $ Just $ map (bind2doc la) dv
isidoreGet' :: Text -> IO (Maybe [[BindingValue]])
isidoreGet' q = do
let s = createSelectQuery $ isidoreSelect q
isidoreGet' :: Int -> Text -> IO (Maybe [[BindingValue]])
isidoreGet' l q = do
let s = createSelectQuery $ isidoreSelect l q
putStrLn s
r <- selectQueryRaw' route s
putStrLn $ show $ r ^. responseStatus
......@@ -60,11 +60,11 @@ isidoreGet' q = do
-- res <- selectQuery route $ simpleSelect q
-- pure res
isidoreSelect :: Text -> Query SelectQuery
isidoreSelect q = do
isidoreSelect :: Int -> Text -> Query SelectQuery
isidoreSelect lim q = do
-- See Predefined Namespace Prefixes:
-- https://isidore.science/sparql?nsdecl
isidore <- prefix "isidore" (iriRef "http://www.rechercheisidore.fr/class/")
isidore <- prefix "isidore" (iriRef "http://isidore.science/class/")
rdf <- prefix "rdf" (iriRef "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
dcterms <- prefix "dcterms" (iriRef "http://purl.org/dc/terms/")
dc <- prefix "dc" (iriRef "http://purl.org/dc/elements/1.1/")
......@@ -80,14 +80,13 @@ isidoreSelect q = do
source <- var
langDoc <- var
publisher <- var
--langFr <- var
--agg <- var
triple_ link (rdf .:. "type") (isidore .:. "BibliographicalResource")
triple_ link (rdf .:. "type") (isidore .:. "Document")
triple_ link (dcterms .:. "title") title
triple_ link (dcterms .:. "date") date
triple_ link (dcterms .:. "creator") authors
triple_ link (dcterms .:. "language") langDoc
--triple_ link (dcterms .:. "language") langDoc
triple_ link (dc .:. "description") abstract
--triple_ link (ore .:. "isAggregatedBy") agg
--triple_ agg (dcterms .:. "title") title
......@@ -96,16 +95,18 @@ isidoreSelect q = do
optional_ $ triple_ link (dcterms .:. "publisher") publisher
-- TODO FIX BUG with (.||.) operator
--filterExpr $ (.||.) (contains title q) (contains title q)
filterExpr_ (containsWith title q) -- (contains abstract q)
--filterExpr (containsWith abstract q) -- (contains abstract q)
--filterExpr_ $ (.||.) (contains title q) (contains abstract q)
--filterExpr_ (containsWith authors q) -- (contains abstract q)
--filterExpr_ (containsWith title q) -- (contains abstract q)
--filterExpr_ $ (.||.) (containsWith title q) (contains abstract q)
filterExpr_ (containsWith title q)
-- TODO FIX filter with lang
--filterExpr $ langMatches title (str ("fra" :: Text))
--filterExpr $ (.==.) lang (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
--filterExpr_ $ langMatches title (str ("fra" :: Text))
--filterExpr_ $ (.==.) langDoc (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
orderNextDesc date
limit_ 10
limit_ lim
distinct_
selectVars [link, date, langDoc, authors, source, publisher, title, abstract]
......
{-|
Module : Gargantext.Text.Parsers.IsidoreApi
Description : To query French Humanities publication database from its API
Copyright : (c) CNRS, 2019-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.IsidoreApi where
import System.FilePath (FilePath())
import Data.Text (Text)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import Gargantext.Prelude
import Isidore.Client
import Servant.Client
import qualified Data.Text as Text
import qualified Gargantext.Text.Parsers.Date as Date
import qualified Isidore as Isidore
import Gargantext.Text.Parsers.CSV (writeDocs2Csv)
import Gargantext.Text.Parsers (cleanText)
-- | TODO work with the ServantErr
get :: Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO [HyperdataDocument]
get la l q a = do
let
printErr (DecodeFailure e _) = panic e
printErr e = panic (cs $ show e)
toIsidoreDocs :: Reply -> [IsidoreDoc]
toIsidoreDocs (ReplyOnly r) = [r]
toIsidoreDocs (Replies rs) = rs
iDocs <- either printErr _content <$> Isidore.get l q a
hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO ()
isidore2csvFile fp la li tq aq = do
hdocs <- get la li tq aq
writeDocs2Csv fp hdocs
isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do
let
author :: Author -> Text
author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
author (Authors aus) = Text.intercalate ". " $ map author aus
creator2text :: Creator -> Text
creator2text (Creator au) = author au
creator2text (Creators aus') = Text.intercalate ". " $ map author aus'
langText :: LangText -> Text
langText (LangText _l t1) = t1
langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
(utcTime, (pub_year, pub_month, pub_day)) <- Date.split l (maybe (Just "2019") (Just) d)
pure $ HyperdataDocument (Just "IsidoreApi")
Nothing
u
Nothing
Nothing
Nothing
(Just $ cleanText $ langText t)
Nothing
(creator2text <$> as)
(_sourceName <$> s)
(cleanText <$> langText <$> a)
(fmap (Text.pack . show) utcTime)
(pub_year)
(pub_month)
(pub_day)
Nothing
Nothing
Nothing
(Just $ (Text.pack . show) l)
......@@ -29,6 +29,7 @@ compute graph
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Text.Terms
......@@ -37,19 +38,31 @@ module Gargantext.Text.Terms
import Control.Lens
import Data.Text (Text)
import Data.Traversable
import GHC.Base (String)
import Gargantext.Prelude
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Text.Terms.Multi (multiterms)
import Gargantext.Text.Terms.Mono (monoTerms)
import Gargantext.Text.Terms.Mono.Stem (stem)
import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.Text as Text
import Gargantext.Text (sentences)
import Gargantext.Text.Terms.Mono.Token.En (tokenize)
import Gargantext.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken)
data TermType lang
= Mono { _tt_lang :: lang }
| Multi { _tt_lang :: lang }
| MonoMulti { _tt_lang :: lang }
| Unsupervised { _tt_lang :: lang
, _tt_windoSize :: Int
, _tt_ngramsSize :: Int
, _tt_model :: Maybe (Tries Token ())
}
makeLenses ''TermType
--group :: [Text] -> [Text]
......@@ -61,7 +74,17 @@ makeLenses ''TermType
-- | Sugar to extract terms from text (hiddeng mapM from end user).
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms :: TermType Lang -> [Text] -> IO [[Terms]]
extractTerms termTypeLang = mapM (terms termTypeLang)
extractTerms (Unsupervised l n s m) xs = mapM (terms (Unsupervised l n s (Just m'))) xs
where
m' = case m of
Just m''-> m''
Nothing -> newTries n (Text.intercalate " " xs)
extractTerms termTypeLang xs = mapM (terms termTypeLang) xs
------------------------------------------------------------------------
-- | Terms from Text
-- Mono : mono terms
......@@ -72,6 +95,45 @@ terms :: TermType Lang -> Text -> IO [Terms]
terms (Mono lang) txt = pure $ monoTerms lang txt
terms (Multi lang) txt = multiterms lang txt
terms (MonoMulti lang) txt = terms (Multi lang) txt
terms (Unsupervised lang n s m) txt = termsUnsupervised (Unsupervised lang n s (Just m')) txt
where
m' = maybe (newTries n txt) identity m
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
------------------------------------------------------------------------
text2term :: Lang -> [Text] -> Terms
text2term _ [] = Terms [] Set.empty
text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
isPunctuation :: Text -> Bool
isPunctuation x = List.elem x $ (Text.pack . pure)
<$> ("!?(),;." :: String)
-- | Unsupervised ngrams extraction
-- language agnostic extraction
-- TODO: remove IO
-- TODO: newtype BlockText
type WindowSize = Int
type MinNgramSize = Int
termsUnsupervised :: TermType Lang -> Text -> IO [Terms]
termsUnsupervised (Unsupervised l n s m) =
pure
. map (text2term l)
. List.nub
. (List.filter (\l' -> List.length l' > s))
. List.concat
. mainEleveWith (maybe (panic "no model") identity m) n
. uniText
termsUnsupervised _ = undefined
newTries :: Int -> Text -> Tries Token ()
newTries n t = buildTries n (fmap toToken $ uniText t)
uniText :: Text -> [[Text]]
uniText = map (List.filter (not . isPunctuation))
. map tokenize
. sentences -- | TODO get sentences according to lang
This diff is collapsed.
......@@ -47,6 +47,7 @@ monoTerms l txt = map (monoText2term l) $ monoTexts txt
monoTexts :: Text -> [Text]
monoTexts = L.concat . monoTextsBySentence
-- | TODO use text2term only
monoText2term :: Lang -> Text -> Terms
monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt)
......@@ -54,6 +55,3 @@ monoTextsBySentence :: Text -> [[Text]]
monoTextsBySentence = map T.words
. T.split isSep
. T.toLower
......@@ -49,6 +49,8 @@ data Node = Node { node_size :: Int
, node_type :: TypeNode -- TODO NgramsType | Person
, node_id :: Text -- TODO NgramId
, node_label :: Text
, node_x_coord :: Double
, node_y_coord :: Double
, node_attributes :: Attributes
}
deriving (Show, Generic)
......@@ -62,6 +64,7 @@ instance ToSchema Node where
data Edge = Edge { edge_source :: Text
, edge_target :: Text
, edge_weight :: Double
, edge_confluence :: Double
, edge_id :: Text
}
deriving (Show, Generic)
......@@ -117,7 +120,7 @@ instance Arbitrary Graph where
arbitrary = elements $ [defaultGraph]
defaultGraph :: Graph
defaultGraph = Graph {_graph_nodes = [Node {node_size = 4, node_type = Terms, node_id = pack "0", node_label = pack "animal", node_attributes = Attributes {clust_default = 0}},Node {node_size = 3, node_type = Terms, node_id = pack "1", node_label = pack "bird", node_attributes = Attributes {clust_default = 0}},Node {node_size = 2, node_type = Terms, node_id = pack "2", node_label = pack "boy", node_attributes = Attributes {clust_default = 1}},Node {node_size = 2, node_type = Terms, node_id = pack "3", node_label = pack "dog", node_attributes = Attributes {clust_default = 0}},Node {node_size = 2, node_type = Terms, node_id = pack "4", node_label = pack "girl", node_attributes = Attributes {clust_default = 1}},Node {node_size = 4, node_type = Terms, node_id = pack "5", node_label = pack "human body", node_attributes = Attributes {clust_default = 1}},Node {node_size = 3, node_type = Terms, node_id = pack "6", node_label = pack "object", node_attributes = Attributes {clust_default = 2}},Node {node_size = 2, node_type = Terms, node_id = pack "7", node_label = pack "pen", node_attributes = Attributes {clust_default = 2}},Node {node_size = 2, node_type = Terms, node_id = pack "8", node_label = pack "table", node_attributes = Attributes {clust_default = 2}}], _graph_edges = [Edge {edge_source = pack "0", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "0"},Edge {edge_source = pack "1", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "1"},Edge {edge_source = pack "1", edge_target = pack "1", edge_weight = 1.0, edge_id = pack "2"},Edge {edge_source = pack "2", edge_target = pack "2", edge_weight = 1.0, edge_id = pack "3"},Edge {edge_source = pack "2", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "4"},Edge {edge_source = pack "3", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "5"},Edge {edge_source = pack "3", edge_target = pack "1", edge_weight = 1.0, edge_id = pack "6"},Edge {edge_source = pack "3", edge_target = pack "3", edge_weight = 1.0, edge_id = pack "7"},Edge {edge_source = pack "4", edge_target = pack "4", edge_weight = 1.0, edge_id = pack "8"},Edge {edge_source = pack "4", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "9"},Edge {edge_source = pack "5", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "10"},Edge {edge_source = pack "6", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "11"},Edge {edge_source = pack "7", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "12"},Edge {edge_source = pack "7", edge_target = pack "7", edge_weight = 1.0, edge_id = pack "13"},Edge {edge_source = pack "8", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "14"},Edge {edge_source = pack "8", edge_target = pack "7", edge_weight = 1.0, edge_id = pack "15"},Edge {edge_source = pack "8", edge_target = pack "8", edge_weight = 1.0, edge_id = pack "16"}], _graph_metadata = Nothing}
defaultGraph = Graph {_graph_nodes = [Node {node_x_coord=0, node_y_coord=0, node_size = 4, node_type = Terms, node_id = pack "0", node_label = pack "animal", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 3, node_type = Terms, node_id = pack "1", node_label = pack "bird", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "2", node_label = pack "boy", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "3", node_label = pack "dog", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "4", node_label = pack "girl", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 4, node_type = Terms, node_id = pack "5", node_label = pack "human body", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 3, node_type = Terms, node_id = pack "6", node_label = pack "object", node_attributes = Attributes {clust_default = 2}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "7", node_label = pack "pen", node_attributes = Attributes {clust_default = 2}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "8", node_label = pack "table", node_attributes = Attributes {clust_default = 2}}], _graph_edges = [Edge {edge_source = pack "0", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "0"},Edge {edge_source = pack "1", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "1"},Edge {edge_source = pack "1", edge_target = pack "1", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "2"},Edge {edge_source = pack "2", edge_target = pack "2", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "3"},Edge {edge_source = pack "2", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "4"},Edge {edge_source = pack "3", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "5"},Edge {edge_source = pack "3", edge_target = pack "1", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "6"},Edge {edge_source = pack "3", edge_target = pack "3", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "7"},Edge {edge_source = pack "4", edge_target = pack "4", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "8"},Edge {edge_source = pack "4", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "9"},Edge {edge_source = pack "5", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "10"},Edge {edge_source = pack "6", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "11"},Edge {edge_source = pack "7", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "12"},Edge {edge_source = pack "7", edge_target = pack "7", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "13"},Edge {edge_source = pack "8", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "14"},Edge {edge_source = pack "8", edge_target = pack "7", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "15"},Edge {edge_source = pack "8", edge_target = pack "8", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "16"}], _graph_metadata = Nothing}
-----------------------------------------------------------
......@@ -156,10 +159,10 @@ graphV3ToGraph (GraphV3 links nodes) = Graph (map nodeV32node nodes) (zipWith li
where
nodeV32node :: NodeV3 -> Node
nodeV32node (NodeV3 no_id' (AttributesV3 cl') no_s' no_lb')
= Node no_s' Terms (cs $ show no_id') no_lb' (Attributes cl')
= Node no_s' Terms (cs $ show no_id') no_lb' 0 0 (Attributes cl')
linkV32edge :: Int -> EdgeV3 -> Edge
linkV32edge n (EdgeV3 eo_s' eo_t' eo_w') = Edge (cs $ show eo_s') (cs $ show eo_t') ((T.read $ T.unpack eo_w') :: Double) (cs $ show n)
linkV32edge n (EdgeV3 eo_s' eo_t' eo_w') = Edge (cs $ show eo_s') (cs $ show eo_t') ((T.read $ T.unpack eo_w') :: Double) 0.5 (cs $ show n)
graphV3ToGraphWithFiles :: FilePath -> FilePath -> IO ()
......
......@@ -22,8 +22,7 @@ Portability : POSIX
module Gargantext.Viz.Graph.API
where
import Data.List (sortOn)
import Control.Lens (set, view)
import Control.Lens (set)
import Control.Monad.IO.Class (liftIO)
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Types
......@@ -69,18 +68,15 @@ getGraph nId = do
let cId = maybe (panic "no parentId") identity $ _node_parentId nodeGraph
lIds <- selectNodesWithUsername NodeList userMaster
lId <- defaultList cId
ngs <- filterListWithRoot GraphTerm <$> mapTermListRoot [lId] NgramsTerms
lId <- defaultList cId
ngs <- filterListWithRoot GraphTerm <$> mapTermListRoot [lId] NgramsTerms
myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal False)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) NgramsTerms (Map.keys ngs)
graph <- liftIO $ cooc2graph myCooc
pure $ set graph_metadata (Just metadata)
$ set graph_nodes ( sortOn node_id
$ view graph_nodes graph
) graph
graph <- liftIO $ cooc2graph 1 myCooc
pure $ set graph_metadata (Just metadata) graph
postGraph :: NodeId -> GargServer (Post '[JSON] [NodeId])
......
......@@ -10,6 +10,7 @@ Portability : POSIX
Let be a graph with partitions (from Louvain algo), Bridgeness uniformly
filters inter-communities links.
TODO rewrite Bridgeness with "equivalence structurale" metrics
-}
{-# LANGUAGE NoImplicitPrelude #-}
......
{-| Module : Gargantext.Viz.Graph.FGL
Description : FGL main functions used in Garg
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main FGL funs/types to ease portability with IGraph.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ConstraintKinds #-}
module Gargantext.Viz.Graph.FGL where
import Gargantext.Prelude
import qualified Data.Graph.Inductive as FGL
import Data.List as List
------------------------------------------------------------------
-- | Main Types
type Graph_Undirected = FGL.Gr () ()
type Graph_Directed = FGL.Gr () ()
type Graph = FGL.Graph
type Node = FGL.Node
type Edge = FGL.Edge
------------------------------------------------------------------
-- | Main Functions
mkGraph :: [Node] -> [Edge] -> Graph_Undirected
mkGraph = FGL.mkUGraph
neighbors :: Graph gr => gr a b -> Node -> [Node]
neighbors = FGL.neighbors
-- | TODO bug: if graph is undirected, we need to filter
-- nub . (map (\(n1,n2) -> if n1 < n2 then (n1,n2) else (n2,n1))) . FGL.edges
edges :: Graph gr => gr a b -> [Edge]
edges = FGL.edges
nodes :: Graph gr => gr a b -> [Node]
nodes = FGL.nodes
------------------------------------------------------------------
-- | Main sugared functions
mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected
mkGraphUfromEdges es = mkGraph ns es
where
ns = List.nub (a <> b)
where
(a, b) = List.unzip es
{-| Module : Gargantext.Viz.Graph.IGraph
Description : IGraph main functions used in Garg
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main IGraph funs/types to ease portability with FGL.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE DataKinds #-}
module Gargantext.Viz.Graph.IGraph where
import Data.Serialize (Serialize)
import Data.Singletons (SingI)
import Gargantext.Prelude
import IGraph hiding (mkGraph, neighbors, edges, nodes, Node, Graph)
import qualified IGraph as IG
import qualified Data.List as List
------------------------------------------------------------------
-- | Main Types
type Graph_Undirected = IG.Graph 'U () ()
type Graph_Directed = IG.Graph 'D () ()
type Node = IG.Node
type Graph = IG.Graph
------------------------------------------------------------------
-- | Main Functions
mkGraph :: (SingI d, Ord v,
Serialize v, Serialize e) =>
[v] -> [LEdge e] -> IG.Graph d v e
mkGraph = IG.mkGraph
neighbors :: IG.Graph d v e -> IG.Node -> [Node]
neighbors = IG.neighbors
edges :: IG.Graph d v e -> [Edge]
edges = IG.edges
nodes :: IG.Graph d v e -> [Node]
nodes = IG.nodes
------------------------------------------------------------------
-- | Main sugared functions
mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected
mkGraphUfromEdges es = mkGraph (List.replicate n ()) $ zip es $ repeat ()
where
(a,b) = List.unzip es
n = List.length (List.nub $ a <> b)
mkGraphDfromEdges :: [(Int, Int)] -> Graph_Directed
mkGraphDfromEdges = undefined
This diff is collapsed.
......@@ -7,7 +7,6 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
......@@ -16,57 +15,136 @@ Portability : POSIX
module Gargantext.Viz.Graph.Tools
where
--import Debug.Trace (trace)
import Data.Graph.Clustering.Louvain.CplusPlus (LouvainNode(..))
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Viz.Graph (Graph(..))
import Gargantext.Viz.Graph -- (Graph(..))
import Gargantext.Core.Statistics
import Gargantext.Viz.Graph
import Gargantext.Viz.Graph.Bridgeness (bridgeness)
import Gargantext.Viz.Graph.Distances.Matrice (measureConditional)
import Gargantext.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map)
import qualified Data.Map as Map
import Gargantext.Viz.Graph.IGraph (mkGraphUfromEdges)
import Gargantext.Viz.Graph.Proxemy (confluence)
import GHC.Float (sin, cos)
import qualified IGraph as Igraph
import qualified IGraph.Algorithms.Layout as Layout
import qualified Data.Vector.Storable as Vec
import qualified Data.Map as Map
import qualified Data.List as List
type Threshold = Int
cooc2graph :: (Map (Text, Text) Int) -> IO Graph
cooc2graph myCooc = do
cooc2graph :: Threshold -> (Map (Text, Text) Int) -> IO Graph
cooc2graph threshold myCooc = do
let (ti, _) = createIndices myCooc
myCooc4 = toIndex ti myCooc
matCooc = map2mat (0) (Map.size ti) myCooc4
myCooc' = toIndex ti myCooc
matCooc = map2mat (0) (Map.size ti) $ Map.filter (>threshold) myCooc'
distanceMat = measureConditional matCooc
distanceMap = Map.map (\_ -> 1) $ Map.filter (>0) $ mat2map distanceMat
distanceMap = Map.filter (>0.01) $ mat2map distanceMat
partitions <- case Map.size distanceMap > 0 of
True -> cLouvain distanceMap
False -> panic "Text.Flow: DistanceMap is empty"
let distanceMap' = bridgeness 300 partitions distanceMap
let bridgeness' = bridgeness 300 partitions distanceMap
let confluence' = confluence (Map.keys bridgeness') 3 True False
pure $ data2graph (Map.toList ti) myCooc4 distanceMap' partitions
data2graph (Map.toList ti) myCooc' bridgeness' confluence' partitions
----------------------------------------------------------
-- | From data to Graph
-- FIXME: distance should not be a map since we just "toList" it (same as cLouvain)
data2graph :: [(Text, Int)] -> Map (Int, Int) Int
-> Map (Int, Int) Double
-> Map (Int, Int) Double
-> [LouvainNode]
-> Graph
data2graph labels coocs distance partitions = Graph nodes edges Nothing
where
community_id_by_node_id = Map.fromList [ (n, c) | LouvainNode n c <- partitions ]
nodes = [ Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
-> IO Graph
data2graph labels coocs bridge conf partitions = do
let community_id_by_node_id = Map.fromList [ (n, c) | LouvainNode n c <- partitions ]
nodes <- mapM (setCoord ForceAtlas labels bridge)
[ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
, node_type = Terms -- or Unknown
, node_id = cs (show n)
, node_label = l
, node_x_coord = 0
, node_y_coord = 0
, node_attributes =
Attributes { clust_default = maybe 0 identity
(Map.lookup n community_id_by_node_id) } }
| (l, n) <- labels ]
edges = [ Edge { edge_source = cs (show s)
)
| (l, n) <- labels
]
let edges = [ Edge { edge_source = cs (show s)
, edge_target = cs (show t)
, edge_weight = w
, edge_weight = d
, edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
, edge_id = cs (show i) }
| (i, ((s,t), w)) <- zip ([0..]::[Integer]) (Map.toList distance) ]
| (i, ((s,t), d)) <- zip ([0..]::[Integer]) (Map.toList bridge) ]
pure $ Graph nodes edges Nothing
------------------------------------------------------------------------
data Layout = KamadaKawai | ACP | ForceAtlas
setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
where
(x,y) = f i
-- | ACP
setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> IO Node
setCoord l labels m (n,node) = getCoord l labels m n
>>= \(x,y) -> pure $ node { node_x_coord = x
, node_y_coord = y
}
getCoord :: Ord a => Layout
-> [(a, Int)] -> Map (Int, Int) Double -> Int -> IO (Double, Double)
getCoord KamadaKawai _ m n = layout m n
getCoord ForceAtlas _ _ n = pure (sin d, cos d)
where
d = fromIntegral n
getCoord ACP labels m n = pure $ to2d $ maybe (panic "Graph.Tools no coordinate") identity
$ Map.lookup n
$ pcaReduceTo (Dimension 2)
$ mapArray labels m
where
to2d :: Vec.Vector Double -> (Double, Double)
to2d v = (x',y')
where
ds = take 2 $ Vec.toList v
x' = head' "to2d" ds
y' = last' "to2d" ds
mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
where
ns = map snd items
toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
------------------------------------------------------------------------
-- | KamadaKawai Layout
-- TODO TEST: check labels, nodeId and coordinates
layout :: Map (Int, Int) Double -> Int -> IO (Double, Double)
layout m n = maybe (panic "") identity <$> Map.lookup n <$> coord
where
coord :: IO (Map Int (Double,Double))
coord = Map.fromList <$> List.zip (Igraph.nodes g) <$> (Layout.getLayout g p)
--p = Layout.defaultLGL
p = Layout.defaultKamadaKawai
g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
......@@ -4,6 +4,10 @@ extra-package-dbs: []
packages:
- .
docker:
enable: false
repo: 'cgenie/stack-build:lts-12.26'
allow-newer: true
extra-deps:
- git: https://github.com/delanoe/data-time-segment.git
......@@ -18,8 +22,10 @@ extra-deps:
commit: 3fe28b683aba5ddf05e3b5f8eced0bd05c5a29f9
- git: https://github.com/robstewart57/rdf4h.git
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: dcaa0f5dd53f20648f4f5a615d29163582a4219c
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: bf57642f6b66f554fdc0a38ac391cd8200dffcb3
- git: https://gitlab.iscpif.fr/gargantext/patches-class
commit: 746b4ce0af8f9e600d555ad7e5b2973a940cdad9
- git: https://github.com/np/servant-job.git
......@@ -32,6 +38,8 @@ extra-deps:
commit: 53385de076be09f728a1b58c035a18e9ff9bcfd6
- git: https://github.com/delanoe/hsparql.git
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: e39454101b53916e3082085ebfe922df695fc775
- KMP-0.1.0.2
- accelerate-1.2.0.0
- aeson-lens-0.5.0.0
......@@ -39,6 +47,7 @@ extra-deps:
- duckling-0.1.3.0
- full-text-search-0.2.1.4
- fullstop-0.1.4
- haskell-igraph-0.7.1
- hgal-2.0.0.2
- located-base-0.1.1.1
- multiset-0.3.4.1 # stack test
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment