Commit 36c913d9 authored by qlobbe's avatar qlobbe

Merge branch 'dev' into dev-phylo

parents 62f57e5a c74fd659
......@@ -33,3 +33,21 @@ curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/master/devo
stack --docker exec gargantext-cli -- CorpusFromGarg.csv ListFromGarg.csv Ouput.json
```sql
INSERT INTO auth_user (password, is_superuser, username, first_name, last_name, email, is_staff, is_active)
VALUES ('1resu', true, 'user1', 'user', '1', 'a@localhost', true, true);
-- nodetype NodeUser has id 1
-- inserted user_id = 3
INSERT INTO nodes (typename, user_id, name)
VALUES (1, 3, 'user1');
-- same for master user -- 'gargantua'
INSERT INTO auth_user (password, is_superuser, username, first_name, last_name, email, is_staff, is_active)
VALUES ('autnagrag, true, 'gargantua, 'gargantua, '1', 'g@localhost', true, true);
-- nodetype NodeUser has id 1
-- inserted user_id = 5
INSERT INTO nodes (typename, user_id, name)
VALUES (1, 5, 'gargantua);
```
......@@ -46,7 +46,7 @@ main = do
createUsers = insertUsersDemo
let cmd :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
cmd = flowCorpusFile (cs user) (cs name) (read limit :: Int) (Multi EN) CsvHalFormat corpusPath
cmd = flowCorpusFile (cs user) (cs name) (read limit :: Int) (Unsupervised EN 5 1 Nothing) CsvHalFormat corpusPath
{-
let debatCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
debatCorpus = do
......
......@@ -10,7 +10,7 @@ else
fi
sudo apt update
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-9.6 nginx
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-9.6 nginx libigraph-dev
#echo "Which user?"
#read USER
......
......@@ -9,5 +9,6 @@ else
fi
sudo apt update
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-9.6 nginx libpq-dev
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql nginx libpq-dev
sudo apt install postgresql-server-dev-9.6
if [ "$#" -lt 3 ]; then
echo "Usage: $0 <name> <path> <limit>"
exit 1
fi
name="$1"
path="$2"
limit="$3"
stack --docker exec gargantext-import -- true "user1" "$name" gargantext.ini "$limit" "$path"
set -eu
docker stop dbgarg || :
docker rm --volumes dbgarg || :
export PGPASSWORD="$(grep DB_PASS gargantext.ini | \awk '{print $3}')"
docker run --name dbgarg -e POSTGRES_USER=gargantua -e POSTGRES_DB=gargandbV5 -e POSTGRES_PASSWORD=${PGPASSWORD} -d postgres
sleep 3
docker run -e PGPASSWORD -i --rm --link dbgarg:postgres postgres psql -h postgres -U gargantua -d gargandbV5 < src/Gargantext/Database/Schema/schema.sql
#docker run -e PGPASSWORD -i --rm --link dbgarg:postgres postgres psql -h postgres -U gargantua -d gargandbV5 < gargantext.dump
#docker run -e PGPASSWORD -i --rm --link dbgarg:postgres postgres dropdb -h postgres -U gargantua gargandbV5
stack --docker exec gargantext-server -- --run Prod --ini gargantext.ini
name: gargantext
version: '4.0.0.5'
version: '4.0.0.6'
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -47,6 +47,7 @@ library:
- Gargantext.Prelude
- Gargantext.Text
- Gargantext.Text.Context
- Gargantext.Text.Crawlers
- Gargantext.Text.Examples
- Gargantext.Text.List.CSV
- Gargantext.Text.Metrics
......@@ -102,6 +103,8 @@ library:
- containers
- contravariant
- crawlerPubMed
- crawlerIsidore
- crawlerHAL
- data-time-segment
- deepseq
- directory
......@@ -110,10 +113,12 @@ library:
- filepath
- fullstop
- fclabels
- fgl
- fast-logger
- filelock
- full-text-search
- graphviz
- haskell-igraph
- http-client
- http-client-tls
- http-conduit
......@@ -153,6 +158,9 @@ library:
- protolude
- pureMD5
- SHA
- simple-reflect
- cereal # (IGraph)
- singletons # (IGraph)
- random
- rake
- regex-compat
......
......@@ -75,7 +75,7 @@ import Gargantext.API.Ngrams (HasRepo(..), HasRepoSaver(..), saveRepo, TableNgra
import Gargantext.API.Node
import Gargantext.API.Search ( SearchAPI, search, SearchQuery)
import Gargantext.API.Types
import Gargantext.API.Upload
import qualified Gargantext.API.Corpus.New as New
import Gargantext.Core.Types (HasInvalidError(..))
import Gargantext.Database.Facet
import Gargantext.Database.Schema.Node (HasNodeError(..), NodeError)
......@@ -262,7 +262,7 @@ type GargAPI' =
:<|> "count" :> Summary "Count endpoint"
:> ReqBody '[JSON] Query :> CountAPI
-- Corpus endpoint
-- Corpus endpoint --> TODO rename s/search/filter/g
:<|> "search":> Summary "Search endpoint"
:> ReqBody '[JSON] SearchQuery
:> QueryParam "offset" Int
......@@ -279,7 +279,7 @@ type GargAPI' =
:<|> "tree" :> Summary "Tree endpoint"
:> Capture "id" NodeId :> TreeAPI
:<|> "upload" :> ApiUpload
:<|> "new" :> New.Api
-- :<|> "scraper" :> WithCallbacks ScraperAPI
......@@ -323,7 +323,7 @@ serverGargAPI -- orchestrator
:<|> search
:<|> graphAPI -- TODO: mock
:<|> treeAPI
:<|> upload
:<|> New.api
-- :<|> orchestrator
where
fakeUserId = 1 -- TODO
......@@ -421,3 +421,7 @@ startGargantextMock port = do
application <- makeMockApp . MockEnv $ FireWall False
run port application
-}
{-|
Module : Gargantext.API.Corpus.New
Description : New corpus API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
New corpus means either:
- new corpus
- new data in existing corpus
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.API.Corpus.New
where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Prelude
import Gargantext.Prelude.Utils (hash)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.Database.Flow (FlowCmdM)
data Query = Query { query_query :: Text
, query_corpus_id :: Int
, query_files_id :: [Text]
}
deriving (Eq, Show, Generic)
deriveJSON (unPrefix "query_") ''Query
instance Arbitrary Query where
arbitrary = elements [ Query q n fs
| q <- ["a","b"]
, n <- [0..10]
, fs <- map (map hash) [["a","b"], ["c","d"]]
]
instance ToSchema Query where
declareNamedSchema =
genericDeclareNamedSchema
defaultSchemaOptions {fieldLabelModifier = \fieldLabel -> drop 6 fieldLabel}
type Api = Summary "New Corpus endpoint"
:> ReqBody '[JSON] Query
:> Post '[JSON] CorpusId
api :: FlowCmdM env err m => Query -> m CorpusId
api (Query q _ _) = do
cId <- flowCorpusSearchInDatabase "user1" EN q
pure cId
......@@ -69,6 +69,7 @@ data QueryBool = QueryBool Text
queries :: [QueryBool]
queries = [QueryBool (pack "(X OR X') AND (Y OR Y') NOT (Z OR Z')")]
--queries = [QueryBool (pack "(X + X') * (Y + Y') - (Z + Z')")]
instance Arbitrary QueryBool where
arbitrary = elements queries
......
......@@ -95,6 +95,7 @@ data TODO = TODO
deriving (Generic)
instance ToSchema TODO where
instance ToParamSchema TODO where
------------------------------------------------------------------------
--data FacetFormat = Table | Chart
......@@ -200,12 +201,12 @@ mkNgramsElement ngrams list rp children =
-- TODO review
size = 1 + count " " ngrams
newNgramsElement :: NgramsTerm -> NgramsElement
newNgramsElement ngrams = mkNgramsElement ngrams GraphTerm Nothing mempty
newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
newNgramsElement mayList ngrams = mkNgramsElement ngrams (fromMaybe GraphTerm mayList) Nothing mempty
instance ToSchema NgramsElement
instance Arbitrary NgramsElement where
arbitrary = elements [newNgramsElement "sport"]
arbitrary = elements [newNgramsElement Nothing "sport"]
ngramsElementToRepo :: NgramsElement -> NgramsRepoElement
ngramsElementToRepo
......@@ -793,9 +794,9 @@ putListNgrams listId ngramsType nes = do
where
m = Map.fromList $ (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) <$> nes
tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> [NgramsTerm] -> m ()
tableNgramsPost tabType listId =
putListNgrams listId (ngramsTypeFromTabType tabType) . fmap newNgramsElement
tableNgramsPost :: RepoCmdM env err m => TabType -> NodeId -> Maybe ListType -> [NgramsTerm] -> m ()
tableNgramsPost tabType listId mayList =
putListNgrams listId (ngramsTypeFromTabType tabType) . fmap (newNgramsElement mayList)
-- Apply the given patch to the DB and returns the patch to be applied on the
-- client.
......@@ -1008,6 +1009,7 @@ type TableNgramsApiPut = Summary " Table Ngrams API Change"
type TableNgramsApiPost = Summary " Table Ngrams API Adds new ngrams"
:> QueryParamR "ngramsType" TabType
:> QueryParamR "list" ListId
:> QueryParam "listType" ListType
:> ReqBody '[JSON] [NgramsTerm]
:> Post '[JSON] ()
......
......@@ -24,45 +24,54 @@ Node API
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node
where
import Control.Lens (prism')
import Control.Monad ((>>))
import Control.Lens (prism', (.~), (?~))
import Control.Monad ((>>), forM)
import Control.Monad.IO.Class (liftIO)
import Data.Aeson (FromJSON, ToJSON)
import Data.Maybe
import Data.Monoid (mempty)
import Data.Swagger
import Data.Text (Text())
import Data.Time (UTCTime)
import GHC.Generics (Generic)
import Gargantext.API.Metrics
import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, apiNgramsTableCorpus, QueryParamR)
import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, apiNgramsTableCorpus, QueryParamR, TODO)
import Gargantext.API.Ngrams.NTree (MyTree)
import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery)
import Gargantext.API.Types
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Types.Main (Tree, NodeTree, ListType)
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),runViewAuthorsDoc)
import Gargantext.Database.Node.Children (getChildren)
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..))
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, getNode', deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..))
import Gargantext.Database.Schema.NodeNode (nodesToFavorite, nodesToTrash)
import Gargantext.Database.Tree (treeDB, HasTreeError(..), TreeError(..))
import Gargantext.Database.Types.Node
import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude
import Gargantext.Prelude.Utils (hash)
import Gargantext.Text.Metrics (Scored(..))
import Gargantext.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Gargantext.Viz.Chart
import Gargantext.API.Ngrams.NTree (MyTree)
import Gargantext.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Servant
import Servant.Multipart
import Servant.Swagger (HasSwagger(toSwagger))
import Servant.Swagger.Internal
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Map as Map
......@@ -133,13 +142,14 @@ type NodeAPI a = Get '[JSON] (Node a)
:> QueryParam "limit" Int
:> QueryParam "order" OrderBy
:> SearchAPI
-- VIZ
:<|> "metrics" :> MetricsAPI
:<|> "chart" :> ChartApi
:<|> "pie" :> PieApi
:<|> "tree" :> TreeApi
:<|> "phylo" :> PhyloAPI
:<|> "upload" :> UploadAPI
-- TODO-ACCESS: check userId CanRenameNode nodeId
-- TODO-EVENTS: NodeRenamed RenameNode or re-use some more general NodeEdited...
......@@ -164,7 +174,7 @@ nodeAPI p uId id
:<|> rename id
:<|> postNode uId id
:<|> putNode id
:<|> deleteNode id
:<|> deleteNodeApi id
:<|> getChildren id p
-- TODO gather it
......@@ -182,10 +192,18 @@ nodeAPI p uId id
:<|> getPie id
:<|> getTree id
:<|> phyloAPI id
:<|> postUpload id
where
deleteNodeApi id' = do
node <- getNode' id'
if _node_typename node == nodeTypeId NodeUser
then panic "not allowed" -- TODO add proper Right Management Type
else deleteNode id'
-- Annuaire
-- :<|> query
------------------------------------------------------------------------
data RenameNode = RenameNode { r_name :: Text }
deriving (Generic)
......@@ -374,7 +392,67 @@ getMetrics cId maybeListId tabType maybeLimit = do
log' n x = 1 + (if x <= 0 then 0 else (log $ (10^(n::Int)) * x))
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent"
pure $ Metrics metrics
-------------------------------------------------------------
type Hash = Text
data FileType = CSV | PresseRIS
deriving (Eq, Show, Generic)
instance ToSchema FileType
instance Arbitrary FileType
where
arbitrary = elements [CSV, PresseRIS]
instance ToParamSchema FileType
instance ToParamSchema (MultipartData Mem) where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance FromHttpApiData FileType
where
parseUrlPiece "CSV" = pure CSV
parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece _ = pure CSV -- TODO error here
instance (ToParamSchema a, HasSwagger sub) =>
HasSwagger (MultipartForm tag a :> sub) where
-- TODO
toSwagger _ = toSwagger (Proxy :: Proxy sub)
& addParam param
where
param = mempty
& required ?~ True
& schema .~ ParamOther sch
sch = mempty
& in_ .~ ParamFormData
& paramSchema .~ toParamSchema (Proxy :: Proxy a)
type UploadAPI = Summary "Upload file(s) to a corpus"
:> MultipartForm Mem (MultipartData Mem)
:> QueryParam "fileType" FileType
:> Post '[JSON] [Hash]
--postUpload :: NodeId -> Maybe FileType -> GargServer UploadAPI
--postUpload :: NodeId -> GargServer UploadAPI
postUpload :: NodeId -> MultipartData Mem -> Maybe FileType -> Cmd err [Hash]
postUpload _ _ Nothing = panic "fileType is a required parameter"
postUpload _ multipartData (Just fileType) = do
putStrLn $ "File Type: " <> (show fileType)
is <- liftIO $ do
putStrLn ("Inputs:" :: Text)
forM (inputs multipartData) $ \input -> do
putStrLn $ ("iName " :: Text) <> (iName input)
<> ("iValue " :: Text) <> (iValue input)
pure $ iName input
_ <- forM (files multipartData) $ \file -> do
let content = fdPayload file
putStrLn $ ("XXX " :: Text) <> (fdFileName file)
putStrLn $ ("YYY " :: Text) <> cs content
--pure $ cs content
-- is <- inputs multipartData
pure $ map (hash . cs) is
......@@ -72,7 +72,8 @@ instance Arbitrary SearchInQuery where
-----------------------------------------------------------------------
data SearchResults = SearchResults { srs_results :: [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]}
data SearchResults = SearchResults' { srs_resultsP :: [FacetDoc]}
| SearchResults { srs_results :: [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]}
deriving (Generic)
$(deriveJSON (unPrefix "srs_") ''SearchResults)
......@@ -96,6 +97,7 @@ search (SearchQuery q pId) o l order =
searchIn :: NodeId -> SearchInQuery -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err SearchResults
searchIn nId (SearchInQuery q ) o l order =
SearchResults <$> searchInCorpusWithContacts nId q o l order
SearchResults' <$> searchInCorpus nId q o l order
--SearchResults <$> searchInCorpusWithContacts nId q o l order
{-|
Module : Gargantext.API.Upload
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Upload
where
import qualified Data.Text as Text
import GHC.Generics (Generic)
import Gargantext.Prelude
import Data.Text (Text)
import Data.Aeson
import Servant
import Servant.Multipart
--import Servant.Mock (HasMock(mock))
import Servant.Swagger (HasSwagger(toSwagger))
-- import qualified Data.ByteString.Lazy as LBS
import Control.Monad
import Control.Monad.IO.Class
import Gargantext.API.Types
--import Servant.CSV.Cassava (CSV'(..))
--import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
--import Data.Swagger
--import Gargantext.API.Ngrams (TODO)
-- | Upload files
-- TODO Is it possible to adapt the function according to iValue input ?
--type API = MultipartForm Mem (MultipartData Mem) :> Post '[JSON] Integer
-- instance Generic Mem
--instance ToSchema Mem
--instance Arbitrary Mem
--instance ToSchema (MultipartData Mem)
--instance Arbitrary ( MultipartData Mem)
instance HasSwagger (MultipartForm tag a :> sub) where
-- TODO
toSwagger _ = undefined -- toSwagger (Proxy :: Proxy (TODO :> Post '[JSON] ()))
--declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
--instance Arbitrary (MultipartForm Mem (MultipartData Mem))
{-
instance (FromMultipart tag a, MultipartBackend tag, Servant.Multipart.LookupContext context (MultipartOptions tag))
=> HasMock (MultipartForm tag a :> sub) context where
mock _ _ = undefined
instance HasMock (MultipartForm Mem (MultipartData Mem) :> sub) context where
mock _ _ = undefined
-}
data Upload = Upload { up :: [Text] }
deriving (Generic)
instance ToJSON Upload
type ApiUpload = MultipartForm Mem (MultipartData Mem) :> Post '[JSON] Text
-- MultipartData consists in textual inputs,
-- accessible through its "inputs" field, as well
-- as files, accessible through its "files" field.
upload :: GargServer ApiUpload
upload multipartData = do
--{-
is <- liftIO $ do
putStrLn ("Inputs:" :: Text)
forM (inputs multipartData) $ \input -> do
putStrLn $ ("iName " :: Text) <> (iName input)
<> ("iValue " :: Text) <> (iValue input)
pure $ iName input
--{-
_ <- forM (files multipartData) $ \file -> do
let content = fdPayload file
putStrLn $ ("XXX " :: Text) <> (fdFileName file)
putStrLn $ ("YYY " :: Text) <> cs content
--pure $ cs content
-- is <- inputs multipartData
--}
pure $ Text.concat $ map cs is
-------------------------------------------------------------------------------
{-|
Module : Gargantext.Core.Statistics
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Core.Statistics
where
import Data.Map (Map)
import Gargantext.Prelude
import Numeric.Statistics.PCA (pcaReduceN)
import Data.Array.IArray (Array, listArray, elems)
import qualified Data.Vector.Storable as Vec
import qualified Data.List as List
import qualified Data.Map as Map
data Dimension = Dimension Int
pcaReduceTo :: Ord t
=> Dimension
-> Map t (Vec.Vector Double)
-> Map t (Vec.Vector Double)
pcaReduceTo (Dimension d) m = Map.fromList
$ zip txts
$ elems
$ pcaReduceN m'' d
where
m'' :: Array Int (Vec.Vector Double)
m'' = listArray (1, List.length m') m'
(txts,m') = List.unzip $ Map.toList m
......@@ -20,15 +20,17 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ConstrainedClassMethods #-}
module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
where
--import Debug.Trace (trace)
import Prelude (String)
import Debug.Trace (trace)
import Control.Lens ((^.), view, Lens', _Just)
import Control.Monad (mapM_)
import Control.Monad.IO.Class (liftIO)
......@@ -58,14 +60,15 @@ import Gargantext.Database.Utils (Cmd, CmdM)
import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Prelude
import Gargantext.Text.Terms.Eleve (buildTries, toToken)
import Gargantext.Text.List (buildNgramsLists,StopSize(..))
import Gargantext.Text.Parsers (parseFile, FileFormat)
import Gargantext.Text.Terms (TermType(..), tt_lang)
import Gargantext.Text.Terms (extractTerms)
import qualified Gargantext.Text.Parsers.IsidoreApi as Isidore
import Gargantext.Text.Terms (TermType(..), tt_lang, extractTerms, uniText)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
import Servant (ServantErr)
import System.FilePath (FilePath)
--import qualified Data.List as List
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Text as Text
import qualified Gargantext.Database.Node.Document.Add as Doc (add)
......@@ -82,10 +85,35 @@ type FlowCorpus a = ( AddUniqId a
, UniqId a
, InsertDb a
, ExtractNgramsT a
, HasText a
)
------------------------------------------------------------------------
data ApiQuery = ApiIsidoreQuery Text | ApiIsidoreAuth Text
-- | APIs
-- TODO instances
getDataApi :: Lang
-> Maybe Limit
-> ApiQuery
-> IO [HyperdataDocument]
getDataApi lang limit (ApiIsidoreQuery q) = Isidore.get lang limit (Just q) Nothing
getDataApi lang limit (ApiIsidoreAuth q) = Isidore.get lang limit Nothing (Just q)
flowCorpusApi :: ( FlowCmdM env ServantErr m)
=> Username -> CorpusName
-> TermType Lang
-> Maybe Limit
-> ApiQuery
-> m CorpusId
flowCorpusApi u n tt l q = do
docs <- liftIO $ splitEvery 500 <$> getDataApi (_tt_lang tt) l q
flowCorpus u n tt docs
------------------------------------------------------------------------
flowAnnuaire :: FlowCmdM env ServantErr m
=> Username -> CorpusName -> (TermType Lang) -> FilePath -> m AnnuaireId
flowAnnuaire u n l filePath = do
......@@ -105,7 +133,6 @@ flowCorpusDebat u n l fp = do
)
flowCorpus u n (Multi FR) (map (map toHyperdataDocument) docs)
flowCorpusFile :: FlowCmdM env ServantErr m
=> Username -> CorpusName
-> Limit -- Limit the number of docs (for dev purpose)
......@@ -119,13 +146,21 @@ flowCorpusFile u n l la ff fp = do
flowCorpus u n la (map (map toHyperdataDocument) docs)
-- TODO query with complex query
flowCorpusSearchInDatabase :: FlowCmdM env ServantErr m
flowCorpusSearchInDatabase :: FlowCmdM env err m
=> Username -> Lang -> Text -> m CorpusId
flowCorpusSearchInDatabase u la q = do
(_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus userMaster "" (Nothing :: Maybe HyperdataCorpus)
ids <- map fst <$> searchInDatabase cId (stemIt q)
flowCorpusUser la u q (Nothing :: Maybe HyperdataCorpus) ids
flowCorpusSearchInDatabase' :: FlowCmdM env ServantErr m
=> Username -> Lang -> Text -> m CorpusId
flowCorpusSearchInDatabase' u la q = do
(_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus userMaster "" (Nothing :: Maybe HyperdataCorpus)
ids <- map fst <$> searchInDatabase cId (stemIt q)
flowCorpusUser la u q (Nothing :: Maybe HyperdataCorpus) ids
------------------------------------------------------------------------
flow :: (FlowCmdM env ServantErr m, FlowCorpus a, MkCorpus c)
......@@ -139,7 +174,7 @@ flowCorpus :: (FlowCmdM env ServantErr m, FlowCorpus a)
flowCorpus = flow (Nothing :: Maybe HyperdataCorpus)
flowCorpusUser :: (FlowCmdM env ServantErr m, MkCorpus c)
flowCorpusUser :: (FlowCmdM env err m, MkCorpus c)
=> Lang -> Username -> CorpusName -> Maybe c -> [NodeId] -> m CorpusId
flowCorpusUser l userName corpusName ctype ids = do
-- User Flow
......@@ -178,7 +213,17 @@ insertMasterDocs c lang hs = do
ids <- insertDb masterUserId masterCorpusId hs'
let documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' hs')
maps <- mapNodeIdNgrams <$> documentIdWithNgrams (extractNgramsT lang) documentsWithId
let
fixLang (Unsupervised l n s m) = Unsupervised l n s m'
where
m' = case m of
Nothing -> trace ("buildTries here" :: String) $ Just $ buildTries n (fmap toToken $ uniText $ Text.intercalate " " $ List.concat $ map hasText documentsWithId)
m'' -> m''
fixLang l = l
lang' = fixLang lang
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
maps <- mapNodeIdNgrams <$> documentIdWithNgrams (extractNgramsT lang') documentsWithId
terms2id <- insertNgrams $ Map.keys maps
let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps
......@@ -256,6 +301,10 @@ data DocumentWithId a = DocumentWithId
, documentData :: !a
} deriving (Show)
instance HasText a => HasText (DocumentWithId a)
where
hasText (DocumentWithId _ a) = hasText a
mergeData :: Map HashId ReturnId
-> Map HashId a
-> [DocumentWithId a]
......@@ -271,12 +320,18 @@ data DocumentIdWithNgrams a = DocumentIdWithNgrams
, document_ngrams :: !(Map Ngrams (Map NgramsType Int))
} deriving (Show)
-- TODO extractNgrams according to Type of Data
class ExtractNgramsT h
where
extractNgramsT :: TermType Lang -> h -> Cmd err (Map Ngrams (Map NgramsType Int))
extractNgramsT :: HasText h => TermType Lang -> h -> Cmd err (Map Ngrams (Map NgramsType Int))
class HasText h
where
hasText :: h -> [Text]
instance HasText HyperdataContact
where
hasText = undefined
instance ExtractNgramsT HyperdataContact
where
......@@ -291,46 +346,42 @@ instance ExtractNgramsT HyperdataContact
pure $ Map.fromList $ [(a', Map.singleton Authors 1) | a' <- authors ]
instance ExtractNgramsT HyperdataDocument
instance HasText HyperdataDocument
where
extractNgramsT = extractNgramsT'
hasText h = catMaybes [ _hyperdataDocument_title h
, _hyperdataDocument_abstract h
]
extractNgramsT' :: TermType Lang -> HyperdataDocument
-> Cmd err (Map Ngrams (Map NgramsType Int))
extractNgramsT' lang hd = filterNgramsT 255 <$> extractNgramsT'' lang hd
instance ExtractNgramsT HyperdataDocument
where
extractNgramsT'' :: TermType Lang -> HyperdataDocument
-> Cmd err (Map Ngrams (Map NgramsType Int))
extractNgramsT'' lang' doc = do
let source = text2ngrams
$ maybe "Nothing" identity
$ _hyperdataDocument_source doc
institutes = map text2ngrams
$ maybe ["Nothing"] (map toSchoolName . (splitOn ", "))
$ _hyperdataDocument_institutes doc
authors = map text2ngrams
$ maybe ["Nothing"] (splitOn ", ")
$ _hyperdataDocument_authors doc
leText = catMaybes [ _hyperdataDocument_title doc
, _hyperdataDocument_abstract doc
]
terms' <- map text2ngrams
<$> map (intercalate " " . _terms_label)
<$> concat
<$> liftIO (extractTerms lang' leText)
pure $ Map.fromList $ [(source, Map.singleton Sources 1)]
<> [(i', Map.singleton Institutes 1) | i' <- institutes ]
<> [(a', Map.singleton Authors 1) | a' <- authors ]
<> [(t', Map.singleton NgramsTerms 1) | t' <- terms' ]
extractNgramsT :: TermType Lang -> HyperdataDocument -> Cmd err (Map Ngrams (Map NgramsType Int))
extractNgramsT lang hd = filterNgramsT 255 <$> extractNgramsT' lang hd
where
extractNgramsT' :: TermType Lang -> HyperdataDocument
-> Cmd err (Map Ngrams (Map NgramsType Int))
extractNgramsT' lang' doc = do
let source = text2ngrams
$ maybe "Nothing" identity
$ _hyperdataDocument_source doc
institutes = map text2ngrams
$ maybe ["Nothing"] (map toSchoolName . (splitOn ", "))
$ _hyperdataDocument_institutes doc
authors = map text2ngrams
$ maybe ["Nothing"] (splitOn ", ")
$ _hyperdataDocument_authors doc
terms' <- map text2ngrams
<$> map (intercalate " " . _terms_label)
<$> concat
<$> liftIO (extractTerms lang' $ hasText doc)
pure $ Map.fromList $ [(source, Map.singleton Sources 1)]
<> [(i', Map.singleton Institutes 1) | i' <- institutes ]
<> [(a', Map.singleton Authors 1) | a' <- authors ]
<> [(t', Map.singleton NgramsTerms 1) | t' <- terms' ]
filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int)
-> Map Ngrams (Map NgramsType Int)
......@@ -349,9 +400,8 @@ documentIdWithNgrams :: HasNodeError err
documentIdWithNgrams f = mapM toDocumentIdWithNgrams
where
toDocumentIdWithNgrams d = do
e <- f $ documentData d
pure $ DocumentIdWithNgrams d e
e <- f $ documentData d
pure $ DocumentIdWithNgrams d e
-- FLOW LIST
......
......@@ -366,6 +366,11 @@ getNode nId _ = do
fromMaybe (error $ "Node does node exist: " <> show nId) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
getNode' :: NodeId -> Cmd err (Node Value)
getNode' nId = fromMaybe (error $ "Node does node exist: " <> show nId) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
getNodesWithType :: Column PGInt4 -> Cmd err [Node HyperdataDocument]
getNodesWithType = runOpaQuery . selectNodesWithType
......@@ -578,8 +583,6 @@ instance MkCorpus HyperdataAnnuaire
mk n h p u = insertNodesR [nodeAnnuaireW n h p u]
getOrMkList :: HasNodeError err => ParentId -> UserId -> Cmd err ListId
getOrMkList pId uId =
maybe (mkList' pId uId) (pure . view node_id) . headMay =<< getListsWithParentId pId
......
......@@ -32,7 +32,7 @@ import Gargantext.Database.Facet
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
import Gargantext.Database.Schema.NodeNgram
import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Queries.Join (leftJoin6)
import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
......@@ -101,31 +101,31 @@ searchInCorpusWithContacts' cId q o l order = runOpaQuery $ queryInCorpusWithCon
queryInCorpusWithContacts :: CorpusId -> Text -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> O.Query FacetPairedRead
queryInCorpusWithContacts cId q _ _ _ = proc () -> do
(docs, (corpusDoc, (docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
(docs, (corpusDoc, (_docNgrams, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
restrict -< (_ns_search docs) @@ (pgTSQuery $ unpack q )
restrict -< (_ns_typename docs) .== (pgInt4 $ nodeTypeId NodeDocument)
restrict -< (nn_node1_id corpusDoc) .== (toNullable $ pgNodeId cId)
restrict -< (nng_listType docNgrams) .== (toNullable $ pgNgramsType Authors)
-- restrict -< (nng_listType docNgrams) .== (toNullable $ pgNgramsType Authors)
restrict -< (_node_typename contacts) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
-- let contact_id = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
returnA -< FacetPaired (_ns_id docs) (_ns_date docs) (_ns_hyperdata docs) (pgInt4 0) (Pair (_node_id contacts) (ngrams_terms ngrams'))
joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))))
joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNgramTable queryNgramsTable queryNodeNgramTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
joinInCorpusWithContacts :: O.Query (NodeSearchRead, (NodeNodeReadNull, (NodeNodeNgramsReadNull, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull)))))
joinInCorpusWithContacts = leftJoin6 queryNodeTable queryNodeNodeNgramsTable queryNgramsTable queryNodeNodeNgramsTable queryNodeNodeTable queryNodeSearchTable cond12 cond23 cond34 cond45 cond56
where
cond12 :: (NodeNgramRead, NodeRead) -> Column PGBool
cond12 (ng3, n2) = _node_id n2 .== nng_node_id ng3
cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
cond12 (ng3, n2) = _node_id n2 .== nnng_node1_id ng3
---------
cond23 :: (NgramsRead, (NodeNgramRead, NodeReadNull)) -> Column PGBool
cond23 (ng2, (nng2, _)) = nng_ngrams_id nng2 .== ngrams_id ng2
cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
cond23 (ng2, (nnng2, _)) = nnng_ngrams_id nnng2 .== ngrams_id ng2
cond34 :: (NodeNgramRead, (NgramsRead, (NodeNgramReadNull, NodeReadNull))) -> Column PGBool
cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nng_ngrams_id nng
cond34 :: (NodeNodeNgramsRead, (NgramsRead, (NodeNodeNgramsReadNull, NodeReadNull))) -> Column PGBool
cond34 (nng, (ng, (_,_))) = ngrams_id ng .== nnng_ngrams_id nng
cond45 :: (NodeNodeRead, (NodeNgramRead, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull)))) -> Column PGBool
cond45 (nn, (nng, (_,(_,_)))) = nng_node_id nng .== nn_node2_id nn
cond45 :: (NodeNodeRead, (NodeNodeNgramsRead, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull)))) -> Column PGBool
cond45 (nn, (nng, (_,(_,_)))) = nnng_node1_id nng .== nn_node2_id nn
cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNgramReadNull, (NgramsReadNull, (NodeNgramReadNull, NodeReadNull))))) -> Column PGBool
cond56 :: (NodeSearchRead, (NodeNodeRead, (NodeNodeNgramsReadNull, (NgramsReadNull, (NodeNodeNgramsReadNull, NodeReadNull))))) -> Column PGBool
cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn_node2_id nn
......
......@@ -98,18 +98,20 @@ eavg [] = 0
-- Simple Average
mean :: Fractional a => [a] -> a
mean xs = if L.null xs then 0.0
else sum xs / fromIntegral (length xs)
mean xs = sum xs / fromIntegral (length xs)
sumMaybe :: Num a => [Maybe a] -> Maybe a
sumMaybe = fmap sum . M.sequence
variance :: Floating a => [a] -> a
variance xs = mean $ map (\x -> (x - m) ** 2) xs where
variance xs = sum ys / (fromIntegral (length xs) - 1)
where
m = mean xs
ys = map (\x -> (x - m) ** 2) xs
deviation :: [Double] -> Double
deviation :: Floating a => [a] -> a
deviation = sqrt . variance
movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
......@@ -242,8 +244,8 @@ scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
scaleNormalize :: [Double] -> [Double]
scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
where
v = variance xs'
m = mean xs'
v = variance xs'
m = mean xs'
xs' = map abs xs
normalize :: [Double] -> [Double]
......
{-|
Module : Gargantext.Text.Crawlers
Description : All crawlers of Gargantext in one file.
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Crawlers
where
{-
import Data.Text (Text)
--import Gargantext.Prelude
import qualified PUBMED as PubMed
data Crawler = PubMed | HAL | Isidore
type Query = Text
--{-
crawl :: Crawler -> Query -> IO [PubMed.Doc]
crawl Pubmed = PubMed.crawler
--}
-}
......@@ -31,7 +31,7 @@ import Gargantext.Text.Metrics.Count (occurrencesWith)
import qualified Data.IntMap as IntMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.SVM as SVM
import qualified Data.SVM as SVM
import qualified Data.Vector as Vec
------------------------------------------------------------------------
......
......@@ -29,29 +29,26 @@ import GHC.Real (round)
import Gargantext.Prelude
import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index
import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
import qualified Data.Array.Accelerate as DAA
import qualified Data.Array.Accelerate.Interpreter as DAA
import qualified Data.List as List
import qualified Data.Map as Map
import Numeric.Statistics.PCA (pcaReduceN)
import qualified Data.Vector.Storable as Vec
import Data.Array.IArray (Array, listArray, elems)
type GraphListSize = Int
type InclusionSize = Int
toScored :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
toScored = map2scored
. (reduceTo (Dimension 2))
. (pcaReduceTo (Dimension 2))
. (Map.filter (\v -> Vec.length v > 1))
. (Map.unionsWith (<>))
scored :: Ord t => Map (t,t) Int -> [Scored t]
scored = map2scored . (reduceTo (Dimension 2)) . scored2map
scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
......@@ -66,20 +63,6 @@ data Scored ts = Scored
, _scored_speGen :: !SpecificityGenericity
} deriving (Show)
data Dimension = Dimension Int
reduceTo :: Ord t
=> Dimension
-> Map t (Vec.Vector Double)
-> Map t (Vec.Vector Double)
reduceTo (Dimension d) ss = Map.fromList $ zip txts $ elems $ pcaReduceN ss'' d
where
ss'' :: Array Int (Vec.Vector Double)
ss'' = listArray (1, List.length ss') ss'
(txts,ss') = List.unzip $ Map.toList ss
localMetrics :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
(Map.toList fi)
......@@ -92,8 +75,6 @@ localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [i
$ DAA.zip (DAA.use is) (DAA.use ss)
-- TODO Code to be remove below
-- TODO in the textflow we end up needing these indices , it might be
-- better to compute them earlier and pass them around.
......@@ -107,10 +88,6 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) s
$ DAA.zip (DAA.use is) (DAA.use ss)
takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
takeScored listSize incSize = map _scored_terms
. linearTakes listSize incSize _scored_speGen
......@@ -134,4 +111,3 @@ linearTakes gls incSize speGen incExc = take gls
. splitEvery incSize
. sortOn speGen
......@@ -22,7 +22,7 @@ please follow the types.
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers (FileFormat(..), clean, parseFile)
module Gargantext.Text.Parsers (FileFormat(..), clean, parseFile, cleanText)
where
--import Data.ByteString (ByteString)
......@@ -164,9 +164,14 @@ openZip fp = do
bs <- mapConcurrently (\s -> withArchive fp (getEntry s)) entries
pure bs
cleanText :: Text -> Text
cleanText = cs . clean . cs
clean :: DB.ByteString -> DB.ByteString
clean txt = DBC.map clean' txt
where
clean' '’' = '\''
clean' '\r' = ' '
clean' '\t' = ' '
clean' ';' = '.'
clean' c = c
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Parsers.Isidore where
import Data.Text (Text)
import Data.Either
import Gargantext.Prelude
import Data.Aeson
import Data.Proxy
import GHC.Generics
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.API
import Servant.Client
type IsidoreAPI = "sparql" :> Capture "query" Text :> Get '[JSON] [IsidoreDoc]
data IsidoreDoc =
IsidoreDoc {title :: Maybe Text}
deriving (Show, Generic)
instance FromJSON IsidoreDoc
instance ToJSON IsidoreDoc
isidoreDocsApi :: Proxy IsidoreAPI
isidoreDocsApi = Proxy
isidoreDocs :: ClientM [IsidoreDoc]
isidoreDocs = client isidoreDocsApi
getIsidoreDocs :: IO [IsidoreDoc]
getIsidoreDocs = do
manager' <- newManager tlsManagerSettings
res <- runClientM isidoreDocs $ mkClientEnv manager' $ BaseUrl Https "https://www.rechercheisidore.fr" 8080 ""
case res of
Left _ -> panic "err"
Right res' -> pure res'
......@@ -43,16 +43,16 @@ selectQueryRaw' uri q = getWith opts uri
& header "User-Agent" .~ ["gargantext-hsparql-client"]
& param "query" .~ [Data.Text.pack q]
isidoreGet :: Lang -> Text -> IO (Maybe [HyperdataDocument])
isidoreGet l q = do
bindingValues <- isidoreGet' q
isidoreGet :: Lang -> Int -> Text -> IO (Maybe [HyperdataDocument])
isidoreGet la li q = do
bindingValues <- isidoreGet' li q
case bindingValues of
Nothing -> pure Nothing
Just dv -> pure $ Just $ map (bind2doc l) dv
Just dv -> pure $ Just $ map (bind2doc la) dv
isidoreGet' :: Text -> IO (Maybe [[BindingValue]])
isidoreGet' q = do
let s = createSelectQuery $ isidoreSelect q
isidoreGet' :: Int -> Text -> IO (Maybe [[BindingValue]])
isidoreGet' l q = do
let s = createSelectQuery $ isidoreSelect l q
putStrLn s
r <- selectQueryRaw' route s
putStrLn $ show $ r ^. responseStatus
......@@ -60,11 +60,11 @@ isidoreGet' q = do
-- res <- selectQuery route $ simpleSelect q
-- pure res
isidoreSelect :: Text -> Query SelectQuery
isidoreSelect q = do
isidoreSelect :: Int -> Text -> Query SelectQuery
isidoreSelect lim q = do
-- See Predefined Namespace Prefixes:
-- https://isidore.science/sparql?nsdecl
isidore <- prefix "isidore" (iriRef "http://www.rechercheisidore.fr/class/")
isidore <- prefix "isidore" (iriRef "http://isidore.science/class/")
rdf <- prefix "rdf" (iriRef "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
dcterms <- prefix "dcterms" (iriRef "http://purl.org/dc/terms/")
dc <- prefix "dc" (iriRef "http://purl.org/dc/elements/1.1/")
......@@ -80,14 +80,13 @@ isidoreSelect q = do
source <- var
langDoc <- var
publisher <- var
--langFr <- var
--agg <- var
triple_ link (rdf .:. "type") (isidore .:. "BibliographicalResource")
triple_ link (rdf .:. "type") (isidore .:. "Document")
triple_ link (dcterms .:. "title") title
triple_ link (dcterms .:. "date") date
triple_ link (dcterms .:. "creator") authors
triple_ link (dcterms .:. "language") langDoc
--triple_ link (dcterms .:. "language") langDoc
triple_ link (dc .:. "description") abstract
--triple_ link (ore .:. "isAggregatedBy") agg
--triple_ agg (dcterms .:. "title") title
......@@ -96,16 +95,18 @@ isidoreSelect q = do
optional_ $ triple_ link (dcterms .:. "publisher") publisher
-- TODO FIX BUG with (.||.) operator
--filterExpr $ (.||.) (contains title q) (contains title q)
filterExpr_ (containsWith title q) -- (contains abstract q)
--filterExpr (containsWith abstract q) -- (contains abstract q)
--filterExpr_ $ (.||.) (contains title q) (contains abstract q)
--filterExpr_ (containsWith authors q) -- (contains abstract q)
--filterExpr_ (containsWith title q) -- (contains abstract q)
--filterExpr_ $ (.||.) (containsWith title q) (contains abstract q)
filterExpr_ (containsWith title q)
-- TODO FIX filter with lang
--filterExpr $ langMatches title (str ("fra" :: Text))
--filterExpr $ (.==.) lang (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
--filterExpr_ $ langMatches title (str ("fra" :: Text))
--filterExpr_ $ (.==.) langDoc (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
orderNextDesc date
limit_ 10
limit_ lim
distinct_
selectVars [link, date, langDoc, authors, source, publisher, title, abstract]
......
{-|
Module : Gargantext.Text.Parsers.IsidoreApi
Description : To query French Humanities publication database from its API
Copyright : (c) CNRS, 2019-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.IsidoreApi where
import System.FilePath (FilePath())
import Data.Text (Text)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import Gargantext.Prelude
import Isidore.Client
import Servant.Client
import qualified Data.Text as Text
import qualified Gargantext.Text.Parsers.Date as Date
import qualified Isidore as Isidore
import Gargantext.Text.Parsers.CSV (writeDocs2Csv)
import Gargantext.Text.Parsers (cleanText)
-- | TODO work with the ServantErr
get :: Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO [HyperdataDocument]
get la l q a = do
let
printErr (DecodeFailure e _) = panic e
printErr e = panic (cs $ show e)
toIsidoreDocs :: Reply -> [IsidoreDoc]
toIsidoreDocs (ReplyOnly r) = [r]
toIsidoreDocs (Replies rs) = rs
iDocs <- either printErr _content <$> Isidore.get l q a
hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO ()
isidore2csvFile fp la li tq aq = do
hdocs <- get la li tq aq
writeDocs2Csv fp hdocs
isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do
let
author :: Author -> Text
author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
author (Authors aus) = Text.intercalate ". " $ map author aus
creator2text :: Creator -> Text
creator2text (Creator au) = author au
creator2text (Creators aus') = Text.intercalate ". " $ map author aus'
langText :: LangText -> Text
langText (LangText _l t1) = t1
langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
(utcTime, (pub_year, pub_month, pub_day)) <- Date.split l (maybe (Just "2019") (Just) d)
pure $ HyperdataDocument (Just "IsidoreApi")
Nothing
u
Nothing
Nothing
Nothing
(Just $ cleanText $ langText t)
Nothing
(creator2text <$> as)
(_sourceName <$> s)
(cleanText <$> langText <$> a)
(fmap (Text.pack . show) utcTime)
(pub_year)
(pub_month)
(pub_day)
Nothing
Nothing
Nothing
(Just $ (Text.pack . show) l)
......@@ -29,6 +29,7 @@ compute graph
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Text.Terms
......@@ -37,19 +38,31 @@ module Gargantext.Text.Terms
import Control.Lens
import Data.Text (Text)
import Data.Traversable
import GHC.Base (String)
import Gargantext.Prelude
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Text.Terms.Multi (multiterms)
import Gargantext.Text.Terms.Mono (monoTerms)
import Gargantext.Text.Terms.Mono.Stem (stem)
import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.Text as Text
import Gargantext.Text (sentences)
import Gargantext.Text.Terms.Mono.Token.En (tokenize)
import Gargantext.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken)
data TermType lang
= Mono { _tt_lang :: lang }
| Multi { _tt_lang :: lang }
| MonoMulti { _tt_lang :: lang }
| Unsupervised { _tt_lang :: lang
, _tt_windoSize :: Int
, _tt_ngramsSize :: Int
, _tt_model :: Maybe (Tries Token ())
}
makeLenses ''TermType
--group :: [Text] -> [Text]
......@@ -61,7 +74,17 @@ makeLenses ''TermType
-- | Sugar to extract terms from text (hiddeng mapM from end user).
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms :: TermType Lang -> [Text] -> IO [[Terms]]
extractTerms termTypeLang = mapM (terms termTypeLang)
extractTerms (Unsupervised l n s m) xs = mapM (terms (Unsupervised l n s (Just m'))) xs
where
m' = case m of
Just m''-> m''
Nothing -> newTries n (Text.intercalate " " xs)
extractTerms termTypeLang xs = mapM (terms termTypeLang) xs
------------------------------------------------------------------------
-- | Terms from Text
-- Mono : mono terms
......@@ -72,6 +95,45 @@ terms :: TermType Lang -> Text -> IO [Terms]
terms (Mono lang) txt = pure $ monoTerms lang txt
terms (Multi lang) txt = multiterms lang txt
terms (MonoMulti lang) txt = terms (Multi lang) txt
terms (Unsupervised lang n s m) txt = termsUnsupervised (Unsupervised lang n s (Just m')) txt
where
m' = maybe (newTries n txt) identity m
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
------------------------------------------------------------------------
text2term :: Lang -> [Text] -> Terms
text2term _ [] = Terms [] Set.empty
text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
isPunctuation :: Text -> Bool
isPunctuation x = List.elem x $ (Text.pack . pure)
<$> ("!?(),;." :: String)
-- | Unsupervised ngrams extraction
-- language agnostic extraction
-- TODO: remove IO
-- TODO: newtype BlockText
type WindowSize = Int
type MinNgramSize = Int
termsUnsupervised :: TermType Lang -> Text -> IO [Terms]
termsUnsupervised (Unsupervised l n s m) =
pure
. map (text2term l)
. List.nub
. (List.filter (\l' -> List.length l' > s))
. List.concat
. mainEleveWith (maybe (panic "no model") identity m) n
. uniText
termsUnsupervised _ = undefined
newTries :: Int -> Text -> Tries Token ()
newTries n t = buildTries n (fmap toToken $ uniText t)
uniText :: Text -> [[Text]]
uniText = map (List.filter (not . isPunctuation))
. map tokenize
. sentences -- | TODO get sentences according to lang
{-|
Module : Gargantext.Text.Terms.Eleve
Description : Unsupervized Word segmentation
Copyright : (c) CNRS, 2019-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
# Implementation of Unsupervized Word Segmentation
References:
- Python implementation (Korantin August, Emmanuel Navarro):
[EleVe](https://github.com/kodexlab/eleve.git)
- Unsupervized Word Segmentation:the case for Mandarin Chinese Pierre
Magistry, Benoît Sagot, Alpage, INRIA & Univ. Paris 7, Proceedings of
the 50th Annual Meeting of the Association for Computational Linguistics
, pages 383–387. [PDF](https://www.aclweb.org/anthology/P12-2075)
Notes for current implementation:
- TODO extract longer ngrams (see paper above, viterbi algo can be used)
- TODO AD TEST: prop (Node c _e f) = c == Map.size f
- AD: Real ngrams extraction test
from Gargantext.Text.Terms import extractTermsUnsupervised
docs <- runCmdRepl $ selectDocs 1004
extractTermsUnsupervised 3 $ DT.intercalate " "
$ catMaybes
$ Gargantext.map _hyperdataDocument_abstract docs
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Text.Terms.Eleve where
-- import Debug.Trace (trace)
-- import Debug.SimpleReflect
import Control.Lens hiding (levels, children)
import Control.Monad (forM_)
import Data.Ord (Ord)
import qualified Data.List as L
import Data.Monoid
import Data.Text (Text)
import qualified Data.Text as T
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import qualified Data.Map as Map
import Gargantext.Prelude hiding (cs)
import qualified Data.Tree as Tree
import Data.Tree (Tree)
import qualified Prelude as P (putStrLn, logBase, isNaN, RealFloat)
nan :: Floating e => e
nan = 0 / 0
noNaNs :: P.RealFloat e => [e] -> [e]
noNaNs = filter (not . P.isNaN)
updateIfDefined :: P.RealFloat e => e -> e -> e
updateIfDefined e0 e | P.isNaN e = e0
| otherwise = e
sim :: Entropy e => e -> e -> Bool
sim x y = x == y || (P.isNaN x && P.isNaN y)
subst :: Entropy e => (e, e) -> e -> e
subst (src, dst) x | sim src x = dst
| otherwise = x
------------------------------------------------------------------------
type Entropy e =
( Fractional e
, Floating e
, P.RealFloat e
, Show e
-- ^ TODO: only used for debugging
)
------------------------------------------------------------------------
-- | Example and tests for development
data I e = I
{ _info_entropy :: e
, _info_entropy_var :: e
, _info_autonomy :: e
}
instance Show e => Show (I e) where
show (I e ev a) = show (e, ev, a)
makeLenses ''I
type ModEntropy i o e = (e -> e) -> i -> o
set_autonomy :: Entropy e => ModEntropy (I e) (I e) e
set_autonomy fe i = i & info_autonomy .~ fe (i ^. info_entropy_var)
set_entropy_var :: Entropy e => Setter e (I e) e e
set_entropy_var f e = (\ev -> I e ev nan) <$> f e
data StartStop = Start | Stop
deriving (Ord, Eq, Show)
data Token = NonTerminal Text
| Terminal StartStop
deriving (Ord, Eq, Show)
isTerminal :: Token -> Bool
isTerminal (Terminal _) = True
isTerminal (NonTerminal _) = False
nonTerminals :: [Token] -> [Text]
nonTerminals ts = [nt | NonTerminal nt <- ts]
parseToken :: Text -> Token
parseToken "<start>" = Terminal Start
parseToken "<stop>" = Terminal Stop
parseToken t = NonTerminal t
toToken :: [Text] -> [Token]
toToken xs = Terminal Start : (NonTerminal <$> xs) <> [Terminal Stop]
printToken :: Token -> Text
printToken = f
where
f (NonTerminal x) = x
f (Terminal Start) = "<start>"
f (Terminal Stop) = "<stop>"
------------------------------------------------------------------------
data Trie k e
= Node { _node_count :: Int
, _node_entropy :: e
, _node_children :: Map k (Trie k e)
}
| Leaf { _node_count :: Int }
deriving (Show)
makeLenses ''Trie
insertTrie :: Ord k => [k] -> Trie k () -> Trie k ()
insertTrie [] n = n { _node_count = _node_count n +1}
insertTrie (x:xs) (Leaf c) = mkTrie (c+1) $ Map.singleton x $ insertTrie xs emptyTrie
insertTrie (x:xs) (Node c _e children) = mkTrie (c+1) $ Map.alter f x children
where
f = Just . insertTrie xs . fromMaybe emptyTrie
-- emptyTrie :: (Ord k, Monoid e) => Trie k e
-- emptyTrie = Node 0 mempty mempty
emptyTrie :: Trie k e
emptyTrie = Leaf 0
mkTrie :: Monoid e => Int -> Map k (Trie k e) -> Trie k e
mkTrie c children
| Map.null children = Leaf c
| otherwise = Node c mempty children
-----------------------------
-- | Trie to Tree since Tree as nice print function
toTree :: k -> Trie k e -> Tree (k,Int,Maybe e)
toTree k (Leaf c) = Tree.Node (k, c, Nothing) []
toTree k (Node c e cs) = Tree.Node (k, c, Just e) (map (uncurry toTree) $ Map.toList cs)
------------------------------------------------------------------------
------------------------------------------------------------------------
normalizeLevel :: Entropy e => e -> e -> e -> e
normalizeLevel m v e = (e - m) / v
{- Unused
nodeChildren :: Trie k e -> Map k (Trie k e)
nodeChildren (Node _ _ cs) = cs
nodeChildren (Leaf _) = Map.empty
-}
chunkAlongEleve :: Int -> [a] -> [[a]]
chunkAlongEleve n xs = L.take n <$> L.tails xs
data Direction = Backward | Forward
buildTrie :: Direction -> Int -> [[Token]] -> Trie Token ()
buildTrie d n sentences
= L.foldr insertTrie emptyTrie
. L.concat
$ ( filter (/= [Terminal (term d)])
. chunkAlongEleve (n + 1)
. order d
)
<$> sentences
where
order Forward = identity
order Backward = reverse
term Forward = Stop
term Backward = Start
class IsTrie trie where
entropyTrie :: Entropy e => (k -> Bool) -> trie k () -> trie k e
nodeEntropy :: Entropy e => Getting e i e -> trie k i -> e
nodeChild :: Ord k => k -> trie k e -> trie k e
findTrie :: Ord k => [k] -> trie k e -> trie k e
printTrie :: (Show i, Entropy e) => Getting e i e -> trie Token i -> IO ()
evTrie :: Entropy e => Getting e i e -> Setter i o e e -> trie k i -> trie k o
normalizeEntropy :: Entropy e
=> Getting e i e -> ModEntropy i o e
-> trie k i -> trie k o
instance IsTrie Trie where
entropyTrie _ (Leaf c) = Leaf c
entropyTrie pred (Node c () children) = Node c e (map (entropyTrie pred) children)
where
children' = Map.toList children
sum_count = sum $ _node_count . snd <$> children'
e | sum_count == 0 = nan
| otherwise = sum $ f <$> children'
f (k, child) = if pred k then chc * P.logBase 2 (fromIntegral c)
else - chc * P.logBase 2 chc
where
chc = fromIntegral (_node_count child) / fromIntegral c
nodeEntropy inE (Node _ e _) = e ^. inE
nodeEntropy _ (Leaf _) = nan
nodeChild k (Node _ _ cs) = fromMaybe emptyTrie (Map.lookup k cs)
nodeChild _ (Leaf _) = emptyTrie
findTrie ks t = L.foldl (flip nodeChild) t ks
printTrie inE t = do
P.putStrLn . Tree.drawTree
. fmap show
$ toTree (NonTerminal "") t
P.putStrLn " Levels:"
forM_ (normalizationLevels inE t) $ \level ->
P.putStrLn $ " " <> show level
evTrie inE setEV = go nan
where
go _ (Leaf c) = Leaf c
go e0 (Node c i children) = Node c (i & setEV .~ ev e0 e1) $ go e1 <$> children
where e1 = i ^. inE
ev 0 0 = nan
ev i0 i1 = i1 - i0
normalizeEntropy inE modE t = go (modE identity) (normalizationLevels inE t) t
where
go _ _ (Leaf c) = Leaf c
go _ [] _ = panic "normalizeEntropy' empty levels"
go f ((m, v, _) : ess) (Node c i children)
= Node c (f i) $ go (modE $ normalizeLevel m v) ess <$> children
------------------------------------------------------------------------
levels :: Trie k e -> [[Trie k e]]
levels = L.takeWhile (not . L.null) . L.iterate (L.concatMap subForest) . pure
where
subForest :: Trie k e -> [Trie k e]
subForest (Leaf _) = []
subForest (Node _ _ children) = Map.elems children
entropyLevels :: Entropy e => Getting e i e -> Trie k i -> [[e]]
entropyLevels inE = fmap (noNaNs . map (nodeEntropy inE)) . L.tail . levels
normalizationLevels :: Entropy e => Getting e i e -> Trie k i -> [(e, e, Int)]
normalizationLevels inE = fmap f . entropyLevels inE
where
f es = (mean es, deviation es, length es)
------------------------------------------------------------------------
data Tries k e = Tries
{ _fwd :: Trie k e
, _bwd :: Trie k e
}
makeLenses ''Tries
buildTries :: Int -> [[Token]] -> Tries Token ()
buildTries n sentences = Tries
{ _fwd = buildTrie Forward n sentences
, _bwd = buildTrie Backward n sentences
}
instance IsTrie Tries where
nodeEntropy inE (Tries f b) = mean [nodeEntropy inE f, nodeEntropy inE b]
findTrie ks (Tries f b) = Tries (findTrie ks f) (findTrie (reverse ks) b)
nodeChild = onTries . nodeChild
entropyTrie = onTries . entropyTrie
evTrie inE setEV = onTries $ evTrie inE setEV
normalizeEntropy inE = onTries . normalizeEntropy inE
printTrie inE (Tries f b) = do
P.putStrLn "Forward:"
printTrie inE f
P.putStrLn ""
P.putStrLn "Backward:"
printTrie inE b
onTries :: (Trie k i -> Trie k o) -> Tries k i -> Tries k o
onTries h (Tries f b) = Tries (h f) (h b)
------------------------------------------------------------------------
mayCons :: [a] -> [[a]] -> [[a]]
mayCons [] xss = xss
mayCons xs xss = xs : xss
{-
split :: (IsTrie trie, Entropy e) => Lens' i e -> trie Token i -> [Token] -> [[Token]]
split _ _ [] = []
split inE t (Terminal Start:xs) = split inE t xs
split inE t (x0:xs0) = go [x0] xs0
where
go pref [] = [pref]
go pref (Terminal Stop:_) = [pref]
go _ (Terminal Start:_) = panic "split impossible"
go pref (x:xs) =
-- trace (show (if acc then "ACC" else "CUT", (prefx, epxt), if acc then ">" else "<=", ((pref, ept), "+", ([x], ext)))) $
if acc
then go prefx xs
else mayCons pref $ go [x] xs
where
prefx = pref <> [x]
pt = findTrie pref t
pxt = findTrie prefx t
xt = findTrie [x] t
ept = ne pt
-- ^ entropy of the current prefix
ext = ne xt
-- ^ entropy of [x]
epxt = ne pxt
-- ^ entropy of the current prefix plus x
acc = P.isNaN ept || P.isNaN ext || not (P.isNaN epxt) -- && (epxt > mean [ept, ext])
-- aut(["in","this","paper"]) > aut(["in","this"]) + aut(["paper"])
ne = nodeEntropy inE
-}
split :: Entropy e => Int -> Lens' i e -> Tries Token i -> [Token] -> [[Text]]
split _ _ _ [] = []
split _ _ _ [t] = pure <$> nonTerminals [t]
split n inE t ts = nonTerminals pref `mayCons` split n inE t (drop (length pref) ts)
where
pref = maximumWith (\ks -> nodeEntropy inE $ findTrie ks t)
(L.tail . L.inits . take n $ ts)
{-
split :: Entropy e => Lens' i e -> Tries Token i -> [Token] -> [[Token]]
split inE t0 ts =
maximumWith (sum . map $ nodeAutonomy inE t0) (all the splits of ts)
-}
------------------------------------------------------------------------
mainEleve :: Int -> [[Text]] -> [[[Text]]]
mainEleve n i = mainEleveWith m n i
where
m = buildTries n (fmap toToken i)
mainEleveWith :: Tries Token () -> Int -> [[Text]] -> [[[Text]]]
mainEleveWith m n i = fmap (split n info_autonomy t) (fmap toToken i)
where
t :: Tries Token (I Double)
t = normalizeEntropy info_entropy_var set_autonomy
$ evTrie identity set_entropy_var
$ entropyTrie isTerminal m
------------------------------------------------------------------------
type Checks e = [(Text, Int, e, e, e, e, e, e, e, e, e)]
testEleve :: e ~ Double => Bool -> Int -> [Text] -> Checks e -> IO Bool
testEleve debug n output checks = do
let
res = split n info_autonomy nt <$> inp
when debug $ do
P.putStrLn $ show input
P.putStrLn ""
printTrie info_entropy nt
P.putStrLn ""
P.putStrLn "Splitting:"
P.putStrLn $ show res
forM_ checks checker
pure $ expected == res
where
out = T.words <$> output
expected = fmap (T.splitOn "-") <$> out
input = (T.splitOn "-" =<<) <$> out
inp = toToken <$> input
nt :: Tries Token (I Double)
nt = normalizeEntropy info_entropy_var set_autonomy
. evTrie identity set_entropy_var
. entropyTrie isTerminal
$ buildTries n inp
check f msg ref my =
if f ref my
then P.putStrLn $ " \ESC[32mPASS\ESC[m " <> msg <> " " <> show ref
else P.putStrLn $ " \ESC[31mFAIL\ESC[m " <> msg <> " ref=" <> show ref <> " my=" <> show my
checker (ngram, count, entropy, ev, autonomy, fwd_entropy, fwd_ev, fwd_autonomy, bwd_entropy, bwd_ev, bwd_autonomy) = do
let ns = parseToken <$> T.words ngram
nt' = findTrie ns nt
P.putStrLn $ " " <> T.unpack ngram <> ":"
check (==) "count" count (_node_count (_fwd nt'))
check sim "entropy" entropy (nodeEntropy info_entropy nt' )
check sim "ev" ev (nodeEntropy info_entropy_var nt' )
check sim "autonomy" autonomy (nodeEntropy info_autonomy nt' )
check sim "fwd_entropy" fwd_entropy (nodeEntropy info_entropy (_fwd nt'))
check sim "fwd_ev" fwd_ev (nodeEntropy info_entropy_var (_fwd nt'))
check sim "fwd_autonomy" fwd_autonomy (nodeEntropy info_autonomy (_fwd nt'))
check sim "bwd_entropy" bwd_entropy (nodeEntropy info_entropy (_bwd nt'))
check sim "bwd_ev" bwd_ev (nodeEntropy info_entropy_var (_bwd nt'))
check sim "bwd_autonomy" bwd_autonomy (nodeEntropy info_autonomy (_bwd nt'))
-- | TODO real data is a list of tokenized sentences
example0, example1, example2, example3, example4, example5, example6 :: [Text]
example0 = ["New-York is New-York and New-York"]
example1 = ["to-be or not to-be"]
example2 = ["to-be-or not to-be-or NOT to-be and"]
example3 = example0 <> example0
-- > TEST: Should not have York New in the trie
example4 = ["a-b-c-d e a-b-c-d f"]
example5 = ["a-b-c-d-e f a-b-c-d-e g a-b-c-d-e"]
example6 = ["le-petit chat"
,"le-petit chien"
,"le-petit rat"
,"le gros rat"
]
checks0, checks2 :: Checks Double
checks0 =
-- [(token, count, entropy, ev, autonomy, fwd_entropy, fwd_ev, fwd_autonomy, bwd_entropy, bwd_ev, bwd_autonomy)]
[ ("<start>", 1, nan, nan, nan, 0.0, -2.113283334294875, -0.5000000000000002, nan, nan, nan)
, ("New", 3, 0.792481250360578, -1.3208020839342969, 0.7499999999999999, 0.0, -2.113283334294875, -0.5000000000000002, 1.584962500721156, -0.5283208335737188, 2.0)
, ("York", 3, 0.792481250360578, -1.3208020839342969, 0.7499999999999999, 1.584962500721156, -0.5283208335737188, 2.0, 0.0, -2.113283334294875, -0.5000000000000002)
, ("is", 1, 0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002)
, ("and", 1, 0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, -2.113283334294875, -0.5000000000000002)
, ("<stop>", 0, nan, nan, nan, nan, nan, nan, 0.0, -2.113283334294875, -0.5000000000000002)
, ("<start> New", 1, nan, nan, nan, 0.0, nan, nan, nan, nan, nan)
, ("New York", 3, 1.584962500721156, 1.584962500721156, 1.414213562373095, 1.584962500721156, 1.584962500721156, 1.4142135623730947, 1.584962500721156, 1.584962500721156, 1.4142135623730951)
, ("York is", 1, 0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865476, 0.0, nan, nan)
, ("is New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865474)
, ("York and", 1, 0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865476, 0.0, nan, nan)
, ("and New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, -0.7071067811865474)
, ("York <stop>", 1, nan, nan, nan, nan, nan, nan, 0.0, nan, nan)
, ("<start> New York", 1, nan, nan, nan, 0.0, nan, nan, nan, nan, nan)
, ("New York is", 1, 0, nan, nan, 0.0, -1.584962500721156, nan, 0.0, nan, nan)
, ("York is New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, nan, nan)
, ("is New York", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, nan)
, ("New York and", 1, 0, nan, nan, 0.0, -1.584962500721156, nan, 0.0, nan, nan)
, ("York and New", 1, 0, nan, nan, 0.0, nan, nan, 0.0, nan, nan)
, ("and New York", 1, 0, nan, nan, 0.0, nan, nan, 0.0, -1.584962500721156, nan)
, ("New York <stop>", 1, nan, nan, nan, nan, nan, nan, 0.0, nan, nan)
]
checks2 = []
{-
[("to be", 3, 1.2516291673878228, 1.2516291673878228, 1.5535694744293167, nan, 0.9182958340544896)
,("be or", 2, 0.5, nan, nan, nan, 1.0)
,("or not", 1, 0.0, nan, nan, nan, 0.0)
,("not to", 1, 0.0, nan, nan, nan, 0.0)
,("or NOT", 1, 0.0, nan, nan, nan, 0.0)
,("NOT to", 1, 0.0, nan, nan, nan, 0.0)
,("be and", 1, 0.0, nan, nan, nan, 0.0)
]
-}
runTestsEleve :: IO ()
runTestsEleve =
forM_
[("example0", 3, example0, checks0)
,("example0", 2, example0, [])
,("example1", 2, example1, [])
,("example2", 3, example2, checks2)
,("example3", 2, example3, [])
,("example4", 4, example4, [])
,("example5", 5, example5, [])
,("example6", 2, example6, [])
]
(\(name, n, ex, checks) -> do
P.putStrLn $ name <> " " <> show n
b <- testEleve False n ex checks
P.putStrLn $ " splitting: " <> if b then "PASS" else "FAIL"
)
......@@ -47,6 +47,7 @@ monoTerms l txt = map (monoText2term l) $ monoTexts txt
monoTexts :: Text -> [Text]
monoTexts = L.concat . monoTextsBySentence
-- | TODO use text2term only
monoText2term :: Lang -> Text -> Terms
monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt)
......@@ -54,6 +55,3 @@ monoTextsBySentence :: Text -> [[Text]]
monoTextsBySentence = map T.words
. T.split isSep
. T.toLower
......@@ -49,6 +49,8 @@ data Node = Node { node_size :: Int
, node_type :: TypeNode -- TODO NgramsType | Person
, node_id :: Text -- TODO NgramId
, node_label :: Text
, node_x_coord :: Double
, node_y_coord :: Double
, node_attributes :: Attributes
}
deriving (Show, Generic)
......@@ -62,6 +64,7 @@ instance ToSchema Node where
data Edge = Edge { edge_source :: Text
, edge_target :: Text
, edge_weight :: Double
, edge_confluence :: Double
, edge_id :: Text
}
deriving (Show, Generic)
......@@ -117,7 +120,7 @@ instance Arbitrary Graph where
arbitrary = elements $ [defaultGraph]
defaultGraph :: Graph
defaultGraph = Graph {_graph_nodes = [Node {node_size = 4, node_type = Terms, node_id = pack "0", node_label = pack "animal", node_attributes = Attributes {clust_default = 0}},Node {node_size = 3, node_type = Terms, node_id = pack "1", node_label = pack "bird", node_attributes = Attributes {clust_default = 0}},Node {node_size = 2, node_type = Terms, node_id = pack "2", node_label = pack "boy", node_attributes = Attributes {clust_default = 1}},Node {node_size = 2, node_type = Terms, node_id = pack "3", node_label = pack "dog", node_attributes = Attributes {clust_default = 0}},Node {node_size = 2, node_type = Terms, node_id = pack "4", node_label = pack "girl", node_attributes = Attributes {clust_default = 1}},Node {node_size = 4, node_type = Terms, node_id = pack "5", node_label = pack "human body", node_attributes = Attributes {clust_default = 1}},Node {node_size = 3, node_type = Terms, node_id = pack "6", node_label = pack "object", node_attributes = Attributes {clust_default = 2}},Node {node_size = 2, node_type = Terms, node_id = pack "7", node_label = pack "pen", node_attributes = Attributes {clust_default = 2}},Node {node_size = 2, node_type = Terms, node_id = pack "8", node_label = pack "table", node_attributes = Attributes {clust_default = 2}}], _graph_edges = [Edge {edge_source = pack "0", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "0"},Edge {edge_source = pack "1", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "1"},Edge {edge_source = pack "1", edge_target = pack "1", edge_weight = 1.0, edge_id = pack "2"},Edge {edge_source = pack "2", edge_target = pack "2", edge_weight = 1.0, edge_id = pack "3"},Edge {edge_source = pack "2", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "4"},Edge {edge_source = pack "3", edge_target = pack "0", edge_weight = 1.0, edge_id = pack "5"},Edge {edge_source = pack "3", edge_target = pack "1", edge_weight = 1.0, edge_id = pack "6"},Edge {edge_source = pack "3", edge_target = pack "3", edge_weight = 1.0, edge_id = pack "7"},Edge {edge_source = pack "4", edge_target = pack "4", edge_weight = 1.0, edge_id = pack "8"},Edge {edge_source = pack "4", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "9"},Edge {edge_source = pack "5", edge_target = pack "5", edge_weight = 1.0, edge_id = pack "10"},Edge {edge_source = pack "6", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "11"},Edge {edge_source = pack "7", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "12"},Edge {edge_source = pack "7", edge_target = pack "7", edge_weight = 1.0, edge_id = pack "13"},Edge {edge_source = pack "8", edge_target = pack "6", edge_weight = 1.0, edge_id = pack "14"},Edge {edge_source = pack "8", edge_target = pack "7", edge_weight = 1.0, edge_id = pack "15"},Edge {edge_source = pack "8", edge_target = pack "8", edge_weight = 1.0, edge_id = pack "16"}], _graph_metadata = Nothing}
defaultGraph = Graph {_graph_nodes = [Node {node_x_coord=0, node_y_coord=0, node_size = 4, node_type = Terms, node_id = pack "0", node_label = pack "animal", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 3, node_type = Terms, node_id = pack "1", node_label = pack "bird", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "2", node_label = pack "boy", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "3", node_label = pack "dog", node_attributes = Attributes {clust_default = 0}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "4", node_label = pack "girl", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 4, node_type = Terms, node_id = pack "5", node_label = pack "human body", node_attributes = Attributes {clust_default = 1}},Node {node_x_coord=0, node_y_coord=0, node_size = 3, node_type = Terms, node_id = pack "6", node_label = pack "object", node_attributes = Attributes {clust_default = 2}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "7", node_label = pack "pen", node_attributes = Attributes {clust_default = 2}},Node {node_x_coord=0, node_y_coord=0, node_size = 2, node_type = Terms, node_id = pack "8", node_label = pack "table", node_attributes = Attributes {clust_default = 2}}], _graph_edges = [Edge {edge_source = pack "0", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "0"},Edge {edge_source = pack "1", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "1"},Edge {edge_source = pack "1", edge_target = pack "1", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "2"},Edge {edge_source = pack "2", edge_target = pack "2", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "3"},Edge {edge_source = pack "2", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "4"},Edge {edge_source = pack "3", edge_target = pack "0", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "5"},Edge {edge_source = pack "3", edge_target = pack "1", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "6"},Edge {edge_source = pack "3", edge_target = pack "3", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "7"},Edge {edge_source = pack "4", edge_target = pack "4", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "8"},Edge {edge_source = pack "4", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "9"},Edge {edge_source = pack "5", edge_target = pack "5", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "10"},Edge {edge_source = pack "6", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "11"},Edge {edge_source = pack "7", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "12"},Edge {edge_source = pack "7", edge_target = pack "7", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "13"},Edge {edge_source = pack "8", edge_target = pack "6", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "14"},Edge {edge_source = pack "8", edge_target = pack "7", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "15"},Edge {edge_source = pack "8", edge_target = pack "8", edge_weight = 1.0, edge_confluence=0.5, edge_id = pack "16"}], _graph_metadata = Nothing}
-----------------------------------------------------------
......@@ -156,10 +159,10 @@ graphV3ToGraph (GraphV3 links nodes) = Graph (map nodeV32node nodes) (zipWith li
where
nodeV32node :: NodeV3 -> Node
nodeV32node (NodeV3 no_id' (AttributesV3 cl') no_s' no_lb')
= Node no_s' Terms (cs $ show no_id') no_lb' (Attributes cl')
= Node no_s' Terms (cs $ show no_id') no_lb' 0 0 (Attributes cl')
linkV32edge :: Int -> EdgeV3 -> Edge
linkV32edge n (EdgeV3 eo_s' eo_t' eo_w') = Edge (cs $ show eo_s') (cs $ show eo_t') ((T.read $ T.unpack eo_w') :: Double) (cs $ show n)
linkV32edge n (EdgeV3 eo_s' eo_t' eo_w') = Edge (cs $ show eo_s') (cs $ show eo_t') ((T.read $ T.unpack eo_w') :: Double) 0.5 (cs $ show n)
graphV3ToGraphWithFiles :: FilePath -> FilePath -> IO ()
......
......@@ -22,8 +22,7 @@ Portability : POSIX
module Gargantext.Viz.Graph.API
where
import Data.List (sortOn)
import Control.Lens (set, view)
import Control.Lens (set)
import Control.Monad.IO.Class (liftIO)
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Types
......@@ -69,18 +68,15 @@ getGraph nId = do
let cId = maybe (panic "no parentId") identity $ _node_parentId nodeGraph
lIds <- selectNodesWithUsername NodeList userMaster
lId <- defaultList cId
ngs <- filterListWithRoot GraphTerm <$> mapTermListRoot [lId] NgramsTerms
lId <- defaultList cId
ngs <- filterListWithRoot GraphTerm <$> mapTermListRoot [lId] NgramsTerms
myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal False)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) NgramsTerms (Map.keys ngs)
graph <- liftIO $ cooc2graph myCooc
pure $ set graph_metadata (Just metadata)
$ set graph_nodes ( sortOn node_id
$ view graph_nodes graph
) graph
graph <- liftIO $ cooc2graph 1 myCooc
pure $ set graph_metadata (Just metadata) graph
postGraph :: NodeId -> GargServer (Post '[JSON] [NodeId])
......
......@@ -10,6 +10,7 @@ Portability : POSIX
Let be a graph with partitions (from Louvain algo), Bridgeness uniformly
filters inter-communities links.
TODO rewrite Bridgeness with "equivalence structurale" metrics
-}
{-# LANGUAGE NoImplicitPrelude #-}
......
{-| Module : Gargantext.Viz.Graph.FGL
Description : FGL main functions used in Garg
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main FGL funs/types to ease portability with IGraph.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ConstraintKinds #-}
module Gargantext.Viz.Graph.FGL where
import Gargantext.Prelude
import qualified Data.Graph.Inductive as FGL
import Data.List as List
------------------------------------------------------------------
-- | Main Types
type Graph_Undirected = FGL.Gr () ()
type Graph_Directed = FGL.Gr () ()
type Graph = FGL.Graph
type Node = FGL.Node
type Edge = FGL.Edge
------------------------------------------------------------------
-- | Main Functions
mkGraph :: [Node] -> [Edge] -> Graph_Undirected
mkGraph = FGL.mkUGraph
neighbors :: Graph gr => gr a b -> Node -> [Node]
neighbors = FGL.neighbors
-- | TODO bug: if graph is undirected, we need to filter
-- nub . (map (\(n1,n2) -> if n1 < n2 then (n1,n2) else (n2,n1))) . FGL.edges
edges :: Graph gr => gr a b -> [Edge]
edges = FGL.edges
nodes :: Graph gr => gr a b -> [Node]
nodes = FGL.nodes
------------------------------------------------------------------
-- | Main sugared functions
mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected
mkGraphUfromEdges es = mkGraph ns es
where
ns = List.nub (a <> b)
where
(a, b) = List.unzip es
{-| Module : Gargantext.Viz.Graph.IGraph
Description : IGraph main functions used in Garg
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main IGraph funs/types to ease portability with FGL.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE DataKinds #-}
module Gargantext.Viz.Graph.IGraph where
import Data.Serialize (Serialize)
import Data.Singletons (SingI)
import Gargantext.Prelude
import IGraph hiding (mkGraph, neighbors, edges, nodes, Node, Graph)
import qualified IGraph as IG
import qualified Data.List as List
------------------------------------------------------------------
-- | Main Types
type Graph_Undirected = IG.Graph 'U () ()
type Graph_Directed = IG.Graph 'D () ()
type Node = IG.Node
type Graph = IG.Graph
------------------------------------------------------------------
-- | Main Functions
mkGraph :: (SingI d, Ord v,
Serialize v, Serialize e) =>
[v] -> [LEdge e] -> IG.Graph d v e
mkGraph = IG.mkGraph
neighbors :: IG.Graph d v e -> IG.Node -> [Node]
neighbors = IG.neighbors
edges :: IG.Graph d v e -> [Edge]
edges = IG.edges
nodes :: IG.Graph d v e -> [Node]
nodes = IG.nodes
------------------------------------------------------------------
-- | Main sugared functions
mkGraphUfromEdges :: [(Int, Int)] -> Graph_Undirected
mkGraphUfromEdges es = mkGraph (List.replicate n ()) $ zip es $ repeat ()
where
(a,b) = List.unzip es
n = List.length (List.nub $ a <> b)
mkGraphDfromEdges :: [(Int, Int)] -> Graph_Directed
mkGraphDfromEdges = undefined
{-| Module : Gargantext.Viz.Graph.Proxemy
Description : Proxemy
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Références:
- Bruno Gaume, Karine Duvignau, Emmanuel Navarro, Yann Desalle, Hintat Cheung, et al.. Skillex: a graph-based lexical score for measuring the semantic efficiency of used verbs by human subjects describing actions. Revue TAL, Association pour le Traitement Automatique des Langues, 2016, Revue TAL : numéro spécial sur Traitement Automatique des Langues et Sciences Cognitives (55-3), 55 (3), ⟨https://www.atala.org/-Cognitive-Issues-in-Natural-⟩. ⟨hal-01320416⟩
- Implémentation Python [Lien]()
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Viz.Graph.Proxemy
where
--import Debug.SimpleReflect
import Gargantext.Prelude
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.List as List
--import Gargantext.Viz.Graph.IGraph
import Gargantext.Viz.Graph.FGL
type Length = Int
type FalseReflexive = Bool
type NeighborsFilter = Graph_Undirected -> Node -> [Node]
type We = Bool
confluence :: [(Node,Node)] -> Length -> FalseReflexive -> We -> Map (Node,Node) Double
confluence ns l fr we = similarity_conf (mkGraphUfromEdges ns) l fr we
similarity_conf :: Graph_Undirected -> Length -> FalseReflexive -> We -> Map (Node,Node) Double
similarity_conf g l fr we = Map.fromList [ ((x,y), similarity_conf_x_y g (x,y) l fr we)
| x <- nodes g, y <- nodes g, x < y]
similarity_conf_x_y :: Graph_Undirected -> (Node,Node) -> Length -> FalseReflexive -> We -> Double
similarity_conf_x_y g (x,y) l r we = similarity
where
similarity :: Double
similarity | denominator == 0 = 0
| otherwise = prox_x_y / denominator
where
denominator = prox_x_y + lim_SC
prox_x_y :: Double
prox_x_y = maybe 0 identity $ Map.lookup y xline
xline :: Map Node Double
xline = prox_markov g [x] l r filterNeighbors'
where
filterNeighbors' | we == True = filterNeighbors
| otherwise = rm_edge_neighbors (x,y)
pair_is_edge :: Bool
pair_is_edge | we == True = False
| otherwise = List.elem y (filterNeighbors g x)
lim_SC :: Double
lim_SC
| denominator == 0 = 0
| otherwise = if pair_is_edge
then (degree g y + 1-1) / denominator
else (degree g y + 1 ) / denominator
where
denominator = if pair_is_edge
then (2 * (ecount g) + (vcount g) - 2)
else (2 * (ecount g) + (vcount g) )
rm_edge_neighbors :: (Node, Node) -> Graph_Undirected -> Node -> [Node]
rm_edge_neighbors (x,y) g n | (n == x && List.elem y all_neighbors) = List.filter (/= y) all_neighbors
| (n == y && List.elem x all_neighbors) = List.filter (/= x) all_neighbors
| otherwise = all_neighbors
where
all_neighbors = filterNeighbors g n
-- | TODO do as a Map instead of [Node] ?
prox_markov :: Graph_Undirected -> [Node] -> Length -> FalseReflexive -> NeighborsFilter -> Map Node Double
prox_markov g ns l r nf = foldl' (\m _ -> spreading g m r nf) ms path
where
path
| l == 0 = []
| l > 0 = [0..l-1]
| otherwise = panic "Gargantext.Viz.Graph.Proxemy.prox_markov: Length < 0"
-- TODO if ns empty
ms = case List.length ns > 0 of
True -> Map.fromList $ map (\n -> (n, 1 / (fromIntegral $ List.length ns))) ns
_ -> Map.empty
spreading :: Graph_Undirected -> Map Node Double -> FalseReflexive -> NeighborsFilter -> Map Node Double
spreading g ms r nf = Map.fromListWith (+) $ List.concat $ map pvalue (Map.keys ms)
where
-- TODO if list empty ...
-- pvalue' n = [pvalue n] <> map pvalue (neighborhood n)
pvalue n = [(n, pvalue' n)] <> map (\n''->(n'', pvalue' n)) (nf g n)
where
pvalue' n' = (value n') / (fromIntegral $ List.length neighborhood)
value n' = maybe 0 identity $ Map.lookup n' ms
neighborhood = (nf g n) <> (if r then [n] else [])
------------------------------------------------------------------------
-- | Graph Tools
filterNeighbors :: Graph_Undirected -> Node -> [Node]
filterNeighbors g n = List.nub $ neighbors g n
degree :: Graph_Undirected -> Node -> Double
degree g n = fromIntegral $ List.length (filterNeighbors g n)
vcount :: Graph_Undirected -> Double
vcount = fromIntegral . List.length . List.nub . nodes
-- | TODO tests, optim and use IGraph library, fix IO ?
ecount :: Graph_Undirected -> Double
ecount = fromIntegral . List.length . List.nub . edges
------------------------------------------------------------------------
-- | Behavior tests
graphTest :: Graph_Undirected
graphTest= mkGraphUfromEdges graphTest_data
graphTest_data :: [(Int,Int)]
graphTest_data = [(0,1),(0,2),(0,4),(0,5),(1,3),(1,8),(2,3),(2,4),(2,5),(2,6),(2,16),(3,4),(3,5),(3,6),(3,18),(4,6),(5,8),(7,8),(7,9),(7,10),(7,13),(8,9),(8,10),(8,11),(8,12),(8,13),(9,12),(9,13),(10,11),(10,17),(11,12),(13,20),(14,16),(14,17),(14,18),(14,20),(15,16),(15,17),(15,18),(15,20),(16,18),(16,20),(17,18),(17,20),(18,19),(18,20),(19,20)]
graphTest_data' :: [(Int,Int)]
graphTest_data' = [(0,1),(0,2),(0,4),(0,5),(1,0),(1,3),(1,8),(2,0),(2,3),(2,4),(2,5),(2,6),(2,16),(3,1),(3,2),(3,4),(3,5),(3,6),(3,18),(4,0),(4,2),(4,3),(4,6),(5,0),(5,2),(5,3),(5,8),(6,2),(6,3),(6,4),(7,8),(7,9),(7,10),(7,13),(8,1),(8,5),(8,7),(8,9),(8,10),(8,11),(8,12),(8,13),(9,7),(9,8),(9,12),(9,13),(10,7),(10,8),(10,11),(10,17),(11,8),(11,10),(11,12),(12,8),(12,9),(12,11),(13,7),(13,8),(13,9),(13,20),(14,16),(14,17),(14,18),(14,20),(15,16),(15,17),(15,18),(15,20),(16,2),(16,14),(16,15),(16,18),(16,20),(17,10),(17,14),(17,15),(17,18),(17,20),(18,3),(18,14),(18,15),(18,16),(18,17),(18,19),(18,20),(19,18),(19,20),(20,13),(20,14),(20,15),(20,16),(20,17),(20,18),(20,19)]
-- | Tests
-- >>> runTest_Confluence_Proxemy
-- (True,True)
runTest_Confluence_Proxemy :: (Bool, Bool)
runTest_Confluence_Proxemy = (runTest_conf_is_ok, runTest_prox_is_ok)
where
runTest_conf_is_ok :: Bool
runTest_conf_is_ok = List.null $ List.filter (\t -> snd t == False)
[ (((x,y)), abs ((look (y,x) test) - (look (y,x) temoin)) < 0.0001)
| y <- nodes graphTest
, x <- nodes graphTest
]
where
test = toMap [(n, [ (y, similarity_conf_x_y graphTest (n,y) 3 True False) | y <- nodes graphTest])
| n <- nodes graphTest
]
temoin = test_confluence_temoin
runTest_prox_is_ok :: Bool
runTest_prox_is_ok = List.null (List.filter (not . List.null) $ map runTest_prox' [0..3])
runTest_prox' :: Node -> [((Node, (Node, Node)), Bool)]
runTest_prox' l = List.filter (\t -> snd t == False)
[ ((l,(x,y)), abs ((look (y,x) test) - (look (y,x) temoin)) < 0.0001)
| y <- nodes graphTest
, x <- nodes graphTest
]
where
test = toMap $ test_proxs_y l
temoin = toMap $ test_prox l
test_proxs_y :: Length -> [(Node, [(Node, Double)])]
test_proxs_y l' = map (\n -> test_proxs_x l' n) (nodes graphTest)
test_proxs_x :: Length -> Node -> (Node, [(Node, Double)])
test_proxs_x l' a = (a, map (\x -> (x, maybe 0 identity $ Map.lookup x (m a))) (nodes graphTest))
where
m x' = prox_markov graphTest [x'] l' True filterNeighbors
toMap = Map.map Map.fromList . Map.fromList
look :: (Node,Node) -> Map Node (Map Node Double) -> Double
look (x,y) m = look' x $ look' y m
where
look' x' m' = maybe (panic "nokey") identity $ Map.lookup x' m'
--prox : longueur balade = 0
test_prox :: Node -> [(Node, [(Node, Double)])]
test_prox 0 = [ (0,[(0,1.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (1,[(0,0.0000),(1,1.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (2,[(0,0.0000),(1,0.0000),(2,1.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (3,[(0,0.0000),(1,0.0000),(2,0.0000),(3,1.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (4,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,1.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (5,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,1.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (6,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,1.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (7,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,1.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (8,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,1.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (9,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,1.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (10,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,1.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (11,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,1.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (12,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,1.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (13,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,1.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (14,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,1.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (15,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,1.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (16,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,1.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (17,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,1.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (18,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,1.0000),(19,0.0000),(20,0.0000)])
, (19,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,1.0000),(20,0.0000)])
, (20,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,1.0000)])
]
--{-
--, longueur balade , 1]),
test_prox 1 = [(0,[(0,0.2000),(1,0.2000),(2,0.2000),(3,0.0000),(4,0.2000),(5,0.2000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (1,[(0,0.2500),(1,0.2500),(2,0.0000),(3,0.2500),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.2500),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (2,[(0,0.1429),(1,0.0000),(2,0.1429),(3,0.1429),(4,0.1429),(5,0.1429),(6,0.1429),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.1429),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (3,[(0,0.0000),(1,0.1429),(2,0.1429),(3,0.1429),(4,0.1429),(5,0.1429),(6,0.1429),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.1429),(19,0.0000),(20,0.0000)])
, (4,[(0,0.2000),(1,0.0000),(2,0.2000),(3,0.2000),(4,0.2000),(5,0.0000),(6,0.2000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (5,[(0,0.2000),(1,0.0000),(2,0.2000),(3,0.2000),(4,0.0000),(5,0.2000),(6,0.0000),(7,0.0000),(8,0.2000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (6,[(0,0.0000),(1,0.0000),(2,0.2500),(3,0.2500),(4,0.2500),(5,0.0000),(6,0.2500),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (7,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.2000),(8,0.2000),(9,0.2000),(10,0.2000),(11,0.0000),(12,0.0000),(13,0.2000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (8,[(0,0.0000),(1,0.1111),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.1111),(6,0.0000),(7,0.1111),(8,0.1111),(9,0.1111),(10,0.1111),(11,0.1111),(12,0.1111),(13,0.1111),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (9,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.2000),(8,0.2000),(9,0.2000),(10,0.0000),(11,0.0000),(12,0.2000),(13,0.2000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (10,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.2000),(8,0.2000),(9,0.0000),(10,0.2000),(11,0.2000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.2000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (11,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.2500),(9,0.0000),(10,0.2500),(11,0.2500),(12,0.2500),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (12,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.2500),(9,0.2500),(10,0.0000),(11,0.2500),(12,0.2500),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (13,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.2000),(8,0.2000),(9,0.2000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.2000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.2000)])
, (14,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.2000),(15,0.0000),(16,0.2000),(17,0.2000),(18,0.2000),(19,0.0000),(20,0.2000)])
, (15,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.2000),(16,0.2000),(17,0.2000),(18,0.2000),(19,0.0000),(20,0.2000)])
, (16,[(0,0.0000),(1,0.0000),(2,0.1667),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.1667),(15,0.1667),(16,0.1667),(17,0.0000),(18,0.1667),(19,0.0000),(20,0.1667)])
, (17,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.1667),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.1667),(15,0.1667),(16,0.0000),(17,0.1667),(18,0.1667),(19,0.0000),(20,0.1667)])
, (18,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.1250),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.1250),(15,0.1250),(16,0.1250),(17,0.1250),(18,0.1250),(19,0.1250),(20,0.1250)])
, (19,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.3333),(19,0.3333),(20,0.3333)])
, (20,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.1250),(14,0.1250),(15,0.1250),(16,0.1250),(17,0.1250),(18,0.1250),(19,0.1250),(20,0.1250)])
]
-- | longueur balade 2
test_prox 2 = [ (0,[(0,0.1986),(1,0.0900),(2,0.1486),(3,0.1586),(4,0.1086),(5,0.1086),(6,0.0686),(7,0.0000),(8,0.0900),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0286),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (1,[(0,0.1125),(1,0.1760),(2,0.0857),(3,0.0982),(4,0.0857),(5,0.1135),(6,0.0357),(7,0.0278),(8,0.0903),(9,0.0278),(10,0.0278),(11,0.0278),(12,0.0278),(13,0.0278),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0357),(19,0.0000),(20,0.0000)])
, (2,[(0,0.1061),(1,0.0490),(2,0.1861),(3,0.1337),(4,0.1337),(5,0.0980),(6,0.1051),(7,0.0000),(8,0.0286),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0238),(15,0.0238),(16,0.0442),(17,0.0000),(18,0.0442),(19,0.0000),(20,0.0238)])
, (3,[(0,0.1133),(1,0.0561),(2,0.1337),(3,0.1872),(4,0.1051),(5,0.0694),(6,0.1051),(7,0.0000),(8,0.0643),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0179),(15,0.0179),(16,0.0383),(17,0.0179),(18,0.0383),(19,0.0179),(20,0.0179)])
, (4,[(0,0.1086),(1,0.0686),(2,0.1871),(3,0.1471),(4,0.1871),(5,0.0971),(6,0.1471),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0286),(17,0.0000),(18,0.0286),(19,0.0000),(20,0.0000)])
, (5,[(0,0.1086),(1,0.0908),(2,0.1371),(3,0.0971),(4,0.0971),(5,0.1594),(6,0.0571),(7,0.0222),(8,0.0622),(9,0.0222),(10,0.0222),(11,0.0222),(12,0.0222),(13,0.0222),(14,0.0000),(15,0.0000),(16,0.0286),(17,0.0000),(18,0.0286),(19,0.0000),(20,0.0000)])
, (6,[(0,0.0857),(1,0.0357),(2,0.1839),(3,0.1839),(4,0.1839),(5,0.0714),(6,0.1839),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0000),(15,0.0000),(16,0.0357),(17,0.0000),(18,0.0357),(19,0.0000),(20,0.0000)])
, (7,[(0,0.0000),(1,0.0222),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0222),(6,0.0000),(7,0.1822),(8,0.1822),(9,0.1422),(10,0.1022),(11,0.0622),(12,0.0622),(13,0.1422),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0400),(18,0.0000),(19,0.0000),(20,0.0400)])
, (8,[(0,0.0500),(1,0.0401),(2,0.0222),(3,0.0500),(4,0.0000),(5,0.0346),(6,0.0000),(7,0.1012),(8,0.2068),(9,0.1068),(10,0.0846),(11,0.0901),(12,0.0901),(13,0.0790),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0222),(18,0.0000),(19,0.0000),(20,0.0222)])
, (9,[(0,0.0000),(1,0.0222),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0222),(6,0.0000),(7,0.1422),(8,0.1922),(9,0.1922),(10,0.0622),(11,0.0722),(12,0.1122),(13,0.1422),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0400)])
, (10,[(0,0.0000),(1,0.0222),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0222),(6,0.0000),(7,0.1022),(8,0.1522),(9,0.0622),(10,0.1856),(11,0.1122),(12,0.0722),(13,0.0622),(14,0.0333),(15,0.0333),(16,0.0000),(17,0.0733),(18,0.0333),(19,0.0000),(20,0.0333)])
, (11,[(0,0.0000),(1,0.0278),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0278),(6,0.0000),(7,0.0778),(8,0.2028),(9,0.0903),(10,0.1403),(11,0.2028),(12,0.1528),(13,0.0278),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0500),(18,0.0000),(19,0.0000),(20,0.0000)])
, (12,[(0,0.0000),(1,0.0278),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0278),(6,0.0000),(7,0.0778),(8,0.2028),(9,0.1403),(10,0.0903),(11,0.1528),(12,0.2028),(13,0.0778),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0000),(18,0.0000),(19,0.0000),(20,0.0000)])
, (13,[(0,0.0000),(1,0.0222),(2,0.0000),(3,0.0000),(4,0.0000),(5,0.0222),(6,0.0000),(7,0.1422),(8,0.1422),(9,0.1422),(10,0.0622),(11,0.0222),(12,0.0622),(13,0.1672),(14,0.0250),(15,0.0250),(16,0.0250),(17,0.0250),(18,0.0250),(19,0.0250),(20,0.0650)])
, (14,[(0,0.0000),(1,0.0000),(2,0.0333),(3,0.0250),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0333),(11,0.0000),(12,0.0000),(13,0.0250),(14,0.1567),(15,0.1167),(16,0.1233),(17,0.1233),(18,0.1567),(19,0.0500),(20,0.1567)])
, (15,[(0,0.0000),(1,0.0000),(2,0.0333),(3,0.0250),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0333),(11,0.0000),(12,0.0000),(13,0.0250),(14,0.1167),(15,0.1567),(16,0.1233),(17,0.1233),(18,0.1567),(19,0.0500),(20,0.1567)])
, (16,[(0,0.0238),(1,0.0000),(2,0.0516),(3,0.0446),(4,0.0238),(5,0.0238),(6,0.0238),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0208),(14,0.1028),(15,0.1028),(16,0.1599),(17,0.1083),(18,0.1361),(19,0.0417),(20,0.1361)])
, (17,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0208),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0333),(8,0.0333),(9,0.0000),(10,0.0611),(11,0.0333),(12,0.0000),(13,0.0208),(14,0.1028),(15,0.1028),(16,0.1083),(17,0.1694),(18,0.1361),(19,0.0417),(20,0.1361)])
, (18,[(0,0.0000),(1,0.0179),(2,0.0387),(3,0.0335),(4,0.0179),(5,0.0179),(6,0.0179),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0208),(11,0.0000),(12,0.0000),(13,0.0156),(14,0.0979),(15,0.0979),(16,0.1021),(17,0.1021),(18,0.1824),(19,0.0729),(20,0.1646)])
, (19,[(0,0.0000),(1,0.0000),(2,0.0000),(3,0.0417),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0000),(8,0.0000),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0417),(14,0.0833),(15,0.0833),(16,0.0833),(17,0.0833),(18,0.1944),(19,0.1944),(20,0.1944)])
, (20,[(0,0.0000),(1,0.0000),(2,0.0208),(3,0.0156),(4,0.0000),(5,0.0000),(6,0.0000),(7,0.0250),(8,0.0250),(9,0.0250),(10,0.0208),(11,0.0000),(12,0.0000),(13,0.0406),(14,0.0979),(15,0.0979),(16,0.1021),(17,0.1021),(18,0.1646),(19,0.0729),(20,0.1896)])
]
-- | longueur balade 3
test_prox 3 = [ (0,[(0,0.1269),(1,0.0949),(2,0.1489),(3,0.1269),(4,0.1224),(5,0.1153),(6,0.0827),(7,0.0100),(8,0.0542),(9,0.0100),(10,0.0100),(11,0.0100),(12,0.0100),(13,0.0100),(14,0.0048),(15,0.0048),(16,0.0260),(17,0.0000),(18,0.0274),(19,0.0000),(20,0.0048)])
, (1,[(0,0.1186),(1,0.0906),(2,0.0975),(3,0.1235),(4,0.0748),(5,0.0815),(6,0.0523),(7,0.0323),(8,0.1128),(9,0.0336),(10,0.0281),(11,0.0295),(12,0.0295),(13,0.0267),(14,0.0045),(15,0.0045),(16,0.0167),(17,0.0100),(18,0.0185),(19,0.0045),(20,0.0100)])
, (2,[(0,0.1064),(1,0.0557),(2,0.1469),(3,0.1360),(4,0.1199),(5,0.0897),(6,0.0987),(7,0.0032),(8,0.0350),(9,0.0032),(10,0.0032),(11,0.0032),(12,0.0032),(13,0.0062),(14,0.0206),(15,0.0206),(16,0.0520),(17,0.0180),(18,0.0445),(19,0.0085),(20,0.0254)])
, (3,[(0,0.0907),(1,0.0706),(2,0.1360),(3,0.1258),(4,0.1158),(5,0.0895),(6,0.0931),(7,0.0071),(8,0.0351),(9,0.0071),(10,0.0101),(11,0.0071),(12,0.0071),(13,0.0094),(14,0.0199),(15,0.0199),(16,0.0396),(17,0.0171),(18,0.0562),(19,0.0130),(20,0.0295)])
, (4,[(0,0.1224),(1,0.0599),(2,0.1679),(3,0.1621),(4,0.1437),(5,0.0889),(6,0.1220),(7,0.0000),(8,0.0366),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0083),(15,0.0083),(16,0.0351),(17,0.0036),(18,0.0294),(19,0.0036),(20,0.0083)])
, (5,[(0,0.1153),(1,0.0652),(2,0.1255),(3,0.1253),(4,0.0889),(5,0.0940),(6,0.0672),(7,0.0247),(8,0.0904),(9,0.0258),(10,0.0214),(11,0.0225),(12,0.0225),(13,0.0202),(14,0.0083),(15,0.0083),(16,0.0279),(17,0.0080),(18,0.0222),(19,0.0036),(20,0.0128)])
, (6,[(0,0.1034),(1,0.0523),(2,0.1727),(3,0.1630),(4,0.1525),(5,0.0840),(6,0.1353),(7,0.0000),(8,0.0232),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.0104),(15,0.0104),(16,0.0367),(17,0.0045),(18,0.0367),(19,0.0045),(20,0.0104)])
, (7,[(0,0.0100),(1,0.0258),(2,0.0044),(3,0.0100),(4,0.0000),(5,0.0247),(6,0.0000),(7,0.1340),(8,0.1751),(9,0.1291),(10,0.0994),(11,0.0718),(12,0.0798),(13,0.1186),(14,0.0117),(15,0.0117),(16,0.0050),(17,0.0321),(18,0.0117),(19,0.0050),(20,0.0401)])
, (8,[(0,0.0301),(1,0.0502),(2,0.0272),(3,0.0273),(4,0.0203),(5,0.0502),(6,0.0103),(7,0.0973),(8,0.1593),(9,0.1029),(10,0.0864),(11,0.0850),(12,0.0894),(13,0.0832),(14,0.0065),(15,0.0065),(16,0.0060),(17,0.0234),(18,0.0136),(19,0.0028),(20,0.0223)])
, (9,[(0,0.0100),(1,0.0269),(2,0.0044),(3,0.0100),(4,0.0000),(5,0.0258),(6,0.0000),(7,0.1291),(8,0.1852),(9,0.1447),(10,0.0803),(11,0.0799),(12,0.1059),(13,0.1217),(14,0.0050),(15,0.0050),(16,0.0050),(17,0.0174),(18,0.0050),(19,0.0050),(20,0.0334)])
, (10,[(0,0.0100),(1,0.0225),(2,0.0044),(3,0.0142),(4,0.0000),(5,0.0214),(6,0.0000),(7,0.0994),(8,0.1555),(9,0.0803),(10,0.1147),(11,0.1001),(12,0.0755),(13,0.0664),(14,0.0272),(15,0.0272),(16,0.0217),(17,0.0710),(18,0.0339),(19,0.0083),(20,0.0463)])
, (11,[(0,0.0125),(1,0.0295),(2,0.0056),(3,0.0125),(4,0.0000),(5,0.0281),(6,0.0000),(7,0.0898),(8,0.1911),(9,0.0999),(10,0.1252),(11,0.1395),(12,0.1295),(13,0.0617),(14,0.0083),(15,0.0083),(16,0.0000),(17,0.0364),(18,0.0083),(19,0.0000),(20,0.0139)])
, (12,[(0,0.0125),(1,0.0295),(2,0.0056),(3,0.0125),(4,0.0000),(5,0.0281),(6,0.0000),(7,0.0998),(8,0.2011),(9,0.1324),(10,0.0943),(11,0.1295),(12,0.1395),(13,0.0817),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.0181),(18,0.0000),(19,0.0000),(20,0.0156)])
, (13,[(0,0.0100),(1,0.0214),(2,0.0086),(3,0.0131),(4,0.0000),(5,0.0202),(6,0.0000),(7,0.1186),(8,0.1497),(9,0.1217),(10,0.0664),(11,0.0494),(12,0.0654),(13,0.1143),(14,0.0246),(15,0.0246),(16,0.0254),(17,0.0379),(18,0.0379),(19,0.0196),(20,0.0714)])
, (14,[(0,0.0048),(1,0.0036),(2,0.0289),(3,0.0279),(4,0.0083),(5,0.0083),(6,0.0083),(7,0.0117),(8,0.0117),(9,0.0050),(10,0.0272),(11,0.0067),(12,0.0000),(13,0.0246),(14,0.1116),(15,0.1036),(16,0.1192),(17,0.1211),(18,0.1552),(19,0.0558),(20,0.1566)])
, (15,[(0,0.0048),(1,0.0036),(2,0.0289),(3,0.0279),(4,0.0083),(5,0.0083),(6,0.0083),(7,0.0117),(8,0.0117),(9,0.0050),(10,0.0272),(11,0.0067),(12,0.0000),(13,0.0246),(14,0.1036),(15,0.1116),(16,0.1192),(17,0.1211),(18,0.1552),(19,0.0558),(20,0.1566)])
, (16,[(0,0.0217),(1,0.0111),(2,0.0606),(3,0.0462),(4,0.0292),(5,0.0233),(6,0.0245),(7,0.0042),(8,0.0089),(9,0.0042),(10,0.0181),(11,0.0000),(12,0.0000),(13,0.0212),(14,0.0993),(15,0.0993),(16,0.1092),(17,0.0932),(18,0.1401),(19,0.0479),(20,0.1379)])
, (17,[(0,0.0000),(1,0.0067),(2,0.0210),(3,0.0200),(4,0.0030),(5,0.0067),(6,0.0030),(7,0.0268),(8,0.0351),(9,0.0145),(10,0.0592),(11,0.0243),(12,0.0120),(13,0.0316),(14,0.1009),(15,0.1009),(16,0.0932),(17,0.1156),(18,0.1383),(19,0.0479),(20,0.1395)])
, (18,[(0,0.0171),(1,0.0092),(2,0.0389),(3,0.0492),(4,0.0183),(5,0.0139),(6,0.0183),(7,0.0073),(8,0.0153),(9,0.0031),(10,0.0212),(11,0.0042),(12,0.0000),(13,0.0237),(14,0.0970),(15,0.0970),(16,0.1051),(17,0.1037),(18,0.1457),(19,0.0677),(20,0.1440)])
, (19,[(0,0.0000),(1,0.0060),(2,0.0198),(3,0.0303),(4,0.0060),(5,0.0060),(6,0.0060),(7,0.0083),(8,0.0083),(9,0.0083),(10,0.0139),(11,0.0000),(12,0.0000),(13,0.0326),(14,0.0931),(15,0.0931),(16,0.0958),(17,0.0958),(18,0.1805),(19,0.1134),(20,0.1829)])
, (20,[(0,0.0030),(1,0.0050),(2,0.0222),(3,0.0258),(4,0.0052),(5,0.0080),(6,0.0052),(7,0.0251),(8,0.0251),(9,0.0209),(10,0.0290),(11,0.0069),(12,0.0078),(13,0.0446),(14,0.0979),(15,0.0979),(16,0.1034),(17,0.1046),(18,0.1440),(19,0.0686),(20,0.1499)])
]
test_prox _ = undefined
-- | confluence longueur balade 3
test_confluence_temoin :: Map Node (Map Node Double)
test_confluence_temoin = Map.map Map.fromList $ Map.fromList [(0,[(0,0.7448),(1,0.4844),(2,0.6471),(3,0.6759),(4,0.6297),(5,0.6219),(6,0.7040),(7,0.1870),(8,0.4092),(9,0.1870),(10,0.1870),(11,0.2233),(12,0.2233),(13,0.1870),(14,0.0987),(15,0.0987),(16,0.3325),(17,0.0000),(18,0.2827),(19,0.0000),(20,0.0641)])
, (1,[(0,0.4844),(1,0.7225),(2,0.6158),(3,0.4509),(4,0.6326),(5,0.6521),(6,0.6008),(7,0.4259),(8,0.2441),(9,0.4362),(10,0.3925),(11,0.4587),(12,0.4587),(13,0.3804),(14,0.0931),(15,0.0931),(16,0.2426),(17,0.1611),(18,0.2100),(19,0.1461),(20,0.1259)])
, (2,[(0,0.6471),(1,0.6158),(2,0.7070),(3,0.6569),(4,0.7060),(5,0.5915),(6,0.6918),(7,0.0680),(8,0.3091),(9,0.0680),(10,0.0680),(11,0.0836),(12,0.0836),(13,0.1239),(14,0.3219),(15,0.3219),(16,0.0630),(17,0.2568),(18,0.3901),(19,0.2458),(20,0.2674)])
, (3,[(0,0.6759),(1,0.4509),(2,0.6569),(3,0.6740),(4,0.6865),(5,0.5777),(6,0.6659),(7,0.1411),(8,0.3093),(9,0.1411),(10,0.1888),(11,0.1704),(12,0.1704),(13,0.1774),(14,0.3144),(15,0.3144),(16,0.4317),(17,0.2472),(18,0.0602),(19,0.3320),(20,0.2975)])
, (4,[(0,0.6297),(1,0.6326),(2,0.7060),(3,0.6865),(4,0.7677),(5,0.6716),(6,0.7228),(7,0.0000),(8,0.3185),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.1608),(15,0.1608),(16,0.4020),(17,0.0641),(18,0.2967),(19,0.1204),(20,0.1070)])
, (5,[(0,0.6219),(1,0.6521),(2,0.5915),(3,0.5777),(4,0.6716),(5,0.6837),(6,0.6589),(7,0.3622),(8,0.2324),(9,0.3724),(10,0.3294),(11,0.3925),(12,0.3925),(13,0.3177),(14,0.1608),(15,0.1608),(16,0.3486),(17,0.1332),(18,0.2420),(19,0.1204),(20,0.1552)])
, (6,[(0,0.7040),(1,0.6008),(2,0.6918),(3,0.6659),(4,0.7228),(5,0.6589),(6,0.7955),(7,0.0000),(8,0.2288),(9,0.0000),(10,0.0000),(11,0.0000),(12,0.0000),(13,0.0000),(14,0.1933),(15,0.1933),(16,0.4129),(17,0.0788),(18,0.3453),(19,0.1461),(20,0.1302)])
, (7,[(0,0.1870),(1,0.4259),(2,0.0680),(3,0.1411),(4,0.0000),(5,0.3622),(6,0.0000),(7,0.7551),(8,0.6496),(9,0.6811),(10,0.4974),(11,0.6737),(12,0.6964),(13,0.6598),(14,0.2116),(15,0.2116),(16,0.0875),(17,0.3810),(18,0.1436),(19,0.1608),(20,0.3657)])
, (8,[(0,0.4092),(1,0.2441),(2,0.3091),(3,0.3093),(4,0.3185),(5,0.2324),(6,0.2288),(7,0.6496),(8,0.6706),(9,0.6676),(10,0.5937),(11,0.6525),(12,0.6738),(13,0.5855),(14,0.1297),(15,0.1297),(16,0.1024),(17,0.3096),(18,0.1638),(19,0.0962),(20,0.2426)])
, (9,[(0,0.1870),(1,0.4362),(2,0.0680),(3,0.1411),(4,0.0000),(5,0.3724),(6,0.0000),(7,0.6811),(8,0.6676),(9,0.7690),(10,0.6487),(11,0.6967),(12,0.5845),(13,0.6642),(14,0.1031),(15,0.1031),(16,0.0875),(17,0.2506),(18,0.0671),(19,0.1608),(20,0.3247)])
, (10,[(0,0.1870),(1,0.3925),(2,0.0680),(3,0.1888),(4,0.0000),(5,0.3294),(6,0.0000),(7,0.4974),(8,0.5937),(9,0.6487),(10,0.7252),(11,0.5449),(12,0.6845),(13,0.6044),(14,0.3850),(15,0.3850),(16,0.2934),(17,0.0000),(18,0.3276),(19,0.2421),(20,0.3998)])
, (11,[(0,0.2233),(1,0.4587),(2,0.0836),(3,0.1704),(4,0.0000),(5,0.3925),(6,0.0000),(7,0.6737),(8,0.6525),(9,0.6967),(10,0.5449),(11,0.8004),(12,0.6217),(13,0.5866),(14,0.1608),(15,0.1608),(16,0.0000),(17,0.4109),(18,0.1070),(19,0.0000),(20,0.1664)])
, (12,[(0,0.2233),(1,0.4587),(2,0.0836),(3,0.1704),(4,0.0000),(5,0.3925),(6,0.0000),(7,0.6964),(8,0.6738),(9,0.5845),(10,0.6845),(11,0.6217),(12,0.8004),(13,0.6527),(14,0.0000),(15,0.0000),(16,0.0000),(17,0.2571),(18,0.0000),(19,0.0000),(20,0.1827)])
, (13,[(0,0.1870),(1,0.3804),(2,0.1239),(3,0.1774),(4,0.0000),(5,0.3177),(6,0.0000),(7,0.6598),(8,0.5855),(9,0.6642),(10,0.6044),(11,0.5866),(12,0.6527),(13,0.7244),(14,0.3612),(15,0.3612),(16,0.3276),(17,0.4205),(18,0.3528),(19,0.4288),(20,0.0000)])
, (14,[(0,0.0987),(1,0.0931),(2,0.3219),(3,0.3144),(4,0.1608),(5,0.1608),(6,0.1933),(7,0.2116),(8,0.1297),(9,0.1031),(10,0.3850),(11,0.1608),(12,0.0000),(13,0.3612),(14,0.7197),(15,0.7044),(16,0.6289),(17,0.6289),(18,0.6538),(19,0.6816),(20,0.6538)])
, (15,[(0,0.0987),(1,0.0931),(2,0.3219),(3,0.3144),(4,0.1608),(5,0.1608),(6,0.1933),(7,0.2116),(8,0.1297),(9,0.1031),(10,0.3850),(11,0.1608),(12,0.0000),(13,0.3612),(14,0.7044),(15,0.7197),(16,0.6289),(17,0.6289),(18,0.6538),(19,0.6816),(20,0.6538)])
, (16,[(0,0.3325),(1,0.2426),(2,0.0630),(3,0.4317),(4,0.4020),(5,0.3486),(6,0.4129),(7,0.0875),(8,0.1024),(9,0.0875),(10,0.2934),(11,0.0000),(12,0.0000),(13,0.3276),(14,0.6289),(15,0.6289),(16,0.6766),(17,0.6411),(18,0.6290),(19,0.6475),(20,0.6197)])
, (17,[(0,0.0000),(1,0.1611),(2,0.2568),(3,0.2472),(4,0.0641),(5,0.1332),(6,0.0788),(7,0.3810),(8,0.3096),(9,0.2506),(10,0.0000),(11,0.4109),(12,0.2571),(13,0.4205),(14,0.6289),(15,0.6289),(16,0.6411),(17,0.6890),(18,0.6197),(19,0.6475),(20,0.6197)])
, (18,[(0,0.2827),(1,0.2100),(2,0.3901),(3,0.0602),(4,0.2967),(5,0.2420),(6,0.3453),(7,0.1436),(8,0.1638),(9,0.0671),(10,0.3276),(11,0.1070),(12,0.0000),(13,0.3528),(14,0.6538),(15,0.6538),(16,0.6290),(17,0.6197),(18,0.6768),(19,0.6023),(20,0.6536)])
, (19,[(0,0.0000),(1,0.1461),(2,0.2458),(3,0.3320),(4,0.1204),(5,0.1204),(6,0.1461),(7,0.1608),(8,0.0962),(9,0.1608),(10,0.2421),(11,0.0000),(12,0.0000),(13,0.4288),(14,0.6816),(15,0.6816),(16,0.6475),(17,0.6475),(18,0.6023),(19,0.8130),(20,0.6023)])
, (20,[(0,0.0641),(1,0.1259),(2,0.2674),(3,0.2975),(4,0.1070),(5,0.1552),(6,0.1302),(7,0.3657),(8,0.2426),(9,0.3247),(10,0.3998),(11,0.1664),(12,0.1827),(13,0.0000),(14,0.6538),(15,0.6538),(16,0.6197),(17,0.6197),(18,0.6536),(19,0.6023),(20,0.6830)])
]
......@@ -7,7 +7,6 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
......@@ -16,57 +15,136 @@ Portability : POSIX
module Gargantext.Viz.Graph.Tools
where
--import Debug.Trace (trace)
import Data.Graph.Clustering.Louvain.CplusPlus (LouvainNode(..))
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Viz.Graph (Graph(..))
import Gargantext.Viz.Graph -- (Graph(..))
import Gargantext.Core.Statistics
import Gargantext.Viz.Graph
import Gargantext.Viz.Graph.Bridgeness (bridgeness)
import Gargantext.Viz.Graph.Distances.Matrice (measureConditional)
import Gargantext.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map)
import qualified Data.Map as Map
import Gargantext.Viz.Graph.IGraph (mkGraphUfromEdges)
import Gargantext.Viz.Graph.Proxemy (confluence)
import GHC.Float (sin, cos)
import qualified IGraph as Igraph
import qualified IGraph.Algorithms.Layout as Layout
import qualified Data.Vector.Storable as Vec
import qualified Data.Map as Map
import qualified Data.List as List
type Threshold = Int
cooc2graph :: (Map (Text, Text) Int) -> IO Graph
cooc2graph myCooc = do
cooc2graph :: Threshold -> (Map (Text, Text) Int) -> IO Graph
cooc2graph threshold myCooc = do
let (ti, _) = createIndices myCooc
myCooc4 = toIndex ti myCooc
matCooc = map2mat (0) (Map.size ti) myCooc4
myCooc' = toIndex ti myCooc
matCooc = map2mat (0) (Map.size ti) $ Map.filter (>threshold) myCooc'
distanceMat = measureConditional matCooc
distanceMap = Map.map (\_ -> 1) $ Map.filter (>0) $ mat2map distanceMat
distanceMap = Map.filter (>0.01) $ mat2map distanceMat
partitions <- case Map.size distanceMap > 0 of
True -> cLouvain distanceMap
False -> panic "Text.Flow: DistanceMap is empty"
let distanceMap' = bridgeness 300 partitions distanceMap
let bridgeness' = bridgeness 300 partitions distanceMap
let confluence' = confluence (Map.keys bridgeness') 3 True False
pure $ data2graph (Map.toList ti) myCooc4 distanceMap' partitions
data2graph (Map.toList ti) myCooc' bridgeness' confluence' partitions
----------------------------------------------------------
-- | From data to Graph
-- FIXME: distance should not be a map since we just "toList" it (same as cLouvain)
data2graph :: [(Text, Int)] -> Map (Int, Int) Int
-> Map (Int, Int) Double
-> Map (Int, Int) Double
-> [LouvainNode]
-> Graph
data2graph labels coocs distance partitions = Graph nodes edges Nothing
where
community_id_by_node_id = Map.fromList [ (n, c) | LouvainNode n c <- partitions ]
nodes = [ Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
-> IO Graph
data2graph labels coocs bridge conf partitions = do
let community_id_by_node_id = Map.fromList [ (n, c) | LouvainNode n c <- partitions ]
nodes <- mapM (setCoord ForceAtlas labels bridge)
[ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
, node_type = Terms -- or Unknown
, node_id = cs (show n)
, node_label = l
, node_x_coord = 0
, node_y_coord = 0
, node_attributes =
Attributes { clust_default = maybe 0 identity
(Map.lookup n community_id_by_node_id) } }
| (l, n) <- labels ]
edges = [ Edge { edge_source = cs (show s)
)
| (l, n) <- labels
]
let edges = [ Edge { edge_source = cs (show s)
, edge_target = cs (show t)
, edge_weight = w
, edge_weight = d
, edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
, edge_id = cs (show i) }
| (i, ((s,t), w)) <- zip ([0..]::[Integer]) (Map.toList distance) ]
| (i, ((s,t), d)) <- zip ([0..]::[Integer]) (Map.toList bridge) ]
pure $ Graph nodes edges Nothing
------------------------------------------------------------------------
data Layout = KamadaKawai | ACP | ForceAtlas
setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
where
(x,y) = f i
-- | ACP
setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> IO Node
setCoord l labels m (n,node) = getCoord l labels m n
>>= \(x,y) -> pure $ node { node_x_coord = x
, node_y_coord = y
}
getCoord :: Ord a => Layout
-> [(a, Int)] -> Map (Int, Int) Double -> Int -> IO (Double, Double)
getCoord KamadaKawai _ m n = layout m n
getCoord ForceAtlas _ _ n = pure (sin d, cos d)
where
d = fromIntegral n
getCoord ACP labels m n = pure $ to2d $ maybe (panic "Graph.Tools no coordinate") identity
$ Map.lookup n
$ pcaReduceTo (Dimension 2)
$ mapArray labels m
where
to2d :: Vec.Vector Double -> (Double, Double)
to2d v = (x',y')
where
ds = take 2 $ Vec.toList v
x' = head' "to2d" ds
y' = last' "to2d" ds
mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
where
ns = map snd items
toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
------------------------------------------------------------------------
-- | KamadaKawai Layout
-- TODO TEST: check labels, nodeId and coordinates
layout :: Map (Int, Int) Double -> Int -> IO (Double, Double)
layout m n = maybe (panic "") identity <$> Map.lookup n <$> coord
where
coord :: IO (Map Int (Double,Double))
coord = Map.fromList <$> List.zip (Igraph.nodes g) <$> (Layout.getLayout g p)
--p = Layout.defaultLGL
p = Layout.defaultKamadaKawai
g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
......@@ -4,6 +4,10 @@ extra-package-dbs: []
packages:
- .
docker:
enable: false
repo: 'cgenie/stack-build:lts-12.26'
allow-newer: true
extra-deps:
- git: https://github.com/delanoe/data-time-segment.git
......@@ -18,8 +22,10 @@ extra-deps:
commit: 3fe28b683aba5ddf05e3b5f8eced0bd05c5a29f9
- git: https://github.com/robstewart57/rdf4h.git
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: dcaa0f5dd53f20648f4f5a615d29163582a4219c
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: bf57642f6b66f554fdc0a38ac391cd8200dffcb3
- git: https://gitlab.iscpif.fr/gargantext/patches-class
commit: 746b4ce0af8f9e600d555ad7e5b2973a940cdad9
- git: https://github.com/np/servant-job.git
......@@ -32,6 +38,8 @@ extra-deps:
commit: 53385de076be09f728a1b58c035a18e9ff9bcfd6
- git: https://github.com/delanoe/hsparql.git
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: e39454101b53916e3082085ebfe922df695fc775
- KMP-0.1.0.2
- accelerate-1.2.0.0
- aeson-lens-0.5.0.0
......@@ -39,6 +47,7 @@ extra-deps:
- duckling-0.1.3.0
- full-text-search-0.2.1.4
- fullstop-0.1.4
- haskell-igraph-0.7.1
- hgal-2.0.0.2
- located-base-0.1.1.1
- multiset-0.3.4.1 # stack test
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment