Commit 8013e11f authored by Nicolas Pouillard's avatar Nicolas Pouillard
parents 22ab33bb 6be5050a
......@@ -28,7 +28,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.List.CSV (csvMapTermList)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo)
......@@ -170,14 +170,25 @@ main = do
let sensibility = case (phyloProximity config) of
Hamming -> undefined
WeightedLogJaccard s -> (show s)
WeightedLogJaccard s -> (show s)
let sync = case (phyloSynchrony config) of
ByProximityThreshold t _ _ _ -> (show t)
ByProximityDistribution _ _ -> undefined
-- to be improved
-- let br_length = case (take 1 $ exportFilter config) of
-- ByBranchSize t -> (show t)
let output = (outputPath config)
<> (unpack $ phyloName config)
<> "-scale_" <> (show (_qua_granularity $ phyloQuality config))
<> "-level_" <> (show (phyloLevel config))
<> "-" <> clq
<> "-sens_" <> sensibility
<> "-level_" <> (show (phyloLevel config))
<> "-sens_" <> sensibility
-- <> "-lenght_" <> br_length
<> "-scale_" <> (show (_qua_granularity $ phyloQuality config))
<> "-sync_" <> sync
<> ".dot"
dotToFile output dot
......@@ -12,26 +12,23 @@ Main specifications to index a corpus with a term list
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module Main where
import Data.ByteString.Lazy (writeFile)
import Data.Maybe (catMaybes)
import Data.Text (pack)
import qualified Data.Text as DT
import Data.Tuple.Extra (both)
import qualified Data.Vector as DV
import qualified Data.Maybe as DMaybe
import Control.Monad (zipWithM)
import Control.Monad.IO.Class
import Data.Map (Map)
import qualified Data.IntMap as DIM
import qualified Data.Map as DM
import GHC.Generics
......@@ -44,7 +41,6 @@ import System.IO (hPutStr, hFlush, stderr)
import System.Environment
import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Concurrent (getNumCapabilities, myThreadId, threadCapability)
import Prelude ((>>))
import Gargantext.Prelude
import Gargantext.Core
......@@ -53,7 +49,7 @@ import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.List.CSV (csvMapTermList)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms (terms)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
......
......@@ -31,7 +31,7 @@ import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.List.CSV (csvMapTermList)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.LevelMaker
......
name: gargantext
version: '0.0.1.91.2'
version: '0.0.1.94.1'
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -73,7 +73,7 @@ library:
- Gargantext.Core.Text.Corpus.API
- Gargantext.Core.Text.Corpus.Parsers.CSV
- Gargantext.Core.Text.Examples
- Gargantext.Core.Text.List.CSV
- Gargantext.Core.Text.List.Formats.CSV
- Gargantext.Core.Text.Metrics
- Gargantext.Core.Text.Metrics.TFICF
- Gargantext.Core.Text.Metrics.CharByChar
......@@ -87,7 +87,6 @@ library:
- Gargantext.Core.Text.Terms.WithList
- Gargantext.Core.Text.Flow
- Gargantext.Core.Viz.Graph
- Gargantext.Core.Viz.Graph.Distances.Matrice
- Gargantext.Core.Viz.Graph.Index
- Gargantext.Core.Viz.Phylo
- Gargantext.Core.Viz.AdaptativePhylo
......
......@@ -107,31 +107,35 @@ import Formatting.Clock (timeSpecs)
import GHC.Generics (Generic)
import Servant hiding (Patch)
import System.Clock (getTime, TimeSpec, Clock(..))
import Servant.Job.Async (JobFunction(..), serveJobsAPI)
import System.IO (stderr)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Prelude (error)
import Gargantext.Prelude
import Gargantext.Prelude hiding (log)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import qualified Gargantext.API.Metrics as Metrics
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, TODO, assertValid)
import Gargantext.API.Prelude
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid)
import Gargantext.Core.Utils (something)
-- import Gargantext.Core.Viz.Graph.API (recomputeGraph)
-- import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngrams, ngramsType, ngrams_terms)
import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Schema.Node (node_id, node_parentId, node_userId)
import Gargantext.Prelude.Job
{-
-- TODO sequences of modifications (Patchs)
......@@ -318,13 +322,8 @@ tableNgramsPull listId ngramsType p_version = do
-- Apply the given patch to the DB and returns the patch to be applied on the
-- client.
-- TODO-ACCESS check
tableNgramsPut :: ( HasNodeError err
, HasTreeError err
, HasInvalidError err
, HasConfig env
, HasConnectionPool env
tableNgramsPut :: ( FlowCmdM env err m
, HasSettings env
, RepoCmdM env err m
)
=> TabType
-> ListId
......@@ -346,55 +345,87 @@ tableNgramsPut tabType listId (Versioned p_version p_table)
ret <- commitStatePatch (Versioned p_version p)
<&> v_data %~ (view (_PatchMap . at ngramsType . _Just . _PatchMap . at listId . _Just))
pure ret
tableNgramsPostChartsAsync :: ( FlowCmdM env err m
, HasNodeError err
, HasSettings env
)
=> UpdateTableNgramsCharts
-> (JobLog -> m ())
-> m JobLog
tableNgramsPostChartsAsync utn logStatus = do
let tabType = utn ^. utn_tab_type
let listId = utn ^. utn_list_id
node <- getNode listId
let nId = node ^. node_id
_uId = node ^. node_userId
mCId = node ^. node_parentId
-- printDebug "[tableNgramsPut] updating graph with nId" nId
-- printDebug "[tableNgramsPut] updating graph with uId" uId
-- _ <- recomputeGraph uId nId Conditional
printDebug "[tableNgramsPut] tabType" tabType
printDebug "[tableNgramsPut] listId" listId
_ <- case mCId of
case mCId of
Nothing -> do
printDebug "[tableNgramsPut] can't update charts, no parent, nId" nId
pure ()
pure $ jobLogFail $ jobLogInit 1
Just cId -> do
case tabType of
Authors -> do
-- printDebug "[tableNgramsPut] Authors, updating Pie, cId" cId
(logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
logRef
_ <- Metrics.updatePie cId (Just listId) tabType Nothing
pure ()
logRefSuccess
getRef
Institutes -> do
-- printDebug "[tableNgramsPut] Institutes, updating Tree, cId" cId
-- printDebug "[tableNgramsPut] updating tree StopTerm, cId" cId
(logRef, logRefSuccess, getRef) <- runJobLog 3 logStatus
logRef
_ <- Metrics.updateTree cId (Just listId) tabType StopTerm
-- printDebug "[tableNgramsPut] updating tree CandidateTerm, cId" cId
logRefSuccess
_ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
-- printDebug "[tableNgramsPut] updating tree MapTerm, cId" cId
logRefSuccess
_ <- Metrics.updateTree cId (Just listId) tabType MapTerm
pure ()
logRefSuccess
getRef
Sources -> do
-- printDebug "[tableNgramsPut] Sources, updating chart, cId" cId
(logRef, logRefSuccess, getRef) <- runJobLog 1 logStatus
logRef
_ <- Metrics.updatePie cId (Just listId) tabType Nothing
pure ()
logRefSuccess
getRef
Terms -> do
-- printDebug "[tableNgramsPut] Terms, updating Metrics (Histo), cId" cId
(logRef, logRefSuccess, getRef) <- runJobLog 6 logStatus
logRef
_ <- Metrics.updateChart cId (Just listId) tabType Nothing
logRefSuccess
_ <- Metrics.updatePie cId (Just listId) tabType Nothing
logRefSuccess
_ <- Metrics.updateScatter cId (Just listId) tabType Nothing
logRefSuccess
_ <- Metrics.updateTree cId (Just listId) tabType StopTerm
logRefSuccess
_ <- Metrics.updateTree cId (Just listId) tabType CandidateTerm
logRefSuccess
_ <- Metrics.updateTree cId (Just listId) tabType MapTerm
pure ()
logRefSuccess
getRef
_ -> do
printDebug "[tableNgramsPut] no update for tabType = " tabType
pure ()
pure ()
pure ret
pure $ jobLogFail $ jobLogInit 1
{-
{ _ne_list :: ListType
If we merge the parents/children we can potentially create cycles!
......@@ -623,6 +654,13 @@ type TableNgramsApi = TableNgramsApiGet
:<|> TableNgramsApiPut
:<|> RecomputeScoresNgramsApiGet
:<|> "version" :> TableNgramsApiGetVersion
:<|> TableNgramsAsyncApi
type TableNgramsAsyncApi = Summary "Table Ngrams Async API"
:> "async"
:> "charts"
:> "update"
:> AsyncJobs JobLog '[JSON] UpdateTableNgramsCharts JobLog
getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnectionPool env, HasConfig env)
=> NodeId
......@@ -670,35 +708,35 @@ getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orde
apiNgramsTableCorpus :: ( RepoCmdM env err m
, HasNodeError err
, HasTreeError err
, HasInvalidError err
, HasConnectionPool env
, HasConfig env
, HasSettings env
apiNgramsTableCorpus :: ( GargServerC env err m
)
=> NodeId -> ServerT TableNgramsApi m
apiNgramsTableCorpus cId = getTableNgramsCorpus cId
:<|> tableNgramsPut
:<|> scoresRecomputeTableNgrams cId
:<|> getTableNgramsVersion cId
:<|> apiNgramsAsync cId
apiNgramsTableDoc :: ( RepoCmdM env err m
, HasNodeError err
, HasTreeError err
, HasInvalidError err
, HasConnectionPool env
, HasConfig env
, HasSettings env
apiNgramsTableDoc :: ( GargServerC env err m
)
=> DocId -> ServerT TableNgramsApi m
apiNgramsTableDoc dId = getTableNgramsDoc dId
:<|> tableNgramsPut
:<|> scoresRecomputeTableNgrams dId
:<|> getTableNgramsVersion dId
:<|> apiNgramsAsync dId
-- > index all the corpus accordingly (TODO AD)
apiNgramsAsync :: NodeId -> GargServer TableNgramsAsyncApi
apiNgramsAsync _dId =
serveJobsAPI $
JobFunction $ \i log ->
let
log' x = do
printDebug "tableNgramsPostChartsAsync" x
liftBase $ log x
in tableNgramsPostChartsAsync i log'
-- Did the given list of ngrams changed since the given version?
-- The returned value is versioned boolean value, meaning that one always retrieve the
-- latest version.
......
......@@ -38,6 +38,7 @@ import Data.Validity
import Database.PostgreSQL.Simple.FromField (FromField, fromField, ResultError(ConversionFailed), returnError)
import GHC.Generics (Generic)
import Servant hiding (Patch)
import Servant.Job.Utils (jsonOptions)
import System.FileLock (FileLock)
import Test.QuickCheck (elements, frequency)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
......@@ -46,7 +47,7 @@ import Protolude (maybeToEither)
import Gargantext.Prelude
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..), NodeId)
import Gargantext.Core.Types (ListType(..), ListId, NodeId)
import Gargantext.Core.Types (TODO)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Prelude (fromField', CmdM')
......@@ -735,3 +736,17 @@ ngramsTypeFromTabType tabType =
Terms -> TableNgrams.NgramsTerms
_ -> panic $ lieu <> "No Ngrams for this tab"
-- TODO: This `panic` would disapear with custom NgramsType.
----
-- Async task
data UpdateTableNgramsCharts = UpdateTableNgramsCharts
{ _utn_tab_type :: !TabType
, _utn_list_id :: !ListId
} deriving (Eq, Show, Generic)
makeLenses ''UpdateTableNgramsCharts
instance FromJSON UpdateTableNgramsCharts where
parseJSON = genericParseJSON $ jsonOptions "_utn_"
instance ToSchema UpdateTableNgramsCharts where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_utn_")
......@@ -30,7 +30,7 @@ import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Prelude (GargServer, simuLogs)
import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Core.Viz.Graph.Distances (GraphMetric(..), Distance(..))
import Gargantext.Core.Methods.Distances (GraphMetric(..), Distance(..))
import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node
......
......@@ -121,7 +121,8 @@ type GargPrivateAPI' =
-- Document endpoint
:<|> "document" :> Summary "Document endpoint"
:> Capture "doc_id" DocId
:> "ngrams" :> TableNgramsApi
:> "ngrams"
:> TableNgramsApi
-- :<|> "counts" :> Stream GET NewLineFraming '[JSON] Count :> CountAPI
-- TODO-SECURITY
......
......@@ -56,6 +56,11 @@ import Gargantext.Prelude
type TableApi = Summary "Table API"
:> QueryParam "tabType" TabType
:> QueryParam "list" ListId
:> QueryParam "limit" Int
:> QueryParam "offset" Int
:> QueryParam "orderBy" OrderBy
:> QueryParam "query" Text
:> Get '[JSON] (HashedResponse FacetTableResult)
:<|> Summary "Table API (POST)"
:> ReqBody '[JSON] TableQuery
......@@ -90,14 +95,21 @@ tableApi id' = getTableApi id'
:<|> getTableHashApi id'
getTableApi :: NodeId -> Maybe TabType -> Cmd err (HashedResponse FacetTableResult)
getTableApi cId tabType = do
t <- getTable cId tabType Nothing Nothing Nothing
getTableApi :: NodeId
-> Maybe TabType
-> Maybe ListId
-> Maybe Int
-> Maybe Int
-> Maybe OrderBy
-> Maybe Text
-> Cmd err (HashedResponse FacetTableResult)
getTableApi cId tabType _mListId mLimit mOffset mOrderBy mQuery = do
printDebug "[getTableApi] mQuery" mQuery
t <- getTable cId tabType mOffset mLimit mOrderBy mQuery
pure $ constructHashedResponse t
postTableApi :: NodeId -> TableQuery -> Cmd err FacetTableResult
postTableApi cId (TableQuery o l order ft "") = getTable cId (Just ft) (Just o) (Just l) (Just order)
postTableApi cId (TableQuery o l order ft "") = getTable cId (Just ft) (Just o) (Just l) (Just order) Nothing
postTableApi cId (TableQuery o l order ft q) = case ft of
Docs -> searchInCorpus' cId False [q] (Just o) (Just l) (Just order)
Trash -> searchInCorpus' cId True [q] (Just o) (Just l) (Just order)
......@@ -105,7 +117,7 @@ postTableApi cId (TableQuery o l order ft q) = case ft of
getTableHashApi :: NodeId -> Maybe TabType -> Cmd err Text
getTableHashApi cId tabType = do
HashedResponse { hash = h } <- getTableApi cId tabType
HashedResponse { hash = h } <- getTableApi cId tabType Nothing Nothing Nothing Nothing Nothing
pure h
searchInCorpus' :: CorpusId
......@@ -121,21 +133,29 @@ searchInCorpus' cId t q o l order = do
pure $ TableResult { tr_docs = docs, tr_count = countAllDocs }
getTable :: NodeId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err FacetTableResult
getTable cId ft o l order = do
docs <- getTable' cId ft o l order
docsCount <- runCountDocuments cId (ft == Just Trash)
getTable :: NodeId
-> Maybe TabType
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Maybe Text
-> Cmd err FacetTableResult
getTable cId ft o l order query = do
docs <- getTable' cId ft o l order query
docsCount <- runCountDocuments cId (ft == Just Trash) query
pure $ TableResult { tr_docs = docs, tr_count = docsCount }
getTable' :: NodeId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err [FacetDoc]
getTable' cId ft o l order =
getTable' :: NodeId
-> Maybe TabType
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Maybe Text
-> Cmd err [FacetDoc]
getTable' cId ft o l order query =
case ft of
(Just Docs) -> runViewDocuments cId False o l order
(Just Trash) -> runViewDocuments cId True o l order
(Just Docs) -> runViewDocuments cId False o l order query
(Just Trash) -> runViewDocuments cId True o l order query
(Just MoreFav) -> moreLike cId o l order IsFav
(Just MoreTrash) -> moreLike cId o l order IsTrash
x -> panic $ "not implemented in getTable: " <> (cs $ show x)
......
......@@ -11,7 +11,7 @@ Portability : POSIX
{-# LANGUAGE Strict #-}
module Gargantext.Core.Viz.Graph.Distances
module Gargantext.Core.Methods.Distances
where
import Data.Aeson
......@@ -20,7 +20,8 @@ import Data.Swagger
import GHC.Generics (Generic)
import Gargantext.Prelude (Ord, Eq, Int, Double)
import Gargantext.Prelude (Show)
import Gargantext.Core.Viz.Graph.Distances.Matrice (measureConditional, distributional)
import Gargantext.Core.Methods.Distances.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Methods.Distances.Accelerate.Distributional (distributional)
import Prelude (Enum, Bounded, minBound, maxBound)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
......
{-|
Module : Gargantext.Core.Methods.Distances.Accelerate.Conditional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This module aims at implementig distances of terms context by context is
the same referential of corpus.
Implementation use Accelerate library which enables GPU and CPU computation
See Gargantext.Core.Methods.Graph.Accelerate)
-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Methods.Distances.Accelerate.Conditional
where
-- import qualified Data.Foldable as P (foldl1)
-- import Debug.Trace (trace)
import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run)
import Gargantext.Core.Methods.Matrix.Accelerate.Utils
import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
import qualified Gargantext.Prelude as P
-- * Metrics of proximity
-----------------------------------------------------------------------
-- ** Conditional distance
-- *** Conditional distance (basic)
-- | Conditional distance (basic version)
--
-- 2 main metrics are actually implemented in order to compute the
-- proximity of two terms: conditional and distributional
--
-- Conditional metric is an absolute metric which reflects
-- interactions of 2 terms in the corpus.
measureConditional :: Matrix Int -> Matrix Double
measureConditional m = run $ zipWith (/) m' (matSumCol d m')
where
m' = map fromIntegral (use m)
d = dim m
-- *** Conditional distance (advanced)
-- | Conditional distance (advanced version)
--
-- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called
-- "confidence" , is the maximum probability between @i@ and @j@ to see
-- @i@ in the same context of @j@ knowing @j@.
--
-- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
-- in the corpus and _[n_{ij}\] the number of its occurrences we get:
--
-- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
conditional' :: Matrix Int -> (Matrix GenericityInclusion, Matrix SpecificityExclusion)
conditional' m = ( run $ ie $ map fromIntegral $ use m
, run $ sg $ map fromIntegral $ use m
)
where
ie :: Acc (Matrix Double) -> Acc (Matrix Double)
ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
sg :: Acc (Matrix Double) -> Acc (Matrix Double)
sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
n :: Exp Double
n = P.fromIntegral r
r :: Dim
r = dim m
xs :: Acc (Matrix Double) -> Acc (Matrix Double)
xs mat = zipWith (-) (matSumCol r $ matProba r mat) (matProba r mat)
ys :: Acc (Matrix Double) -> Acc (Matrix Double)
ys mat = zipWith (-) (matSumCol r $ transpose $ matProba r mat) (matProba r mat)
{-|
Module : Gargantext.Core.Methods.Distances.Accelerate.Distributional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This module aims at implementig distances of terms context by context is
the same referential of corpus.
Implementation use Accelerate library which enables GPU and CPU computation
See Gargantext.Core.Methods.Graph.Accelerate)
-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Methods.Distances.Accelerate.Distributional
where
-- import qualified Data.Foldable as P (foldl1)
-- import Debug.Trace (trace)
import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run)
import Gargantext.Core.Methods.Matrix.Accelerate.Utils
import qualified Gargantext.Prelude as P
-- * Metrics of proximity
-----------------------------------------------------------------------
-- ** Distributional Distance
-- | Distributional Distance metric
--
-- Distributional metric is a relative metric which depends on the
-- selected list, it represents structural equivalence of mutual information.
--
-- The distributional metric P(c) of @i@ and @j@ terms is: \[
-- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
--
-- Mutual information
-- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
--
-- Number of cooccurrences of @i@ and @j@ in the same context of text
-- \[C{ij}\]
--
-- The expected value of the cooccurrences @i@ and @j@ (given a map list of size @n@)
-- \[E_{ij}^{m} = \frac {S_{i} S_{j}} {N_{m}}\]
--
-- Total cooccurrences of term @i@ given a map list of size @m@
-- \[S_{i} = \sum_{j, j \neq i}^{m} S_{ij}\]
--
-- Total cooccurrences of terms given a map list of size @m@
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
--
distributional :: Matrix Int -> Matrix Double
distributional m = -- run {- $ matMiniMax -}
run $ diagNull n
$ rIJ n
$ filterWith 0 100
$ filter' 0
$ s_mi
$ map fromIntegral
{- from Int to Double -}
$ use m
{- push matrix in Accelerate type -}
where
_ri :: Acc (Matrix Double) -> Acc (Matrix Double)
_ri mat = mat1 -- zipWith (/) mat1 mat2
where
mat1 = matSumCol n $ zipWith min (_myMin mat) (_myMin $ filterWith 0 100 $ diagNull n $ transpose mat)
_mat2 = total mat
_myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
_myMin = replicate (constant (Z :. n :. All)) . minimum
-- TODO fix NaN
-- Quali TEST: OK
s_mi :: Acc (Matrix Double) -> Acc (Matrix Double)
s_mi m' = zipWith (\x y -> log (x / y)) (diagNull n m')
$ zipWith (/) (crossProduct n m') (total m')
-- crossProduct n m'
total :: Acc (Matrix Double) -> Acc (Matrix Double)
total = replicate (constant (Z :. n :. n)) . sum . sum
n :: Dim
n = dim m
rIJ :: (Elt a, Ord a, P.Fractional (Exp a), P.Num a)
=> Dim -> Acc (Matrix a) -> Acc (Matrix a)
rIJ n m = matMiniMax $ divide a b
where
a = sumRowMin n m
b = sumColMin n m
-- * For Tests (to be removed)
-- | Test perfermance with this matrix
-- TODO : add this in a benchmark folder
distriTest :: Int -> Matrix Double
distriTest n = distributional (theMatrix n)
{-|
Module : Gargantext.Core.Methods.Distances.Accelerate.SpeGen
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This module aims at implementig distances of terms context by context is
the same referential of corpus.
Implementation use Accelerate library which enables GPU and CPU computation
See Gargantext.Core.Methods.Graph.Accelerate)
-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Methods.Distances.Accelerate.SpeGen
where
-- import qualified Data.Foldable as P (foldl1)
-- import Debug.Trace (trace)
import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run)
import Gargantext.Core.Methods.Matrix.Accelerate.Utils
import qualified Gargantext.Prelude as P
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-- * Specificity and Genericity
{- | Metric Specificity and genericity: select terms
- let N termes and occurrences of i \[N{i}\]
- Cooccurrences of i and j \[N{ij}\]
- Probability to get i given j : \[P(i|j)=N{ij}/N{j}\]
- Genericity of i \[Gen(i) = \frac{\sum_{j \neq i,j} P(i|j)}{N-1}\]
- Specificity of j \[Spec(i) = \frac{\sum_{j \neq i,j} P(j|i)}{N-1}\]
- \[Inclusion (i) = Gen(i) + Spec(i)\)
- \[GenericityScore = Gen(i)- Spec(i)\]
- References: Science mapping with asymmetrical paradigmatic proximity
Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted
on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276,
arXiv:0803.2315 [cs.OH]
-}
type GenericityInclusion = Double
type SpecificityExclusion = Double
data SquareMatrix = SymetricMatrix | NonSymetricMatrix
type SymetricMatrix = Matrix
type NonSymetricMatrix = Matrix
incExcSpeGen :: Matrix Int
-> ( Vector GenericityInclusion
, Vector SpecificityExclusion
)
incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
where
run' fun mat = run $ fun $ map fromIntegral $ use mat
-- | Inclusion (i) = Gen(i)+Spec(i)
inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
inclusionExclusion mat = zipWith (+) (pV mat) (pV mat)
-- | Genericity score = Gen(i)- Spec(i)
specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
specificityGenericity mat = zipWith (+) (pH mat) (pH mat)
-- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
pV :: Acc (Matrix Double) -> Acc (Vector Double)
pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
-- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
pH :: Acc (Matrix Double) -> Acc (Vector Double)
pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
cardN :: Exp Double
cardN = constant (P.fromIntegral (dim m) :: Double)
-- | P(i|j) = Nij /N(jj) Probability to get i given j
--p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Matrix e) -> Acc (Matrix e)
p_ij m = zipWith (/) m (n_jj m)
where
n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
n_jj myMat' = backpermute (shape m)
(lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
-> (Z :. j :. j)
)
) myMat'
-- | P(j|i) = Nij /N(ii) Probability to get i given j
-- to test
p_ji :: (Elt e, P.Fractional (Exp e))
=> Acc (Array DIM2 e)
-> Acc (Array DIM2 e)
p_ji = transpose . p_ij
-- | Step to ckeck the result in visual/qualitative tests
incExcSpeGen_proba :: Matrix Int -> Matrix Double
incExcSpeGen_proba m = run' pro m
where
run' fun mat = run $ fun $ map fromIntegral $ use mat
pro mat = p_ji mat
{-
-- | Hypothesis to test maybe later (or not)
-- TODO ask accelerate for instances to ease such writtings:
p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
p_ m = zipWith (/) m (n_ m)
where
n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
n_ m = backpermute (shape m)
(lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
-> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
)
) m
-}
{-|
Module : Gargantext.Graph.Distances.Conditional
Module : Gargantext.Core.Methods.Distances
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -12,7 +12,7 @@ Motivation and definition of the @Conditional@ distance.
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE Strict #-}
module Gargantext.Core.Viz.Graph.Distances.Conditional
module Gargantext.Core.Methods.Distances.Conditional
where
import Data.Matrix hiding (identity)
......
{-|
Module : Gargantext.Graph.Distances.Distributional
Description :
Module : Gargantext.Core.Methods.Distances.Distributional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
......@@ -14,7 +14,7 @@ Motivation and definition of the @Distributional@ distance.
{-# LANGUAGE Strict #-}
module Gargantext.Core.Viz.Graph.Distances.Distributional
module Gargantext.Core.Methods.Distances.Distributional
where
import Data.Matrix hiding (identity)
......
......@@ -14,7 +14,7 @@ Références:
-}
module Gargantext.Core.Viz.Graph.Proxemy
module Gargantext.Core.Methods.Graph.BAC.Proxemy
where
--import Debug.SimpleReflect
......@@ -28,17 +28,17 @@ import Gargantext.Core.Viz.Graph.FGL
type Length = Int
type FalseReflexive = Bool
type NeighborsFilter = Graph_Undirected -> Node -> [Node]
type We = Bool
type RmEdge = Bool
confluence :: [(Node,Node)] -> Length -> FalseReflexive -> We -> Map (Node,Node) Double
confluence ns l fr we = similarity_conf (mkGraphUfromEdges ns) l fr we
confluence :: [(Node,Node)] -> Length -> FalseReflexive -> RmEdge -> Map (Node,Node) Double
confluence ns = similarity_conf (mkGraphUfromEdges ns)
similarity_conf :: Graph_Undirected -> Length -> FalseReflexive -> We -> Map (Node,Node) Double
similarity_conf g l fr we = Map.fromList [ ((x,y), similarity_conf_x_y g (x,y) l fr we)
similarity_conf :: Graph_Undirected -> Length -> FalseReflexive -> RmEdge -> Map (Node,Node) Double
similarity_conf g l fr rm = Map.fromList [ ((x,y), similarity_conf_x_y g (x,y) l fr rm)
| x <- nodes g, y <- nodes g, x < y]
similarity_conf_x_y :: Graph_Undirected -> (Node,Node) -> Length -> FalseReflexive -> We -> Double
similarity_conf_x_y g (x,y) l r we = similarity
similarity_conf_x_y :: Graph_Undirected -> (Node,Node) -> Length -> FalseReflexive -> RmEdge -> Double
similarity_conf_x_y g (x,y) l r rm_e = similarity
where
similarity :: Double
similarity | denominator == 0 = 0
......@@ -52,11 +52,11 @@ similarity_conf_x_y g (x,y) l r we = similarity
xline :: Map Node Double
xline = prox_markov g [x] l r filterNeighbors'
where
filterNeighbors' | we == True = filterNeighbors
filterNeighbors' | rm_e == True = filterNeighbors
| otherwise = rm_edge_neighbors (x,y)
pair_is_edge :: Bool
pair_is_edge | we == True = False
pair_is_edge | rm_e == True = False
| otherwise = List.elem y (filterNeighbors g x)
lim_SC :: Double
......
......@@ -10,7 +10,7 @@ Portability : POSIX
-}
module Gargantext.Core.Viz.Graph.Louvain
module Gargantext.Core.Methods.Graph.Louvain
where
import Gargantext.Prelude
......
......@@ -6,8 +6,9 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
- First written by Bruno Gaume in Python (see below for details)
- Then written by Alexandre Delanoë in Haskell (see below for details)
- Result of the workshop, Pyremiel 2019
- First written by Bruno Gaume in Python (see below for details)
- Then written by Alexandre Delanoë in Haskell (see below for details)
# By Bruno Gaume:
def fast_maximal_cliques(g):
......@@ -48,7 +49,7 @@ def fast_maximal_cliques(g):
module Gargantext.Core.Viz.Graph.MaxClique
module Gargantext.Core.Methods.Graph.MaxClique
where
import Data.Maybe (catMaybes)
......@@ -61,12 +62,12 @@ import Data.Set (fromList, toList, isSubsetOf)
import Data.Graph.Inductive hiding (Graph, neighbors, subgraph, (&))
import Gargantext.Core.Viz.Graph.FGL (Graph_Undirected, degree, neighbors, mkGraphUfromEdges)
import Gargantext.Core.Viz.Graph.Tools (cooc2graph', Threshold)
import Gargantext.Core.Viz.Graph.Distances (Distance)
import Gargantext.Core.Methods.Distances (Distance)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex)
type Graph = Graph_Undirected
type Neighbor = Node
-- | getMaxCliques
-- TODO chose distance order
getMaxCliques :: Ord a => Distance -> Threshold -> Map (a, a) Int -> [[a]]
......
......@@ -26,29 +26,24 @@ This document defines basic of Text definitions according to Gargantext..
module Gargantext.Core.Text.Examples
where
import Data.Ord (Down(..))
import qualified Data.List as L
import Data.Array.Accelerate (toList, Matrix)
import Data.Map (Map)
import qualified Data.Map as M
import Data.Ord (Down(..))
import Data.Text (Text)
import qualified Data.Text as T
import Data.Tuple.Extra (both)
import Data.Array.Accelerate (toList, Matrix)
import Gargantext.Prelude
import Gargantext.Core.Text.Metrics.Count (occurrences, cooc)
import Gargantext.Core.Text.Terms (TermType(MonoMulti), terms)
import Gargantext.Core (Lang(EN))
import Gargantext.Core.Types (Terms(..), Label)
import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
import Gargantext.Core.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Core.Text.Metrics.Count (Grouped)
import Gargantext.Core.Viz.Graph.Distances.Matrice
import Gargantext.Core.Text.Metrics.Count (occurrences, cooc)
import Gargantext.Core.Text.Terms (TermType(MonoMulti), terms)
import Gargantext.Core.Types (Terms(..), Label)
import Gargantext.Core.Viz.Graph.Index
import Gargantext.Prelude
import qualified Data.Array.Accelerate as DAA
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Text as Text
-- | Sentences
-- Let be a list of Texts: ['Data.Text.Text']. Each text in this example is a sentence.
......@@ -70,7 +65,7 @@ ex_sentences = [ "There is a table with a glass of wine and a spoon."
-- >>> T.intercalate (T.pack " ") ex_sentences
-- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
ex_paragraph :: Text
ex_paragraph = T.intercalate " " ex_sentences
ex_paragraph = Text.intercalate " " ex_sentences
-- | Let split sentences by Contexts of text.
-- More about 'Gargantext.Core.Text.Context'
......@@ -88,10 +83,10 @@ ex_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
-- | Test the Occurrences
--
-- >>> occurrences <$> L.concat <$> ex_terms
-- >>> occurrences <$> List.concat <$> ex_terms
-- fromList [(fromList ["glass"],fromList [(["glass"],4)]),(fromList ["spoon"],fromList [(["spoon"],2)]),(fromList ["tabl"],fromList [(["table"],4)]),(fromList ["wine"],fromList [(["wine"],3)])]
ex_occ :: IO (Map Grouped (Map Terms Int))
ex_occ = occurrences <$> L.concat <$> ex_terms
ex_occ = occurrences <$> List.concat <$> ex_terms
-- | Test the cooccurrences
-- Use the 'Gargantext.Core.Text.Metrics.Count.cooc' function.
......@@ -132,6 +127,6 @@ incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
where
(ti,fi) = createIndices m
ordonne x = sortWith (Down . snd)
$ zip (map snd $ M.toList fi) (toList x)
$ zip (map snd $ Map.toList fi) (toList x)
This diff is collapsed.
{-|
Module : Gargantext.Core.Text.List.CSV
Module : Gargantext.Core.Text.List.Formats.CSV
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
......@@ -12,29 +12,24 @@ CSV parser for Gargantext corpus files.
-}
module Gargantext.Core.Text.List.CSV where
import GHC.IO (FilePath)
module Gargantext.Core.Text.List.Formats.CSV where
import Control.Applicative
import Control.Monad (mzero)
import Data.Char (ord)
import Data.Csv
import Data.Either (Either(Left, Right))
import Data.List (null)
import Data.Text (Text, pack)
import qualified Data.Text as DT
import qualified Data.ByteString.Lazy as BL
import Data.Vector (Vector)
import qualified Data.Vector as V
import Gargantext.Prelude hiding (length)
import GHC.IO (FilePath)
import Gargantext.Core.Text.Context
import Gargantext.Prelude hiding (length)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as DT
import qualified Data.Vector as V
------------------------------------------------------------------------
csvMapTermList :: FilePath -> IO TermList
csvMapTermList fp = csv2list CsvMap <$> snd <$> fromCsvListFile fp
......
{-|
Module : Gargantext.Core.Text.List.Group
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
module Gargantext.Core.Text.List.Group
where
import Control.Lens (set)
import Data.Set (Set)
import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Core.Types (ListType(..))
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Social.Scores (FlowListScores(..))
import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List
------------------------------------------------------------------------
toGroupedText :: GroupedTextParams a b
-> Map Text FlowListScores
-> Map Text (Set NodeId)
-> Map Stem (GroupedText Int)
toGroupedText groupParams scores =
(groupWithStem groupParams) . (groupWithScores scores)
------------------------------------------------------------------------
-- | WIP
toGroupedText_test :: Bool -- Map Stem (GroupedText Int)
toGroupedText_test =
-- fromGroupedScores $ fromListScores from
toGroupedText params from datas == result
where
params = GroupedTextParams identity (Set.size . snd) fst snd
from :: Map Text FlowListScores
from = Map.fromList [("A. Rahmani",FlowListScores {_fls_parents = Map.fromList [("T. Reposeur",1)]
,_fls_listType = Map.fromList [(MapTerm,2)]})
,("B. Tamain",FlowListScores {_fls_parents = Map.fromList [("T. Reposeur",1)]
, _fls_listType = Map.fromList [(MapTerm,2)]})
]
datas :: Map Text (Set NodeId)
datas = Map.fromList [("A. Rahmani" , Set.fromList [1,2])
,("T. Reposeur", Set.fromList [3,4])
,("B. Tamain" , Set.fromList [5,6])
]
result :: Map Stem (GroupedText Int)
result = Map.fromList [("A. Rahmani",GroupedText {_gt_listType = Nothing
,_gt_label = "A. Rahmani"
,_gt_score = 2
,_gt_children = Set.empty
,_gt_size = 2
,_gt_stem = "A. Rahmani"
,_gt_nodes = Set.fromList [1,2]
}
)
,("B. Tamain",GroupedText {_gt_listType = Nothing
, _gt_label = "B. Tamain"
, _gt_score = 2
, _gt_children = Set.empty
, _gt_size = 2
, _gt_stem = "B. Tamain"
, _gt_nodes = Set.fromList [5,6]
}
)
,("T. Reposeur",GroupedText {_gt_listType = Nothing
,_gt_label = "T. Reposeur"
,_gt_score = 2
,_gt_children = Set.fromList ["A. Rahmani","B. Tamain"]
,_gt_size = 2
,_gt_stem = "T. Reposeur"
,_gt_nodes = Set.fromList [1..6]
}
)
]
------------------------------------------------------------------------
-- | To be removed
addListType :: Map Text ListType -> GroupedText a -> GroupedText a
addListType m g = set gt_listType (hasListType m g) g
where
hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
hasListType m' (GroupedText _ label _ g' _ _ _) =
List.foldl' (<>) Nothing
$ map (\t -> Map.lookup t m')
$ Set.toList
$ Set.insert label g'
{-|
Module : Gargantext.Core.Text.List.WithScores
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.List.Group.WithScores
where
import Control.Lens (makeLenses, view, set)
import Data.Semigroup
import Data.Set (Set)
import Data.Map (Map)
import Data.Maybe (catMaybes)
import Data.Text (Text)
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
-- import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.List.Social.Scores
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
------------------------------------------------------------------------
-- | Main Types
data GroupedWithListScores =
GroupedWithListScores { _gwls_children :: !(Set Text)
, _gwls_listType :: !(Maybe ListType)
} deriving (Show)
makeLenses ''GroupedWithListScores
instance Semigroup GroupedWithListScores where
(<>) (GroupedWithListScores c1 l1)
(GroupedWithListScores c2 l2) =
GroupedWithListScores (c1 <> c2) (l1 <> l2)
------
data GroupedTextScores score =
GroupedTextScores { _gts_listType :: !(Maybe ListType)
, _gts_score :: score
, _gts_children :: !(Set Text)
} deriving (Show)
makeLenses 'GroupedTextScores
instance Semigroup a => Semigroup (GroupedTextScores a) where
(<>) (GroupedTextScores l1 s1 c1)
(GroupedTextScores l2 s2 c2)
= GroupedTextScores (l1 <> l2) (s1 <> s2) (c1 <> c2)
------
data GroupedTextScores' score =
GroupedTextScores' { _gts'_listType :: !(Maybe ListType)
, _gts'_score :: score
, _gts'_children :: !(Set (GroupedTextScores' score))
} deriving (Show, Ord, Eq)
makeLenses 'GroupedTextScores'
instance (Semigroup a, Ord a) => Semigroup (GroupedTextScores' a) where
(<>) (GroupedTextScores' l1 s1 c1)
(GroupedTextScores' l2 s2 c2)
= GroupedTextScores' (l1 <> l2) (s1 <> s2) (c1 <> c2)
------------------------------------------------------------------------
-- | Main function
groupWithScores :: Map Text FlowListScores
-> Map Text (Set NodeId)
-> Map Text (GroupedTextScores (Set NodeId))
groupWithScores scores ms = orphans <> groups
where
groups = addScore ms
$ fromGroupedScores
$ fromListScores scores
orphans = addIfNotExist scores ms
addScore :: Map Text (Set NodeId)
-> Map Text (GroupedTextScores (Set NodeId))
-> Map Text (GroupedTextScores (Set NodeId))
addScore mapNs = Map.mapWithKey scoring
where
scoring k g = set gts_score ( Set.unions
$ catMaybes
$ map (\n -> Map.lookup n mapNs)
$ [k] <> (Set.toList $ view gts_children g)
) g
addIfNotExist :: Map Text FlowListScores
-> Map Text (Set NodeId)
-> Map Text (GroupedTextScores (Set NodeId))
addIfNotExist mapSocialScores mapScores =
foldl' (addIfNotExist' mapSocialScores) Map.empty $ Map.toList mapScores
where
addIfNotExist' mss m (t,ns) =
case Map.lookup t mss of
Nothing -> Map.alter (add ns) t m
_ -> m
add ns' Nothing = Just $ GroupedTextScores Nothing ns' Set.empty
add _ _ = Nothing -- should not be present
------------------------------------------------------------------------
fromGroupedScores :: Map Parent GroupedWithListScores
-> Map Parent (GroupedTextScores (Set NodeId))
fromGroupedScores = Map.map (\(GroupedWithListScores c l) -> GroupedTextScores l Set.empty c)
------------------------------------------------------------------------
fromListScores :: Map Text FlowListScores -> Map Parent GroupedWithListScores
fromListScores = Map.fromListWith (<>) . (map fromScores') . Map.toList
where
fromScores' :: (Text, FlowListScores) -> (Text, GroupedWithListScores)
fromScores' (t, fs) = case (keyWithMaxValue $ view fls_parents fs) of
Nothing -> (t, GroupedWithListScores Set.empty (keyWithMaxValue $ view fls_listType fs))
-- Parent case: taking its listType, for now children Set is empty
Just parent -> (parent, GroupedWithListScores (Set.singleton t) Nothing)
-- We ignore the ListType of children for the parents' one
-- added after and winner of semigroup actions
{-|
Module : Gargantext.Core.Text.List.Group.WithStem
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
module Gargantext.Core.Text.List.Group.WithStem
where
import Control.Lens (makeLenses, view)
import Data.Set (Set)
import Data.Map (Map)
import Data.Text (Text)
import Data.Semigroup (Semigroup)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Text as Text
-- | Main Types
data StopSize = StopSize {unStopSize :: !Int}
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
data GroupParams = GroupParams { unGroupParams_lang :: !Lang
, unGroupParams_len :: !Int
, unGroupParams_limit :: !Int
, unGroupParams_stopSize :: !StopSize
}
| GroupIdentity
data GroupedTextParams a b =
GroupedTextParams { _gt_fun_stem :: Text -> Text
, _gt_fun_score :: a -> b
, _gt_fun_texts :: a -> Set Text
, _gt_fun_nodeIds :: a -> Set NodeId
-- , _gt_fun_size :: a -> Int
}
makeLenses 'GroupedTextParams
type Stem = Text
data GroupedText score =
GroupedText { _gt_listType :: !(Maybe ListType)
, _gt_label :: !Text
, _gt_score :: !score
, _gt_children :: !(Set Text)
, _gt_size :: !Int
, _gt_stem :: !Stem -- needed ?
, _gt_nodes :: !(Set NodeId)
} deriving (Show, Eq) --}
{-
instance Show score => Show (GroupedText score) where
show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
--}
{-
instance (Eq a) => Eq (GroupedText a) where
(==) (GroupedText _ _ score1 _ _ _ _)
(GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
-}
instance (Eq a, Ord a) => Ord (GroupedText a) where
compare (GroupedText _ _ score1 _ _ _ _)
(GroupedText _ _ score2 _ _ _ _) = compare score1 score2
instance Ord a => Semigroup (GroupedText a) where
(<>) (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where
lt = lt1 <> lt2
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
-- | Lenses Instances
makeLenses 'GroupedText
------------------------------------------------------------------------
groupWithStem :: {- ( HasNgrams a
, HasGroupWithScores a b
, Semigroup a
, Ord b
)
=> -} GroupedTextParams a b
-> Map Text (GroupedTextScores (Set NodeId))
-> Map Stem (GroupedText Int)
groupWithStem _ = Map.mapWithKey scores2groupedText
scores2groupedText :: Text -> GroupedTextScores (Set NodeId) -> GroupedText Int
scores2groupedText t g = GroupedText (view gts_listType g)
t
(Set.size $ view gts_score g)
(Set.delete t $ view gts_children g)
(size t)
t
(view gts_score g)
------------------------------------------------------------------------
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------
groupedTextWithStem :: Ord b
=> GroupedTextParams a b
-> Map Text a
-> Map Stem (GroupedText b)
groupedTextWithStem gparams from =
Map.fromListWith (<>) $ map (group gparams) $ Map.toList from
where
group gparams' (t,d) = let t' = (view gt_fun_stem gparams') t
in (t', GroupedText
Nothing
t
((view gt_fun_score gparams') d)
((view gt_fun_texts gparams') d)
(size t)
t'
((view gt_fun_nodeIds gparams') d)
)
------------------------------------------------------------------------
This diff is collapsed.
{-|
Module : Gargantext.Core.Text.List.Social.Find
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.List.Social.Find
where
-- findList imports
import Gargantext.Core.Types.Individu
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Tree
import Gargantext.Database.Query.Tree.Root (getRootId)
import Gargantext.Prelude
------------------------------------------------------------------------
findListsId :: (HasNodeError err, HasTreeError err)
=> User -> NodeMode -> Cmd err [NodeId]
findListsId u mode = do
r <- getRootId u
ns <- map _dt_nodeId <$> filter (\n -> _dt_typeId n == nodeTypeId NodeList)
<$> findNodes' mode r
pure ns
findNodes' :: HasTreeError err
=> NodeMode -> RootId
-> Cmd err [DbTreeNode]
findNodes' Private r = findNodes Private r $ [NodeFolderPrivate] <> commonNodes
findNodes' Shared r = findNodes Shared r $ [NodeFolderShared ] <> commonNodes
findNodes' Public r = findNodes Public r $ [NodeFolderPublic ] <> commonNodes
commonNodes:: [NodeType]
commonNodes = [NodeFolder, NodeCorpus, NodeList]
{-|
Module : Gargantext.Core.Text.List.Social.ListType
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.List.Social.ListType
where
import Gargantext.Database.Admin.Types.Node
import Data.Map (Map)
import Data.Set (Set)
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.API.Ngrams.Tools -- (getListNgrams)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Database.Schema.Ngrams
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
------------------------------------------------------------------------
-- | [ListId] does not merge the lists (it is for Master and User lists
-- here we need UserList only
countFilterList :: RepoCmdM env err m
=> Set Text -> NgramsType -> [ListId]
-> Map Text (Map ListType Int)
-> m (Map Text (Map ListType Int))
countFilterList st nt ls input =
foldM' (\m l -> countFilterList' st nt [l] m) input ls
where
countFilterList' :: RepoCmdM env err m
=> Set Text -> NgramsType -> [ListId]
-> Map Text (Map ListType Int)
-> m (Map Text (Map ListType Int))
countFilterList' st' nt' ls' input' = do
ml <- toMapTextListType <$> getListNgrams ls' nt'
pure $ Set.foldl' (\m t -> countList t ml m) input' st'
------------------------------------------------------------------------
-- FIXME children have to herit the ListType of the parent
toMapTextListType :: Map Text NgramsRepoElement -> Map Text ListType
toMapTextListType m = Map.fromListWith (<>)
$ List.concat
$ map (toList m)
$ Map.toList m
where
toList :: Map Text NgramsRepoElement -> (Text, NgramsRepoElement) -> [(Text, ListType)]
toList m' (t, nre@(NgramsRepoElement _ _ _ _ (MSet children))) =
List.zip terms (List.cycle [lt'])
where
terms = [t]
-- <> maybe [] (\n -> [unNgramsTerm n]) root
-- <> maybe [] (\n -> [unNgramsTerm n]) parent
<> (map unNgramsTerm $ Map.keys children)
lt' = listOf m' nre
listOf :: Map Text NgramsRepoElement -> NgramsRepoElement -> ListType
listOf m'' ng = case _nre_parent ng of
Nothing -> _nre_list ng
Just p -> case Map.lookup (unNgramsTerm p) m'' of
Just ng' -> listOf m'' ng'
Nothing -> CandidateTerm
-- panic "[G.C.T.L.Social.listOf] Nothing: Should Not happen"
------------------------------------------------------------------------
countList :: Text
-> Map Text ListType
-> Map Text (Map ListType Int)
-> Map Text (Map ListType Int)
countList t m input = case Map.lookup t m of
Nothing -> input
Just l -> Map.alter addList t input
where
addList Nothing = Just $ addCountList l Map.empty
addList (Just lm) = Just $ addCountList l lm
addCountList :: ListType -> Map ListType Int -> Map ListType Int
addCountList l' m' = Map.alter (plus l') l' m'
where
plus CandidateTerm Nothing = Just 1
plus CandidateTerm (Just x) = Just $ x + 1
plus MapTerm Nothing = Just 2
plus MapTerm (Just x) = Just $ x + 2
plus StopTerm Nothing = Just 3
plus StopTerm (Just x) = Just $ x + 3
{-|
Module : Gargantext.Core.Text.List.Social.Scores
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Core.Text.List.Social.Scores
where
import Control.Lens
import Data.Map (Map)
import Data.Semigroup (Semigroup(..))
import Data.Set (Set)
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.Set as Set
------------------------------------------------------------------------
-- | Tools to inherit groupings
------------------------------------------------------------------------
-- | Tools
parentUnionsMerge :: (Ord a, Ord b, Num c)
=> [Map a (Map b c)]
-> Map a (Map b c)
parentUnionsMerge = Map.unionsWith (Map.unionWith (+))
-- This Parent union is specific
-- [Private, Shared, Public]
-- means the following preferences:
-- Private > Shared > Public
-- if data have not been tagged privately, then use others tags
-- This unions behavior takes first key only and ignore others
parentUnionsExcl :: Ord a
=> [Map a b]
-> Map a b
parentUnionsExcl = Map.unions
------------------------------------------------------------------------
type Parent = Text
hasParent :: Text
-> Map Text (Map Parent Int)
-> Maybe Parent
hasParent t m = case Map.lookup t m of
Nothing -> Nothing
Just m' -> keyWithMaxValue m'
------------------------------------------------------------------------
keyWithMaxValue :: Map a b -> Maybe a
keyWithMaxValue m = (fst . fst) <$> Map.maxViewWithKey m
------------------------------------------------------------------------
data FlowListScores =
FlowListScores { _fls_parents :: Map Parent Int
, _fls_listType :: Map ListType Int
-- You can add any score by incrementing this type
-- , _flc_score :: Map Score Int
}
deriving (Show, Generic)
makeLenses ''FlowListScores
instance Semigroup FlowListScores where
(<>) (FlowListScores p1 l1) (FlowListScores p2 l2) =
FlowListScores (p1 <> p2) (l1 <> l2)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | toFlowListScores which generate Score from list of Map Text
-- NgramsRepoElement
toFlowListScores :: KeepAllParents
-> Set Text
-> Map Text FlowListScores
-> [Map Text NgramsRepoElement]
-> Map Text FlowListScores
toFlowListScores k ts = foldl' (toFlowListScores' k ts)
where
toFlowListScores' :: KeepAllParents
-> Set Text
-> Map Text FlowListScores
-> Map Text NgramsRepoElement
-> Map Text FlowListScores
toFlowListScores' k' ts' to' ngramsRepo =
Set.foldl' (toFlowListScores'' k' ts' ngramsRepo) to' ts'
toFlowListScores'' :: KeepAllParents
-> Set Text
-> Map Text NgramsRepoElement
-> Map Text FlowListScores
-> Text
-> Map Text FlowListScores
toFlowListScores'' k'' ss ngramsRepo to'' t =
case Map.lookup t ngramsRepo of
Nothing -> to''
Just nre -> Map.alter (addParent k'' nre ss) t
$ Map.alter (addList $ _nre_list nre) t to''
------------------------------------------------------------------------
-- | Main addFunctions to groupResolution the FlowListScores
-- Use patch-map library here
-- diff, transformWith patches simplifies functions below
addList :: ListType
-> Maybe FlowListScores
-> Maybe FlowListScores
addList l Nothing =
Just $ FlowListScores Map.empty (addList' l Map.empty)
addList l (Just (FlowListScores mapParent mapList)) =
Just $ FlowListScores mapParent mapList'
where
mapList' = addList' l mapList
-- * Unseful but nice comment:
-- "the addList function looks like an ASCII bird"
-- | Concrete function to pass to PatchMap
addList' :: ListType -> Map ListType Int -> Map ListType Int
addList' l m = Map.alter (plus l) l m
where
plus CandidateTerm Nothing = Just 1
plus CandidateTerm (Just x) = Just $ x + 1
plus MapTerm Nothing = Just 2
plus MapTerm (Just x) = Just $ x + 2
plus StopTerm Nothing = Just 3
plus StopTerm (Just x) = Just $ x + 3
------------------------------------------------------------------------
------------------------------------------------------------------------
data KeepAllParents = KeepAllParents Bool
addParent :: KeepAllParents -> NgramsRepoElement -> Set Text
-> Maybe FlowListScores
-> Maybe FlowListScores
addParent k nre ss Nothing =
Just $ FlowListScores mapParent Map.empty
where
mapParent = addParent' k (_nre_parent nre) ss Map.empty
addParent k nre ss (Just (FlowListScores mapParent mapList)) =
Just $ FlowListScores mapParent' mapList
where
mapParent' = addParent' k (_nre_parent nre) ss mapParent
addParent' :: Num a
=> KeepAllParents
-> Maybe NgramsTerm
-> Set Text
-> Map Text a
-> Map Text a
addParent' _ Nothing _ss mapParent = mapParent
addParent' (KeepAllParents k) (Just (NgramsTerm p')) ss mapParent =
case k of
True -> Map.alter addCount p' mapParent
False -> case Set.member p' ss of
False -> mapParent
True -> Map.alter addCount p' mapParent
where
addCount Nothing = Just 1
addCount (Just n) = Just $ n + 1
------------------------------------------------------------------------
......@@ -20,7 +20,7 @@ module Gargantext.Core.Text.Metrics
--import Math.KMeans (kmeans, euclidSq, elements)
import Data.Map (Map)
import Gargantext.Prelude
import Gargantext.Core.Viz.Graph.Distances.Matrice
import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
import Gargantext.Core.Viz.Graph.Index
import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
import qualified Data.Array.Accelerate as DAA
......
......@@ -17,14 +17,12 @@ module Gargantext.Core.Text.Terms.Multi (multiterms, multiterms_rake)
import Data.Text hiding (map, group, filter, concat)
import Data.List (concat)
import qualified Data.Set as S
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Core.Types
import Gargantext.Core.Text.Terms.Multi.PosTagging
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En
import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr
......@@ -32,17 +30,15 @@ import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake)
multiterms :: Lang -> Text -> IO [Terms]
multiterms lang txt = concat
<$> map (map (tokenTag2terms lang))
<$> map (map tokenTag2terms)
<$> map (filter (\t -> _my_token_pos t == Just NP))
<$> tokenTags lang txt
tokenTag2terms :: Lang -> TokenTag -> Terms
tokenTag2terms lang (TokenTag w t _ _) = Terms w t'
where
t' = S.fromList $ map (stem lang) $ S.toList t
tokenTag2terms :: TokenTag -> Terms
tokenTag2terms (TokenTag ws t _ _) = Terms ws t
tokenTags :: Lang -> Text -> IO [[TokenTag]]
tokenTags lang s = map (group lang) <$> tokenTags' lang s
tokenTags lang s = map (groupTokens lang) <$> tokenTags' lang s
tokenTags' :: Lang -> Text -> IO [[TokenTag]]
......@@ -53,7 +49,7 @@ tokenTags' lang t = map tokens2tokensTags
---- | This function analyses and groups (or not) ngrams according to
---- specific grammars of each language.
group :: Lang -> [TokenTag] -> [TokenTag]
group EN = En.group
group FR = Fr.group
group _ = panic $ pack "group :: Lang not implemeted yet"
groupTokens :: Lang -> [TokenTag] -> [TokenTag]
groupTokens EN = En.groupTokens
groupTokens FR = Fr.groupTokens
groupTokens _ = panic $ pack "groupTokens :: Lang not implemeted yet"
......@@ -13,7 +13,7 @@ the tokens into extracted terms.
-}
module Gargantext.Core.Text.Terms.Multi.Lang.En (group)
module Gargantext.Core.Text.Terms.Multi.Lang.En (groupTokens)
where
import Gargantext.Prelude
......@@ -22,17 +22,17 @@ import Gargantext.Core.Text.Terms.Multi.Group
------------------------------------------------------------------------
-- | Rule grammar to group tokens
group :: [TokenTag] -> [TokenTag]
group [] = []
group ntags = group2 NP NP
$ group2 NP VB
-- $ group2 NP IN
$ group2 IN DT
-- $ group2 VB NP
$ group2 JJ NP
$ group2 JJ JJ
$ group2 JJ CC
$ ntags
groupTokens :: [TokenTag] -> [TokenTag]
groupTokens [] = []
groupTokens ntags = group2 NP NP
$ group2 NP VB
-- $ group2 NP IN
$ group2 IN DT
-- $ group2 VB NP
$ group2 JJ NP
$ group2 JJ JJ
$ group2 JJ CC
$ ntags
------------------------------------------------------------------------
--groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
......
......@@ -14,16 +14,16 @@ is ADJectiv in french.
-}
module Gargantext.Core.Text.Terms.Multi.Lang.Fr (group)
module Gargantext.Core.Text.Terms.Multi.Lang.Fr (groupTokens)
where
import Gargantext.Prelude
import Gargantext.Core.Types
import Gargantext.Core.Text.Terms.Multi.Group (group2)
group :: [TokenTag] -> [TokenTag]
group [] = []
group ntags = group2 NP NP
groupTokens :: [TokenTag] -> [TokenTag]
groupTokens [] = []
groupTokens ntags = group2 NP NP
$ group2 NP VB
-- group2 NP IN
-- group2 IN DT
......
......@@ -64,10 +64,10 @@ tokens2tokensTags :: [Token] -> [TokenTag]
tokens2tokensTags ts = filter' $ map tokenTag ts
------------------------------------------------------------------------
tokenTag :: Token -> TokenTag
tokenTag (Token _ _ w s _ _ p n _ _) = TokenTag w' s' p n
tokenTag (Token _ _ w l _ _ p n _ _) = TokenTag w' l' p n
where
w' = split w
s' = fromList (split s)
l' = fromList (split l)
split = splitOn (pack " ") . toLower
filter' :: [TokenTag] -> [TokenTag]
......@@ -76,7 +76,7 @@ filter' xs = filter isNgrams xs
isNgrams (TokenTag _ _ p n) = isJust p || isJust n
------------------------------------------------------------------------
data Sentence = Sentence { _sentenceIndex :: Int
data Sentence = Sentence { _sentenceIndex :: Int
, _sentenceTokens :: [Token]
} deriving (Show, Generic)
......@@ -110,8 +110,9 @@ $(deriveJSON (unPrefix "_") ''PosSentences)
corenlp' :: ( FromJSON a
, ConvertibleStrings p ByteString) =>
Lang -> p -> IO (Response a)
, ConvertibleStrings p ByteString
)
=> Lang -> p -> IO (Response a)
corenlp' lang txt = do
let properties = case lang of
EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
......@@ -142,9 +143,9 @@ corenlp lang txt = do
-- parseWith _tokenNer "Hello world of Peter."
-- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]]
tokenWith :: (Token -> t) -> Lang -> Text -> IO [[(Text, t)]]
tokenWith f lang s = map (map (\t -> (_tokenWord t, f t)))
tokenWith f lang s = map (map (\t -> (_tokenWord t, f t)))
<$> map _sentenceTokens
<$> _sentences
<$> _sentences
<$> corenlp lang s
......@@ -109,10 +109,10 @@ instance FromJSON NER where
instance ToJSON NER
data TokenTag = TokenTag { _my_token_word :: [Text]
, _my_token_stem :: Set Text
, _my_token_pos :: Maybe POS
, _my_token_ner :: Maybe NER
data TokenTag = TokenTag { _my_token_word :: [Text]
, _my_token_lemma :: Set Text
, _my_token_pos :: Maybe POS
, _my_token_ner :: Maybe NER
} deriving (Show)
instance Semigroup TokenTag where
......
......@@ -49,7 +49,8 @@ instance ToSchema NodeTree where
type TypeId = Int
-- TODO multiple ListType declaration, remove it
data ListType = StopTerm | CandidateTerm | MapTerm
-- data ListType = CandidateTerm | StopTerm | MapTerm
data ListType = CandidateTerm | StopTerm | MapTerm
deriving (Generic, Eq, Ord, Show, Read, Enum, Bounded)
instance ToJSON ListType
......@@ -61,11 +62,11 @@ instance Arbitrary ListType where
instance Semigroup ListType
where
MapTerm <> _ = MapTerm
_ <> MapTerm = MapTerm
CandidateTerm <> _ = CandidateTerm
_ <> CandidateTerm = CandidateTerm
StopTerm <> StopTerm = StopTerm
MapTerm <> _ = MapTerm
_ <> MapTerm = MapTerm
StopTerm <> _ = StopTerm
_ <> StopTerm = StopTerm
_ <> _ = CandidateTerm
instance FromHttpApiData ListType where
......@@ -73,13 +74,18 @@ instance FromHttpApiData ListType where
type ListTypeId = Int
-- FIXME Candidate: 0 and Stop : 1
listTypeId :: ListType -> ListTypeId
listTypeId StopTerm = 0
listTypeId CandidateTerm = 1
listTypeId MapTerm = 2
listTypeId MapTerm = 2
fromListTypeId :: ListTypeId -> Maybe ListType
fromListTypeId i = lookup i $ fromList [ (listTypeId l, l) | l <- [minBound..maxBound]]
fromListTypeId i = lookup i
$ fromList
[ (listTypeId l, l)
| l <- [StopTerm, CandidateTerm, MapTerm]
]
-- data Metrics = Occurrences | Cooccurrences | Specclusion | Genclusion | Cvalue
-- | TfidfCorpus | TfidfGlobal | TirankLocal | TirankGlobal
......
......@@ -27,7 +27,7 @@ import qualified Text.Read as T
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Viz.Graph.Distances (GraphMetric)
import Gargantext.Core.Methods.Distances (GraphMetric)
import Gargantext.Prelude
......
......@@ -48,7 +48,7 @@ import Gargantext.Prelude
import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.GEXF ()
import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph)
import Gargantext.Core.Viz.Graph.Distances (Distance(..), GraphMetric(..))
import Gargantext.Core.Methods.Distances (Distance(..), GraphMetric(..))
------------------------------------------------------------------------
-- | There is no Delete specific API for Graph since it can be deleted
......
......@@ -26,12 +26,11 @@ import qualified Data.Map as DM
import Data.Maybe (catMaybes)
import Data.List (concat, sortOn)
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
import Gargantext.Core.Viz.Graph.Louvain (LouvainNodeId, CommunityId, nodeId2comId)
import Gargantext.Core.Methods.Graph.Louvain (LouvainNodeId, CommunityId, nodeId2comId)
type Bridgeness = Double
bridgeness :: Bridgeness
-> [LouvainNode]
-> Map (LouvainNodeId, LouvainNodeId) Double
......
......@@ -24,10 +24,10 @@ import Gargantext.Prelude
import Gargantext.Core.Statistics
import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness)
import Gargantext.Core.Viz.Graph.Distances (Distance(..), measure)
import Gargantext.Core.Methods.Distances (Distance(..), measure)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index)
import Gargantext.Core.Viz.Graph.IGraph (mkGraphUfromEdges)
import Gargantext.Core.Viz.Graph.Proxemy (confluence)
import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
import GHC.Float (sin, cos)
import qualified IGraph as Igraph
import IGraph.Random -- (Gen(..))
......@@ -47,7 +47,7 @@ cooc2graph' distance threshold myCooc = distanceMap
where
(ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 0) myCooc'
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat
......
This diff is collapsed.
......@@ -17,6 +17,7 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.API
where
import Data.Maybe (fromMaybe)
import Control.Lens ((^.))
import Data.String.Conversions
--import Control.Monad.Reader (ask)
......@@ -98,13 +99,13 @@ getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo phId _lId l msb = do
phNode <- getNodeWith phId (Proxy :: Proxy HyperdataPhylo)
let
level = maybe 2 identity l
branc = maybe 2 identity msb
level = fromMaybe 2 l
branc = fromMaybe 2 msb
maybePhylo = phNode ^. (node_hyperdata . hp_data)
p <- liftBase $ viewPhylo2Svg
$ viewPhylo level branc
$ maybe phyloFromQuery identity maybePhylo
$ fromMaybe phyloFromQuery maybePhylo
pure (SVG p)
------------------------------------------------------------------------
type PostPhylo = QueryParam "listId" ListId
......@@ -112,16 +113,16 @@ type PostPhylo = QueryParam "listId" ListId
:> (Post '[JSON] NodeId)
postPhylo :: CorpusId -> UserId -> GargServer PostPhylo
postPhylo n userId _lId = do
postPhylo corpusId userId _lId = do
-- TODO get Reader settings
-- s <- ask
let
-- let
-- _vrs = Just ("1" :: Text)
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
phy <- flowPhylo n
pId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just n) userId]
pure $ NodeId (fromIntegral pId)
phy <- flowPhylo corpusId -- params
phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
pure $ NodeId (fromIntegral phyloId)
------------------------------------------------------------------------
-- | DELETE Phylo == delete a node
......@@ -136,64 +137,25 @@ putPhylo = undefined
-- | Instances
instance Arbitrary PhyloView
where
arbitrary = elements [phyloView]
-- | TODO add phyloGroup ex
instance Arbitrary PhyloGroup
where
arbitrary = elements []
instance Arbitrary Phylo
where
arbitrary = elements [phylo]
instance ToSchema Order
instance ToParamSchema Order
instance FromHttpApiData Order
where
parseUrlPiece = readTextData
instance ToParamSchema Metric
instance FromHttpApiData [Metric]
where
parseUrlPiece = readTextData
instance FromHttpApiData Metric
where
parseUrlPiece = readTextData
instance Arbitrary Phylo where arbitrary = elements [phylo]
instance Arbitrary PhyloGroup where arbitrary = elements []
instance Arbitrary PhyloView where arbitrary = elements [phyloView]
instance FromHttpApiData DisplayMode where parseUrlPiece = readTextData
instance FromHttpApiData ExportMode where parseUrlPiece = readTextData
instance FromHttpApiData Filiation where parseUrlPiece = readTextData
instance FromHttpApiData Metric where parseUrlPiece = readTextData
instance FromHttpApiData Order where parseUrlPiece = readTextData
instance FromHttpApiData Sort where parseUrlPiece = readTextData
instance FromHttpApiData Tagger where parseUrlPiece = readTextData
instance FromHttpApiData [Metric] where parseUrlPiece = readTextData
instance FromHttpApiData [Tagger] where parseUrlPiece = readTextData
instance ToParamSchema DisplayMode
instance FromHttpApiData DisplayMode
where
parseUrlPiece = readTextData
instance ToParamSchema ExportMode
instance FromHttpApiData ExportMode
where
parseUrlPiece = readTextData
instance FromHttpApiData Sort
where
parseUrlPiece = readTextData
instance ToParamSchema Sort
instance FromHttpApiData [Tagger]
where
parseUrlPiece = readTextData
instance FromHttpApiData Tagger
where
parseUrlPiece = readTextData
instance ToParamSchema Tagger
instance FromHttpApiData Filiation
where
parseUrlPiece = readTextData
instance ToParamSchema Filiation
instance ToParamSchema Tagger
instance ToParamSchema Metric
instance ToParamSchema Order
instance ToParamSchema Sort
instance ToSchema Order
......@@ -44,7 +44,6 @@ import Numeric.Statistics (percentile)
-- | PhyloLevelMaker | --
-------------------------
-- | A typeClass for polymorphic PhyloLevel functions
class PhyloLevelMaker aggregate
where
......@@ -105,11 +104,15 @@ instance PhyloLevelMaker Document
addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
addPhyloLevel' lvl m p = alterPhyloPeriods
(\period -> let pId = _phylo_periodId period
in over (phylo_periodLevels)
in over phylo_periodLevels
(\phyloLevels ->
let groups = toPhyloGroups lvl pId (m ! pId) m p
in trace (show (length groups) <> " groups for " <> show (pId) ) $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
) period) p
in trace (show (length groups)
<> " groups for "
<> show (pId) )
$ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
) period
) p
----------------------
......@@ -118,7 +121,14 @@ addPhyloLevel' lvl m p = alterPhyloPeriods
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map Date (Map (Int,Int) Double) -> Vector Ngrams -> PhyloGroup
cliqueToGroup :: PhyloPeriodId
-> Level
-> Int
-> Text
-> PhyloFis
-> Map Date (Map (Int,Int) Double)
-> Vector Ngrams
-> PhyloGroup
cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
......@@ -142,10 +152,17 @@ cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
clusterToGroup :: PhyloPeriodId
-> Level
-> Int
-> Text
-> PhyloCluster
-> Map (Date,Date) [PhyloCluster]
-> Phylo
-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
empty
Nothing
......@@ -154,12 +171,14 @@ clusterToGroup prd lvl idx lbl groups _m p =
where
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
cooc = getMiniCooc (listToFullCombi ngrams)
(periodsToYears [prd] )
(getPhyloCooc p )
--------------------------------------
childs :: [Pointer]
childs = map (\g -> (getGroupId g, 1)) groups
ascLink = concat $ map getGroupPeriodParents groups
desLink = concat $ map getGroupPeriodChilds groups
ascLink = concat $ map getGroupPeriodParents groups
desLink = concat $ map getGroupPeriodChilds groups
--------------------------------------
ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups
......@@ -195,9 +214,13 @@ toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching
phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
--------------------------------------
phyloBase :: Phylo
phyloBase = tracePhyloBase
$ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c termList fis
--------------------------------------
phyloBase = tracePhyloBase
$ toPhyloBase q init c termList fis
where
init = initPhyloParam (Just defaultPhyloVersion)
(Just defaultSoftware )
(Just q )
---------------------------------------
-- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
......@@ -205,17 +228,16 @@ toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
toNthLevel lvlMax prox clus p
| lvl >= lvlMax = p
| otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1)
$ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1)
-- \$ transposePeriodLinks (lvl + 1)
$ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant
$ transposeLinks (lvl + 1) Ascendant
$ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1)
(clusters) p
$ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant
$ transposeLinks (lvl + 1) Ascendant
$ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1) (clusters) p
where
--------------------------------------
clusters :: Map (Date,Date) [PhyloCluster]
......
......@@ -35,12 +35,14 @@ import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Viz.Phylo hiding (Svg, Dot)
import Gargantext.Core.Viz.Phylo.LevelMaker
import Gargantext.Core.Viz.Phylo.LevelMaker (toPhylo)
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.View.Export
import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
type MinSizeBranch = Int
flowPhylo :: FlowCmdM env err m
......@@ -48,14 +50,14 @@ flowPhylo :: FlowCmdM env err m
-> m Phylo
flowPhylo cId = do
list <- defaultList cId
list <- defaultList cId
termList <- Map.toList <$> getTermsWith Text.words [list] NgramsTerms MapTerm
docs' <- catMaybes
<$> map (\h -> (,) <$> _hd_publication_year h
<*> _hd_abstract h
)
<$> selectDocs cId
<$> map (\h -> (,) <$> _hd_publication_year h
<*> _hd_abstract h
)
<$> selectDocs cId
let
patterns = buildPatterns termList
......@@ -65,10 +67,13 @@ flowPhylo cId = do
where
--------------------------------------
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = List.nub $ List.concat $ map (map Text.unwords) $ extractTermsWithList pats txt
termsInText pats txt = List.nub
$ List.concat
$ map (map Text.unwords)
$ extractTermsWithList pats txt
--------------------------------------
docs = map ( (\(y,t) -> Document y t) . filterTerms patterns) docs'
docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs'
--liftBase $ flowPhylo' (List.sortOn date docs) termList l m fp
pure $ buildPhylo (List.sortOn date docs) termList
......@@ -76,9 +81,9 @@ flowPhylo cId = do
-- TODO SortedList Document
flowPhylo' :: [Document] -> TermList -- ^Build
-> Level -> MinSizeBranch -- ^View
-> FilePath
-> IO FilePath
-> Level -> MinSizeBranch -- ^View
-> FilePath
-> IO FilePath
flowPhylo' corpus terms l m fp = do
let
phylo = buildPhylo corpus terms
......
......@@ -142,6 +142,7 @@ groupToDotNode fdt g bId =
, toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
, toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
, toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
, toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
])
......@@ -192,7 +193,7 @@ exportToDot phylo export =
,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
])
{-
......@@ -201,7 +202,7 @@ exportToDot phylo export =
-- 2) create a layer for the branches labels -}
subgraph (Str "Branches peaks") $ do
graphAttrs [Rank SameRank]
-- graphAttrs [Rank SameRank]
{-
-- 3) group the branches by hierarchy
-- mapM (\branches ->
......@@ -368,8 +369,8 @@ inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
ngramsMetrics :: PhyloExport -> PhyloExport
ngramsMetrics export =
ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
ngramsMetrics phylo export =
over ( export_groups
. traverse )
(\g -> g & phylo_groupMeta %~ insert "genericity"
......@@ -378,6 +379,8 @@ ngramsMetrics export =
(map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "inclusion"
(map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "frequence"
(map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
) export
......@@ -397,9 +400,9 @@ branchDating export =
& branch_meta %~ insert "age" [fromIntegral age]
& branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
processMetrics :: PhyloExport -> PhyloExport
processMetrics export = ngramsMetrics
$ branchDating export
processMetrics :: Phylo -> PhyloExport -> PhyloExport
processMetrics phylo export = ngramsMetrics phylo
$ branchDating export
-----------------
......@@ -598,8 +601,10 @@ toHorizon phylo =
mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd ->
let groups = getGroupsFromLevelPeriods level [prd] phylo
childs = getPreviousChildIds level frame prd periods phylo
heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
childs = getPreviousChildIds level frame prd periods phylo
-- maybe add a better filter for non isolated ancestors
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
$ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
......@@ -630,7 +635,7 @@ toPhyloExport phylo = exportToDot phylo
$ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
$ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
$ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
$ processMetrics export
$ processMetrics phylo export
where
export :: PhyloExport
export = PhyloExport groups branches
......
......@@ -22,8 +22,8 @@ import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, c
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Core.Viz.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Viz.Graph.Distances (Distance(Conditional))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Control.DeepSeq (NFData)
import Control.Parallel.Strategies (parList, rdeepseq, using)
......@@ -37,6 +37,17 @@ import qualified Data.Set as Set
-- | To Phylo | --
------------------
{-
-- TODO AD
data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
| PhyloN { _phylo'_phylo1 :: Phylo}
toPhylo' :: Phylo' -> [Document] -> TermList -> Config -> Phylo
toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo
-}
toPhylo :: [Document] -> TermList -> Config -> Phylo
toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1))
......@@ -48,9 +59,11 @@ toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFrom
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 docs phyloBase
-- > AD to db here
--------------------------------------
phyloBase :: Phylo
phyloBase :: Phylo
phyloBase = toPhyloBase docs lst conf
-- > AD to db here
--------------------------------------
......@@ -202,7 +215,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0.01 cooc))
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0.001 cooc))
$ toList phyloDocs
mcl' = mcl `using` parList rdeepseq
in fromList mcl'
......@@ -232,9 +245,9 @@ docsToTimeScaleCooc docs fdt =
-----------------------
-- | to Phylo Base | --
-----------------------
-- TODO anoe
groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
groupDocsByPeriodRec f prds docs acc =
groupDocsByPeriodRec f prds docs acc =
if ((null prds) || (null docs))
then acc
else
......@@ -245,7 +258,7 @@ groupDocsByPeriodRec f prds docs acc =
-- To group a list of Documents by fixed periods
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs =
groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = map (inPeriode f docs') pds
periods' = periods `using` parList rdeepseq
......@@ -262,7 +275,7 @@ groupDocsByPeriod' f pds docs =
-- To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es =
groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
......
......@@ -19,6 +19,8 @@ import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithK
import Data.String (String)
import Data.Text (Text)
import Prelude (floor)
import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
import Text.Printf
......@@ -56,6 +58,22 @@ printIOComment cmt =
-- | Misc | --
--------------
-- truncate' :: Double -> Int -> Double
-- truncate' x n = (fromIntegral (floor (x * t))) / t
-- where t = 10^n
truncate' :: Double -> Int -> Double
truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
where
--------------
t :: Double
t = 10 ^n
getInMap :: Int -> Map Int Double -> Double
getInMap k m =
if (member k m)
then m ! k
else 0
roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
roundToStr = printf "%0.*f"
......
......@@ -64,8 +64,9 @@ import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Core.Flow.Types
import Gargantext.Core.Text
import Gargantext.Core.Text.List.Group.WithStem (StopSize(..), GroupParams(..))
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists,StopSize(..))
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (Terms(..))
......@@ -210,17 +211,21 @@ flowCorpusUser :: ( FlowCmdM env err m
flowCorpusUser l user corpusName ctype ids = do
-- User Flow
(userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus user corpusName ctype
-- NodeTexts is first
_tId <- insertDefaultNode NodeTexts userCorpusId userId
-- printDebug "NodeTexts: " tId
-- NodeList is second
listId <- getOrMkList userCorpusId userId
-- _cooc <- insertDefaultNode NodeListCooc listId userId
-- TODO: check if present already, ignore
_ <- Doc.add userCorpusId ids
_tId <- insertDefaultNode NodeTexts userCorpusId userId
-- printDebug "Node Text Ids:" tId
-- User List Flow
(masterUserId, _masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype
ngs <- buildNgramsLists user l 2 3 (StopSize 3) userCorpusId masterCorpusId
ngs <- buildNgramsLists user (GroupParams l 2 3 (StopSize 3)) userCorpusId masterCorpusId
_userListId <- flowList_DbRepo listId ngs
_mastListId <- getOrMkList masterCorpusId masterUserId
-- _ <- insertOccsUpdates userCorpusId mastListId
......
......@@ -42,10 +42,10 @@ getPriors :: FavOrTrash -> CorpusId -> Cmd err (Events Bool)
getPriors ft cId = do
docs_fav <- filter (\(FacetDoc _ _ _ _ f _) -> f == Just 2)
<$> runViewDocuments cId False Nothing Nothing Nothing
<$> runViewDocuments cId False Nothing Nothing Nothing Nothing
docs_trash <- List.take (List.length docs_fav)
<$> runViewDocuments cId True Nothing Nothing Nothing
<$> runViewDocuments cId True Nothing Nothing Nothing Nothing
let priors = priorEventsWith text (fav2bool ft) ( List.zip (repeat False) docs_fav
......@@ -59,7 +59,7 @@ moreLikeWith :: CorpusId -> Maybe Offset -> Maybe Limit -> Maybe OrderBy
moreLikeWith cId o l order ft priors = do
docs_test <- filter (\(FacetDoc _ _ _ _ f _) -> f == Just 1)
<$> runViewDocuments cId False o Nothing order
<$> runViewDocuments cId False o Nothing order Nothing
let results = map fst
$ filter ((==) (Just $ not $ fav2bool ft) . snd)
......
......@@ -16,6 +16,7 @@ Ngrams by node enable contextual metrics.
module Gargantext.Database.Action.Metrics.NgramsByNode
where
import Data.Map.Strict (Map, fromListWith, elems, toList)
import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.Set (Set)
......@@ -24,36 +25,14 @@ import Data.Tuple.Extra (second, swap)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Debug.Trace (trace)
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Database.PostgreSQL.Simple as DPS
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Config (nodeTypeId)
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
ngramsGroup :: Lang
-> Int
-> Int
-> Text
-> Text
ngramsGroup l _m _n = Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Database.PostgreSQL.Simple as DPS
......
......@@ -22,6 +22,7 @@ module Gargantext.Database.Action.Node
import Gargantext.Core.Types (Name)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Hyperdata.Default
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error
......@@ -55,6 +56,11 @@ mkNodeWithParent NodeFrameWrite i u n =
mkNodeWithParent NodeFrameCalc i u n =
mkNodeWithParent_ConfigureHyperdata NodeFrameCalc i u n
mkNodeWithParent NodeFrameNotebook i u n =
mkNodeWithParent_ConfigureHyperdata NodeFrameNotebook i u n
mkNodeWithParent nt (Just pId) uId name = insertNode nt (Just name) Nothing pId uId
-- mkNodeWithParent _ _ _ _ = errorWith "[G.D.A.Node.mkNodeWithParent] nees parent"
......@@ -72,6 +78,9 @@ mkNodeWithParent_ConfigureHyperdata NodeFrameWrite (Just i) uId name =
mkNodeWithParent_ConfigureHyperdata NodeFrameCalc (Just i) uId name =
mkNodeWithParent_ConfigureHyperdata' NodeFrameCalc (Just i) uId name
mkNodeWithParent_ConfigureHyperdata NodeFrameNotebook (Just i) uId name =
insertNode NodeFrameNotebook (Just "Notebook") (Just $ DefaultFrameCode $ HyperdataFrame "Notebook" name) i uId
mkNodeWithParent_ConfigureHyperdata _ _ _ _ = nodeError NotImplYet
......@@ -84,8 +93,8 @@ mkNodeWithParent_ConfigureHyperdata' :: (HasNodeError err)
-> Cmd err [NodeId]
mkNodeWithParent_ConfigureHyperdata' nt (Just i) uId name = do
maybeNodeId <- case nt of
NodeFrameWrite -> insertNode NodeFrameWrite (Just name) Nothing i uId
NodeFrameCalc -> insertNode NodeFrameCalc (Just name) Nothing i uId
NodeFrameWrite -> insertNode NodeFrameWrite (Just name) Nothing i uId
NodeFrameCalc -> insertNode NodeFrameCalc (Just name) Nothing i uId
_ -> nodeError NeedsConfiguration
case maybeNodeId of
......
......@@ -70,6 +70,7 @@ nodeTypeId n =
NodeFrameWrite -> 991
NodeFrameCalc -> 992
NodeFrameNotebook -> 993
-- Cooccurrences -> 9
--
......
......@@ -52,6 +52,7 @@ data DefaultHyperdata =
| DefaultFrameWrite HyperdataFrame
| DefaultFrameCalc HyperdataFrame
| DefaultFrameCode HyperdataFrame
| DefaultFile HyperdataFile
......@@ -83,6 +84,7 @@ instance ToJSON DefaultHyperdata where
toJSON (DefaultFrameWrite x) = toJSON x
toJSON (DefaultFrameCalc x) = toJSON x
toJSON (DefaultFrameCode x) = toJSON x
toJSON (DefaultFile x) = toJSON x
......@@ -113,5 +115,6 @@ defaultHyperdata NodeDashboard = DefaultDashboard defaultHyperdataDashboard
defaultHyperdata NodeFrameWrite = DefaultFrameWrite defaultHyperdataFrame
defaultHyperdata NodeFrameCalc = DefaultFrameCalc defaultHyperdataFrame
defaultHyperdata NodeFrameNotebook = DefaultFrameCalc defaultHyperdataFrame
defaultHyperdata NodeFile = DefaultFile defaultHyperdataFile
......@@ -50,7 +50,7 @@ import Gargantext.Database.Prelude (fromField')
import Gargantext.Prelude
import Opaleye (QueryRunnerColumnDefault, queryRunnerColumnDefault, PGJsonb, fieldQueryRunnerColumn, Nullable)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Test.QuickCheck.Arbitrary hiding (vector)
------------------------------------------------------------------------
-- Only Hyperdata types should be member of this type class.
......
......@@ -265,7 +265,7 @@ data NodeType = NodeUser
-}
-- Optional Nodes
| NodeFrameWrite | NodeFrameCalc
| NodeFrameWrite | NodeFrameCalc | NodeFrameNotebook
| NodeFile
deriving (Show, Read, Eq, Generic, Bounded, Enum)
......@@ -283,7 +283,7 @@ defaultName NodeCorpusV3 = "Corpus"
defaultName NodeAnnuaire = "Annuaire"
defaultName NodeDocument = "Doc"
defaultName NodeTexts = "Texts"
defaultName NodeTexts = "Docs"
defaultName NodeList = "List"
defaultName NodeListCooc = "List"
defaultName NodeModel = "Model"
......@@ -294,12 +294,13 @@ defaultName NodeFolderShared = "Shared Folder"
defaultName NodeTeam = "Folder"
defaultName NodeFolderPublic = "Public Folder"
defaultName NodeDashboard = "Board"
defaultName NodeGraph = "Graph"
defaultName NodePhylo = "Phylo"
defaultName NodeDashboard = "Dashboard"
defaultName NodeFrameWrite = "Frame Write"
defaultName NodeFrameCalc = "Frame Calc"
defaultName NodeFrameNotebook = "Frame Code"
defaultName NodeFile = "File"
......
......@@ -42,13 +42,11 @@ import Data.Aeson (FromJSON, ToJSON)
import Data.Aeson.TH (deriveJSON)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Swagger
import Data.Text (Text)
import qualified Data.Text as T
import Data.Time (UTCTime)
import Data.Time.Segment (jour)
import Data.Typeable (Typeable)
import GHC.Generics (Generic)
import Opaleye
import Prelude hiding (null, id, map, sum, not, read)
import Protolude hiding (null, map, sum, not)
import Servant.API
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
......@@ -276,19 +274,32 @@ queryAuthorsDoc = leftJoin5 queryNodeTable queryNodeNodeNgramsTable queryNgramsT
------------------------------------------------------------------------
-- TODO-SECURITY check
runViewDocuments :: CorpusId -> IsTrash -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err [FacetDoc]
runViewDocuments cId t o l order =
runOpaQuery $ filterWith o l order $ viewDocuments cId t ntId
runViewDocuments :: CorpusId
-> IsTrash
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Maybe Text
-> Cmd err [FacetDoc]
runViewDocuments cId t o l order query = do
runOpaQuery $ filterWith o l order sqlQuery
where
ntId = nodeTypeId NodeDocument
sqlQuery = viewDocuments cId t ntId query
runCountDocuments :: CorpusId -> IsTrash -> Cmd err Int
runCountDocuments cId t =
runCountOpaQuery $ viewDocuments cId t $ nodeTypeId NodeDocument
runCountDocuments :: CorpusId -> IsTrash -> Maybe Text -> Cmd err Int
runCountDocuments cId t mQuery = do
runCountOpaQuery sqlQuery
where
sqlQuery = viewDocuments cId t (nodeTypeId NodeDocument) mQuery
viewDocuments :: CorpusId -> IsTrash -> NodeTypeId -> Query FacetDocRead
viewDocuments cId t ntId = proc () -> do
viewDocuments :: CorpusId
-> IsTrash
-> NodeTypeId
-> Maybe Text
-> Query FacetDocRead
viewDocuments cId t ntId mQuery = proc () -> do
n <- queryNodeTable -< ()
nn <- queryNodeNodeTable -< ()
restrict -< n^.node_id .== nn^.nn_node2_id
......@@ -296,6 +307,11 @@ viewDocuments cId t ntId = proc () -> do
restrict -< n^.node_typename .== (pgInt4 ntId)
restrict -< if t then nn^.nn_category .== (pgInt4 0)
else nn^.nn_category .>= (pgInt4 1)
let query = (fromMaybe "" mQuery)
iLikeQuery = T.intercalate "" ["%", query, "%"]
restrict -< (n^.node_name) `ilike` (pgStrictText iLikeQuery)
returnA -< FacetDoc (_node_id n)
(_node_date n)
(_node_name n)
......@@ -305,7 +321,7 @@ viewDocuments cId t ntId = proc () -> do
------------------------------------------------------------------------
filterWith :: (PGOrd date, PGOrd title, PGOrd score, hyperdata ~ Column SqlJsonb) =>
Maybe Gargantext.Core.Types.Offset
Maybe Gargantext.Core.Types.Offset
-> Maybe Gargantext.Core.Types.Limit
-> Maybe OrderBy
-> Select (Facet id (Column date) (Column title) hyperdata (Column score) ngramCount)
......
......@@ -39,7 +39,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Opaleye hiding (FromField, readOnly)
import Opaleye.Internal.QueryArr (Query)
import Test.QuickCheck.Arbitrary
import Test.QuickCheck.Arbitrary hiding (vector)
import Database.PostgreSQL.Simple.FromField (FromField, fromField)
import Database.PostgreSQL.Simple.FromRow (FromRow, fromRow, field)
......
module Gargantext.Prelude.Job where
import Data.IORef
import Data.Maybe
import Gargantext.Prelude
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..))
jobLogInit :: Int -> JobLog
jobLogInit rem =
JobLog { _scst_succeeded = Just 0
, _scst_remaining = Just rem
, _scst_failed = Just 0
, _scst_events = Just [] }
jobLogSuccess :: JobLog -> JobLog
jobLogSuccess (JobLog { _scst_succeeded = mSucc
, _scst_remaining = mRem
, _scst_failed = mFail
, _scst_events = evt }) =
JobLog { _scst_succeeded = (+ 1) <$> mSucc
, _scst_remaining = (\x -> x - 1) <$> mRem
, _scst_failed = mFail
, _scst_events = evt }
jobLogFail :: JobLog -> JobLog
jobLogFail (JobLog { _scst_succeeded = mSucc
, _scst_remaining = mRem
, _scst_failed = mFail
, _scst_events = evt }) =
JobLog { _scst_succeeded = mSucc
, _scst_remaining = (\x -> x - 1) <$> mRem
, _scst_failed = (+ 1) <$> mFail
, _scst_events = evt }
runJobLog :: MonadBase IO m => Int -> (JobLog -> m ()) -> m (m (), m (), m JobLog)
runJobLog num logStatus = do
jlRef <- liftBase $ newIORef $ jobLogInit num
return (logRefF jlRef, logRefSuccessF jlRef, getRefF jlRef)
where
logRefF ref = do
jl <- liftBase $ readIORef ref
logStatus jl
logRefSuccessF ref = do
jl <- liftBase $ readIORef ref
liftBase $ writeIORef ref $ jobLogSuccess jl
getRefF ref = do
liftBase $ readIORef ref
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment