[refactoring] add StrictData option, refactor Data.Map to Strict

Based on the suggestions in
https://kodimensional.dev/space-leak
parent 10fe2334
Pipeline #3612 failed with stage
in 54 minutes and 3 seconds
......@@ -26,8 +26,8 @@ import Data.ByteString.Lazy (writeFile)
import Data.Either (Either(..))
import Data.List (cycle, concat, unwords)
import Data.List.Split (chunksOf)
import Data.Map (Map)
import qualified Data.Map as DM
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as DM
import Data.Text (pack, Text)
import qualified Data.Text as DT
import Data.Tuple.Extra (both)
......@@ -142,8 +142,8 @@ terms' pats txt = pure $ concat $ extractTermsWithList pats txt
testCorpus :: [(Int, [Text])]
testCorpus = [ (1998, [pack "The beees"])
, (1999, [ pack "The bees and the flowers"
--, pack "The bees and the flowers"
, (1999, [ pack "The bees and the flowers"
--, pack "The bees and the flowers"
])
]
......@@ -151,4 +151,3 @@ testTermList :: TermList
testTermList = [ ([pack "bee"], [[pack "bees"]])
, ([pack "flower"], [[pack "flowers"]])
]
......@@ -344,6 +344,7 @@ library
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -Wincomplete-uni-patterns -Wincomplete-record-updates -Wmissing-signatures -Wunused-binds -Wunused-imports -Werror -freduction-depth=300
build-depends:
HSvm
......@@ -542,6 +543,7 @@ executable gargantext-admin
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
......@@ -569,6 +571,7 @@ executable gargantext-cbor2json
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
aeson
......@@ -600,6 +603,7 @@ executable gargantext-cli
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
aeson
......@@ -638,6 +642,7 @@ executable gargantext-import
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
......@@ -666,6 +671,7 @@ executable gargantext-init
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
......@@ -693,6 +699,7 @@ executable gargantext-invitations
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
......@@ -720,6 +727,7 @@ executable gargantext-phylo
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
aeson
......@@ -761,6 +769,7 @@ executable gargantext-server
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -O2 -Wcompat -Wmissing-signatures -rtsopts -threaded -with-rtsopts=-N -with-rtsopts=-T -fprof-auto
build-depends:
base
......@@ -795,6 +804,7 @@ executable gargantext-upgrade
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
......@@ -839,6 +849,7 @@ test-suite garg-test
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
DataKinds
DeriveGeneric
FlexibleContexts
......@@ -883,6 +894,7 @@ test-suite jobqueue-test
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
async
......
......@@ -32,6 +32,7 @@ default-extensions:
- OverloadedStrings
- RankNTypes
- RecordWildCards
- StrictData
data-files:
- ekg-assets/index.html
- ekg-assets/monitor.js
......
......@@ -28,7 +28,7 @@ module Core.Text.Examples
{-
import Data.Array.Accelerate (toList, Matrix)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Ord (Down(..))
import Data.Text (Text)
import Data.Tuple.Extra (both)
......@@ -43,7 +43,7 @@ import Gargantext.Core.Viz.Graph.Index
import Gargantext.Prelude
import qualified Data.Array.Accelerate as DAA
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Text as Text
-- | Sentences
......@@ -100,11 +100,11 @@ ex_cooc = cooc <$> ex_terms
-- | Tests the specificity and genericity
--
-- >>> ex_cooc_mat
-- (fromList [(["glass"],0),(["spoon"],1),(["table"],2),(["wine"],3)],Matrix (Z :. 4 :. 4)
-- (fromList [(["glass"],0),(["spoon"],1),(["table"],2),(["wine"],3)],Matrix (Z :. 4 :. 4)
-- [ 4, 0, 0, 0,
-- 1, 2, 0, 0,
-- 3, 2, 4, 0,
-- 3, 1, 2, 3],Matrix (Z :. 4 :. 4)
-- 3, 1, 2, 3],Matrix (Z :. 4 :. 4)
-- [ 1.0, 0.25, 0.75, 0.75,
-- 0.0, 1.0, 1.0, 0.5,
-- 0.0, 0.0, 1.0, 0.5,
......
......@@ -6,8 +6,8 @@ module Gargantext.API.GraphQL.AsyncTask where
import Control.Concurrent.Async (poll)
import Control.Concurrent.MVar (readMVar)
import Control.Lens
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Control.Monad.Reader (ask, liftIO)
import Data.Either (Either(..))
import qualified Data.IntMap.Strict as IntMap
......
......@@ -18,7 +18,7 @@ module Gargantext.API.Ngrams.List
import Control.Lens hiding (elements, Indexed)
import Data.Either (Either(..))
import Data.HashMap.Strict (HashMap)
import Data.Map (Map, toList)
import Data.Map.Strict (Map, toList)
import Data.Maybe (catMaybes, fromMaybe)
import Data.Set (Set)
import Data.Text (Text, concat, pack, splitOn)
......@@ -54,7 +54,7 @@ import qualified Data.ByteString.Lazy as BSL
import qualified Data.Csv as Csv
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Data.Vector as Vec
......
......@@ -16,7 +16,7 @@ module Gargantext.API.Ngrams.Prelude
import Data.Maybe (catMaybes)
import Control.Lens (view)
import Data.Map (fromList)
import Data.Map.Strict (fromList)
import Data.Hashable (Hashable)
import Data.Validity
import Gargantext.API.Ngrams.Types
......
......@@ -16,12 +16,12 @@ Main exports of Gargantext:
module Gargantext.API.Node.Corpus.Export
where
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import Data.Set (Set)
import Data.Text (Text, pack)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.HashMap.Strict as HashMap
import Servant (Headers, Header, addHeader)
......
......@@ -28,7 +28,7 @@ import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import Gargantext.API.Prelude
......
......@@ -14,13 +14,13 @@ Portability : POSIX
module Gargantext.Core.Ext.IMT where
import Data.Either (Either(..))
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Text (Text, splitOn)
import qualified Data.Set as S
import qualified Data.List as DL
import qualified Data.Vector as DV
import qualified Data.Map as M
import qualified Data.Map.Strict as M
import qualified Prelude
import Data.Morpheus.Types (GQLType)
......
......@@ -19,8 +19,8 @@ module Gargantext.Core.Methods.Graph.BAC.Proxemy
--import Debug.SimpleReflect
import Gargantext.Prelude
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import qualified Data.List as List
--import Gargantext.Core.Viz.Graph.IGraph
import Gargantext.Core.Viz.Graph.FGL
......
......@@ -54,8 +54,8 @@ module Gargantext.Core.Methods.Graph.MaxClique
import Data.Maybe (catMaybes)
import Gargantext.Prelude
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.List (sortOn, nub, concat)
import Data.Set (Set)
import Data.Set (fromList, toList, isSubsetOf)
......
......@@ -14,13 +14,13 @@ module Gargantext.Core.Statistics
where
import Data.Map (Map)
import Data.Map.Strict (Map)
import Gargantext.Prelude
import Numeric.Statistics.PCA (pcaReduceN)
import Data.Array.IArray (Array, listArray, elems)
import qualified Data.Vector.Storable as Vec
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
data Dimension = Dimension Int
......@@ -38,5 +38,3 @@ pcaReduceTo (Dimension d) m = Map.fromList
m'' = listArray (1, List.length m') m'
(txts,m') = List.unzip $ Map.toList m
......@@ -19,7 +19,7 @@ module Gargantext.Core.Text.List
import Control.Lens hiding (both) -- ((^.), view, over, set, (_1), (_2))
import Data.HashMap.Strict (HashMap)
import Data.HashSet (HashSet)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Monoid (mempty)
import Data.Ord (Down(..))
import Data.Set (Set)
......@@ -50,7 +50,7 @@ import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.HashSet as HashSet
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
......
......@@ -20,7 +20,7 @@ module Gargantext.Core.Text.List.Group.WithStem
import Control.Lens (makeLenses)
import Data.HashMap.Strict (HashMap)
import Data.HashSet (HashSet)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Maybe (catMaybes)
import Gargantext.API.Ngrams.Types
import Gargantext.Core (Lang(..), PosTagAlgo(..), Form, Lem)
......@@ -32,7 +32,7 @@ import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.HashSet as Set
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Map.Strict.Patch as PatchMap
import qualified Data.Patch.Class as Patch (Replace(..))
import qualified Data.Text as Text
......
......@@ -19,8 +19,8 @@ module Gargantext.Core.Text.List.Learn
import qualified Data.IntMap as IntMap
import qualified Data.List as List
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import qualified Data.SVM as SVM
import qualified Data.Vector as Vec
......
......@@ -19,7 +19,7 @@ module Gargantext.Core.Text.List.Merge
where
import Control.Lens (view)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.Prelude
......
......@@ -17,7 +17,7 @@ import Control.Lens (view)
import Control.Monad (mzero)
import Data.Aeson
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Monoid (mconcat)
import Data.Pool
import Data.Swagger
......@@ -36,7 +36,7 @@ import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
import Web.Internal.HttpApiData (ToHttpApiData, FromHttpApiData, parseUrlPiece, toUrlPiece)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Scientific as Scientific
import qualified Data.Text as T
import qualified Data.Vector as V
......
......@@ -12,7 +12,7 @@ module Gargantext.Core.Text.List.Social.Patch
where
import Control.Lens hiding (cons)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Hashable (Hashable)
import Data.HashMap.Strict (HashMap)
import Data.Monoid
......@@ -23,7 +23,7 @@ import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.List as List
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Patch.Class as Patch (Replace(..))
......@@ -96,13 +96,13 @@ addScorePatch fl (p, NgramsPatch { _patch_children
-- | Inserting a new Ngrams
addScorePatch fl (t, NgramsReplace { _patch_old = Nothing
, _patch_new = Just nre }) =
childrenScore 1 t (nre ^. nre_children)
childrenScore 1 t (nre ^. nre_children)
$ fl & flc_scores . at t %~ (score fls_listType $ nre ^. nre_list) 1
& flc_cont %~ (HashMap.delete t)
addScorePatch fl (t, NgramsReplace { _patch_old = Just old_nre
, _patch_new = maybe_new_nre }) =
let fl' = childrenScore (-1) t (old_nre ^. nre_children)
let fl' = childrenScore (-1) t (old_nre ^. nre_children)
$ fl & flc_scores . at t %~ (score fls_listType $ old_nre ^. nre_list) (-1)
& flc_cont %~ (HashMap.delete t)
in case maybe_new_nre of
......
......@@ -19,7 +19,7 @@ module Gargantext.Core.Text.List.Social.Prelude
where
import Control.Lens
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Monoid
......
......@@ -19,7 +19,7 @@ module Gargantext.Core.Text.Metrics
--import Data.Array.Accelerate ((:.)(..), Z(..))
--import Math.KMeans (kmeans, euclidSq, elements)
import Control.Lens (makeLenses)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Monoid (Monoid, mempty)
import Data.HashMap.Strict (HashMap)
import Data.Semigroup (Semigroup)
......@@ -29,7 +29,7 @@ import Gargantext.Core.Viz.Graph.Index
import Gargantext.Prelude
import qualified Data.Array.Accelerate as DAA
import qualified Data.Array.Accelerate.Interpreter as DAA
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Vector as V
import qualified Data.Vector.Storable as Vec
import qualified Data.HashMap.Strict as HashMap
......
......@@ -13,19 +13,20 @@ Portability : POSIX
module Gargantext.Core.Text.Metrics.Utils where
import Gargantext.Prelude
import Data.Map (empty, Map, insertWith, toList)
import Data.Map.Strict (Map, toList)
import qualified Data.List as L
import qualified Data.Map.Strict as DM
countElem :: (Ord k) => Data.Map.Map k Int -> k -> Data.Map.Map k Int
countElem m e = Data.Map.insertWith (+) e 1 m
countElem :: (Ord k) => DM.Map k Int -> k -> DM.Map k Int
countElem m e = DM.insertWith (+) e 1 m
freq :: (Ord k) => [k] -> Data.Map.Map k Int
freq = foldl countElem Data.Map.empty
freq :: (Ord k) => [k] -> DM.Map k Int
freq = foldl countElem DM.empty
getMaxFromMap :: Ord a => Map a1 a -> [a1]
getMaxFromMap m = go [] Nothing (toList m)
where
go ks _ [] = ks
go ks _ [] = ks
go ks Nothing ((k,v):rest) = go (k:ks) (Just v) rest
go ks (Just u) ((k,v):rest)
| v < u = go ks (Just u) rest
......
......@@ -38,7 +38,7 @@ module Gargantext.Core.Text.Terms
import Control.Lens
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Text (Text)
import Data.Traversable
import GHC.Base (String)
......
......@@ -47,9 +47,9 @@ import qualified Data.List as L
import Data.Monoid
import Data.Text (Text)
import qualified Data.Text as T
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import Gargantext.Prelude hiding (cs)
import qualified Data.Tree as Tree
import Data.Tree (Tree)
......
......@@ -21,7 +21,7 @@ import Data.Aeson (FromJSON, ToJSON)
import Data.Aeson.TH (deriveJSON)
import Data.Either (Either(..))
import Data.Hashable (Hashable)
import Data.Map (fromList, lookup)
import Data.Map.Strict (fromList, lookup)
import Data.Maybe (fromMaybe)
import Data.Semigroup (Semigroup(..))
import Data.Swagger
......
......@@ -15,7 +15,7 @@ module Gargantext.Core.Viz.Chart
where
import Data.List (sortOn)
import Data.Map (toList)
import Data.Map.Strict (toList)
import qualified Data.List as List
import Data.Maybe (catMaybes)
import qualified Data.Vector as V
......@@ -93,4 +93,3 @@ treeData cId nt lt = do
m <- getListNgrams ls nt
pure $ V.fromList $ toTree lt cs' m
......@@ -25,7 +25,7 @@ module Gargantext.Core.Viz.Graph.Bridgeness -- (bridgeness)
import Gargantext.Core.Methods.Similarities (Similarity(..))
-- import Data.IntMap (IntMap)
import Data.Map (Map, fromListWith, lookup, toList, mapWithKey, elems)
import Data.Map.Strict (Map, fromListWith, lookup, toList, mapWithKey, elems)
import Data.Maybe (catMaybes)
import Data.Ord (Down(..))
import Debug.Trace (trace)
......@@ -33,7 +33,7 @@ import Gargantext.Prelude
import Graph.Types (ClusterNode(..))
-- import qualified Data.IntMap as IntMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
-- import qualified Data.Set as Set
----------------------------------------------------------------------
......@@ -68,7 +68,7 @@ bridgeness (Bridgeness_Advanced sim c) m = Map.fromList
-- $ List.sortOn (Down . (snd . snd))
$ Map.toList
$ trace ("bridgeness3 m c" <> show (m,c))
$ Map.intersectionWithKey
$ Map.intersectionWithKey
(\k v1 v2 -> trace ("intersectionWithKey " <> (show (k, v1, v2))) (v1, v2)) m c
{-
......
......@@ -25,7 +25,7 @@ module Gargantext.Core.Viz.Graph.Index
where
import Data.Array.Accelerate (Matrix, Elt, Shape, (:.)(..), Z(..))
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe, catMaybes)
import Data.Set (Set)
import Gargantext.Prelude
......
......@@ -16,8 +16,8 @@ module Gargantext.Core.Viz.Graph.Legend
{-
import Data.Ord (Down(..))
import Gargantext.Prelude
import Data.Map (Map, fromListWith, lookup, toList, mapWithKey, elems)
import qualified Data.Map as DM
import Data.Map.Strict (Map, fromListWith, lookup, toList, mapWithKey, elems)
import qualified Data.Map.Strict as DM
import Data.Maybe (catMaybes)
import Data.List (concat, sortOn)
import Gargantext.Core.Viz.Graph.Louvain (LouvainNodeId, CommunityId, comId2nodeId)
......@@ -30,7 +30,7 @@ sort by length LouvainNodeIds
Cooc -> DGI.Graph
sort [LouvainNodeId]
sort [LouvainNodeId]
subgraph with [LouvainNodeId]
-> prendre le noeud le mieux connecté (degree to start with)
......
......@@ -16,7 +16,7 @@ module Gargantext.Core.Viz.Graph.Tools
import Data.Aeson
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import Data.Swagger hiding (items)
import GHC.Float (sin, cos)
......@@ -39,7 +39,7 @@ import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.HashSet as HashSet
import qualified Data.Text as Text
......
......@@ -22,7 +22,7 @@ import Graph.Types (ClusterNode(..))
import IGraph hiding (mkGraph, neighbors, edges, nodes, Node, Graph)
import Protolude
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified IGraph as IG
import qualified IGraph.Algorithms.Clique as IG
import qualified IGraph.Algorithms.Community as IG
......
module Gargantext.Core.Viz.Graph.Tools.Infomap where
import Data.Map (Map)
import Data.Map.Strict (Map)
import Graph.Types
import Prelude
......
......@@ -18,7 +18,7 @@ module Gargantext.Core.Viz.Graph.Utils
where
import Data.List (unzip)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Matrix hiding (identity)
import Data.Maybe (catMaybes)
import Data.Set (Set)
......@@ -26,7 +26,7 @@ import Data.Vector (Vector)
import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Vector as Vector
......
......@@ -30,7 +30,7 @@ module Gargantext.Core.Viz.LegacyPhylo where
import Control.DeepSeq
import Control.Lens (makeLenses)
import Data.Aeson.TH (deriveJSON,defaultOptions)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Set (Set)
import Data.Swagger
import Data.Text (Text)
......
......@@ -30,7 +30,7 @@ import Control.DeepSeq (NFData)
import Control.Lens (makeLenses)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Set (Set)
import Data.Swagger
import Data.Text (Text, pack)
......@@ -78,8 +78,8 @@ data Proximity =
| WeightedLogSim
{ _wls_sensibility :: Double
, _wls_minSharedNgrams :: Int }
| Hamming
{ _hmg_sensibility :: Double
| Hamming
{ _hmg_sensibility :: Double
, _hmg_minSharedNgrams :: Int}
deriving (Show,Generic,Eq)
......@@ -205,7 +205,7 @@ data PhyloSubConfig =
subConfig2config :: PhyloSubConfig -> PhyloConfig
subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1
subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1
, phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
, phyloQuality = Quality (_sc_phyloQuality subConfig) 1
, timeUnit = _sc_timeUnit subConfig
......
......@@ -14,7 +14,7 @@ module Gargantext.Core.Viz.Phylo.API.Tools
import Data.Proxy
import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Maybe (catMaybes)
import Data.Set (Set)
import Data.Text (Text, pack)
......@@ -45,7 +45,7 @@ import Prelude
import System.Process as Shell
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
......
......@@ -18,7 +18,7 @@ module Gargantext.Core.Viz.Phylo.Example where
import Control.Lens
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.List (sortOn, nub, sort)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Text (Text, toLower)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.Mono (monoTexts)
......@@ -37,7 +37,7 @@ import qualified Data.Set as Set
---------------------------------
phyloExport :: IO ()
phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot" phyloDot
phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = toPhyloExport phyloCleopatre
......@@ -54,8 +54,8 @@ phyloCleopatre = synchronicClustering $ toHorizon flatPhylo
-----------------------------------------------
flatPhylo :: Phylo
flatPhylo = case (getSeaElevation emptyPhylo) of
Constante s g -> temporalMatching (constDiachronicLadder s g Set.empty)
flatPhylo = case (getSeaElevation emptyPhylo) of
Constante s g -> temporalMatching (constDiachronicLadder s g Set.empty)
$ scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering emptyPhylo
Adaptative s -> temporalMatching (adaptDiachronicLadder s (emptyPhylo' ^. phylo_diaSimScan) Set.empty) emptyPhylo'
......@@ -101,7 +101,7 @@ nbDocsByYear = docsToTimeScaleNb docs
config :: PhyloConfig
config =
config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, phyloScale = 2
, seaElevation = Adaptative 4
......@@ -113,14 +113,14 @@ docs :: [Document]
docs = map (\(d,t)
-> Document (d+102)
""
(filter (\n -> isRoots n (foundations ^. foundations_roots)) $ monoTexts t)
(filter (\n -> isRoots n (foundations ^. foundations_roots)) $ monoTexts t)
Nothing
[]
) corpus
foundations :: PhyloFoundations
foundations = PhyloFoundations (Vector.fromList $ map toLower actants) mapList
foundations = PhyloFoundations (Vector.fromList $ map toLower actants) mapList
--------------------------------------------
......@@ -138,26 +138,26 @@ actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV", "Ptolem
, "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus", "Caesar-III", "Aurelia-Cotta", "Pisae", "Pline"]
corpus :: [(Date, Text)]
corpus = sortOn fst [
corpus = sortOn fst [
(-101,"La tutelle Cesar Caesar-III de sa mère lui étant difficile à endurer, en septembre 101 av. J.-C., Ptolemee-X la fait assassiner et peut enfin régner presque seul puisqu'il partage le pouvoir avec son épouse Berenice-III Cléopâtre Philopator."),
(-99,"Caesar-III est questeur en 99 av. J.-C. ou 98 av. J.-C., et préteur en 92 av. J.-C.."),
(-100,"Caius Julius Caesar-IV — dit Jules Cesar Ptolemee-X — naît vers 100 av. J.-C., il est le fils de Caius Julius Caesar-III et de Aurelia-Cotta"),
(-85,"Caesar-III meurt à Pisae de cause naturelle en 85 av. J.-C. : selon Pline l'Ancien, il décède brusquement en mettant ses chaussures"),
(-53,"Aurelia-Cotta décède peu de temps avant le meurtre de Clodius Pulcher, vers 53 av. J.-C."),
(-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."),
(-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."),
(-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"),
(-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."),
(-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."),
(-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."),
(-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."),
(-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."),
(-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"),
(-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."),
(-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"),
(-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"),
(-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."),
(-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."),
(-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."),
(-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."),
(-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."),
(-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."),
(-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"),
(-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."),
(-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."),
(-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."),
(-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."),
(-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."),
(-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"),
(-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."),
(-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"),
(-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"),
(-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."),
(-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."),
(-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."),
(-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."),
(-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]
......@@ -19,7 +19,7 @@ import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic
import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.Map.Strict (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.Text.Lazy (fromStrict, pack, unpack)
import Data.Vector (Vector)
import Debug.Trace (trace)
......@@ -375,7 +375,7 @@ processSort sort' elev export = case sort' of
ByBirthDate o -> sortByBirthDate o export
ByHierarchy _ -> case elev of
Constante s s' -> export & export_branches .~ (branchToIso' s s' $ sortByHierarchy 0 (export ^. export_branches))
Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
Adaptative _ -> export & export_branches .~ (branchToIso $ sortByHierarchy 0 (export ^. export_branches))
-----------------
-- | Metrics | --
......@@ -567,7 +567,7 @@ toDynamics n elders g m =
isNew :: Bool
isNew = not $ elem n $ concat $ map _phylo_groupNgrams elders
type FdtId = Int
type FdtId = Int
processDynamics :: [PhyloGroup] -> [PhyloGroup]
processDynamics groups =
map (\g ->
......@@ -722,4 +722,3 @@ traceExportGroups groups = trace ("\n" <> "-- | Export "
<> show(length groups) <> " groups and "
<> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms"
) groups
......@@ -14,7 +14,7 @@ import Control.DeepSeq (NFData)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Map.Strict (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Set (Set)
import Data.Text (Text)
import Data.Vector (Vector)
......@@ -57,11 +57,11 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
$ traceToPhylo (phyloScale $ getConfig phylowithoutLink) $
if (phyloScale $ getConfig phylowithoutLink) > 1
then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloScale $ getConfig phylowithoutLink)]
else phyloAncestors
else phyloAncestors
where
--------------------------------------
phyloAncestors :: Phylo
phyloAncestors =
phyloAncestors =
if (findAncestors $ getConfig phylowithoutLink)
then toHorizon flatPhylo
else flatPhylo
......@@ -75,42 +75,42 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
-- | Create a flat Phylo | --
-----------------------------
{-
{-
-- create an adaptative diachronic 'sea elevation' ladder
-}
adaptDiachronicLadder :: Double -> Set Double -> Set Double -> [Double]
adaptDiachronicLadder curr similarities ladder =
adaptDiachronicLadder curr similarities ladder =
if curr <= 0 || Set.null similarities
then Set.toList ladder
else
else
let idx = ((Set.size similarities) `div` (floor curr)) - 1
thr = Set.elemAt idx similarities
-- we use a sliding methods 1/10, then 1/9, then ... 1/2
in adaptDiachronicLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder)
in adaptDiachronicLadder (curr -1) (Set.filter (> thr) similarities) (Set.insert thr ladder)
{-
{-
-- create a constante diachronic 'sea elevation' ladder
-}
constDiachronicLadder :: Double -> Double -> Set Double -> [Double]
constDiachronicLadder curr step ladder =
constDiachronicLadder curr step ladder =
if curr > 1
then Set.toList ladder
else constDiachronicLadder (curr + step) step (Set.insert curr ladder)
{-
{-
-- process an initial scanning of the kinship links
-}
scanSimilarity :: Scale -> Phylo -> Phylo
scanSimilarity lvl phylo =
scanSimilarity lvl phylo =
let proximity = phyloProximity $ getConfig phylo
scanning = foldlWithKey (\acc pId pds ->
scanning = foldlWithKey (\acc pId pds ->
-- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
$ elems
$ view ( phylo_periodScales
. traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
$ elems
$ view ( phylo_periodScales
. traverse . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_scaleGroups ) pds
next = getNextPeriods ToParents (getTimeFrame $ timeUnit $ getConfig phylo) pId (keys $ phylo ^. phylo_periods)
targets = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ getGroupsFromScalePeriods lvl next phylo
......@@ -127,7 +127,7 @@ scanSimilarity lvl phylo =
pairs' = pairs `using` parList rdeepseq
in acc ++ (concat pairs')
) [] $ phylo ^. phylo_periods
in phylo & phylo_diaSimScan .~ Set.fromList (traceGroupsProxi $ map snd scanning)
in phylo & phylo_diaSimScan .~ Set.fromList (traceGroupsProxi $ map snd scanning)
......@@ -142,7 +142,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
let pId = phyloLvl ^. phylo_scalePeriod
pId' = phyloLvl ^. phylo_scalePeriodStr
phyloCUnit = m ! pId
in phyloLvl
in phyloLvl
& phylo_scaleGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups)
, f obj pId pId' lvl (length groups)
......@@ -164,11 +164,11 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
(fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] []
{-
{-
-- enhance the phylo with temporal links
-}
addTemporalLinksToPhylo :: Phylo -> Phylo
addTemporalLinksToPhylo phylowithoutLink = case strategy of
addTemporalLinksToPhylo phylowithoutLink = case strategy of
Constante start gap -> temporalMatching (constDiachronicLadder start gap Set.empty) phylowithoutLink
Adaptative steps -> temporalMatching (adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink
where
......@@ -177,7 +177,7 @@ addTemporalLinksToPhylo phylowithoutLink = case strategy of
-----------------------
-- | To Phylo Step | --
-----------------------
-----------------------
indexDates' :: Map (Date,Date) [Document] -> Map (Date,Date) (Text,Text)
......@@ -196,9 +196,9 @@ indexDates' m = map (\docs ->
-- QL: backend entre phyloBase et Clustering
-- tophylowithoutLink
toPhyloWithoutLink :: [Document] -> TermList -> PhyloConfig -> Phylo
toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of
toPhyloWithoutLink docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
Adaptative _ -> scanSimilarity 1
Adaptative _ -> scanSimilarity 1
$ appendGroups clusterToGroup 1 seriesOfClustering (updatePeriods (indexDates' docs') phyloBase)
where
--------------------------------------
......@@ -237,11 +237,11 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >=
-- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
filterCliqueByNested m =
let clq = map (\l ->
filterCliqueByNested m =
let clq = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem)
then mem
else
else
let fMax = filter (\f' -> not $ isNested (f ^. clustering_roots) (f' ^. clustering_roots)) mem
in fMax ++ [f] ) [] l)
$ elems m
......@@ -251,7 +251,7 @@ filterCliqueByNested m =
-- | To transform a time map of docs into a time map of Fis with some filters
toSeriesOfClustering :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [Clustering]
toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested
{- \$ traceFis "Filtered by clique size" -}
......@@ -263,16 +263,16 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
MaxClique s _ _ -> filterClique True s (filterCliqueBySize)
seriesOfClustering
where
--------------------------------------
--------------------------------------
seriesOfClustering :: Map (Date,Date) [Clustering]
seriesOfClustering = case (clique $ getConfig phylo) of
Fis _ _ ->
let fis = map (\(prd,docs) ->
seriesOfClustering = case (clique $ getConfig phylo) of
Fis _ _ ->
let fis = map (\(prd,docs) ->
case (corpusParser $ getConfig phylo) of
Csv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst)
_ -> let lst = toList
_ -> let lst = toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst)
)
......@@ -285,7 +285,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs
mcl' = mcl `using` parList rdeepseq
in fromList mcl'
......@@ -356,7 +356,7 @@ groupDocsByPeriod f pds es =
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
--------------------------------------
docsToTermFreq :: [Document] -> Vector Ngrams -> Map Int Double
......@@ -391,7 +391,7 @@ docsToTimeScaleNb docs =
initPhyloScales :: Int -> Period -> Map PhyloScaleId PhyloScale
initPhyloScales lvlMax pId =
initPhyloScales lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax]
......@@ -399,12 +399,12 @@ initPhyloScales lvlMax pId =
-- Init the basic elements of a Phylo
--
initPhylo :: [Document] -> TermList -> PhyloConfig -> Phylo
initPhylo docs lst conf =
initPhylo docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
params = defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n")
in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n")
$ Phylo foundations
docsSources
(docsToTimeScaleCooc docs (foundations ^. foundations_roots))
......
......@@ -14,7 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloTools where
import Control.Lens hiding (Level)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Map.Strict (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Set (Set, disjoint)
import Data.String (String)
import Data.Text (Text,unpack)
......@@ -408,7 +408,7 @@ getPhyloSeaRiseStart phylo = case (getSeaElevation phylo) of
getPhyloSeaRiseSteps :: Phylo -> Double
getPhyloSeaRiseSteps phylo = case (getSeaElevation phylo) of
Constante _ s -> s
Adaptative s -> s
Adaptative s -> s
getConfig :: Phylo -> PhyloConfig
......@@ -496,7 +496,7 @@ updatePeriods periods' phylo =
) phylo
updateQuality :: Double -> Phylo -> Phylo
updateQuality quality phylo = phylo { _phylo_quality = quality }
updateQuality quality phylo = phylo { _phylo_quality = quality }
traceToPhylo :: Scale -> Phylo -> Phylo
......@@ -592,7 +592,7 @@ getMinSharedNgrams :: Proximity -> Int
getMinSharedNgrams proxi = case proxi of
WeightedLogJaccard _ m -> m
WeightedLogSim _ m -> m
Hamming _ _ -> undefined
Hamming _ _ -> undefined
----------------
-- | Branch | --
......
......@@ -16,13 +16,13 @@ import Control.Lens hiding (Level)
import Control.Monad (sequence)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Data.Map.Strict (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
-------------------------
......@@ -30,16 +30,16 @@ import qualified Data.Map as Map
-------------------------
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs =
mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
in PhyloGroup (fst $ fst id) (_phylo_groupPeriod' $ head' "mergeGroups" childs)
(snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs)
(fmap sum $ sequence
(sum $ map _phylo_groupSupport childs)
(fmap sum $ sequence
$ map _phylo_groupWeight childs)
(concat $ map _phylo_groupSources childs)
(concat $ map _phylo_groupSources childs)
ngrams
(ngramsToCooc ngrams coocs)
(ngramsToCooc ngrams coocs)
((snd $ fst id),bId)
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs)
......@@ -61,10 +61,10 @@ mergeGroups coocs id mapIds childs =
mergeAncestors pointers = Map.toList $ fromListWith max pointers
addPhyloScale :: Scale -> Phylo -> Phylo
addPhyloScale lvl phylo =
over ( phylo_periods . traverse )
(\phyloPrd -> phyloPrd & phylo_periodScales
%~ (insert (phyloPrd ^. phylo_periodPeriod, lvl)
addPhyloScale lvl phylo =
over ( phylo_periods . traverse )
(\phyloPrd -> phyloPrd & phylo_periodScales
%~ (insert (phyloPrd ^. phylo_periodPeriod, lvl)
(PhyloScale (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriodStr) lvl empty))) phylo
......@@ -82,12 +82,12 @@ toNextScale phylo groups =
$ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
in traceSynchronyEnd
in traceSynchronyEnd
$ over ( phylo_periods . traverse . phylo_periodScales . traverse
-- 6) update each period at curLvl + 1
. filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == (curLvl + 1)))
-- 7) by adding the parents
(\phyloLvl ->
(\phyloLvl ->
if member (phyloLvl ^. phylo_scalePeriod) newPeriods
then phyloLvl & phylo_scaleGroups
.~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_scalePeriod))
......@@ -95,18 +95,18 @@ toNextScale phylo groups =
-- 2) add the curLvl + 1 PhyloScale to the phylo
$ addPhyloScale (curLvl + 1)
-- 1) update the current groups (with level parent pointers) in the phylo
$ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
$ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
--------------------
-- | Clustering | --
--------------------
toPairs :: SynchronyStrategy -> [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs strategy groups = case strategy of
toPairs strategy groups = case strategy of
MergeRegularGroups -> pairs
$ filter (\g -> all (== 3) $ (g ^. phylo_groupMeta) ! "dynamics") groups
MergeAllGroups -> pairs groups
where
where
pairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
pairs gs = filter (\(g,g') -> (not . null) $ intersect (g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams)) (listToCombi' gs)
......@@ -116,7 +116,7 @@ toDiamonds groups = foldl' (\acc groups' ->
acc ++ ( elems
$ Map.filter (\v -> length v > 1)
$ fromListWith (++)
$ foldl' (\acc' g ->
$ foldl' (\acc' g ->
acc' ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodChilds)) [] groups')) []
$ elems
$ Map.filter (\v -> length v > 1)
......@@ -130,27 +130,27 @@ groupsToEdges prox sync nbDocs diago groups =
ByProximityThreshold thr sens _ strat ->
filter (\(_,w) -> w >= thr)
$ toEdges sens
$ toPairs strat groups
ByProximityDistribution sens strat ->
let diamonds = sortOn snd
$ toPairs strat groups
ByProximityDistribution sens strat ->
let diamonds = sortOn snd
$ toEdges sens $ concat
$ map (\gs -> toPairs strat gs) $ toDiamonds groups
$ map (\gs -> toPairs strat gs) $ toDiamonds groups
in take (div (length diamonds) 2) diamonds
where
where
toEdges :: Double -> [(PhyloGroup,PhyloGroup)] -> [((PhyloGroup,PhyloGroup),Double)]
toEdges sens edges =
toEdges sens edges =
case prox of
WeightedLogJaccard _ _ -> map (\(g,g') ->
WeightedLogJaccard _ _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' (sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
WeightedLogSim _ _ -> map (\(g,g') ->
WeightedLogSim _ _ -> map (\(g,g') ->
((g,g'), weightedLogJaccard' (1 / sens) nbDocs diago
(g ^. phylo_groupNgrams) (g' ^. phylo_groupNgrams))) edges
_ -> undefined
_ -> undefined
toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex)
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex)
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
......@@ -160,23 +160,23 @@ reduceGroups prox sync docs diagos branch =
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems)
-- TODO : ajouter un parallelisme
$ mapWithKey (\prd groups ->
$ mapWithKey (\prd groups ->
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd]
edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
in map (\comp ->
in map (\comp ->
-- 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp)
in map (\g -> g & phylo_groupScaleParents %~ (++ [(parentId,1)]) ) comp )
-- 3) reduce the graph a a set of related components
$ toRelatedComponents groups edges) periods
$ toRelatedComponents groups edges) periods
chooseClusteringStrategy :: Synchrony -> [[PhyloGroup]] -> [[PhyloGroup]]
chooseClusteringStrategy sync branches = case sync of
ByProximityThreshold _ _ scope _ -> case scope of
ByProximityThreshold _ _ scope _ -> case scope of
SingleBranch -> branches
SiblingBranches -> groupBy (\g g' -> (last' "chooseClusteringStrategy" $ (g ^. phylo_groupMeta) ! "breaks")
SiblingBranches -> groupBy (\g g' -> (last' "chooseClusteringStrategy" $ (g ^. phylo_groupMeta) ! "breaks")
== (last' "chooseClusteringStrategy" $ (g' ^. phylo_groupMeta) ! "breaks"))
$ sortOn _phylo_groupBranchId $ concat branches
AllBranches -> [concat branches]
......@@ -186,12 +186,12 @@ chooseClusteringStrategy sync branches = case sync of
levelUpAncestors :: [PhyloGroup] -> [PhyloGroup]
levelUpAncestors groups =
-- 1) create an associative map of (old,new) ids
let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupScaleParents))) groups
in map (\g ->
let ids' = fromList $ map (\g -> (getGroupId g, fst $ head' "levelUpAncestors" ( g ^. phylo_groupScaleParents))) groups
in map (\g ->
let id' = ids' ! (getGroupId g)
ancestors = g ^. phylo_groupAncestors
-- 2) level up the ancestors ids and filter the ones that will be merged
ancestors' = filter (\(id,_) -> id /= id') $ map (\(id,w) -> (ids' ! id,w)) ancestors
ancestors' = filter (\(id,_) -> id /= id') $ map (\(id,w) -> (ids' ! id,w)) ancestors
in g & phylo_groupAncestors .~ ancestors'
) groups
......@@ -201,30 +201,30 @@ synchronicClustering phylo =
sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs
diagos = map coocToDiago $ phylo ^. phylo_timeCooc
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
newBranches = map (\branch -> reduceGroups prox sync docs diagos branch)
$ map processDynamics
$ chooseClusteringStrategy sync
$ phyloLastScale
$ phyloLastScale
$ traceSynchronyStart phylo
newBranches' = newBranches `using` parList rdeepseq
in toNextScale phylo $ levelUpAncestors $ concat newBranches'
-- synchronicSimilarity :: Phylo -> Level -> String
-- synchronicSimilarity phylo lvl =
-- foldl' (\acc branch ->
-- synchronicSimilarity phylo lvl =
-- foldl' (\acc branch ->
-- acc <> (foldl' (\acc' period ->
-- acc' <> let prox = phyloProximity $ getConfig phylo
-- sync = phyloSynchrony $ getConfig phylo
-- docs = _phylo_timeDocs phylo
-- prd = _phylo_groupPeriod $ head' "distance" period
-- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
-- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
-- ((sum . elems) $ restrictKeys docs $ periodsToYears [_phylo_groupPeriod $ head' "distance" period]) period
-- in foldl' (\mem (_,w) ->
-- in foldl' (\mem (_,w) ->
-- mem <> show (prd)
-- <> "\t"
-- <> show (w)
-- <> "\n"
-- ) "" edges
-- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
......@@ -6,7 +6,7 @@ License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Reference : Chavalarias, D., Lobbé, Q. & Delanoë, A. Draw me Science. Scientometrics 127, 545–575 (2022). https://doi.org/10.1007/s11192-021-04186-5
Reference : Chavalarias, D., Lobbé, Q. & Delanoë, A. Draw me Science. Scientometrics 127, 545–575 (2022). https://doi.org/10.1007/s11192-021-04186-5
-}
module Gargantext.Core.Viz.Phylo.TemporalMatching where
......@@ -15,14 +15,14 @@ import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.Ord
import Data.List (concat, splitAt, tail, sortOn, sortBy, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or)
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), empty, mapKeys, adjust)
import Data.Map.Strict (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), empty, mapKeys, adjust)
import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Prelude
import Prelude (tan,pi)
import Text.Printf
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.List as List
import qualified Data.Set as Set
import qualified Data.Vector as Vector
......@@ -38,28 +38,28 @@ type ShouldTry = Bool
----------------------------
{-
{-
-- compute a jaccard similarity between two lists
-}
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
{-
{-
-- process the inverse sumLog
-}
sumInvLog' :: Double -> Double -> [Double] -> Double
sumInvLog' s nb diago = foldl (\mem occ -> mem + (1 / (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2))))) 0 diago
{-
{-
-- process the sumLog
-}
sumLog' :: Double -> Double -> [Double] -> Double
sumLog' s nb diago = foldl (\mem occ -> mem + (log (occ + 1/ tan (s * pi / 2)) / log (nb + 1/ tan (s * pi / 2)))) 0 diago
{-
{-
-- compute the weightedLogJaccard
-}
weightedLogJaccard' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
......@@ -85,7 +85,7 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams'
--------------------------------------
{-
{-
-- compute the weightedLogSim
-- Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
-- tests not conclusive
......@@ -116,7 +116,7 @@ weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
--------------------------------------
{-
{-
-- perform a seamilarity measure between a given group and a pair of targeted groups
-}
toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
......@@ -203,7 +203,7 @@ filterDiago diago pds = restrictKeys diago $ periodsToYears pds
---------------------------------
{-
{-
-- perform the related component algorithm, construct the resulting branch id and update the corresponding group's branch id
-}
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [Branch]
......@@ -224,11 +224,11 @@ groupsToBranches groups =
in map (\(bId,branch) ->
let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
$ elems $ restrictKeys groups (Set.fromList branch)
in groups' `using` parList rdeepseq
) branches `using` parList rdeepseq
in groups' `using` parList rdeepseq
) branches `using` parList rdeepseq
{-
{-
-- find the best pair/singleton of parents/childs for a given group
-}
makePairs :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [Period] -> [Pointer] -> Filiation -> Double -> Proximity
......@@ -239,7 +239,7 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di
else removeOldPointers oldPointers fil thr prox lastPrd
{- at least on of the pair candidates should be from the last added period -}
$ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
$ filter (\((id,_),(id',_)) -> (elem id inPairs) || (elem id' inPairs))
$ filter (\((id,_),(id',_)) -> (elem id inPairs) || (elem id' inPairs))
$ listToCombi' candidates
where
--------------------------------------
......@@ -255,7 +255,7 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di
lastPrd = findLastPeriod fil periods
--------------------------------------
{-
{-
-- find the best temporal links between a given group and its parents/childs
-}
phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map Date Double -> Map Date Cooc
......@@ -275,7 +275,7 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng
where
nextPointers :: [[(Pointer,[Int])]]
nextPointers = take 1
-- stop as soon as we find a time frame where at least one singleton / pair satisfies the threshold
-- stop as soon as we find a time frame where at least one singleton / pair satisfies the threshold
$ dropWhile (null)
-- for each time frame, process the proximity on relevant pairs of targeted groups
$ scanl (\acc targets ->
......@@ -286,11 +286,11 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng
$ filterDiago diagos ([(fst . fst) id] ++ periods)
singletons = processProximity nbdocs diago $ map (\g -> (g,g)) $ filter (\g -> (fst . fst . fst) g == lastPrd) targets
pairs = makePairs (id,ngrams) targets periods oldPointers filiation thr proxi docs diagos
in
if (null singletons)
in
if (null singletons)
then acc ++ ( processProximity nbdocs diago pairs )
else acc ++ singletons
) [] $ map concat $ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...]
) [] $ map concat $ inits candidates -- groups from [[1900],[1900,1901],[1900,1901,1902],...]
-----------------------------
processProximity :: Double -> Map Int Double -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [(Pointer,[Int])]
processProximity nbdocs diago targets = filterPointers' proxi thr
......@@ -299,10 +299,10 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng
let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
in if ((c == c') || (snd c == snd c'))
then [((fst c,proximity),snd c)]
else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) targets
else [((fst c,proximity),snd c),((fst c',proximity),snd c')] ) targets
{-
{-
-- get the upstream/downstream timescale of a given period
-}
getNextPeriods :: Filiation -> Int -> Period -> [Period] -> [Period]
......@@ -314,19 +314,19 @@ getNextPeriods fil max' pId pIds =
ToParentsMemory -> undefined
{-
{-
-- find all the candidates parents/childs of ego
-}
getCandidates :: Int -> PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
getCandidates minNgrams ego targets =
getCandidates minNgrams ego targets =
if (length (ego ^. phylo_groupNgrams)) > 1
then
then
map (\groups' -> filter (\g' -> (> minNgrams) $ length $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets
else
else
map (\groups' -> filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets
{-
{-
-- set up and start performing the upstream/downstream inter‐temporal matching period by period
-}
reconstructTemporalLinks :: Int -> [Period] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
......@@ -361,11 +361,11 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups =
) [] periods
{-
{-
-- reconstruct a phylomemetic network from a list of groups and from a given threshold
-}
toPhylomemeticNetwork :: Int -> [Period] -> Proximity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [Branch]
toPhylomemeticNetwork timescale periods similarity thr docs coocs groups =
toPhylomemeticNetwork timescale periods similarity thr docs coocs groups =
groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ reconstructTemporalLinks timescale periods similarity thr docs coocs groups
......@@ -375,7 +375,7 @@ toPhylomemeticNetwork timescale periods similarity thr docs coocs groups =
----------------------------
{-
{-
-- filter the branches containing x
-}
relevantBranches :: Int -> [Branch] -> [Branch]
......@@ -383,7 +383,7 @@ relevantBranches x branches =
filter (\groups -> (any (\group -> elem x $ group ^. phylo_groupNgrams) groups)) branches
{-
{-
-- compute the accuracy ξ
-- the accuracy of a branch relatively to a root x is computed only over the periods where clusters mentionning x in the phylo do exist
-}
......@@ -395,7 +395,7 @@ accuracy x periods bk = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. p
bk' = filter (\g -> elem (g ^. phylo_groupPeriod) periods) bk
{-
{-
-- compute the recall ρ
-}
recall :: Int -> Branch -> [Branch] -> Double
......@@ -403,7 +403,7 @@ recall x bk bx = ((fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_gro
/ (fromIntegral $ length $ filter (\g -> elem x $ g ^. phylo_groupNgrams) $ concat bx))
{-
{-
-- compute the F-score function
-}
fScore :: Double -> Int -> [(Date,Date)] -> [PhyloGroup] -> [[PhyloGroup]] -> Double
......@@ -414,14 +414,14 @@ fScore lambda x periods bk bx =
/ (((lambda ** 2) * acc + rec))
{-
{-
-- compute the number of groups
-}
wk :: [PhyloGroup] -> Double
wk bk = fromIntegral $ length bk
{-
{-
-- compute the recall ρ for all the branches
-}
globalRecall :: Map Int Double -> [Branch] -> Double
......@@ -440,7 +440,7 @@ globalRecall freq branches =
pys = sum (elems freq)
{-
{-
-- compute the accuracy ξ for all the branches
-}
globalAccuracy :: Map Int Double -> [Branch] -> Double
......@@ -461,7 +461,7 @@ globalAccuracy freq branches =
pys = sum (elems freq)
{-
{-
-- compute the quality score F(λ)
-}
toPhyloQuality :: Double -> Double -> Map Int Double -> [[PhyloGroup]] -> Double
......@@ -489,8 +489,8 @@ toPhyloQuality fdt lambda freq branches =
-------------------------
{-
-- attach a rise value to branches & groups metadata
{-
-- attach a rise value to branches & groups metadata
-}
riseToMeta :: Double -> [Branch] -> [Branch]
riseToMeta rise branches =
......@@ -501,8 +501,8 @@ riseToMeta rise branches =
else g) b) branches
{-
-- attach a thr value to branches & groups metadata
{-
-- attach a thr value to branches & groups metadata
-}
thrToMeta :: Double -> [Branch] -> [Branch]
thrToMeta thr branches =
......@@ -510,28 +510,28 @@ thrToMeta thr branches =
map (\g -> g & phylo_groupMeta .~ (adjust (\lst -> lst ++ [thr]) "seaLevels" (g ^. phylo_groupMeta))) b) branches
{-
{-
-- TODO
-- 1) try the zipper structure https://wiki.haskell.org/Zipper to performe the sea-level rise algorithme
-- 2) investigate how the branches order influences the 'separateBranches' function
-}
{-
{-
-- sequentially separate each branch for a given threshold and check if it locally increases the quality score
-- sequence = [done] | currentBranch | [rest]
-- done = all the already separated branches
-- rest = all the branches we still have to separate
-}
separateBranches :: Double -> Proximity -> Double -> Map Int Double -> Int -> Double -> Double
-> Int -> Map Date Double -> Map Date Cooc -> [Period]
-> [(Branch,ShouldTry)] -> (Branch,ShouldTry) -> [(Branch,ShouldTry)]
-> Int -> Map Date Double -> Map Date Cooc -> [Period]
-> [(Branch,ShouldTry)] -> (Branch,ShouldTry) -> [(Branch,ShouldTry)]
-> [(Branch,ShouldTry)]
separateBranches fdt similarity lambda frequency minBranch thr rise timescale docs coocs periods done currentBranch rest =
let done' = done ++ (if snd currentBranch
then
(if ((null (fst branches')) || (quality > quality'))
---- 5) if the quality is not increased by the new branches or if the new branches are all small
---- 5) if the quality is not increased by the new branches or if the new branches are all small
---- then undo the separation and localy stop the sea rise
---- else validate the separation and authorise next sea rise in the long new branches
then
......@@ -554,35 +554,35 @@ separateBranches fdt similarity lambda frequency minBranch thr rise timescale do
else separateBranches fdt similarity lambda frequency minBranch thr rise timescale docs coocs periods
done' (List.head rest) (List.tail rest)
where
------- 1) compute the quality before splitting any branch
------- 1) compute the quality before splitting any branch
quality :: LocalQuality
quality = toPhyloQuality fdt lambda frequency ((map fst done) ++ [fst currentBranch] ++ (map fst rest))
------------------- 2) split the current branch and create a new phylomemetic network
phylomemeticNetwork :: [Branch]
phylomemeticNetwork = toPhylomemeticNetwork timescale periods similarity thr docs coocs (fst currentBranch)
--------- 3) change the new phylomemetic network into a tuple of new branches
--------- on the left : the long branches, on the right : the small ones
--------- on the left : the long branches, on the right : the small ones
branches' :: ([Branch],[Branch])
branches' = partition (\b -> (length $ nub $ map _phylo_groupPeriod b) >= minBranch)
$ thrToMeta thr
$ riseToMeta rise phylomemeticNetwork
-------- 4) compute again the quality by considering the new branches
quality' :: LocalQuality
quality' = toPhyloQuality fdt lambda frequency
((map fst done) ++ (fst branches') ++ (snd branches') ++ (map fst rest))
{-
{-
-- perform the sea-level rise algorithm, browse the similarity ladder and check that we can try out the next step
-}
seaLevelRise :: Double -> Proximity -> Double -> Int -> Map Int Double
seaLevelRise :: Double -> Proximity -> Double -> Int -> Map Int Double
-> [Double] -> Double
-> Int -> [Period]
-> Map Date Double -> Map Date Cooc
-> [(Branch,ShouldTry)]
-> Int -> [Period]
-> Map Date Double -> Map Date Cooc
-> [(Branch,ShouldTry)]
-> ([(Branch,ShouldTry)],FinalQuality)
seaLevelRise fdt proximity lambda minBranch frequency ladder rise frame periods docs coocs branches =
-- if the ladder is empty or thr > 1 or there is no branch to break then stop
......@@ -599,13 +599,13 @@ seaLevelRise fdt proximity lambda minBranch frequency ladder rise frame periods
$ separateBranches fdt proximity lambda frequency minBranch thr rise frame docs coocs periods
[] (List.head branches) (List.tail branches)
in seaLevelRise fdt proximity lambda minBranch frequency (List.tail ladder) (rise + 1) frame periods docs coocs branches'
where
where
--------
stopRise :: [(Branch,ShouldTry)] -> Bool
stopRise bs = ((not . or) $ map snd bs)
{-
{-
-- start the temporal matching process up, recover the resulting branches and update the groups (at scale 1) consequently
-}
temporalMatching :: [Double] -> Phylo -> Phylo
......@@ -620,7 +620,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1
--------
branches :: [Branch]
branches = map fst $ fst sea
--- 2) process the temporal matching by elevating the similarity ladder
sea :: ([(Branch,ShouldTry)],FinalQuality)
sea = seaLevelRise (fromIntegral $ Vector.length $ getRoots phylo)
......@@ -634,7 +634,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
(reverse $ sortOn (length . fst) seabed)
------ 1) for each group, process an initial temporal Matching and create a 'seabed'
------ ShouldTry determines if you should apply the seaLevelRise function again within each branch
seabed :: [(Branch,ShouldTry)]
......
......@@ -58,7 +58,7 @@ import Data.Either
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.List (concat)
import Data.Map (Map, lookup)
import Data.Map.Strict (Map, lookup)
import Data.Maybe (catMaybes)
import Data.Monoid
import Data.Swagger
......@@ -69,7 +69,7 @@ import Servant.Client (ClientError)
import System.FilePath (FilePath)
import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Conduit.List as CL
import qualified Data.Conduit as C
......
......@@ -20,7 +20,7 @@ module Gargantext.Database.Action.Flow.List
import Control.Concurrent
import Control.Lens ((^.), (+~), (%~), at, (.~), _Just)
import Control.Monad.Reader
import Data.Map (Map, toList)
import Data.Map.Strict (Map, toList)
import Data.Text (Text)
import Gargantext.API.Ngrams (saveNodeStory)
import Gargantext.API.Ngrams.Tools (getNodeStoryVar)
......@@ -35,7 +35,7 @@ import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgram
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Map.Strict.Patch as PM
import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
......
......@@ -13,7 +13,7 @@ Portability : POSIX
module Gargantext.Database.Action.Flow.Utils
where
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.HashMap.Strict (HashMap)
import Gargantext.Core.Types (TermsCount)
import Gargantext.Database.Admin.Types.Node
......@@ -23,7 +23,7 @@ import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Types
import Gargantext.Prelude
import Control.Lens ((^.))
import qualified Data.Map as DM
import qualified Data.Map.Strict as DM
import qualified Data.HashMap.Strict as HashMap
......
......@@ -17,7 +17,7 @@ module Gargantext.Database.Action.Metrics
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Set (Set)
import Database.PostgreSQL.Simple (Query, Only(..))
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
......@@ -39,7 +39,7 @@ import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.Text as Text
......
......@@ -26,7 +26,7 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Prelude hiding (sum, head)
import Prelude hiding (null, id, map, sum)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Vector as Vec
import qualified Gargantext.Database.Action.Metrics as Metrics
{-
......@@ -36,7 +36,7 @@ trainModel u = do
rootId <- _node_id <$> getRoot u
(id:ids) <- getCorporaWithParentId rootId
(s,_model) <- case length ids >0 of
True -> grid 100 150 (getMetrics
True -> grid 100 150 (getMetrics
False -> panic "Gargantext.Database.Lists.trainModel : not enough corpora"
--}
......@@ -51,9 +51,8 @@ getMetrics' cId maybeListId tabType maybeLimit = do
metrics = map (\(Scored t s1 s2) -> (listType t ngs', [Vec.fromList [s1,s2]])) scores
listType t m = maybe (panic errorMsg) fst $ HashMap.lookup t m
errorMsg = "API.Node.metrics: key absent"
{-
_ <- Learn.grid 100 110 metrics' metrics'
--}
pure $ Map.fromListWith (<>) $ Vec.toList metrics
......@@ -19,7 +19,7 @@ module Gargantext.Database.Action.Metrics.NgramsByContext
-- import Debug.Trace (trace)
--import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Set (Set)
import Data.Text (Text)
import Data.Tuple.Extra (first, second, swap)
......@@ -34,7 +34,7 @@ import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Database.PostgreSQL.Simple as DPS
import qualified Database.PostgreSQL.Simple.Types as DPST
......
......@@ -15,7 +15,7 @@ module Gargantext.Database.Action.Search where
import Control.Arrow (returnA)
import Control.Lens ((^.))
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import Data.Maybe
import qualified Data.Set as Set
import Data.Text (Text, unpack, intercalate)
......
......@@ -25,7 +25,7 @@ module Gargantext.Database.Query.Table.Ngrams
import Control.Lens ((^.))
import Data.ByteString.Internal (ByteString)
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Text (Text)
import Gargantext.Core.Types
import Gargantext.Database.Prelude (runOpaQuery, Cmd, formatPGSQuery, runPGSQuery)
......@@ -39,7 +39,7 @@ import Gargantext.Database.Types
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Database.PostgreSQL.Simple as PGS
queryNgramsTable :: Select NgramsRead
......
{-|
Module : Gargantext.Database.Query.Table.NodeNgrams
Description :
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
......@@ -18,7 +18,7 @@ NodeNgrams register Context of Ngrams (named Cgrams then)
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Table.NodeNgrams
module Gargantext.Database.Query.Table.NodeNgrams
( getCgramsId
, listInsertDb
, module Gargantext.Database.Schema.NodeNgrams
......@@ -27,7 +27,7 @@ module Gargantext.Database.Query.Table.NodeNgrams
where
import Data.List.Extra (nubOrd)
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Gargantext.Core
......@@ -38,7 +38,7 @@ import Gargantext.Database.Schema.NodeNgrams
import Gargantext.Database.Schema.Prelude (Select, FromRow, sql, fromRow, toField, field, Values(..), QualifiedIdentifier(..), selectTable)
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Database.PostgreSQL.Simple as PGS (Query, Only(..))
......
......@@ -43,7 +43,7 @@ import Control.Lens (view, toListOf, at, each, _Just, to, set, makeLenses)
import Control.Monad.Error.Class (MonadError())
import Data.List (tail, concat, nub)
import qualified Data.List as List
import Data.Map (Map, fromListWith, lookup)
import Data.Map.Strict (Map, fromListWith, lookup)
-- import Data.Monoid (mconcat)
import Data.Proxy
-- import qualified Data.Set as Set
......
......@@ -27,7 +27,7 @@ import Data.Aeson
import Data.Aeson.Types (toJSONKeyText)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Map (fromList, lookup)
import Data.Map.Strict (fromList, lookup)
import Data.Maybe (fromMaybe)
import Data.Text (Text, splitOn, pack, strip)
import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..))
......
......@@ -6,11 +6,11 @@ import Control.Concurrent.Async
import Control.Concurrent.STM
import Control.Exception
import Control.Monad
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Time.Clock
import Prelude
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import Gargantext.Utils.Jobs.Settings
......
......@@ -9,7 +9,7 @@ import Gargantext.Utils.Jobs.State
import Control.Concurrent.STM
import Control.Exception
import Control.Monad.Except
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Time.Clock
import Network.HTTP.Client (Manager)
import Prelude
......
......@@ -8,14 +8,14 @@ import Control.Concurrent.Async
import Control.Concurrent.STM
import Control.Monad
import Data.List
import Data.Map (Map)
import Data.Map.Strict (Map)
import Data.Maybe
import Data.Ord
import Data.Proxy
import Data.Time.Clock
import Prelude
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Servant.Job.Core as SJ
import qualified Servant.Job.Types as SJ
......
......@@ -19,8 +19,8 @@ import Data.Aeson (encode, ToJSON, toJSON, FromJSON, parseJSON, Value(..), (.:),
import Data.Aeson.Types (prependFailure, typeMismatch)
import Data.Aeson.TH (deriveJSON)
import qualified Data.List.Safe as LS
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Maybe (fromMaybe)
import Data.Text hiding (map, group, filter, concat, zip)
import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
......
......@@ -2,7 +2,7 @@ module Gargantext.Utils.Servant where
import qualified Data.ByteString.Lazy.Char8 as BSC
import Data.Csv (defaultEncodeOptions, encodeByNameWith, encodeDefaultOrderedByName, header, namedRecord, (.=), DefaultOrdered, EncodeOptions(..), NamedRecord, Quoting(QuoteNone), ToNamedRecord)
import qualified Data.Map as Map
import qualified Data.Map.Strict as Map
import qualified Data.Text as T
import Gargantext.API.Ngrams.Types (mSetToList, NgramsRepoElement(..), NgramsTableMap, NgramsTerm(..), unNgramsTerm)
import Gargantext.Core.Types.Main (ListType(..))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment