Commit 87a8bd2c authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-phylo' into dev-merge

parents ed3661a1 36c913d9
...@@ -22,8 +22,11 @@ Phylo binaries ...@@ -22,8 +22,11 @@ Phylo binaries
module Main where module Main where
import System.Directory (doesFileExist)
import Data.Aeson import Data.Aeson
import Data.Text (Text, unwords) import Data.Text (Text, unwords)
import Data.List ((++))
import GHC.Generics import GHC.Generics
import GHC.IO (FilePath) import GHC.IO (FilePath)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -42,12 +45,9 @@ import Gargantext.Viz.Phylo.LevelMaker ...@@ -42,12 +45,9 @@ import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.View.Export import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.View.ViewMaker import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Database.Types.Node import Gargantext.Database.Types.Node
import Data.Maybe import Data.Maybe
import qualified Data.Map as DM import qualified Data.Map as DM
import qualified Data.Vector as DV import qualified Data.Vector as DV
import qualified Data.List as DL import qualified Data.List as DL
...@@ -62,6 +62,7 @@ import qualified Data.ByteString.Lazy as L ...@@ -62,6 +62,7 @@ import qualified Data.ByteString.Lazy as L
type ListPath = FilePath type ListPath = FilePath
type FisPath = FilePath
type CorpusPath = FilePath type CorpusPath = FilePath
data CorpusType = Wos | Csv deriving (Show,Generic) data CorpusType = Wos | Csv deriving (Show,Generic)
type Limit = Int type Limit = Int
...@@ -70,13 +71,18 @@ data Conf = ...@@ -70,13 +71,18 @@ data Conf =
Conf { corpusPath :: CorpusPath Conf { corpusPath :: CorpusPath
, corpusType :: CorpusType , corpusType :: CorpusType
, listPath :: ListPath , listPath :: ListPath
, fisPath :: FilePath
, outputPath :: FilePath , outputPath :: FilePath
, phyloName :: Text , phyloName :: Text
, limit :: Limit , limit :: Limit
, timeGrain :: Int , timeGrain :: Int
, timeStep :: Int , timeStep :: Int
, timeFrame :: Int
, timeFrameTh :: Double
, timeTh :: Double , timeTh :: Double
, timeSens :: Double , timeSens :: Double
, reBranchThr :: Double
, reBranchNth :: Int
, clusterTh :: Double , clusterTh :: Double
, clusterSens :: Double , clusterSens :: Double
, phyloLevel :: Int , phyloLevel :: Int
...@@ -92,6 +98,11 @@ instance ToJSON Conf ...@@ -92,6 +98,11 @@ instance ToJSON Conf
instance FromJSON CorpusType instance FromJSON CorpusType
instance ToJSON CorpusType instance ToJSON CorpusType
decoder :: P.Either a b -> b
decoder (P.Left _) = P.error "Error"
decoder (P.Right x) = x
-- | Get the conf from a Json file -- | Get the conf from a Json file
getJson :: FilePath -> IO L.ByteString getJson :: FilePath -> IO L.ByteString
getJson path = L.readFile path getJson path = L.readFile path
...@@ -115,7 +126,9 @@ filterTerms patterns (y,d) = (y,termsInText patterns d) ...@@ -115,7 +126,9 @@ filterTerms patterns (y,d) = (y,termsInText patterns d)
-- | To transform a Csv nfile into a readable corpus -- | To transform a Csv nfile into a readable corpus
csvToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)]) csvToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList csvToCorpus limit csv = DV.toList
-- . DV.reverse
. DV.take limit . DV.take limit
-- . DV.reverse
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n))) . DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> CSV.readFile csv . snd <$> CSV.readFile csv
...@@ -146,6 +159,25 @@ parse format limit path l = do ...@@ -146,6 +159,25 @@ parse format limit path l = do
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
-- | To parse an existing Fis file
parseFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> IO [PhyloFis]
parseFis path name grain step support clique = do
fisExists <- doesFileExist (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")
if fisExists
then do
fisJson <- (eitherDecode <$> getJson (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")) :: IO (P.Either P.String [PhyloFis])
case fisJson of
P.Left err -> do
putStrLn err
pure []
P.Right fis -> pure fis
else pure []
writeFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> DM.Map (Date,Date) [PhyloFis] -> IO ()
writeFis path name grain step support clique fis = do
let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json"
L.writeFile fisPath $ encode (DL.concat $ DM.elems fis)
-------------- --------------
-- | Main | -- -- | Main | --
-------------- --------------
...@@ -166,17 +198,28 @@ main = do ...@@ -166,17 +198,28 @@ main = do
corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
let roots = DL.nub $ DL.concat $ map text corpus let roots = DL.nub $ DL.concat $ map text corpus
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs") putStrLn $ ("\n" <> show (length roots) <> " parsed foundation roots")
fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf)
putStrLn $ ("\n" <> show (length fis) <> " parsed fis")
let mFis = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf) let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (phyloLevel conf) (Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (timeFrame conf) (timeFrameTh conf)
(reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf)) (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let phylo = toPhylo query corpus roots termList let phylo = toPhylo query corpus roots termList mFis
writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo)
let view = toPhyloView queryView phylo let view = toPhyloView queryView phylo
......
...@@ -106,6 +106,7 @@ library: ...@@ -106,6 +106,7 @@ library:
- crawlerIsidore - crawlerIsidore
- crawlerHAL - crawlerHAL
- data-time-segment - data-time-segment
- deepseq
- directory - directory
- duckling - duckling
- exceptions - exceptions
...@@ -143,6 +144,7 @@ library: ...@@ -143,6 +144,7 @@ library:
- natural-transformation - natural-transformation
- opaleye - opaleye
- pandoc - pandoc
- parallel
- parsec - parsec
- patches-class - patches-class
- patches-map - patches-map
...@@ -268,6 +270,7 @@ executables: ...@@ -268,6 +270,7 @@ executables:
- base - base
- bytestring - bytestring
- containers - containers
- directory
- gargantext - gargantext
- vector - vector
- parallel - parallel
......
...@@ -22,7 +22,7 @@ one 8, e54847. ...@@ -22,7 +22,7 @@ one 8, e54847.
-} -}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE MultiParamTypeClasses #-}
...@@ -44,6 +44,8 @@ import Gargantext.Core.Utils.Prefix (unPrefix) ...@@ -44,6 +44,8 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
import Gargantext.Prelude import Gargantext.Prelude
import Control.DeepSeq
-------------------- --------------------
-- | PhyloParam | -- -- | PhyloParam | --
-------------------- --------------------
...@@ -77,6 +79,9 @@ data Phylo = ...@@ -77,6 +79,9 @@ data Phylo =
Phylo { _phylo_duration :: (Start, End) Phylo { _phylo_duration :: (Start, End)
, _phylo_foundations :: PhyloFoundations , _phylo_foundations :: PhyloFoundations
, _phylo_periods :: [PhyloPeriod] , _phylo_periods :: [PhyloPeriod]
, _phylo_docsByYears :: Map Date Double
, _phylo_cooc :: Map Date (Map (Int,Int) Double)
, _phylo_fis :: Map (Date,Date) [PhyloFis]
, _phylo_param :: PhyloParam , _phylo_param :: PhyloParam
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
...@@ -150,6 +155,7 @@ data PhyloGroup = ...@@ -150,6 +155,7 @@ data PhyloGroup =
, _phylo_groupNgrams :: [Int] , _phylo_groupNgrams :: [Int]
, _phylo_groupMeta :: Map Text Double , _phylo_groupMeta :: Map Text Double
, _phylo_groupBranchId :: Maybe PhyloBranchId , _phylo_groupBranchId :: Maybe PhyloBranchId
, _phylo_groupCooc :: Map (Int,Int) Double
, _phylo_groupPeriodParents :: [Pointer] , _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer] , _phylo_groupPeriodChilds :: [Pointer]
...@@ -157,7 +163,9 @@ data PhyloGroup = ...@@ -157,7 +163,9 @@ data PhyloGroup =
, _phylo_groupLevelParents :: [Pointer] , _phylo_groupLevelParents :: [Pointer]
, _phylo_groupLevelChilds :: [Pointer] , _phylo_groupLevelChilds :: [Pointer]
} }
deriving (Generic, Show, Eq, Ord) deriving (Generic, NFData, Show, Eq, Ord)
-- instance NFData PhyloGroup
-- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster) -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
...@@ -199,8 +207,8 @@ type Support = Int ...@@ -199,8 +207,8 @@ type Support = Int
data PhyloFis = PhyloFis data PhyloFis = PhyloFis
{ _phyloFis_clique :: Clique { _phyloFis_clique :: Clique
, _phyloFis_support :: Support , _phyloFis_support :: Support
, _phyloFis_metrics :: Map (Int,Int) (Map Text [Double]) , _phyloFis_period :: (Date,Date)
} deriving (Show) } deriving (Generic,Show,Eq)
-- | A list of clustered PhyloGroup -- | A list of clustered PhyloGroup
type PhyloCluster = [PhyloGroup] type PhyloCluster = [PhyloGroup]
...@@ -343,6 +351,11 @@ data PhyloQueryBuild = PhyloQueryBuild ...@@ -343,6 +351,11 @@ data PhyloQueryBuild = PhyloQueryBuild
-- Inter-temporal matching method of the Phylo -- Inter-temporal matching method of the Phylo
, _q_interTemporalMatching :: Proximity , _q_interTemporalMatching :: Proximity
, _q_interTemporalMatchingFrame :: Int
, _q_interTemporalMatchingFrameTh :: Double
, _q_reBranchThr :: Double
, _q_reBranchNth :: Int
-- Last level of reconstruction -- Last level of reconstruction
, _q_nthLevel :: Level , _q_nthLevel :: Level
......
...@@ -26,6 +26,7 @@ module Gargantext.Viz.Phylo.API ...@@ -26,6 +26,7 @@ module Gargantext.Viz.Phylo.API
--import Control.Monad.Reader (ask) --import Control.Monad.Reader (ask)
import Data.Text (Text) import Data.Text (Text)
import Data.Map (empty)
import Data.Swagger import Data.Swagger
import Gargantext.API.Types import Gargantext.API.Types
import Gargantext.Database.Types.Node (PhyloId, ListId, CorpusId) import Gargantext.Database.Types.Node (PhyloId, ListId, CorpusId)
...@@ -104,7 +105,7 @@ postPhylo _n _lId q = do ...@@ -104,7 +105,7 @@ postPhylo _n _lId q = do
vrs = Just ("1" :: Text) vrs = Just ("1" :: Text)
sft = Just (Software "Gargantext" "4") sft = Just (Software "Gargantext" "4")
prm = initPhyloParam vrs sft (Just q) prm = initPhyloParam vrs sft (Just q)
pure (toPhyloBase q prm corpus actants termList) pure (toPhyloBase q prm corpus actants termList empty)
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -139,6 +140,7 @@ instance ToSchema LouvainParams ...@@ -139,6 +140,7 @@ instance ToSchema LouvainParams
instance ToSchema Metric instance ToSchema Metric
instance ToSchema Order instance ToSchema Order
instance ToSchema Phylo instance ToSchema Phylo
instance ToSchema PhyloFis
instance ToSchema PhyloBranch instance ToSchema PhyloBranch
instance ToSchema PhyloEdge instance ToSchema PhyloEdge
instance ToSchema PhyloGroup instance ToSchema PhyloGroup
......
...@@ -13,11 +13,13 @@ Portability : POSIX ...@@ -13,11 +13,13 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Viz.Phylo.Aggregates.Cluster module Gargantext.Viz.Phylo.Aggregates.Cluster
where where
import Data.List (null,tail,concat,sort,intersect) import Control.Parallel.Strategies
import Data.List (null,concat,sort,intersect,(++))
import Data.Map (Map) import Data.Map (Map)
import Data.Tuple (fst) import Data.Tuple (fst)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -25,7 +27,7 @@ import Gargantext.Viz.Phylo ...@@ -25,7 +27,7 @@ import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Metrics.Clustering import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc import Gargantext.Viz.Phylo.LinkMaker
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS import qualified Data.Vector.Storable as VS
...@@ -44,19 +46,19 @@ getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g ...@@ -44,19 +46,19 @@ getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g
graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster] graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
graphToClusters clust (nodes,edges) = case clust of graphToClusters clust (nodes,edges) = case clust of
Louvain (LouvainParams _) -> undefined Louvain (LouvainParams _) -> undefined
RelatedComponents (RCParams _) -> relatedComp 0 (head' "graphToClusters" nodes) (tail nodes,edges) [] [] RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
_ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented" _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a list of PhyloGroups into a Graph of Proximity -- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> Phylo -> ([GroupNode],[GroupEdge]) groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs cooc p = case prox of groupsToGraph nbDocs prox gs = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
$ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc))) $ getCandidates gs
$ getCandidates gs) candidates' = candidates `using` parList rdeepseq
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc))) in candidates' )
$ getCandidates gs) Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
_ -> undefined _ -> undefined
-- | To filter a Graph of Proximity using a given threshold -- | To filter a Graph of Proximity using a given threshold
...@@ -80,9 +82,11 @@ phyloToClusters lvl clus p = Map.fromList ...@@ -80,9 +82,11 @@ phyloToClusters lvl clus p = Map.fromList
graphs' = traceGraphFiltered lvl graphs' = traceGraphFiltered lvl
$ map (\g -> filterGraph prox g) graphs $ map (\g -> filterGraph prox g) graphs
-------------------------------------- --------------------------------------
graphs :: [([GroupNode],[GroupEdge])] graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox) graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p) p) periods $ let gs = map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
gs' = gs `using` parList rdeepseq
in gs'
-------------------------------------- --------------------------------------
prox :: Proximity prox :: Proximity
prox = getProximity clus prox = getProximity clus
...@@ -100,7 +104,6 @@ phyloToClusters lvl clus p = Map.fromList ...@@ -100,7 +104,6 @@ phyloToClusters lvl clus p = Map.fromList
traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])] traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n" traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n" <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
<> show (lst) <> "\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) " <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) " <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) " <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
...@@ -118,9 +121,3 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> ...@@ -118,9 +121,3 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where where
lst = sort $ map snd $ concat $ map snd g lst = sort $ map snd $ concat $ map snd g
traceSim :: PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> Map (Int, Int) Double -> Phylo -> Double -> Double
traceSim g g' _ _ p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n"
-- <> show (c) <> " [vs] " <> show (c') <> " = " <> show (sim)
) sim
...@@ -17,14 +17,17 @@ Portability : POSIX ...@@ -17,14 +17,17 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Cooc module Gargantext.Viz.Phylo.Aggregates.Cooc
where where
import Data.List (union,concat,nub) import Data.List (union,concat,nub,sort, sortOn)
import Data.Map (Map,elems,adjust,filterWithKey) import Data.Map (Map,elems,adjust,filterWithKey,fromListWith,fromList,restrictKeys)
import Data.Set (Set)
import Data.Vector (Vector)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
-- import Debug.Trace (trace)
-- | To transform the Fis into a full coocurency Matrix in a Phylo -- | To transform the Fis into a full coocurency Matrix in a Phylo
fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
...@@ -83,5 +86,49 @@ getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g) ...@@ -83,5 +86,49 @@ getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)
-------------------------------------- --------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc :: [Int] -> Map (Int,Int) Double
listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns v = sort $ map (\n -> getIdxInVector n v) ns
-- | To build the cooc matrix by years out of the corpus
docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
docsToCooc docs fdt = fromListWith sumCooc
$ map (\(d,l) -> (d, listToCooc l))
$ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
-- | To sum all the docs produced during a list of years
sumDocsByYears :: Set Date -> Map Date Double -> Double
sumDocsByYears years m = sum $ elems $ restrictKeys m years
-- | To get the cooc matrix of a group
groupToCooc :: PhyloGroup -> Phylo -> Map (Int,Int) Double
groupToCooc g p = getMiniCooc (listToFullCombi $ getGroupNgrams g) (periodsToYears [getGroupPeriod g]) (getPhyloCooc p)
-- | To get the union of the cooc matrix of two groups
unionOfCooc :: PhyloGroup -> PhyloGroup -> Phylo -> Map (Int,Int) Double
unionOfCooc g g' p = sumCooc (groupToCooc g p) (groupToCooc g' p)
-- | To get the nth most occurent elems in a coocurency matrix
getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
getNthMostOcc nth cooc = (nub . concat)
$ map (\((idx,idx'),_) -> [idx,idx'])
$ take nth
$ reverse
$ sortOn snd $ Map.toList cooc
-- phyloCooc :: Map (Int, Int) Double -- phyloCooc :: Map (Int, Int) Double
-- phyloCooc = fisToCooc phyloFis phylo1_0_1 -- phyloCooc = fisToCooc phyloFis phylo1_0_1
...@@ -17,8 +17,7 @@ Portability : POSIX ...@@ -17,8 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Document module Gargantext.Viz.Phylo.Aggregates.Document
where where
import Data.List (last) import Data.Map (Map,fromListWith)
import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple (fst) import Data.Tuple (fst)
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -29,23 +28,25 @@ import qualified Data.List as List ...@@ -29,23 +28,25 @@ import qualified Data.List as List
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import Debug.Trace (trace)
-- | To init a list of Periods framed by a starting Date and an ending Date -- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)] initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last l)) initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
$ chunkAlong g s [start .. end] $ chunkAlong g s [start .. end]
-- | To group a list of Documents by fixed periods -- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods" groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds groupDocsByPeriod f pds es = trace ("----\nGroup docs by periods\n") $ Map.fromList $ zip pds $ map (inPeriode f es) pds
where where
-------------------------------------- --------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t] inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) = inPeriode f' h (start,end) =
fst $ List.partition (\d -> f' d >= start && f' d <= end) h fst $ List.partition (\d -> f' d >= start && f' d <= end) h
-------------------------------------- --------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams -- | To parse a list of Documents by filtering on a Vector of Ngrams
...@@ -54,4 +55,10 @@ parseDocs roots c = map (\(d,t) ...@@ -54,4 +55,10 @@ parseDocs roots c = map (\(d,t)
-> Document d ( filter (\x -> Vector.elem x roots) -> Document d ( filter (\x -> Vector.elem x roots)
$ monoTexts t)) c $ monoTexts t)) c
-- | To count the number of documents by year
countDocs :: [(Date,a)] -> Map Date Double
countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
...@@ -17,14 +17,16 @@ Portability : POSIX ...@@ -17,14 +17,16 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Fis module Gargantext.Viz.Phylo.Aggregates.Fis
where where
import Data.List (null,concat,sort) import Control.Lens hiding (makeLenses, both, Level)
import Data.Map (Map, empty,elems) import Data.List (null,concat,sort,(++))
import Data.Map (Map,elems,mapWithKey,unionWith,fromList,keys)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Data.Set (size) import Data.Set (size)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Vector.Storable as Vector import qualified Data.Vector.Storable as Vector
...@@ -43,12 +45,12 @@ filterFis keep thr f m = case keep of ...@@ -43,12 +45,12 @@ filterFis keep thr f m = case keep of
-- | To filter Fis with small Support -- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis] filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> getSupport fis > thr) l filterFisBySupport thr l = filter (\fis -> getSupport fis >= thr) l
-- | To filter Fis with small Clique size -- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis] filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) > thr) l filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
-- | To filter nested Fis -- | To filter nested Fis
...@@ -57,38 +59,23 @@ filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ m ...@@ -57,38 +59,23 @@ filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ m
in filter (\fis -> elem (getClique fis) cliqueMax) l) in filter (\fis -> elem (getClique fis) cliqueMax) l)
-- | To transform a list of Documents into a Frequent Items Set docsToFis' :: Map (Date,Date) [Document] -> Phylo -> Phylo
docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis] docsToFis' m p = if (null $ getPhyloFis p)
docsToFis docs = map (\d -> let fs = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text d) then trace("----\nRebuild the Fis from scratch\n")
in map (\f -> PhyloFis (fst f) (snd f) empty) fs) docs $ p & phylo_fis .~ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
else trace("----\nUse Fis from an existing file\n")
-- | To process a list of Filters on top of the PhyloFis $ p & phylo_fis %~ (unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m))
processFilters :: [Filter] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processFilters filters phyloFis
| null filters = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processFilters] please add some filters for the Fis"
-- | To process a list of Metrics on top of the PhyloFis
processMetrics :: [Metric] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processMetrics metrics phyloFis
| null metrics = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processMetrics] please add some metrics for the Fis"
-- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters toPhyloFis' :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis] toPhyloFis' fis k s t = traceFis "----\nFiltered Fis by clique size :\n"
toPhyloFis ds k s t ms fs = processFilters fs $ filterFis k t (filterFisByClique)
$ processMetrics ms $ traceFis "----\nFiltered Fis by nested :\n"
$ traceFis "----\nFiltered Fis by clique size :\n" $ filterFisByNested
$ filterFis k t (filterFisByClique) $ traceFis "----\nFiltered Fis by support :\n"
$ traceFis "----\nFiltered Fis by nested :\n" $ filterFis k s (filterFisBySupport)
$ filterFisByNested $ traceFis "----\nUnfiltered Fis :\n" fis
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n"
$ docsToFis ds
----------------- -----------------
......
...@@ -17,45 +17,135 @@ Portability : POSIX ...@@ -17,45 +17,135 @@ Portability : POSIX
module Gargantext.Viz.Phylo.BranchMaker module Gargantext.Viz.Phylo.BranchMaker
where where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List (concat,nub,(++),tail) import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union,delete)
import Data.Map (Map,(!), fromListWith, elems)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Metrics.Clustering import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
---------------------------
-- | Readability links | --
---------------------------
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
graphToBranches :: Level -> GroupGraph -> Phylo -> [(Int,PhyloGroupId)] getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
graphToBranches _lvl (nodes,edges) _p = concat
$ map (\(idx,gs) -> map (\g -> (idx,getGroupId g)) gs) getFramedPeriod :: [PhyloGroup] -> (Date,Date)
$ zip [1..] getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
$ relatedComp 0 (head' "branchMaker" nodes) (tail nodes,edges) [] []
getGroupsNgrams :: [PhyloGroup] -> [Int]
getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
-- | Process a Jaccard on top of two set of Branch Peaks
areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
/ ((fromIntegral . length) $ union ns ns')) >= thr
-- | Get the framing period of a branch ([[PhyloGroup]])
getBranchPeriod :: [PhyloGroup] -> (Date,Date)
getBranchPeriod gs =
let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
-- | Get the Nth most coocurent Ngrams in a list of Groups
getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
getGroupsPeaks gs nth p = getNthMostOcc nth
$ getSubCooc (getGroupsNgrams gs)
$ getCooc (getGroupsPeriods gs) p
-- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
(getGroupsPeaks gs (getPhyloReBranchNth p) p)
(getGroupsPeaks gs' (getPhyloReBranchNth p) p))
&& ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
&& (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
) branches
-- | To build a graph using the parents and childs pointers
makeGraph :: [PhyloGroup] -> Phylo -> GroupGraph -- | Try to connect a focused branch to other candidate branches by finding the best pointers
makeGraph gs p = (gs,edges) reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
reBranch p branch candidates =
let newLinks = map (\branch' ->
let pointers = map (\g ->
-- define pairs of candidates groups
let pairs = listToPairs
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
-- process the matching between the pairs and the current group
in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
in if (g2 == g3)
then mem ++ [(getGroupId g,(getGroupId g2,s))]
else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
) branch
pointers' = pointers `using` parList rdeepseq
-- keep the best pointer between the focused branch and the current candidates
in head' "reBranch" $ reverse $ sortOn (snd . snd)
$ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
) candidates
newLinks' = newLinks `using` parList rdeepseq
in newLinks'
reLinkPhyloBranches :: Level -> Phylo -> Phylo
reLinkPhyloBranches lvl p =
let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
$ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
([],branches) branches
in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
where where
edges :: [GroupEdge] branches :: [[PhyloGroup]]
edges = (nub . concat) branches = elems
$ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p) $ fromListWith (++)
++ $ foldl' (\mem g -> case getGroupBranchId g of
(map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) gs Nothing -> mem
Just i -> mem ++ [(i,[g])] )
[] $ getGroupsWithLevel lvl p
------------------
-- | Branches | --
------------------
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
graphToBranches groups = Map.fromList
$ concat
$ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
$ zip [1..]
$ relatedComp
$ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
-- | To set all the PhyloBranches for a given Level in a Phylo -- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches :: Level -> Phylo -> Phylo setPhyloBranches :: Level -> Phylo -> Phylo
setPhyloBranches lvl p = alterGroupWithLevel (\g -> let bIdx = (fst $ head' "branchMaker" $ filter (\b -> snd b == getGroupId g) bs) setPhyloBranches lvl p = alterGroupWithLevel (\g ->
in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p let bIdx = branches ! (getGroupId g)
where in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
-------------------------------------- where
bs :: [(Int,PhyloGroupId)]
bs = graphToBranches lvl graph p
-------------------------------------- --------------------------------------
graph :: GroupGraph branches :: Map PhyloGroupId Int
graph = makeGraph (getGroupsWithLevel lvl p) p branches = graphToBranches (getGroupsWithLevel lvl p)
-------------------------------------- --------------------------------------
-- trace' bs = trace bs
\ No newline at end of file
...@@ -31,7 +31,7 @@ module Gargantext.Viz.Phylo.Example where ...@@ -31,7 +31,7 @@ module Gargantext.Viz.Phylo.Example where
import Data.GraphViz.Types.Generalised (DotGraph) import Data.GraphViz.Types.Generalised (DotGraph)
import Data.Text (Text) import Data.Text (Text)
import Data.List ((++), last) import Data.List ((++), last)
import Data.Map (Map) import Data.Map (Map,empty)
import Data.Tuple (fst) import Data.Tuple (fst)
import Data.Tuple.Extra import Data.Tuple.Extra
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -40,6 +40,7 @@ import Gargantext.Text.Context (TermList) ...@@ -40,6 +40,7 @@ import Gargantext.Text.Context (TermList)
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Aggregates.Cluster import Gargantext.Viz.Phylo.Aggregates.Cluster
import Gargantext.Viz.Phylo.Aggregates.Document import Gargantext.Viz.Phylo.Aggregates.Document
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Aggregates.Fis import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LevelMaker import Gargantext.Viz.Phylo.LevelMaker
...@@ -55,7 +56,7 @@ import qualified Data.List as List ...@@ -55,7 +56,7 @@ import qualified Data.List as List
------------------------------------------------------ ------------------------------------------------------
export :: IO () export :: IO ()
export = dotToFile "/home/qlobbe/data/epique/output/cesar_cleopatre.dot" phyloDot export = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView phyloDot = viewToDot phyloView
...@@ -77,7 +78,7 @@ queryViewEx = "level=3" ...@@ -77,7 +78,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 1 Merge False 1 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True phyloQueryView = PhyloQueryView 2 Merge False 1 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
-------------------------------------------------- --------------------------------------------------
...@@ -86,7 +87,7 @@ phyloQueryView = PhyloQueryView 1 Merge False 1 [BranchAge] [] [BranchPeakFreq,G ...@@ -86,7 +87,7 @@ phyloQueryView = PhyloQueryView 1 Merge False 1 [BranchAge] [] [BranchPeakFreq,G
phyloFromQuery :: Phylo phyloFromQuery :: Phylo
phyloFromQuery = toPhylo (queryParser queryEx) corpus actants termList phyloFromQuery = toPhylo (queryParser queryEx) corpus actants termList empty
-- | To do : create a request handler and a query parser -- | To do : create a request handler and a query parser
queryParser :: [Char] -> PhyloQueryBuild queryParser :: [Char] -> PhyloQueryBuild
...@@ -104,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre" ...@@ -104,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.13 0) 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.6 20) 5 0.8 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.4 0)
...@@ -154,7 +155,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1 ...@@ -154,7 +155,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster :: Map (Date,Date) [PhyloCluster] phyloCluster :: Map (Date,Date) [PhyloCluster]
phyloCluster = phyloToClusters 1 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1 phyloCluster = phyloToClusters 3 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
---------------------------------- ----------------------------------
...@@ -204,9 +205,12 @@ phylo1 = addPhyloLevel (1) phyloFis phylo ...@@ -204,9 +205,12 @@ phylo1 = addPhyloLevel (1) phyloFis phylo
phyloFis :: Map (Date, Date) [PhyloFis] phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = filterFis True 1 (filterFisByClique) phyloFis = filterFis True 1 (filterFisByClique)
$ filterFisByNested $ filterFisByNested
$ filterFis True 1 (filterFisBySupport) (docsToFis phyloDocs) $ filterFis True 1 (filterFisBySupport) (getPhyloFis phylo')
phylo' :: Phylo
phylo' = docsToFis' phyloDocs phylo
---------------------------------------- ----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0 -- | STEP 2 | -- Init a Phylo of level 0
---------------------------------------- ----------------------------------------
...@@ -226,7 +230,13 @@ phyloDocs = corpusToDocs corpus phyloBase ...@@ -226,7 +230,13 @@ phyloDocs = corpusToDocs corpus phyloBase
phyloBase :: Phylo phyloBase :: Phylo
phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) defaultPhyloParam phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) nbDocs cooc empty defaultPhyloParam
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc (parseDocs foundationsRoots corpus) foundationsRoots
nbDocs :: Map Date Double
nbDocs = countDocs corpus
periods :: [(Date,Date)] periods :: [(Date,Date)]
periods = initPeriods 5 3 periods = initPeriods 5 3
......
This diff is collapsed.
This diff is collapsed.
...@@ -18,36 +18,23 @@ module Gargantext.Viz.Phylo.Metrics.Clustering ...@@ -18,36 +18,23 @@ module Gargantext.Viz.Phylo.Metrics.Clustering
where where
import Data.Graph.Clustering.Louvain.CplusPlus import Data.Graph.Clustering.Louvain.CplusPlus
import Data.List (last,concat,null,nub,(++),init,tail,elemIndex,groupBy,(!!)) import Data.List (concat,null,nub,(++),elemIndex,groupBy,(!!), (\\), union, intersect)
import Data.Map (fromList,mapKeys) import Data.Map (fromList,mapKeys)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools -- import Gargantext.Viz.Phylo.Tools
-- | To apply the related components method to a PhyloGraph -- import Debug.Trace (trace)
-- curr = the current PhyloGroup
-- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup relatedComp :: Eq a => [[a]] -> [[a]]
-- next = the next PhyloGroups to be added in the cluster relatedComp graphs = foldl' (\mem groups ->
-- memo = the memory of the allready created clusters if (null mem)
relatedComp :: Int -> PhyloGroup -> GroupGraph -> [PhyloGroup] -> [[PhyloGroup]] -> [[PhyloGroup]] then mem ++ [groups]
relatedComp idx curr (nodes,edges) next memo else
| null nodes' && null next' = memo' let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
| (not . null) next' = relatedComp idx (head' "relatedComp1" next' ) (nodes' ,edges) (tail next') memo' in if (null related)
| otherwise = relatedComp (idx + 1) (head' "relatedComp2" nodes') (tail nodes',edges) [] memo' then mem ++ [groups]
where else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
--------------------------------------
memo' :: [[PhyloGroup]]
memo'
| null memo = [[curr]]
| idx == ((length memo) - 1) = (init memo) ++ [(last memo) ++ [curr]]
| otherwise = memo ++ [[curr]]
--------------------------------------
next' :: [PhyloGroup]
next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
--------------------------------------
nodes' :: [PhyloGroup]
nodes' = filter (\x -> not $ elem x next') nodes
--------------------------------------
louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]] louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
......
...@@ -17,33 +17,81 @@ Portability : POSIX ...@@ -17,33 +17,81 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Metrics.Proximity module Gargantext.Viz.Phylo.Metrics.Proximity
where where
import Data.List (null) import Data.List (null,union,intersect)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size) import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size,filterWithKey)
import Gargantext.Prelude import Gargantext.Prelude
import Debug.Trace (trace) -- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields sumInvLog :: Double -> [Double] -> Double
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
weightedLogJaccard s f1 f2
| null wUnion = 0 sumLog :: Double -> [Double] -> Double
| wUnion == wInter = 1 sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
| s == 0 = trace ("==0") $ (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
| s > 0 = trace (">0") $ (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion) -- -- | To process WeighedLogJaccard distance between to coocurency matrix
where -- weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
-- weightedLogJaccard sens cooc cooc' nbDocs
-- | null union' = 0
-- | union' == inter' = 1
-- | sens == 0 = (fromIntegral $ length $ keysInter) / (fromIntegral $ length $ keysUnion)
-- | sens > 0 = (sumInvLog sens $ elems wInter) / (sumInvLog sens $ elems wUnion)
-- | otherwise = (sumLog sens $ elems wInter) / (sumLog sens $ elems wUnion)
-- where
-- --------------------------------------
-- keysInter :: [Int]
-- keysInter = nub $ concat $ map (\(x,x') -> [x,x']) $ keys inter'
-- --------------------------------------
-- keysUnion :: [Int]
-- keysUnion = nub $ concat $ map (\(x,x') -> [x,x']) $ keys union'
-- --------------------------------------
-- wInter :: Map (Int,Int) Double
-- wInter = map (/nbDocs) inter'
-- --------------------------------------
-- wUnion :: Map (Int,Int) Double
-- wUnion = map (/nbDocs) union'
-- --------------------------------------
-- inter' :: Map (Int, Int) Double
-- inter' = intersectionWith (+) cooc cooc'
-- --------------------------------------
-- union' :: Map (Int, Int) Double
-- union' = unionWith (+) cooc cooc'
-- --------------------------------------
-- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | To get the diagonal of a matrix
toDiago :: Map (Int, Int) Double -> [Double]
toDiago cooc = elems $ filterWithKey (\(x,x') _ -> x == x') cooc
-- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard :: Double -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
| null gInter = 0
| gInter == gUnion = 1
| sens == 0 = jaccard gInter gUnion
| sens > 0 = (sumInvLog sens wInter) / (sumInvLog sens wUnion)
| otherwise = (sumLog sens wInter) / (sumLog sens wUnion)
where
--------------------------------------
gInter :: [Int]
gInter = intersect ngrams ngrams'
--------------------------------------
gUnion :: [Int]
gUnion = union ngrams ngrams'
-------------------------------------- --------------------------------------
wInter :: [Double] wInter :: [Double]
wInter = elems $ intersectionWith (+) f1 f2 wInter = toDiago $ map (/nbDocs) $ intersectionWith (+) cooc cooc'
-------------------------------------- --------------------------------------
wUnion :: [Double] wUnion :: [Double]
wUnion = elems $ unionWith (+) f1 f2 wUnion = toDiago $ map (/nbDocs) $ unionWith (+) cooc cooc'
--------------------------------------
sumInvLog :: [Double] -> Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
-------------------------------------- --------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields -- | To process the Hamming distance between two PhyloGroup fields
......
...@@ -20,9 +20,9 @@ module Gargantext.Viz.Phylo.Tools ...@@ -20,9 +20,9 @@ module Gargantext.Viz.Phylo.Tools
where where
import Control.Lens hiding (both, Level, Empty) import Control.Lens hiding (both, Level, Empty)
import Data.List (filter, intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy) import Data.List (filter, intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy, concat)
import Data.Maybe (mapMaybe,fromMaybe) import Data.Maybe (mapMaybe,fromMaybe)
import Data.Map (Map, mapKeys, member, (!)) import Data.Map (Map, mapKeys, member, (!), restrictKeys, elems, empty, filterWithKey, unionWith)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text,toLower,unwords) import Data.Text (Text,toLower,unwords)
import Data.Tuple.Extra import Data.Tuple.Extra
...@@ -110,6 +110,13 @@ listToDirectedCombi :: Eq a => [a] -> [(a,a)] ...@@ -110,6 +110,13 @@ listToDirectedCombi :: Eq a => [a] -> [(a,a)]
listToDirectedCombi l = [(x,y) | x <- l, y <- l, x /= y] listToDirectedCombi l = [(x,y) | x <- l, y <- l, x /= y]
listToEqualCombi :: Eq a => [a] -> [(a,a)]
listToEqualCombi l = [(x,y) | x <- l, y <- l, x == y]
listToPairs :: Eq a => [a] -> [(a,a)]
listToPairs l = (listToEqualCombi l) ++ (listToUnDirectedCombi l)
-- | To get all combinations of a list and apply a function to the resulting list of pairs -- | To get all combinations of a list and apply a function to the resulting list of pairs
listToDirectedCombiWith :: Eq a => forall b. (a -> b) -> [a] -> [(b,b)] listToDirectedCombiWith :: Eq a => forall b. (a -> b) -> [a] -> [(b,b)]
listToDirectedCombiWith f l = [(f x,f y) | x <- l, y <- l, x /= y] listToDirectedCombiWith f l = [(f x,f y) | x <- l, y <- l, x /= y]
...@@ -160,8 +167,8 @@ initFoundationsRoots :: [Ngrams] -> Vector Ngrams ...@@ -160,8 +167,8 @@ initFoundationsRoots :: [Ngrams] -> Vector Ngrams
initFoundationsRoots l = Vector.fromList $ map phyloAnalyzer l initFoundationsRoots l = Vector.fromList $ map phyloAnalyzer l
-- | To init the base of a Phylo from a List of Periods and Foundations -- | To init the base of a Phylo from a List of Periods and Foundations
initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> PhyloParam -> Phylo initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> Map Date Double -> Map Date (Map (Int,Int) Double) -> Map (Date,Date) [PhyloFis] -> PhyloParam -> Phylo
initPhyloBase pds fds prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) prm initPhyloBase pds fds nbDocs cooc fis prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) nbDocs cooc fis prm
-- | To init the param of a Phylo -- | To init the param of a Phylo
initPhyloParam :: Maybe Text -> Maybe Software -> Maybe PhyloQueryBuild -> PhyloParam initPhyloParam :: Maybe Text -> Maybe Software -> Maybe PhyloQueryBuild -> PhyloParam
...@@ -175,6 +182,41 @@ getLastLevel p = (last . sort) ...@@ -175,6 +182,41 @@ getLastLevel p = (last . sort)
. traverse . traverse
. phylo_periodLevels ) p . phylo_periodLevels ) p
-- | To get all the coocurency matrix of a phylo
getPhyloCooc :: Phylo -> Map Date (Map (Int,Int) Double)
getPhyloCooc p = p ^. phylo_cooc
-- | To get the PhyloParam of a Phylo
getPhyloParams :: Phylo -> PhyloParam
getPhyloParams = _phylo_param
-- | To get the title of a Phylo
getPhyloTitle :: Phylo -> Text
getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-- | To get the desc of a Phylo
getPhyloDescription :: Phylo -> Text
getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrame :: Phylo -> Int
getPhyloMatchingFrame p = _q_interTemporalMatchingFrame $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrameTh :: Phylo -> Double
getPhyloMatchingFrameTh p = _q_interTemporalMatchingFrameTh $ _phyloParam_query $ getPhyloParams p
getPhyloProximity :: Phylo -> Proximity
getPhyloProximity p = _q_interTemporalMatching $ _phyloParam_query $ getPhyloParams p
getPhyloReBranchThr :: Phylo -> Double
getPhyloReBranchThr p = _q_reBranchThr $ _phyloParam_query $ getPhyloParams p
getPhyloReBranchNth :: Phylo -> Int
getPhyloReBranchNth p = _q_reBranchNth $ _phyloParam_query $ getPhyloParams p
getPhyloFis :: Phylo -> Map (Date,Date) [PhyloFis]
getPhyloFis = _phylo_fis
-------------------- --------------------
-- | PhyloRoots | -- -- | PhyloRoots | --
...@@ -194,6 +236,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of ...@@ -194,6 +236,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots" Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx Just idx -> idx
getIdxInVector :: Ngrams -> Vector Ngrams -> Int
getIdxInVector n ns = case (elemIndex n ns) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx
-------------------- --------------------
-- | PhyloGroup | -- -- | PhyloGroup | --
-------------------- --------------------
...@@ -209,7 +256,7 @@ alterGroupWithLevel f lvl p = over ( phylo_periods ...@@ -209,7 +256,7 @@ alterGroupWithLevel f lvl p = over ( phylo_periods
. traverse . traverse
) (\g -> if getGroupLevel g == lvl ) (\g -> if getGroupLevel g == lvl
then f g then f g
else g ) p else g ) p
-- | To alter each list of PhyloGroups following a given function -- | To alter each list of PhyloGroups following a given function
...@@ -242,6 +289,10 @@ getGroupId :: PhyloGroup -> PhyloGroupId ...@@ -242,6 +289,10 @@ getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId = _phylo_groupId getGroupId = _phylo_groupId
getGroupCooc :: PhyloGroup -> Map (Int,Int) Double
getGroupCooc = _phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup -- | To get the level out of the id of a PhyloGroup
getGroupLevel :: PhyloGroup -> Int getGroupLevel :: PhyloGroup -> Int
getGroupLevel = snd . fst . getGroupId getGroupLevel = snd . fst . getGroupId
...@@ -344,13 +395,19 @@ getGroups = view ( phylo_periods ...@@ -344,13 +395,19 @@ getGroups = view ( phylo_periods
) )
-- | To get all PhyloGroups matching a list of PhyloGroupIds in a Phylo -- | To get all PhyloGroups matching a list of PhyloGoupIds in a Phylo
getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup] -- getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
getGroupsFromIds ids p = filter (\g -> elem (getGroupId g) ids) $ getGroups p -- getGroupsFromIds ids p = filter (\g -> elem (getGroupId g) ids) $ getGroups p
-- | To get a PhyloGroup matching a PhyloGroupId in a Phylo
getGroupFromId :: PhyloGroupId -> Phylo -> PhyloGroup getGroupFromId :: PhyloGroupId -> Phylo -> PhyloGroup
getGroupFromId id p = (head' "getGroupFromId") $ getGroupsFromIds [id] p getGroupFromId id p =
let groups = Map.fromList $ map (\g -> (getGroupId g, g)) $ getGroups p
in groups ! id
getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
getGroupsFromIds ids p =
let groups = Map.fromList $ map (\g -> (getGroupId g, g)) $ getGroups p
in elems $ restrictKeys groups (Set.fromList ids)
-- | To get the corresponding list of PhyloGroups from a list of PhyloNodes -- | To get the corresponding list of PhyloGroups from a list of PhyloNodes
...@@ -380,10 +437,29 @@ initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup ...@@ -380,10 +437,29 @@ initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup
initGroup ngrams lbl idx lvl from' to' p = PhyloGroup initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
(((from', to'), lvl), idx) (((from', to'), lvl), idx)
lbl lbl
(sort $ map (\x -> getIdxInRoots x p) ngrams) idxs
(Map.empty) (Map.empty)
Nothing Nothing
(getMiniCooc (listToFullCombi idxs) (periodsToYears [(from', to')]) (getPhyloCooc p))
[] [] [] [] [] [] [] []
where
idxs = sort $ map (\x -> getIdxInRoots x p) ngrams
-- | To sum two coocurency Matrix
sumCooc :: Map (Int, Int) Double -> Map (Int, Int) Double -> Map (Int, Int) Double
sumCooc m m' = unionWith (+) m m'
-- | To build the mini cooc matrix of each group
getMiniCooc :: [(Int,Int)] -> Set Date -> Map Date (Map (Int,Int) Double) -> Map (Int,Int) Double
getMiniCooc pairs years cooc = filterWithKey (\(n,n') _ -> elem (n,n') pairs) cooc'
where
--------------------------------------
cooc' :: Map (Int,Int) Double
cooc' = foldl (\m m' -> sumCooc m m') empty
$ elems
$ restrictKeys cooc years
--------------------------------------
--------------------- ---------------------
...@@ -418,6 +494,12 @@ initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod ...@@ -418,6 +494,12 @@ initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod
initPhyloPeriod id l = PhyloPeriod id l initPhyloPeriod id l = PhyloPeriod id l
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat)
$ map (\(d,d') -> [d..d']) periods
-------------------- --------------------
-- | PhyloLevel | -- -- | PhyloLevel | --
-------------------- --------------------
...@@ -464,14 +546,14 @@ setPhyloLevelId lvl' (PhyloLevel (id, _lvl) groups) ...@@ -464,14 +546,14 @@ setPhyloLevelId lvl' (PhyloLevel (id, _lvl) groups)
getClique :: PhyloFis -> Clique getClique :: PhyloFis -> Clique
getClique = _phyloFis_clique getClique = _phyloFis_clique
-- | To get the metrics of a PhyloFis
getFisMetrics :: PhyloFis -> Map (Int,Int) (Map Text [Double])
getFisMetrics = _phyloFis_metrics
-- | To get the support of a PhyloFis -- | To get the support of a PhyloFis
getSupport :: PhyloFis -> Support getSupport :: PhyloFis -> Support
getSupport = _phyloFis_support getSupport = _phyloFis_support
-- | To get the period of a PhyloFis
getFisPeriod :: PhyloFis -> (Date,Date)
getFisPeriod = _phyloFis_period
---------------------------- ----------------------------
-- | PhyloNodes & Edges | -- -- | PhyloNodes & Edges | --
...@@ -737,11 +819,10 @@ initWeightedLogJaccard (def 0 -> thr) (def 0.01 -> sens) = WLJParams thr sens ...@@ -737,11 +819,10 @@ initWeightedLogJaccard (def 0 -> thr) (def 0.01 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQueryBuild from given and default parameters -- | To initialize a PhyloQueryBuild from given and default parameters
initPhyloQueryBuild :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild initPhyloQueryBuild :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Int -> Maybe Double -> Maybe Double -> Maybe Int -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics) (def [] -> filters) initPhyloQueryBuild name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics) (def [] -> filters)
(def defaultWeightedLogJaccard -> matching') (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) = (def defaultWeightedLogJaccard -> matching') (def 5 -> frame) (def 0.8 -> frameThr) (def 0.5 -> reBranchThr) (def 4 -> reBranchNth) (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain steps cluster metrics filters matching' nthLevel nthCluster PhyloQueryBuild name desc grain steps cluster metrics filters matching' frame frameThr reBranchThr reBranchNth nthLevel nthCluster
-- | To initialize a PhyloQueryView default parameters -- | To initialize a PhyloQueryView default parameters
...@@ -794,7 +875,7 @@ defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing N ...@@ -794,7 +875,7 @@ defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing N
defaultQueryBuild :: PhyloQueryBuild defaultQueryBuild :: PhyloQueryBuild
defaultQueryBuild = initPhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" defaultQueryBuild = initPhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
defaultQueryView :: PhyloQueryView defaultQueryView :: PhyloQueryView
defaultQueryView = initPhyloQueryView Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing defaultQueryView = initPhyloQueryView Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
......
...@@ -157,7 +157,9 @@ setDotNode pn = node (toNodeDotId $ pn ^. pn_id) ...@@ -157,7 +157,9 @@ setDotNode pn = node (toNodeDotId $ pn ^. pn_id)
-- | To set an Edge -- | To set an Edge
setDotEdge :: PhyloEdge -> Dot DotId setDotEdge :: PhyloEdge -> Dot DotId
setDotEdge pe = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Black]] setDotEdge pe
| pe ^. pe_weight == 100 = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Red]]
| otherwise = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Black]]
-- | To set a Period Edge -- | To set a Period Edge
......
...@@ -71,7 +71,7 @@ filterSizeBranch min' v = cleanNodesEdges v v' ...@@ -71,7 +71,7 @@ filterSizeBranch min' v = cleanNodesEdges v v'
where where
-------------------------------------- --------------------------------------
v' :: PhyloView v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) > min')) v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) >= min'))
-------------------------------------- --------------------------------------
......
...@@ -26,8 +26,9 @@ import Data.Map (Map) ...@@ -26,8 +26,9 @@ import Data.Map (Map)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Aggregates.Cooc import Gargantext.Viz.Phylo.BranchMaker
import qualified Data.Map as Map import qualified Data.Map as Map
-- import Debug.Trace (trace)
-- | To get the nth most frequent Ngrams in a list of PhyloGroups -- | To get the nth most frequent Ngrams in a list of PhyloGroups
...@@ -48,14 +49,15 @@ freqToLabel thr ngs l = ngramsToLabel ngs $ mostFreqNgrams thr l ...@@ -48,14 +49,15 @@ freqToLabel thr ngs l = ngramsToLabel ngs $ mostFreqNgrams thr l
-- | To get the (nth `div` 2) most cooccuring Ngrams in a PhyloGroup -- | To get the (nth `div` 2) most cooccuring Ngrams in a PhyloGroup
mostOccNgrams :: Int -> Phylo -> PhyloGroup -> [Int] mostOccNgrams :: Int -> PhyloGroup -> [Int]
mostOccNgrams thr p g = (nub . concat ) mostOccNgrams nth g = (nub . concat)
$ map (\((f,s),_d) -> [f,s]) $ map (\((f,s),_d) -> [f,s])
$ take (thr `div` 2) $ take nth
$ reverse $ sortOn snd $ Map.toList cooc $ reverse $ sortOn snd
$ Map.toList cooc
where where
cooc :: Map (Int, Int) Double cooc :: Map (Int, Int) Double
cooc = getSubCooc (getGroupNgrams g) $ getCooc [getGroupPeriod g] p cooc = getGroupCooc g
-- | To alter the peak of a PhyloBranch -- | To alter the peak of a PhyloBranch
...@@ -74,13 +76,18 @@ branchPeakFreq v thr p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v ...@@ -74,13 +76,18 @@ branchPeakFreq v thr p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ getGroupsFromNodes ns p)) $ getGroupsFromNodes ns p))
$ getNodesByBranches v $ getNodesByBranches v
branchPeakCooc :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ map (\(id,ns) -> (id, ngramsToLabel (getFoundationsRoots p) (getGroupsPeaks (getGroupsFromNodes ns p) nth p) ) )
$ getNodesByBranches v
-- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes -- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes
nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelCooc v thr p = over (pv_nodes nodeLabelCooc v thr p = over (pv_nodes
. traverse) . traverse)
(\n -> let lbl = ngramsToLabel (getFoundationsRoots p) (\n -> let lbl = ngramsToLabel (getFoundationsRoots p)
$ mostOccNgrams thr p $ mostOccNgrams thr
$ head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p $ head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p
in n & pn_label .~ lbl) v in n & pn_label .~ lbl) v
...@@ -89,6 +96,7 @@ nodeLabelCooc v thr p = over (pv_nodes ...@@ -89,6 +96,7 @@ nodeLabelCooc v thr p = over (pv_nodes
processTaggers :: [Tagger] -> Phylo -> PhyloView -> PhyloView processTaggers :: [Tagger] -> Phylo -> PhyloView -> PhyloView
processTaggers ts p v = foldl (\v' t -> case t of processTaggers ts p v = foldl (\v' t -> case t of
BranchPeakFreq -> branchPeakFreq v' 2 p BranchPeakFreq -> branchPeakFreq v' 2 p
-- BranchPeakFreq -> branchPeakCooc v' 3 p
GroupLabelCooc -> nodeLabelCooc v' 2 p GroupLabelCooc -> nodeLabelCooc v' 2 p
_ -> panic "[ERR][Viz.Phylo.View.Taggers.processTaggers] tagger not found") v ts _ -> panic "[ERR][Viz.Phylo.View.Taggers.processTaggers] tagger not found") v ts
...@@ -153,19 +153,6 @@ toPhyloView q p = traceView ...@@ -153,19 +153,6 @@ toPhyloView q p = traceView
-- | To get the PhyloParam of a Phylo
getPhyloParams :: Phylo -> PhyloParam
getPhyloParams = _phylo_param
-- | To get the title of a Phylo
getPhyloTitle :: Phylo -> Text
getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-- | To get the desc of a Phylo
getPhyloDescription :: Phylo -> Text
getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
----------------- -----------------
-- | Taggers | -- -- | Taggers | --
----------------- -----------------
......
...@@ -43,6 +43,7 @@ extra-deps: ...@@ -43,6 +43,7 @@ extra-deps:
- KMP-0.1.0.2 - KMP-0.1.0.2
- accelerate-1.2.0.0 - accelerate-1.2.0.0
- aeson-lens-0.5.0.0 - aeson-lens-0.5.0.0
- deepseq-th-0.1.0.4
- duckling-0.1.3.0 - duckling-0.1.3.0
- full-text-search-0.2.1.4 - full-text-search-0.2.1.4
- fullstop-0.1.4 - fullstop-0.1.4
...@@ -59,3 +60,4 @@ extra-deps: ...@@ -59,3 +60,4 @@ extra-deps:
- stemmer-0.5.2 - stemmer-0.5.2
- time-units-1.0.0 - time-units-1.0.0
- validity-0.9.0.0 # patches-{map,class} - validity-0.9.0.0 # patches-{map,class}
- directory-1.3.1.5
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment