Commit 87a8bd2c authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-phylo' into dev-merge

parents ed3661a1 36c913d9
......@@ -22,8 +22,11 @@ Phylo binaries
module Main where
import System.Directory (doesFileExist)
import Data.Aeson
import Data.Text (Text, unwords)
import Data.List ((++))
import GHC.Generics
import GHC.IO (FilePath)
import Gargantext.Prelude
......@@ -42,12 +45,9 @@ import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Database.Types.Node
import Data.Maybe
import qualified Data.Map as DM
import qualified Data.Vector as DV
import qualified Data.List as DL
......@@ -62,6 +62,7 @@ import qualified Data.ByteString.Lazy as L
type ListPath = FilePath
type FisPath = FilePath
type CorpusPath = FilePath
data CorpusType = Wos | Csv deriving (Show,Generic)
type Limit = Int
......@@ -70,13 +71,18 @@ data Conf =
Conf { corpusPath :: CorpusPath
, corpusType :: CorpusType
, listPath :: ListPath
, fisPath :: FilePath
, outputPath :: FilePath
, phyloName :: Text
, limit :: Limit
, timeGrain :: Int
, timeStep :: Int
, timeFrame :: Int
, timeFrameTh :: Double
, timeTh :: Double
, timeSens :: Double
, reBranchThr :: Double
, reBranchNth :: Int
, clusterTh :: Double
, clusterSens :: Double
, phyloLevel :: Int
......@@ -92,6 +98,11 @@ instance ToJSON Conf
instance FromJSON CorpusType
instance ToJSON CorpusType
decoder :: P.Either a b -> b
decoder (P.Left _) = P.error "Error"
decoder (P.Right x) = x
-- | Get the conf from a Json file
getJson :: FilePath -> IO L.ByteString
getJson path = L.readFile path
......@@ -115,7 +126,9 @@ filterTerms patterns (y,d) = (y,termsInText patterns d)
-- | To transform a Csv nfile into a readable corpus
csvToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList
-- . DV.reverse
. DV.take limit
-- . DV.reverse
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> CSV.readFile csv
......@@ -146,6 +159,25 @@ parse format limit path l = do
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
-- | To parse an existing Fis file
parseFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> IO [PhyloFis]
parseFis path name grain step support clique = do
fisExists <- doesFileExist (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")
if fisExists
then do
fisJson <- (eitherDecode <$> getJson (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")) :: IO (P.Either P.String [PhyloFis])
case fisJson of
P.Left err -> do
putStrLn err
pure []
P.Right fis -> pure fis
else pure []
writeFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> DM.Map (Date,Date) [PhyloFis] -> IO ()
writeFis path name grain step support clique fis = do
let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json"
L.writeFile fisPath $ encode (DL.concat $ DM.elems fis)
--------------
-- | Main | --
--------------
......@@ -166,17 +198,28 @@ main = do
corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
let roots = DL.nub $ DL.concat $ map text corpus
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
putStrLn $ ("\n" <> show (length roots) <> " parsed foundation roots")
fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf)
putStrLn $ ("\n" <> show (length fis) <> " parsed fis")
let mFis = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (phyloLevel conf)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (timeFrame conf) (timeFrameTh conf)
(reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let phylo = toPhylo query corpus roots termList
let phylo = toPhylo query corpus roots termList mFis
writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo)
let view = toPhyloView queryView phylo
......
......@@ -106,6 +106,7 @@ library:
- crawlerIsidore
- crawlerHAL
- data-time-segment
- deepseq
- directory
- duckling
- exceptions
......@@ -143,6 +144,7 @@ library:
- natural-transformation
- opaleye
- pandoc
- parallel
- parsec
- patches-class
- patches-map
......@@ -268,6 +270,7 @@ executables:
- base
- bytestring
- containers
- directory
- gargantext
- vector
- parallel
......
......@@ -22,7 +22,7 @@ one 8, e54847.
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MultiParamTypeClasses #-}
......@@ -44,6 +44,8 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Text.Context (TermList)
import Gargantext.Prelude
import Control.DeepSeq
--------------------
-- | PhyloParam | --
--------------------
......@@ -77,6 +79,9 @@ data Phylo =
Phylo { _phylo_duration :: (Start, End)
, _phylo_foundations :: PhyloFoundations
, _phylo_periods :: [PhyloPeriod]
, _phylo_docsByYears :: Map Date Double
, _phylo_cooc :: Map Date (Map (Int,Int) Double)
, _phylo_fis :: Map (Date,Date) [PhyloFis]
, _phylo_param :: PhyloParam
}
deriving (Generic, Show, Eq)
......@@ -150,6 +155,7 @@ data PhyloGroup =
, _phylo_groupNgrams :: [Int]
, _phylo_groupMeta :: Map Text Double
, _phylo_groupBranchId :: Maybe PhyloBranchId
, _phylo_groupCooc :: Map (Int,Int) Double
, _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer]
......@@ -157,7 +163,9 @@ data PhyloGroup =
, _phylo_groupLevelParents :: [Pointer]
, _phylo_groupLevelChilds :: [Pointer]
}
deriving (Generic, Show, Eq, Ord)
deriving (Generic, NFData, Show, Eq, Ord)
-- instance NFData PhyloGroup
-- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
......@@ -199,8 +207,8 @@ type Support = Int
data PhyloFis = PhyloFis
{ _phyloFis_clique :: Clique
, _phyloFis_support :: Support
, _phyloFis_metrics :: Map (Int,Int) (Map Text [Double])
} deriving (Show)
, _phyloFis_period :: (Date,Date)
} deriving (Generic,Show,Eq)
-- | A list of clustered PhyloGroup
type PhyloCluster = [PhyloGroup]
......@@ -343,6 +351,11 @@ data PhyloQueryBuild = PhyloQueryBuild
-- Inter-temporal matching method of the Phylo
, _q_interTemporalMatching :: Proximity
, _q_interTemporalMatchingFrame :: Int
, _q_interTemporalMatchingFrameTh :: Double
, _q_reBranchThr :: Double
, _q_reBranchNth :: Int
-- Last level of reconstruction
, _q_nthLevel :: Level
......
......@@ -26,6 +26,7 @@ module Gargantext.Viz.Phylo.API
--import Control.Monad.Reader (ask)
import Data.Text (Text)
import Data.Map (empty)
import Data.Swagger
import Gargantext.API.Types
import Gargantext.Database.Types.Node (PhyloId, ListId, CorpusId)
......@@ -104,7 +105,7 @@ postPhylo _n _lId q = do
vrs = Just ("1" :: Text)
sft = Just (Software "Gargantext" "4")
prm = initPhyloParam vrs sft (Just q)
pure (toPhyloBase q prm corpus actants termList)
pure (toPhyloBase q prm corpus actants termList empty)
------------------------------------------------------------------------
......@@ -139,6 +140,7 @@ instance ToSchema LouvainParams
instance ToSchema Metric
instance ToSchema Order
instance ToSchema Phylo
instance ToSchema PhyloFis
instance ToSchema PhyloBranch
instance ToSchema PhyloEdge
instance ToSchema PhyloGroup
......
......@@ -13,11 +13,13 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Viz.Phylo.Aggregates.Cluster
where
import Data.List (null,tail,concat,sort,intersect)
import Control.Parallel.Strategies
import Data.List (null,concat,sort,intersect,(++))
import Data.Map (Map)
import Data.Tuple (fst)
import Gargantext.Prelude
......@@ -25,7 +27,7 @@ import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS
......@@ -44,19 +46,19 @@ getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g
graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
graphToClusters clust (nodes,edges) = case clust of
Louvain (LouvainParams _) -> undefined
RelatedComponents (RCParams _) -> relatedComp 0 (head' "graphToClusters" nodes) (tail nodes,edges) [] []
RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
_ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> Phylo -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs cooc p = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p
$ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ getCandidates gs)
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ getCandidates gs)
_ -> undefined
groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
groupsToGraph nbDocs prox gs = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
$ getCandidates gs
candidates' = candidates `using` parList rdeepseq
in candidates' )
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
_ -> undefined
-- | To filter a Graph of Proximity using a given threshold
......@@ -80,9 +82,11 @@ phyloToClusters lvl clus p = Map.fromList
graphs' = traceGraphFiltered lvl
$ map (\g -> filterGraph prox g) graphs
--------------------------------------
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p) p) periods
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ let gs = map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
gs' = gs `using` parList rdeepseq
in gs'
--------------------------------------
prox :: Proximity
prox = getProximity clus
......@@ -100,7 +104,6 @@ phyloToClusters lvl clus p = Map.fromList
traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
<> show (lst) <> "\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
......@@ -118,9 +121,3 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where
lst = sort $ map snd $ concat $ map snd g
traceSim :: PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> Map (Int, Int) Double -> Phylo -> Double -> Double
traceSim g g' _ _ p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n"
-- <> show (c) <> " [vs] " <> show (c') <> " = " <> show (sim)
) sim
......@@ -17,14 +17,17 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Cooc
where
import Data.List (union,concat,nub)
import Data.Map (Map,elems,adjust,filterWithKey)
import Data.List (union,concat,nub,sort, sortOn)
import Data.Map (Map,elems,adjust,filterWithKey,fromListWith,fromList,restrictKeys)
import Data.Set (Set)
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.Map as Map
import qualified Data.Set as Set
-- import Debug.Trace (trace)
-- | To transform the Fis into a full coocurency Matrix in a Phylo
fisToCooc :: Map (Date, Date) [PhyloFis] -> Phylo -> Map (Int, Int) Double
......@@ -83,5 +86,49 @@ getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc :: [Int] -> Map (Int,Int) Double
listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns v = sort $ map (\n -> getIdxInVector n v) ns
-- | To build the cooc matrix by years out of the corpus
docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
docsToCooc docs fdt = fromListWith sumCooc
$ map (\(d,l) -> (d, listToCooc l))
$ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
-- | To sum all the docs produced during a list of years
sumDocsByYears :: Set Date -> Map Date Double -> Double
sumDocsByYears years m = sum $ elems $ restrictKeys m years
-- | To get the cooc matrix of a group
groupToCooc :: PhyloGroup -> Phylo -> Map (Int,Int) Double
groupToCooc g p = getMiniCooc (listToFullCombi $ getGroupNgrams g) (periodsToYears [getGroupPeriod g]) (getPhyloCooc p)
-- | To get the union of the cooc matrix of two groups
unionOfCooc :: PhyloGroup -> PhyloGroup -> Phylo -> Map (Int,Int) Double
unionOfCooc g g' p = sumCooc (groupToCooc g p) (groupToCooc g' p)
-- | To get the nth most occurent elems in a coocurency matrix
getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
getNthMostOcc nth cooc = (nub . concat)
$ map (\((idx,idx'),_) -> [idx,idx'])
$ take nth
$ reverse
$ sortOn snd $ Map.toList cooc
-- phyloCooc :: Map (Int, Int) Double
-- phyloCooc = fisToCooc phyloFis phylo1_0_1
......@@ -17,8 +17,7 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Document
where
import Data.List (last)
import Data.Map (Map)
import Data.Map (Map,fromListWith)
import Data.Text (Text)
import Data.Tuple (fst)
import Data.Vector (Vector)
......@@ -29,23 +28,25 @@ import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Vector as Vector
import Debug.Trace (trace)
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last l))
initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
$ chunkAlong g s [start .. end]
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
groupDocsByPeriod f pds es = trace ("----\nGroup docs by periods\n") $ Map.fromList $ zip pds $ map (inPeriode f es) pds
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ List.partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
......@@ -54,4 +55,10 @@ parseDocs roots c = map (\(d,t)
-> Document d ( filter (\x -> Vector.elem x roots)
$ monoTexts t)) c
-- | To count the number of documents by year
countDocs :: [(Date,a)] -> Map Date Double
countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
......@@ -17,14 +17,16 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Fis
where
import Data.List (null,concat,sort)
import Data.Map (Map, empty,elems)
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (null,concat,sort,(++))
import Data.Map (Map,elems,mapWithKey,unionWith,fromList,keys)
import Data.Tuple (fst, snd)
import Data.Set (size)
import Gargantext.Prelude
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector.Storable as Vector
......@@ -43,12 +45,12 @@ filterFis keep thr f m = case keep of
-- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> getSupport fis > thr) l
filterFisBySupport thr l = filter (\fis -> getSupport fis >= thr) l
-- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) > thr) l
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
-- | To filter nested Fis
......@@ -57,38 +59,23 @@ filterFisByNested = map (\l -> let cliqueMax = filterNestedSets (head' "Fis" $ m
in filter (\fis -> elem (getClique fis) cliqueMax) l)
-- | To transform a list of Documents into a Frequent Items Set
docsToFis :: Map (Date, Date) [Document] -> Map (Date, Date) [PhyloFis]
docsToFis docs = map (\d -> let fs = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text d)
in map (\f -> PhyloFis (fst f) (snd f) empty) fs) docs
-- | To process a list of Filters on top of the PhyloFis
processFilters :: [Filter] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processFilters filters phyloFis
| null filters = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processFilters] please add some filters for the Fis"
-- | To process a list of Metrics on top of the PhyloFis
processMetrics :: [Metric] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
processMetrics metrics phyloFis
| null metrics = phyloFis
| otherwise = panic "[ERR][Viz.Phylo.LevelMaker.processMetrics] please add some metrics for the Fis"
docsToFis' :: Map (Date,Date) [Document] -> Phylo -> Phylo
docsToFis' m p = if (null $ getPhyloFis p)
then trace("----\nRebuild the Fis from scratch\n")
$ p & phylo_fis .~ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
else trace("----\nUse Fis from an existing file\n")
$ p & phylo_fis %~ (unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m))
-- | To transform some Documents into PhyloFis and apply a List of Metrics and Filters
toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis ds k s t ms fs = processFilters fs
$ processMetrics ms
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n"
$ docsToFis ds
toPhyloFis' :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
toPhyloFis' fis k s t = traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n" fis
-----------------
......
......@@ -17,45 +17,135 @@ Portability : POSIX
module Gargantext.Viz.Phylo.BranchMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List (concat,nub,(++),tail)
import Data.List (concat,nub,(++),tail,sortOn,take,reverse,sort,null,intersect,union,delete)
import Data.Map (Map,(!), fromListWith, elems)
import Data.Tuple (fst, snd)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
-- import Debug.Trace (trace)
---------------------------
-- | Readability links | --
---------------------------
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: Level -> GroupGraph -> Phylo -> [(Int,PhyloGroupId)]
graphToBranches _lvl (nodes,edges) _p = concat
$ map (\(idx,gs) -> map (\g -> (idx,getGroupId g)) gs)
$ zip [1..]
$ relatedComp 0 (head' "branchMaker" nodes) (tail nodes,edges) [] []
getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
getFramedPeriod :: [PhyloGroup] -> (Date,Date)
getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
getGroupsNgrams :: [PhyloGroup] -> [Int]
getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
-- | Process a Jaccard on top of two set of Branch Peaks
areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
/ ((fromIntegral . length) $ union ns ns')) >= thr
-- | Get the framing period of a branch ([[PhyloGroup]])
getBranchPeriod :: [PhyloGroup] -> (Date,Date)
getBranchPeriod gs =
let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
-- | Get the Nth most coocurent Ngrams in a list of Groups
getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
getGroupsPeaks gs nth p = getNthMostOcc nth
$ getSubCooc (getGroupsNgrams gs)
$ getCooc (getGroupsPeriods gs) p
-- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
(getGroupsPeaks gs (getPhyloReBranchNth p) p)
(getGroupsPeaks gs' (getPhyloReBranchNth p) p))
&& ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
&& (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
) branches
-- | To build a graph using the parents and childs pointers
makeGraph :: [PhyloGroup] -> Phylo -> GroupGraph
makeGraph gs p = (gs,edges)
-- | Try to connect a focused branch to other candidate branches by finding the best pointers
reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
reBranch p branch candidates =
let newLinks = map (\branch' ->
let pointers = map (\g ->
-- define pairs of candidates groups
let pairs = listToPairs
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
-- process the matching between the pairs and the current group
in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
in if (g2 == g3)
then mem ++ [(getGroupId g,(getGroupId g2,s))]
else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
) branch
pointers' = pointers `using` parList rdeepseq
-- keep the best pointer between the focused branch and the current candidates
in head' "reBranch" $ reverse $ sortOn (snd . snd)
$ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
) candidates
newLinks' = newLinks `using` parList rdeepseq
in newLinks'
reLinkPhyloBranches :: Level -> Phylo -> Phylo
reLinkPhyloBranches lvl p =
let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
$ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
([],branches) branches
in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
where
edges :: [GroupEdge]
edges = (nub . concat)
$ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
++
(map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) gs
branches :: [[PhyloGroup]]
branches = elems
$ fromListWith (++)
$ foldl' (\mem g -> case getGroupBranchId g of
Nothing -> mem
Just i -> mem ++ [(i,[g])] )
[] $ getGroupsWithLevel lvl p
------------------
-- | Branches | --
------------------
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
graphToBranches groups = Map.fromList
$ concat
$ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
$ zip [1..]
$ relatedComp
$ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
-- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches :: Level -> Phylo -> Phylo
setPhyloBranches lvl p = alterGroupWithLevel (\g -> let bIdx = (fst $ head' "branchMaker" $ filter (\b -> snd b == getGroupId g) bs)
in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
where
--------------------------------------
bs :: [(Int,PhyloGroupId)]
bs = graphToBranches lvl graph p
setPhyloBranches lvl p = alterGroupWithLevel (\g ->
let bIdx = branches ! (getGroupId g)
in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
where
--------------------------------------
graph :: GroupGraph
graph = makeGraph (getGroupsWithLevel lvl p) p
branches :: Map PhyloGroupId Int
branches = graphToBranches (getGroupsWithLevel lvl p)
--------------------------------------
-- trace' bs = trace bs
\ No newline at end of file
......@@ -31,7 +31,7 @@ module Gargantext.Viz.Phylo.Example where
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.Text (Text)
import Data.List ((++), last)
import Data.Map (Map)
import Data.Map (Map,empty)
import Data.Tuple (fst)
import Data.Tuple.Extra
import Data.Vector (Vector)
......@@ -40,6 +40,7 @@ import Gargantext.Text.Context (TermList)
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Aggregates.Cluster
import Gargantext.Viz.Phylo.Aggregates.Document
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.Aggregates.Fis
import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LevelMaker
......@@ -55,7 +56,7 @@ import qualified Data.List as List
------------------------------------------------------
export :: IO ()
export = dotToFile "/home/qlobbe/data/epique/output/cesar_cleopatre.dot" phyloDot
export = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView
......@@ -77,7 +78,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 1 Merge False 1 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
phyloQueryView = PhyloQueryView 2 Merge False 1 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
--------------------------------------------------
......@@ -86,7 +87,7 @@ phyloQueryView = PhyloQueryView 1 Merge False 1 [BranchAge] [] [BranchPeakFreq,G
phyloFromQuery :: Phylo
phyloFromQuery = toPhylo (queryParser queryEx) corpus actants termList
phyloFromQuery = toPhylo (queryParser queryEx) corpus actants termList empty
-- | To do : create a request handler and a query parser
queryParser :: [Char] -> PhyloQueryBuild
......@@ -104,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.13 0)
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.6 20) 5 0.8 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.4 0)
......@@ -154,7 +155,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster :: Map (Date,Date) [PhyloCluster]
phyloCluster = phyloToClusters 1 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
phyloCluster = phyloToClusters 3 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
----------------------------------
......@@ -204,9 +205,12 @@ phylo1 = addPhyloLevel (1) phyloFis phylo
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = filterFis True 1 (filterFisByClique)
$ filterFisByNested
$ filterFis True 1 (filterFisBySupport) (docsToFis phyloDocs)
$ filterFis True 1 (filterFisBySupport) (getPhyloFis phylo')
phylo' :: Phylo
phylo' = docsToFis' phyloDocs phylo
----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0
----------------------------------------
......@@ -226,7 +230,13 @@ phyloDocs = corpusToDocs corpus phyloBase
phyloBase :: Phylo
phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) defaultPhyloParam
phyloBase = initPhyloBase periods (PhyloFoundations foundationsRoots termList) nbDocs cooc empty defaultPhyloParam
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc (parseDocs foundationsRoots corpus) foundationsRoots
nbDocs :: Map Date Double
nbDocs = countDocs corpus
periods :: [(Date,Date)]
periods = initPeriods 5 3
......
This diff is collapsed.
This diff is collapsed.
......@@ -18,36 +18,23 @@ module Gargantext.Viz.Phylo.Metrics.Clustering
where
import Data.Graph.Clustering.Louvain.CplusPlus
import Data.List (last,concat,null,nub,(++),init,tail,elemIndex,groupBy,(!!))
import Data.List (concat,null,nub,(++),elemIndex,groupBy,(!!), (\\), union, intersect)
import Data.Map (fromList,mapKeys)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
-- | To apply the related components method to a PhyloGraph
-- curr = the current PhyloGroup
-- (nodes,edges) = the initial PhyloGraph minus the current PhyloGroup
-- next = the next PhyloGroups to be added in the cluster
-- memo = the memory of the allready created clusters
relatedComp :: Int -> PhyloGroup -> GroupGraph -> [PhyloGroup] -> [[PhyloGroup]] -> [[PhyloGroup]]
relatedComp idx curr (nodes,edges) next memo
| null nodes' && null next' = memo'
| (not . null) next' = relatedComp idx (head' "relatedComp1" next' ) (nodes' ,edges) (tail next') memo'
| otherwise = relatedComp (idx + 1) (head' "relatedComp2" nodes') (tail nodes',edges) [] memo'
where
--------------------------------------
memo' :: [[PhyloGroup]]
memo'
| null memo = [[curr]]
| idx == ((length memo) - 1) = (init memo) ++ [(last memo) ++ [curr]]
| otherwise = memo ++ [[curr]]
--------------------------------------
next' :: [PhyloGroup]
next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
--------------------------------------
nodes' :: [PhyloGroup]
nodes' = filter (\x -> not $ elem x next') nodes
--------------------------------------
-- import Gargantext.Viz.Phylo.Tools
-- import Debug.Trace (trace)
relatedComp :: Eq a => [[a]] -> [[a]]
relatedComp graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
......
......@@ -17,33 +17,81 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Metrics.Proximity
where
import Data.List (null)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size)
import Data.List (null,union,intersect)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size,filterWithKey)
import Gargantext.Prelude
import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
weightedLogJaccard s f1 f2
| null wUnion = 0
| wUnion == wInter = 1
| s == 0 = trace ("==0") $ (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
| s > 0 = trace (">0") $ (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion)
where
-- import Debug.Trace (trace)
sumInvLog :: Double -> [Double] -> Double
sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
sumLog :: Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
-- -- | To process WeighedLogJaccard distance between to coocurency matrix
-- weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
-- weightedLogJaccard sens cooc cooc' nbDocs
-- | null union' = 0
-- | union' == inter' = 1
-- | sens == 0 = (fromIntegral $ length $ keysInter) / (fromIntegral $ length $ keysUnion)
-- | sens > 0 = (sumInvLog sens $ elems wInter) / (sumInvLog sens $ elems wUnion)
-- | otherwise = (sumLog sens $ elems wInter) / (sumLog sens $ elems wUnion)
-- where
-- --------------------------------------
-- keysInter :: [Int]
-- keysInter = nub $ concat $ map (\(x,x') -> [x,x']) $ keys inter'
-- --------------------------------------
-- keysUnion :: [Int]
-- keysUnion = nub $ concat $ map (\(x,x') -> [x,x']) $ keys union'
-- --------------------------------------
-- wInter :: Map (Int,Int) Double
-- wInter = map (/nbDocs) inter'
-- --------------------------------------
-- wUnion :: Map (Int,Int) Double
-- wUnion = map (/nbDocs) union'
-- --------------------------------------
-- inter' :: Map (Int, Int) Double
-- inter' = intersectionWith (+) cooc cooc'
-- --------------------------------------
-- union' :: Map (Int, Int) Double
-- union' = unionWith (+) cooc cooc'
-- --------------------------------------
-- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | To get the diagonal of a matrix
toDiago :: Map (Int, Int) Double -> [Double]
toDiago cooc = elems $ filterWithKey (\(x,x') _ -> x == x') cooc
-- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard :: Double -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
| null gInter = 0
| gInter == gUnion = 1
| sens == 0 = jaccard gInter gUnion
| sens > 0 = (sumInvLog sens wInter) / (sumInvLog sens wUnion)
| otherwise = (sumLog sens wInter) / (sumLog sens wUnion)
where
--------------------------------------
gInter :: [Int]
gInter = intersect ngrams ngrams'
--------------------------------------
gUnion :: [Int]
gUnion = union ngrams ngrams'
--------------------------------------
wInter :: [Double]
wInter = elems $ intersectionWith (+) f1 f2
wInter = toDiago $ map (/nbDocs) $ intersectionWith (+) cooc cooc'
--------------------------------------
wUnion :: [Double]
wUnion = elems $ unionWith (+) f1 f2
--------------------------------------
sumInvLog :: [Double] -> Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
wUnion = toDiago $ map (/nbDocs) $ unionWith (+) cooc cooc'
--------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
......
......@@ -20,9 +20,9 @@ module Gargantext.Viz.Phylo.Tools
where
import Control.Lens hiding (both, Level, Empty)
import Data.List (filter, intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy)
import Data.List (filter, intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy, concat)
import Data.Maybe (mapMaybe,fromMaybe)
import Data.Map (Map, mapKeys, member, (!))
import Data.Map (Map, mapKeys, member, (!), restrictKeys, elems, empty, filterWithKey, unionWith)
import Data.Set (Set)
import Data.Text (Text,toLower,unwords)
import Data.Tuple.Extra
......@@ -110,6 +110,13 @@ listToDirectedCombi :: Eq a => [a] -> [(a,a)]
listToDirectedCombi l = [(x,y) | x <- l, y <- l, x /= y]
listToEqualCombi :: Eq a => [a] -> [(a,a)]
listToEqualCombi l = [(x,y) | x <- l, y <- l, x == y]
listToPairs :: Eq a => [a] -> [(a,a)]
listToPairs l = (listToEqualCombi l) ++ (listToUnDirectedCombi l)
-- | To get all combinations of a list and apply a function to the resulting list of pairs
listToDirectedCombiWith :: Eq a => forall b. (a -> b) -> [a] -> [(b,b)]
listToDirectedCombiWith f l = [(f x,f y) | x <- l, y <- l, x /= y]
......@@ -160,8 +167,8 @@ initFoundationsRoots :: [Ngrams] -> Vector Ngrams
initFoundationsRoots l = Vector.fromList $ map phyloAnalyzer l
-- | To init the base of a Phylo from a List of Periods and Foundations
initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> PhyloParam -> Phylo
initPhyloBase pds fds prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) prm
initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> Map Date Double -> Map Date (Map (Int,Int) Double) -> Map (Date,Date) [PhyloFis] -> PhyloParam -> Phylo
initPhyloBase pds fds nbDocs cooc fis prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) nbDocs cooc fis prm
-- | To init the param of a Phylo
initPhyloParam :: Maybe Text -> Maybe Software -> Maybe PhyloQueryBuild -> PhyloParam
......@@ -175,6 +182,41 @@ getLastLevel p = (last . sort)
. traverse
. phylo_periodLevels ) p
-- | To get all the coocurency matrix of a phylo
getPhyloCooc :: Phylo -> Map Date (Map (Int,Int) Double)
getPhyloCooc p = p ^. phylo_cooc
-- | To get the PhyloParam of a Phylo
getPhyloParams :: Phylo -> PhyloParam
getPhyloParams = _phylo_param
-- | To get the title of a Phylo
getPhyloTitle :: Phylo -> Text
getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-- | To get the desc of a Phylo
getPhyloDescription :: Phylo -> Text
getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrame :: Phylo -> Int
getPhyloMatchingFrame p = _q_interTemporalMatchingFrame $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrameTh :: Phylo -> Double
getPhyloMatchingFrameTh p = _q_interTemporalMatchingFrameTh $ _phyloParam_query $ getPhyloParams p
getPhyloProximity :: Phylo -> Proximity
getPhyloProximity p = _q_interTemporalMatching $ _phyloParam_query $ getPhyloParams p
getPhyloReBranchThr :: Phylo -> Double
getPhyloReBranchThr p = _q_reBranchThr $ _phyloParam_query $ getPhyloParams p
getPhyloReBranchNth :: Phylo -> Int
getPhyloReBranchNth p = _q_reBranchNth $ _phyloParam_query $ getPhyloParams p
getPhyloFis :: Phylo -> Map (Date,Date) [PhyloFis]
getPhyloFis = _phylo_fis
--------------------
-- | PhyloRoots | --
......@@ -194,6 +236,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx
getIdxInVector :: Ngrams -> Vector Ngrams -> Int
getIdxInVector n ns = case (elemIndex n ns) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx
--------------------
-- | PhyloGroup | --
--------------------
......@@ -209,7 +256,7 @@ alterGroupWithLevel f lvl p = over ( phylo_periods
. traverse
) (\g -> if getGroupLevel g == lvl
then f g
else g ) p
else g ) p
-- | To alter each list of PhyloGroups following a given function
......@@ -242,6 +289,10 @@ getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId = _phylo_groupId
getGroupCooc :: PhyloGroup -> Map (Int,Int) Double
getGroupCooc = _phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup
getGroupLevel :: PhyloGroup -> Int
getGroupLevel = snd . fst . getGroupId
......@@ -344,13 +395,19 @@ getGroups = view ( phylo_periods
)
-- | To get all PhyloGroups matching a list of PhyloGroupIds in a Phylo
getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
getGroupsFromIds ids p = filter (\g -> elem (getGroupId g) ids) $ getGroups p
-- | To get all PhyloGroups matching a list of PhyloGoupIds in a Phylo
-- getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
-- getGroupsFromIds ids p = filter (\g -> elem (getGroupId g) ids) $ getGroups p
-- | To get a PhyloGroup matching a PhyloGroupId in a Phylo
getGroupFromId :: PhyloGroupId -> Phylo -> PhyloGroup
getGroupFromId id p = (head' "getGroupFromId") $ getGroupsFromIds [id] p
getGroupFromId id p =
let groups = Map.fromList $ map (\g -> (getGroupId g, g)) $ getGroups p
in groups ! id
getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
getGroupsFromIds ids p =
let groups = Map.fromList $ map (\g -> (getGroupId g, g)) $ getGroups p
in elems $ restrictKeys groups (Set.fromList ids)
-- | To get the corresponding list of PhyloGroups from a list of PhyloNodes
......@@ -380,10 +437,29 @@ initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup
initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
(((from', to'), lvl), idx)
lbl
(sort $ map (\x -> getIdxInRoots x p) ngrams)
idxs
(Map.empty)
Nothing
(getMiniCooc (listToFullCombi idxs) (periodsToYears [(from', to')]) (getPhyloCooc p))
[] [] [] []
where
idxs = sort $ map (\x -> getIdxInRoots x p) ngrams
-- | To sum two coocurency Matrix
sumCooc :: Map (Int, Int) Double -> Map (Int, Int) Double -> Map (Int, Int) Double
sumCooc m m' = unionWith (+) m m'
-- | To build the mini cooc matrix of each group
getMiniCooc :: [(Int,Int)] -> Set Date -> Map Date (Map (Int,Int) Double) -> Map (Int,Int) Double
getMiniCooc pairs years cooc = filterWithKey (\(n,n') _ -> elem (n,n') pairs) cooc'
where
--------------------------------------
cooc' :: Map (Int,Int) Double
cooc' = foldl (\m m' -> sumCooc m m') empty
$ elems
$ restrictKeys cooc years
--------------------------------------
---------------------
......@@ -418,6 +494,12 @@ initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod
initPhyloPeriod id l = PhyloPeriod id l
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat)
$ map (\(d,d') -> [d..d']) periods
--------------------
-- | PhyloLevel | --
--------------------
......@@ -464,14 +546,14 @@ setPhyloLevelId lvl' (PhyloLevel (id, _lvl) groups)
getClique :: PhyloFis -> Clique
getClique = _phyloFis_clique
-- | To get the metrics of a PhyloFis
getFisMetrics :: PhyloFis -> Map (Int,Int) (Map Text [Double])
getFisMetrics = _phyloFis_metrics
-- | To get the support of a PhyloFis
getSupport :: PhyloFis -> Support
getSupport = _phyloFis_support
-- | To get the period of a PhyloFis
getFisPeriod :: PhyloFis -> (Date,Date)
getFisPeriod = _phyloFis_period
----------------------------
-- | PhyloNodes & Edges | --
......@@ -737,11 +819,10 @@ initWeightedLogJaccard (def 0 -> thr) (def 0.01 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQueryBuild from given and default parameters
initPhyloQueryBuild :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Int -> Maybe Double -> Maybe Double -> Maybe Int -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics) (def [] -> filters)
(def defaultWeightedLogJaccard -> matching') (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain steps cluster metrics filters matching' nthLevel nthCluster
(def defaultWeightedLogJaccard -> matching') (def 5 -> frame) (def 0.8 -> frameThr) (def 0.5 -> reBranchThr) (def 4 -> reBranchNth) (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain steps cluster metrics filters matching' frame frameThr reBranchThr reBranchNth nthLevel nthCluster
-- | To initialize a PhyloQueryView default parameters
......@@ -794,7 +875,7 @@ defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing N
defaultQueryBuild :: PhyloQueryBuild
defaultQueryBuild = initPhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
defaultQueryView :: PhyloQueryView
defaultQueryView = initPhyloQueryView Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
......
......@@ -157,7 +157,9 @@ setDotNode pn = node (toNodeDotId $ pn ^. pn_id)
-- | To set an Edge
setDotEdge :: PhyloEdge -> Dot DotId
setDotEdge pe = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Black]]
setDotEdge pe
| pe ^. pe_weight == 100 = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Red]]
| otherwise = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Black]]
-- | To set a Period Edge
......
......@@ -71,7 +71,7 @@ filterSizeBranch min' v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) > min'))
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) >= min'))
--------------------------------------
......
......@@ -26,8 +26,9 @@ import Data.Map (Map)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Aggregates.Cooc
import Gargantext.Viz.Phylo.BranchMaker
import qualified Data.Map as Map
-- import Debug.Trace (trace)
-- | To get the nth most frequent Ngrams in a list of PhyloGroups
......@@ -48,14 +49,15 @@ freqToLabel thr ngs l = ngramsToLabel ngs $ mostFreqNgrams thr l
-- | To get the (nth `div` 2) most cooccuring Ngrams in a PhyloGroup
mostOccNgrams :: Int -> Phylo -> PhyloGroup -> [Int]
mostOccNgrams thr p g = (nub . concat )
$ map (\((f,s),_d) -> [f,s])
$ take (thr `div` 2)
$ reverse $ sortOn snd $ Map.toList cooc
mostOccNgrams :: Int -> PhyloGroup -> [Int]
mostOccNgrams nth g = (nub . concat)
$ map (\((f,s),_d) -> [f,s])
$ take nth
$ reverse $ sortOn snd
$ Map.toList cooc
where
cooc :: Map (Int, Int) Double
cooc = getSubCooc (getGroupNgrams g) $ getCooc [getGroupPeriod g] p
cooc = getGroupCooc g
-- | To alter the peak of a PhyloBranch
......@@ -74,13 +76,18 @@ branchPeakFreq v thr p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ getGroupsFromNodes ns p))
$ getNodesByBranches v
branchPeakCooc :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ map (\(id,ns) -> (id, ngramsToLabel (getFoundationsRoots p) (getGroupsPeaks (getGroupsFromNodes ns p) nth p) ) )
$ getNodesByBranches v
-- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes
nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelCooc v thr p = over (pv_nodes
. traverse)
(\n -> let lbl = ngramsToLabel (getFoundationsRoots p)
$ mostOccNgrams thr p
$ mostOccNgrams thr
$ head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p
in n & pn_label .~ lbl) v
......@@ -89,6 +96,7 @@ nodeLabelCooc v thr p = over (pv_nodes
processTaggers :: [Tagger] -> Phylo -> PhyloView -> PhyloView
processTaggers ts p v = foldl (\v' t -> case t of
BranchPeakFreq -> branchPeakFreq v' 2 p
-- BranchPeakFreq -> branchPeakCooc v' 3 p
GroupLabelCooc -> nodeLabelCooc v' 2 p
_ -> panic "[ERR][Viz.Phylo.View.Taggers.processTaggers] tagger not found") v ts
......@@ -153,19 +153,6 @@ toPhyloView q p = traceView
-- | To get the PhyloParam of a Phylo
getPhyloParams :: Phylo -> PhyloParam
getPhyloParams = _phylo_param
-- | To get the title of a Phylo
getPhyloTitle :: Phylo -> Text
getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-- | To get the desc of a Phylo
getPhyloDescription :: Phylo -> Text
getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-----------------
-- | Taggers | --
-----------------
......
......@@ -43,6 +43,7 @@ extra-deps:
- KMP-0.1.0.2
- accelerate-1.2.0.0
- aeson-lens-0.5.0.0
- deepseq-th-0.1.0.4
- duckling-0.1.3.0
- full-text-search-0.2.1.4
- fullstop-0.1.4
......@@ -59,3 +60,4 @@ extra-deps:
- stemmer-0.5.2
- time-units-1.0.0
- validity-0.9.0.0 # patches-{map,class}
- directory-1.3.1.5
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment