Commit a893c2f4 authored by Quentin Lobbé's avatar Quentin Lobbé

add some tracers and fix the temporal matching

parent 762b3416
...@@ -41,6 +41,10 @@ import Gargantext.Viz.Phylo.LevelMaker ...@@ -41,6 +41,10 @@ import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.View.Export import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.View.ViewMaker import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Database.Types.Node
import Data.Maybe
import qualified Data.Map as DM import qualified Data.Map as DM
import qualified Data.Vector as DV import qualified Data.Vector as DV
import qualified Data.List as DL import qualified Data.List as DL
...@@ -60,12 +64,18 @@ data CorpusType = Wos | Csv deriving (Show,Generic) ...@@ -60,12 +64,18 @@ data CorpusType = Wos | Csv deriving (Show,Generic)
type Limit = Int type Limit = Int
data Conf = data Conf =
Conf { corpusPath :: CorpusPath Conf { corpusPath :: CorpusPath
, corpusType :: CorpusType , corpusType :: CorpusType
, listPath :: ListPath , listPath :: ListPath
, outputPath :: FilePath , outputPath :: FilePath
, phyloName :: Text , phyloName :: Text
, limit :: Limit , limit :: Limit
, timeGrain :: Int
, timeStep :: Int
, timeTh :: Double
, timeSens :: Double
, clusterTh :: Double
, clusterSens :: Double
} deriving (Show,Generic) } deriving (Show,Generic)
instance FromJSON Conf instance FromJSON Conf
...@@ -84,35 +94,48 @@ getJson path = L.readFile path ...@@ -84,35 +94,48 @@ getJson path = L.readFile path
--------------- ---------------
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (a, Text) -> (a, [Text]) filterTerms :: Patterns -> (a, Text) -> (a, [Text])
filterTerms patterns (year', doc) = (year',termsInText patterns doc) filterTerms patterns (y,d) = (y,termsInText patterns d)
where where
--------------------------------------
termsInText :: Patterns -> Text -> [Text] termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = DL.nub $ DL.concat $ map (map unwords) $ extractTermsWithList pats txt termsInText pats txt = DL.nub $ DL.concat $ map (map unwords) $ extractTermsWithList pats txt
--------------------------------------
csvToCorpus :: Int -> CorpusPath -> IO ([(Int,Text)]) -- | To transform a Csv nfile into a readable corpus
csvToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList csvToCorpus limit csv = DV.toList
. DV.take limit . DV.take limit
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n))) . DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> readCsv csv . snd <$> readCsv csv
wosToCorpus :: Int -> CorpusPath -> IO ([(Int,Text)]) -- | To transform a Wos nfile into a readable corpus
wosToCorpus limit path = undefined wosToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)])
wosToCorpus limit path = DL.take limit
. map (\d -> ((fromJust $_hyperdataDocument_publication_year d)
,(fromJust $_hyperdataDocument_title d) <> " " <> (fromJust $_hyperdataDocument_abstract d)))
. filter (\d -> (isJust $_hyperdataDocument_publication_year d)
&& (isJust $_hyperdataDocument_title d)
&& (isJust $_hyperdataDocument_abstract d))
<$> parseDocs WOS path
fileToCorpus :: CorpusType -> Int -> CorpusPath -> IO ([(Int,Text)]) -- | To use the correct parser given a CorpusType
fileToCorpus :: CorpusType -> Limit -> CorpusPath -> IO ([(Int,Text)])
fileToCorpus format limit path = case format of fileToCorpus format limit path = case format of
Wos -> wosToCorpus limit path Wos -> wosToCorpus limit path
Csv -> csvToCorpus limit path Csv -> csvToCorpus limit path
parse :: Limit -> CorpusPath -> TermList -> IO [Document] -- | To parse a file into a list of Document
parse limit corpus lst = do parse :: CorpusType -> Limit -> CorpusPath -> TermList -> IO [Document]
corpus' <- csvToCorpus limit corpus parse format limit path l = do
let patterns = buildPatterns lst corpus <- fileToCorpus format limit path
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus' let patterns = buildPatterns l
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
-------------- --------------
...@@ -123,7 +146,7 @@ parse limit corpus lst = do ...@@ -123,7 +146,7 @@ parse limit corpus lst = do
main :: IO () main :: IO ()
main = do main = do
putStrLn $ show "--| Read the conf |--" putStrLn $ show ("--| Read the conf |--")
[jsonPath] <- getArgs [jsonPath] <- getArgs
...@@ -133,17 +156,21 @@ main = do ...@@ -133,17 +156,21 @@ main = do
P.Left err -> putStrLn err P.Left err -> putStrLn err
P.Right conf -> do P.Right conf -> do
putStrLn $ show "--| Parse the corpus |--" putStrLn $ show ("--| Parse the corpus |--")
termList <- csvGraphTermList (listPath conf) termList <- csvGraphTermList (listPath conf)
corpus <- parse (limit conf) (corpusPath conf) termList corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList
let roots = DL.nub $ DL.concat $ map text corpus let roots = DL.nub $ DL.concat $ map text corpus
putStrLn $ show "--| Build the phylo |--" putStrLn $ ("-- | parsed docs : " <> show (length corpus) <> " |--")
putStrLn $ show ("--| Build the phylo |--")
let query = PhyloQueryBuild (phyloName conf) "" 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.00001 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.5 10) let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf)
defaultFis [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) 2
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
...@@ -151,8 +178,14 @@ main = do ...@@ -151,8 +178,14 @@ main = do
let view = toPhyloView queryView phylo let view = toPhyloView queryView phylo
putStrLn $ show "--| Export the phylo as a dot graph |--" putStrLn $ show ("--| Export the phylo as a dot graph |--")
let outputFile = (outputPath conf) P.++ (DT.unpack $ phyloName conf) P.++ ".dot" let outputFile = (outputPath conf) <> (DT.unpack $ phyloName conf)
<> "_" <> show (limit conf) <> "_"
<> "_" <> show (timeTh conf) <> "_"
<> "_" <> show (timeSens conf) <> "_"
<> "_" <> show (clusterTh conf) <> "_"
<> "_" <> show (clusterSens conf)
<> ".dot"
P.writeFile outputFile $ dotToString $ viewToDot view P.writeFile outputFile $ dotToString $ viewToDot view
...@@ -17,15 +17,22 @@ Portability : POSIX ...@@ -17,15 +17,22 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Aggregates.Fis module Gargantext.Viz.Phylo.Aggregates.Fis
where where
import Data.List (null) import Data.List (null,concat,sort)
import Data.Map (Map, empty) import Data.Map (Map, empty,elems)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Data.Set (size) import Data.Set (size)
import Data.Vector.Storable (Vector)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector.Storable as Vector
import Numeric.Statistics (percentile)
import Debug.Trace (trace)
-- | To Filter Fis by support -- | To Filter Fis by support
...@@ -74,7 +81,33 @@ processMetrics metrics phyloFis ...@@ -74,7 +81,33 @@ processMetrics metrics phyloFis
toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis] toPhyloFis :: Map (Date, Date) [Document] -> Bool -> Support -> Int -> [Metric] -> [Filter] -> Map (Date, Date) [PhyloFis]
toPhyloFis ds k s t ms fs = processFilters fs toPhyloFis ds k s t ms fs = processFilters fs
$ processMetrics ms $ processMetrics ms
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFisByNgrams t $ filterFisByNgrams t
$ traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested $ filterFisByNested
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFisBySupport k s $ filterFisBySupport k s
$ traceFis "----\nUnfiltered Fis :\n"
$ docsToFis ds $ docsToFis ds
-----------------
-- | Tracers | --
-----------------
traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <> " Fis\n"
<> "support : " <> show (percentile 25 supps) <> " (25%) "
<> show (percentile 50 supps) <> " (50%) "
<> show (percentile 75 supps) <> " (75%) "
<> show (percentile 90 supps) <> " (90%)\n"
<> "clique size : " <> show (percentile 25 ngrms) <> " (25%) "
<> show (percentile 50 ngrms) <> " (50%) "
<> show (percentile 75 ngrms) <> " (75%) "
<> show (percentile 90 ngrms) <> " (90%)\n"
) m
where
supps :: Vector Double
supps = Vector.fromList $ sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems m
ngrms :: Vector Double
ngrms = Vector.fromList $ sort $ map (\f -> fromIntegral $ Set.size $ _phyloFis_clique f) $ concat $ elems m
\ No newline at end of file
...@@ -104,7 +104,7 @@ queryEx = "title=Cesar et Cleôpatre" ...@@ -104,7 +104,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.0001 10) 3 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 10) 3 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10)
......
...@@ -34,7 +34,14 @@ import Gargantext.Viz.Phylo.BranchMaker ...@@ -34,7 +34,14 @@ import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.LinkMaker import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
import qualified Data.Vector.Storable as VS
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Vector as Vector
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
-- | A typeClass for polymorphic PhyloLevel functions -- | A typeClass for polymorphic PhyloLevel functions
...@@ -144,8 +151,11 @@ toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo ...@@ -144,8 +151,11 @@ toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
toNthLevel lvlMax prox clus p toNthLevel lvlMax prox clus p
| lvl >= lvlMax = p | lvl >= lvlMax = p
| otherwise = toNthLevel lvlMax prox clus | otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1) $ setPhyloBranches (lvl + 1)
$ traceTempoMatching Descendant (lvl + 1)
$ interTempoMatching Descendant (lvl + 1) prox $ interTempoMatching Descendant (lvl + 1) prox
$ traceTempoMatching Ascendant (lvl + 1)
$ interTempoMatching Ascendant (lvl + 1) prox $ interTempoMatching Ascendant (lvl + 1) prox
$ setLevelLinks (lvl, lvl + 1) $ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1) $ addPhyloLevel (lvl + 1)
...@@ -160,9 +170,12 @@ toNthLevel lvlMax prox clus p ...@@ -160,9 +170,12 @@ toNthLevel lvlMax prox clus p
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method -- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo toPhylo1 :: Cluster -> Proximity -> [Metric] -> [Filter] -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox metrics filters d p = case clus of toPhylo1 clus prox metrics filters d p = case clus of
Fis (FisParams k s t) -> setPhyloBranches 1 Fis (FisParams k s t) -> traceBranches 1
$ setPhyloBranches 1
$ traceTempoMatching Descendant 1
$ interTempoMatching Descendant 1 prox $ interTempoMatching Descendant 1 prox
$ interTempoMatching Ascendant 1 prox $ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox
$ setLevelLinks (0,1) $ setLevelLinks (0,1)
$ setLevelLinks (1,0) $ setLevelLinks (1,0)
$ addPhyloLevel 1 phyloFis p $ addPhyloLevel 1 phyloFis p
...@@ -180,14 +193,6 @@ toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo ...@@ -180,14 +193,6 @@ toPhylo0 :: Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo0 d p = addPhyloLevel 0 d p toPhylo0 d p = addPhyloLevel 0 d p
-- | To reconstruct the Base of a Phylo
-- | To reconstruct a Phylomemy from a PhyloQueryBuild, a Corpus and a list of actants
class PhyloMaker corpus class PhyloMaker corpus
where where
toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Phylo toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> TermList -> Phylo
...@@ -210,7 +215,7 @@ instance PhyloMaker [(Date, Text)] ...@@ -210,7 +215,7 @@ instance PhyloMaker [(Date, Text)]
phyloDocs = corpusToDocs c phyloBase phyloDocs = corpusToDocs c phyloBase
-------------------------------------- --------------------------------------
phyloBase :: Phylo phyloBase :: Phylo
phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
-------------------------------------- --------------------------------------
-------------------------------------- --------------------------------------
toPhyloBase q p c roots termList = initPhyloBase periods foundations p toPhyloBase q p c roots termList = initPhyloBase periods foundations p
...@@ -243,7 +248,7 @@ instance PhyloMaker [Document] ...@@ -243,7 +248,7 @@ instance PhyloMaker [Document]
phyloDocs = corpusToDocs c phyloBase phyloDocs = corpusToDocs c phyloBase
-------------------------------------- --------------------------------------
phyloBase :: Phylo phyloBase :: Phylo
phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList phyloBase = tracePhyloBase $ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c roots termList
-------------------------------------- --------------------------------------
-------------------------------------- --------------------------------------
toPhyloBase q p c roots termList = initPhyloBase periods foundations p toPhyloBase q p c roots termList = initPhyloBase periods foundations p
...@@ -257,4 +262,54 @@ instance PhyloMaker [Document] ...@@ -257,4 +262,54 @@ instance PhyloMaker [Document]
$ both date (head' "LevelMaker" c,last c) $ both date (head' "LevelMaker" c,last c)
-------------------------------------- --------------------------------------
-------------------------------------- --------------------------------------
corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
\ No newline at end of file
-----------------
-- | Tracers | --
-----------------
tracePhyloBase :: Phylo -> Phylo
tracePhyloBase p = trace ( "----\nPhyloBase : \n"
<> show (length $ _phylo_periods p) <> " periods from "
<> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
<> " to "
<> show (getPhyloPeriodId $ last $ _phylo_periods p)
<> "\n"
<> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length pts) <> " pointers\n"
<> "similarity : " <> show (percentile 25 (VS.fromList sim)) <> " (25%) "
<> show (percentile 50 (VS.fromList sim)) <> " (50%) "
<> show (percentile 75 (VS.fromList sim)) <> " (75%) "
<> show (percentile 90 (VS.fromList sim)) <> " (90%)\n") p
where
--------------------------------------
sim :: [Double]
sim = sort $ map snd pts
--------------------------------------
pts :: [Pointer]
pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
--------------------------------------
traceBranches :: Level -> Phylo -> Phylo
traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
<> "count : " <> show (length $ getBranchIds p) <> " branches\n"
<> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
<> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
<> show (percentile 50 (VS.fromList brs)) <> " (50%) "
<> show (percentile 75 (VS.fromList brs)) <> " (75%) "
<> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
where
--------------------------------------
brs :: [Double]
brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
$ filter (\(id,_) -> (fst id) == lvl)
$ getGroupsByBranches p
--------------------------------------
\ No newline at end of file
...@@ -18,15 +18,20 @@ module Gargantext.Viz.Phylo.LinkMaker ...@@ -18,15 +18,20 @@ module Gargantext.Viz.Phylo.LinkMaker
where where
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List ((++), nub, sortOn, null, tail, splitAt, elem, concat) import Data.List ((++), sortOn, null, tail, splitAt, elem, concat, sort, delete)
import Data.Tuple.Extra import Data.Tuple.Extra
import Data.Map (Map,(!))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.Metrics.Proximity import Gargantext.Viz.Phylo.Metrics.Proximity
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Maybe as Maybe import qualified Data.Maybe as Maybe
-- import Debug.Trace (trace) import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -106,60 +111,84 @@ getNextPeriods to' id l = case to' of ...@@ -106,60 +111,84 @@ getNextPeriods to' id l = case to' of
-------------------------------------- --------------------------------------
-- | To find the best set (max = 2) of Childs/Parents candidates based on a given Proximity mesure until a maximum depth (max = Period + 5 units ) -- | To find the best candidates regarding a given proximity
findBestCandidates :: Filiation -> Int -> Int -> Proximity -> PhyloGroup -> Phylo -> [(PhyloGroupId, Double)] findBestCandidates' :: Filiation -> Int -> Int -> Proximity -> [PhyloPeriodId] -> [PhyloGroup] -> PhyloGroup -> ([Pointer],[Double])
findBestCandidates to' depth max' prox group p findBestCandidates' fil depth limit prox prds gs g
| depth > max' || null next = [] | depth > limit || null next = ([],[])
| (not . null) best = take 2 best | (not . null) bestScores = (take 2 bestScores, map snd scores)
| otherwise = findBestCandidates to' (depth + 1) max' prox group p | otherwise = findBestCandidates' fil (depth + 1) limit prox prds gs g
where where
-------------------------------------- --------------------------------------
next :: [PhyloPeriodId] next :: [PhyloPeriodId]
next = getNextPeriods to' (getGroupPeriod group) (getPhyloPeriods p) next = take depth prds
-------------------------------------- --------------------------------------
candidates :: [PhyloGroup] candidates :: [PhyloGroup]
candidates = concat $ map (\prd -> getGroupsWithFilters (getGroupLevel group) prd p) $ (take depth next) candidates = filter (\g' -> elem (getGroupPeriod g') next) gs
-------------------------------------- --------------------------------------
scores :: [(PhyloGroupId, Double)] scores :: [(PhyloGroupId, Double)]
scores = map (\group' -> applyProximity prox group group') candidates scores = map (\g' -> applyProximity prox g g') candidates
--------------------------------------
bestScores :: [(PhyloGroupId, Double)]
bestScores = reverse
$ sortOn snd
$ filter (\(_id,score) -> case prox of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
Filiation -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Filiation"
) scores
--------------------------------------
-- | To add some Pointer to a PhyloGroup
addPointers' :: Filiation -> [Pointer] -> PhyloGroup -> PhyloGroup
addPointers' fil pts g = g & case fil of
Descendant -> phylo_groupPeriodChilds %~ (++ pts)
Ascendant -> phylo_groupPeriodParents %~ (++ pts)
_ -> panic ("[ERR][Viz.Phylo.LinkMaker.addPointers] Wrong type of filiation")
-- | To update a list of pkyloGroups with some Pointers
updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo
updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if (getGroupLevel g) == lvl
then addPointers' fil (m ! (getGroupId g)) g
else g ) gs) p
-- | To apply the intertemporal matching to Phylo at a given level
interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
interTempoMatching fil lvl prox p = traceMatching fil lvl scores
$ updateGroups fil lvl pointers p
where
--------------------------------------
pointers :: Map PhyloGroupId [Pointer]
pointers = Map.fromList $ map (\(id,x) -> (id,fst x)) candidates
--------------------------------------
scores :: [Double]
scores = sort $ concat $ map (snd . snd) candidates
--------------------------------------
candidates :: [(PhyloGroupId,([Pointer],[Double]))]
candidates = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (delete g gs) g)) gs
-------------------------------------- --------------------------------------
best :: [(PhyloGroupId, Double)] gs :: [PhyloGroup]
best = reverse gs = getGroupsWithLevel lvl p
$ sortOn snd --------------------------------------
$ filter (\(_id,score) -> case prox of prds :: [PhyloPeriodId]
WeightedLogJaccard (WLJParams thr _) -> score >= thr prds = getPhyloPeriods p
Hamming (HammingParams thr) -> score <= thr
Filiation -> panic "[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Filiation"
) scores
-------------------------------------- --------------------------------------
-- | To add a new list of Pointers into an existing Childs/Parents list of Pointers ----------------
makePair :: Filiation -> PhyloGroup -> [(PhyloGroupId, Double)] -> PhyloGroup -- | Tracer | --
makePair to' group ids = case to' of ----------------
Descendant -> over (phylo_groupPeriodChilds) addPointers group
Ascendant -> over (phylo_groupPeriodParents) addPointers group
_ -> panic ("[ERR][Viz.Phylo.Example.makePair] Filiation type not defined")
where
--------------------------------------
addPointers :: [Pointer] -> [Pointer]
addPointers l = nub $ (l ++ ids)
--------------------------------------
-- | To pair all the Phylogroups of given PhyloLevel to their best Parents or Childs traceMatching :: Filiation -> Level -> [Double] -> Phylo -> Phylo
interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo traceMatching fil lvl lst p = trace ( "----\n" <> show (fil) <> " unfiltered temporal Matching in Phylo" <> show (lvl) <> " :\n"
interTempoMatching to' lvl prox p = alterPhyloGroups <> "count : " <> show (length lst) <> " potential pointers\n"
(\groups -> <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
map (\group -> <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
if (getGroupLevel group) == lvl <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
then <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
let
--------------------------------------
candidates :: [(PhyloGroupId, Double)]
candidates = findBestCandidates to' 1 5 prox group p
--------------------------------------
in
makePair to' group candidates
else
group ) groups) p
...@@ -266,6 +266,10 @@ getGroupLevelParents = _phylo_groupLevelParents ...@@ -266,6 +266,10 @@ getGroupLevelParents = _phylo_groupLevelParents
getGroupLevelParentsId :: PhyloGroup -> [PhyloGroupId] getGroupLevelParentsId :: PhyloGroup -> [PhyloGroupId]
getGroupLevelParentsId g = map fst $ getGroupLevelParents g getGroupLevelParentsId g = map fst $ getGroupLevelParents g
-- | To get the Meta value of a PhyloGroup
getGroupMeta :: Text -> PhyloGroup -> Double
getGroupMeta k g = (g ^. phylo_groupMeta) ! k
-- | To get the Ngrams of a PhyloGroup -- | To get the Ngrams of a PhyloGroup
getGroupNgrams :: PhyloGroup -> [Int] getGroupNgrams :: PhyloGroup -> [Int]
...@@ -306,6 +310,20 @@ getGroupPeriodParents = _phylo_groupPeriodParents ...@@ -306,6 +310,20 @@ getGroupPeriodParents = _phylo_groupPeriodParents
getGroupPeriodParentsId :: PhyloGroup -> [PhyloGroupId] getGroupPeriodParentsId :: PhyloGroup -> [PhyloGroupId]
getGroupPeriodParentsId g = map fst $ getGroupPeriodParents g getGroupPeriodParentsId g = map fst $ getGroupPeriodParents g
-- | To get the pointers of a given Phylogroup
getGroupPointers :: EdgeType -> Filiation -> PhyloGroup -> [Pointer]
getGroupPointers t f g = case t of
PeriodEdge -> case f of
Ascendant -> getGroupPeriodParents g
Descendant -> getGroupPeriodChilds g
_ -> panic "[ERR][Viz.Phylo.Tools.getGroupPointers] wrong filiation"
LevelEdge -> case f of
Ascendant -> getGroupLevelParents g
Descendant -> getGroupLevelChilds g
_ -> panic "[ERR][Viz.Phylo.Tools.getGroupPointers] wrong filiation"
-- | To get the roots labels of a list of group ngrams -- | To get the roots labels of a list of group ngrams
getGroupText :: PhyloGroup -> Phylo -> [Text] getGroupText :: PhyloGroup -> Phylo -> [Text]
getGroupText g p = ngramsToText (getFoundationsRoots p) (getGroupNgrams g) getGroupText g p = ngramsToText (getFoundationsRoots p) (getGroupNgrams g)
...@@ -532,6 +550,13 @@ getTargetId e = e ^. pe_target ...@@ -532,6 +550,13 @@ getTargetId e = e ^. pe_target
getBranchId :: PhyloBranch -> PhyloBranchId getBranchId :: PhyloBranch -> PhyloBranchId
getBranchId b = b ^. pb_id getBranchId b = b ^. pb_id
-- | To get a list of PhyloBranchIds
getBranchIds :: Phylo -> [PhyloBranchId]
getBranchIds p = sortOn snd
$ nub
$ mapMaybe getGroupBranchId
$ getGroups p
-- | To get a list of PhyloBranchIds given a Level in a Phylo -- | To get a list of PhyloBranchIds given a Level in a Phylo
getBranchIdsWith :: Level -> Phylo -> [PhyloBranchId] getBranchIdsWith :: Level -> Phylo -> [PhyloBranchId]
...@@ -550,6 +575,20 @@ getViewBranchIds :: PhyloView -> [PhyloBranchId] ...@@ -550,6 +575,20 @@ getViewBranchIds :: PhyloView -> [PhyloBranchId]
getViewBranchIds v = map getBranchId $ v ^. pv_branches getViewBranchIds v = map getBranchId $ v ^. pv_branches
-- | To get a list of PhyloGroup sharing the same PhyloBranchId
getGroupsByBranches :: Phylo -> [(PhyloBranchId,[PhyloGroup])]
getGroupsByBranches p = zip (getBranchIds p)
$ map (\id -> filter (\g -> (fromJust $ getGroupBranchId g) == id)
$ getGroupsInBranches p)
$ getBranchIds p
-- | To get the sublist of all the PhyloGroups linked to a branch
getGroupsInBranches :: Phylo -> [PhyloGroup]
getGroupsInBranches p = filter (\g -> isJust $ g ^. phylo_groupBranchId)
$ getGroups p
-------------------------------- --------------------------------
-- | PhyloQuery & QueryView | -- -- | PhyloQuery & QueryView | --
-------------------------------- --------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment