Commit 2120f449 authored by qlobbe's avatar qlobbe Committed by Alexandre Delanoë

refactoring

parent c6d78653
...@@ -152,7 +152,7 @@ seaToLabel config = case (seaElevation config) of ...@@ -152,7 +152,7 @@ seaToLabel config = case (seaElevation config) of
sensToLabel :: PhyloConfig -> [Char] sensToLabel :: PhyloConfig -> [Char]
sensToLabel config = case (phyloProximity config) of sensToLabel config = case (similarity config) of
Hamming _ _ -> undefined Hamming _ _ -> undefined
WeightedLogJaccard s _ -> ("WeightedLogJaccard_" <> show s) WeightedLogJaccard s _ -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s _ -> ( "WeightedLogSim-sens_" <> show s) WeightedLogSim s _ -> ( "WeightedLogSim-sens_" <> show s)
......
...@@ -71,7 +71,7 @@ data SeaElevation = ...@@ -71,7 +71,7 @@ data SeaElevation =
instance ToSchema SeaElevation instance ToSchema SeaElevation
data Proximity = data Similarity =
WeightedLogJaccard WeightedLogJaccard
{ _wlj_sensibility :: Double { _wlj_sensibility :: Double
, _wlj_minSharedNgrams :: Int } , _wlj_minSharedNgrams :: Int }
...@@ -84,7 +84,7 @@ data Proximity = ...@@ -84,7 +84,7 @@ data Proximity =
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema Proximity where instance ToSchema Similarity where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
...@@ -179,7 +179,7 @@ data PhyloConfig = ...@@ -179,7 +179,7 @@ data PhyloConfig =
, listParser :: ListParser , listParser :: ListParser
, phyloName :: Text , phyloName :: Text
, phyloScale :: Int , phyloScale :: Int
, phyloProximity :: Proximity , similarity :: Similarity
, seaElevation :: SeaElevation , seaElevation :: SeaElevation
, findAncestors :: Bool , findAncestors :: Bool
, phyloSynchrony :: Synchrony , phyloSynchrony :: Synchrony
...@@ -205,7 +205,7 @@ data PhyloSubConfig = ...@@ -205,7 +205,7 @@ data PhyloSubConfig =
subConfig2config :: PhyloSubConfig -> PhyloConfig subConfig2config :: PhyloSubConfig -> PhyloConfig
subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1 subConfig2config subConfig = defaultConfig { similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1
, phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
, phyloQuality = Quality (_sc_phyloQuality subConfig) 1 , phyloQuality = Quality (_sc_phyloQuality subConfig) 1
, timeUnit = _sc_timeUnit subConfig , timeUnit = _sc_timeUnit subConfig
...@@ -223,7 +223,7 @@ defaultConfig = ...@@ -223,7 +223,7 @@ defaultConfig =
, listParser = V4 , listParser = V4
, phyloName = pack "Phylo Name" , phyloName = pack "Phylo Name"
, phyloScale = 2 , phyloScale = 2
, phyloProximity = WeightedLogJaccard 0.5 1 , similarity = WeightedLogJaccard 0.5 1
, seaElevation = Constante 0.1 0.1 , seaElevation = Constante 0.1 0.1
, findAncestors = False , findAncestors = False
, phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups
...@@ -251,8 +251,8 @@ instance ToJSON CorpusParser ...@@ -251,8 +251,8 @@ instance ToJSON CorpusParser
instance FromJSON ListParser instance FromJSON ListParser
instance ToJSON ListParser instance ToJSON ListParser
instance FromJSON Proximity instance FromJSON Similarity
instance ToJSON Proximity instance ToJSON Similarity
instance FromJSON SeaElevation instance FromJSON SeaElevation
instance ToJSON SeaElevation instance ToJSON SeaElevation
...@@ -592,7 +592,7 @@ instance ToSchema PhyloExport where ...@@ -592,7 +592,7 @@ instance ToSchema PhyloExport where
makeLenses ''PhyloConfig makeLenses ''PhyloConfig
makeLenses ''PhyloSubConfig makeLenses ''PhyloSubConfig
makeLenses ''Proximity makeLenses ''Similarity
makeLenses ''SeaElevation makeLenses ''SeaElevation
makeLenses ''Quality makeLenses ''Quality
makeLenses ''Cluster makeLenses ''Cluster
......
...@@ -25,7 +25,7 @@ import Data.Vector (Vector) ...@@ -25,7 +25,7 @@ import Data.Vector (Vector)
import Debug.Trace (trace) import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods) import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
import Gargantext.Prelude hiding (scale) import Gargantext.Prelude hiding (scale)
import Prelude (writeFile) import Prelude (writeFile)
import System.FilePath import System.FilePath
...@@ -288,9 +288,9 @@ exportToDot phylo export = ...@@ -288,9 +288,9 @@ exportToDot phylo export =
{- 8) create the edges between the branches {- 8) create the edges between the branches
-- _ <- mapM (\(bId,bId') -> -- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId') -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId' -- (Text.pack $ show(branchIdsToSimilarity bId bId'
-- (getThresholdInit $ phyloProximity $ getConfig phylo) -- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
-- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch -- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
-} -}
...@@ -595,23 +595,23 @@ getGroupThr step g = ...@@ -595,23 +595,23 @@ getGroupThr step g =
breaks = (g ^. phylo_groupMeta) ! "breaks" breaks = (g ^. phylo_groupMeta) ! "breaks"
in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup toAncestor :: Double -> Map Int Double -> Similarity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
toAncestor nbDocs diago proximity step candidates ego = toAncestor nbDocs diago similarity step candidates ego =
let curr = ego ^. phylo_groupAncestors let curr = ego ^. phylo_groupAncestors
in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w)) in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
$ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g)))) $ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
$ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams))) $ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
$ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates)) $ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup] headsToAncestors :: Double -> Map Int Double -> Similarity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
headsToAncestors nbDocs diago proximity step heads acc = headsToAncestors nbDocs diago similarity step heads acc =
if (null heads) if (null heads)
then acc then acc
else else
let ego = head' "headsToAncestors" heads let ego = head' "headsToAncestors" heads
heads' = tail' "headsToAncestors" heads heads' = tail' "headsToAncestors" heads
in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego]) in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
toHorizon :: Phylo -> Phylo toHorizon :: Phylo -> Phylo
...@@ -645,13 +645,13 @@ toHorizon phylo = ...@@ -645,13 +645,13 @@ toHorizon phylo =
noHeads = groups \\ heads noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd] nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd] diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
proximity = (phyloProximity $ getConfig phylo) sim = (similarity $ getConfig phylo)
step = case getSeaElevation phylo of step = case getSeaElevation phylo of
Constante _ s -> s Constante _ s -> s
Adaptative _ -> 0 Adaptative _ -> 0
-- in headsToAncestors nbDocs diago proximity heads groups [] -- in headsToAncestors nbDocs diago Similarity heads groups []
in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego) in map (\ego -> toAncestor nbDocs diago sim step noHeads ego)
$ headsToAncestors nbDocs diago proximity step heads [] $ headsToAncestors nbDocs diago sim step heads []
) periods ) periods
-- | 3) process this task concurrently -- | 3) process this task concurrently
newGroups :: [[PhyloGroup]] newGroups :: [[PhyloGroup]]
......
...@@ -31,10 +31,11 @@ import Gargantext.Core.Viz.Phylo ...@@ -31,10 +31,11 @@ import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon) import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity) import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toSimilarity)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
------------------ ------------------
...@@ -78,6 +79,13 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou ...@@ -78,6 +79,13 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
----------------------------- -----------------------------
{-
-- create a square ladder
-}
squareLadder :: [Double] -> [Double]
squareLadder ladder = List.map (\x -> x * x) ladder
{- {-
-- create an adaptative diachronic 'sea elevation' ladder -- create an adaptative diachronic 'sea elevation' ladder
-} -}
...@@ -107,7 +115,7 @@ constDiachronicLadder curr step ladder = ...@@ -107,7 +115,7 @@ constDiachronicLadder curr step ladder =
-} -}
scanSimilarity :: Scale -> Phylo -> Phylo scanSimilarity :: Scale -> Phylo -> Phylo
scanSimilarity lvl phylo = scanSimilarity lvl phylo =
let proximity = phyloProximity $ getConfig phylo let proximity = similarity $ getConfig phylo
scanning = foldlWithKey (\acc pId pds -> scanning = foldlWithKey (\acc pId pds ->
-- 1) process period by period -- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
...@@ -124,7 +132,7 @@ scanSimilarity lvl phylo = ...@@ -124,7 +132,7 @@ scanSimilarity lvl phylo =
map (\(id',ngrams') -> map (\(id',ngrams') ->
let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id']) let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id'])
diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id']) diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id'])
in ((id,id'),toProximity nbDocs diago proximity ngrams ngrams' ngrams') in ((id,id'),toSimilarity nbDocs diago proximity ngrams ngrams' ngrams')
) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets ) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets
) egos ) egos
pairs' = pairs `using` parList rdeepseq pairs' = pairs `using` parList rdeepseq
...@@ -173,7 +181,7 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx "" ...@@ -173,7 +181,7 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
addTemporalLinksToPhylo :: Phylo -> Phylo addTemporalLinksToPhylo :: Phylo -> Phylo
addTemporalLinksToPhylo phylowithoutLink = case strategy of addTemporalLinksToPhylo phylowithoutLink = case strategy of
Constante start gap -> temporalMatching (constDiachronicLadder start gap Set.empty) phylowithoutLink Constante start gap -> temporalMatching (constDiachronicLadder start gap Set.empty) phylowithoutLink
Adaptative steps -> temporalMatching (adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink Adaptative steps -> temporalMatching (squareLadder $ adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink
where where
strategy :: SeaElevation strategy :: SeaElevation
strategy = getSeaElevation phylowithoutLink strategy = getSeaElevation phylowithoutLink
......
...@@ -330,16 +330,16 @@ getPeriodPointers fil g = ...@@ -330,16 +330,16 @@ getPeriodPointers fil g =
ToChildsMemory -> undefined ToChildsMemory -> undefined
ToParentsMemory -> undefined ToParentsMemory -> undefined
filterProximity :: Proximity -> Double -> Double -> Bool filterSimilarity :: Similarity -> Double -> Double -> Bool
filterProximity proximity thr local = filterSimilarity similarity thr local =
case proximity of case similarity of
WeightedLogJaccard _ _ -> local >= thr WeightedLogJaccard _ _ -> local >= thr
WeightedLogSim _ _ -> local >= thr WeightedLogSim _ _ -> local >= thr
Hamming _ _ -> undefined Hamming _ _ -> undefined
getProximityName :: Proximity -> String getSimilarityName :: Similarity -> String
getProximityName proximity = getSimilarityName similarity =
case proximity of case similarity of
WeightedLogJaccard _ _ -> "WLJaccard" WeightedLogJaccard _ _ -> "WLJaccard"
WeightedLogSim _ _ -> "WeightedLogSim" WeightedLogSim _ _ -> "WeightedLogSim"
Hamming _ _ -> "Hamming" Hamming _ _ -> "Hamming"
...@@ -578,16 +578,16 @@ traceSynchronyStart phylo = ...@@ -578,16 +578,16 @@ traceSynchronyStart phylo =
------------------- -------------------
-- | Proximity | -- -- | Similarity | --
------------------- -------------------
getSensibility :: Proximity -> Double getSensibility :: Similarity -> Double
getSensibility proxi = case proxi of getSensibility proxi = case proxi of
WeightedLogJaccard s _ -> s WeightedLogJaccard s _ -> s
WeightedLogSim s _ -> s WeightedLogSim s _ -> s
Hamming _ _ -> undefined Hamming _ _ -> undefined
getMinSharedNgrams :: Proximity -> Int getMinSharedNgrams :: Similarity -> Int
getMinSharedNgrams proxi = case proxi of getMinSharedNgrams proxi = case proxi of
WeightedLogJaccard _ m -> m WeightedLogJaccard _ m -> m
WeightedLogSim _ m -> m WeightedLogSim _ m -> m
...@@ -605,8 +605,8 @@ intersectInit acc lst lst' = ...@@ -605,8 +605,8 @@ intersectInit acc lst lst' =
then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst') then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
else acc else acc
branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double branchIdsToSimilarity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id')) branchIdsToSimilarity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
ngramsInBranches :: [[PhyloGroup]] -> [Int] ngramsInBranches :: [[PhyloGroup]] -> [Int]
ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
...@@ -662,4 +662,4 @@ traceTemporalMatching groups = ...@@ -662,4 +662,4 @@ traceTemporalMatching groups =
traceGroupsProxi :: [Double] -> [Double] traceGroupsProxi :: [Double] -> [Double]
traceGroupsProxi l = traceGroupsProxi l =
trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups proximity" <> "\n") l trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups Similarity" <> "\n") l
...@@ -124,7 +124,7 @@ toDiamonds groups = foldl' (\acc groups' -> ...@@ -124,7 +124,7 @@ toDiamonds groups = foldl' (\acc groups' ->
$ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups $ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups
groupsToEdges :: Proximity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] groupsToEdges :: Similarity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox sync nbDocs diago groups = groupsToEdges prox sync nbDocs diago groups =
case sync of case sync of
ByProximityThreshold thr sens _ strat -> ByProximityThreshold thr sens _ strat ->
...@@ -153,7 +153,7 @@ toParentId :: PhyloGroup -> PhyloGroupId ...@@ -153,7 +153,7 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex) toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex)
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup] reduceGroups :: Similarity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs diagos branch = reduceGroups prox sync docs diagos branch =
-- 1) reduce a branch as a set of periods & groups -- 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++) let periods = fromListWith (++)
...@@ -197,7 +197,7 @@ levelUpAncestors groups = ...@@ -197,7 +197,7 @@ levelUpAncestors groups =
synchronicClustering :: Phylo -> Phylo synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo = synchronicClustering phylo =
let prox = phyloProximity $ getConfig phylo let prox = similarity $ getConfig phylo
sync = phyloSynchrony $ getConfig phylo sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs docs = phylo ^. phylo_timeDocs
diagos = map coocToDiago $ phylo ^. phylo_timeCooc diagos = map coocToDiago $ phylo ^. phylo_timeCooc
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment