Commit 258f6aec authored by qlobbe's avatar qlobbe

refactoring

parent 8faf7d6d
Pipeline #3432 failed with stage
in 73 minutes and 22 seconds
......@@ -152,7 +152,7 @@ seaToLabel config = case (seaElevation config) of
sensToLabel :: PhyloConfig -> [Char]
sensToLabel config = case (phyloProximity config) of
sensToLabel config = case (similarity config) of
Hamming _ _ -> undefined
WeightedLogJaccard s _ -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s _ -> ( "WeightedLogSim-sens_" <> show s)
......
......@@ -71,7 +71,7 @@ data SeaElevation =
instance ToSchema SeaElevation
data Proximity =
data Similarity =
WeightedLogJaccard
{ _wlj_sensibility :: Double
, _wlj_minSharedNgrams :: Int }
......@@ -84,7 +84,7 @@ data Proximity =
deriving (Show,Generic,Eq)
instance ToSchema Proximity where
instance ToSchema Similarity where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
......@@ -179,7 +179,7 @@ data PhyloConfig =
, listParser :: ListParser
, phyloName :: Text
, phyloScale :: Int
, phyloProximity :: Proximity
, similarity :: Similarity
, seaElevation :: SeaElevation
, findAncestors :: Bool
, phyloSynchrony :: Synchrony
......@@ -205,7 +205,7 @@ data PhyloSubConfig =
subConfig2config :: PhyloSubConfig -> PhyloConfig
subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1
subConfig2config subConfig = defaultConfig { similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1
, phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
, phyloQuality = Quality (_sc_phyloQuality subConfig) 1
, timeUnit = _sc_timeUnit subConfig
......@@ -223,7 +223,7 @@ defaultConfig =
, listParser = V4
, phyloName = pack "Phylo Name"
, phyloScale = 2
, phyloProximity = WeightedLogJaccard 0.5 1
, similarity = WeightedLogJaccard 0.5 1
, seaElevation = Constante 0.1 0.1
, findAncestors = False
, phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups
......@@ -251,8 +251,8 @@ instance ToJSON CorpusParser
instance FromJSON ListParser
instance ToJSON ListParser
instance FromJSON Proximity
instance ToJSON Proximity
instance FromJSON Similarity
instance ToJSON Similarity
instance FromJSON SeaElevation
instance ToJSON SeaElevation
......@@ -592,7 +592,7 @@ instance ToSchema PhyloExport where
makeLenses ''PhyloConfig
makeLenses ''PhyloSubConfig
makeLenses ''Proximity
makeLenses ''Similarity
makeLenses ''SeaElevation
makeLenses ''Quality
makeLenses ''Cluster
......
......@@ -25,7 +25,7 @@ import Data.Vector (Vector)
import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toSimilarity, getNextPeriods)
import Gargantext.Prelude hiding (scale)
import Prelude (writeFile)
import System.FilePath
......@@ -288,9 +288,9 @@ exportToDot phylo export =
{- 8) create the edges between the branches
-- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId'
-- (getThresholdInit $ phyloProximity $ getConfig phylo)
-- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
-- (Text.pack $ show(branchIdsToSimilarity bId bId'
-- (getThresholdInit $ phyloSimilarity $ getConfig phylo)
-- (getThresholdStep $ phyloSimilarity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
-}
......@@ -595,23 +595,23 @@ getGroupThr step g =
breaks = (g ^. phylo_groupMeta) ! "breaks"
in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
toAncestor nbDocs diago proximity step candidates ego =
toAncestor :: Double -> Map Int Double -> Similarity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
toAncestor nbDocs diago similarity step candidates ego =
let curr = ego ^. phylo_groupAncestors
in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
$ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
$ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
$ map (\g -> (g, toSimilarity nbDocs diago similarity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
$ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
headsToAncestors nbDocs diago proximity step heads acc =
headsToAncestors :: Double -> Map Int Double -> Similarity -> Double -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
headsToAncestors nbDocs diago similarity step heads acc =
if (null heads)
then acc
else
let ego = head' "headsToAncestors" heads
heads' = tail' "headsToAncestors" heads
in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
in headsToAncestors nbDocs diago similarity step heads' (acc ++ [toAncestor nbDocs diago similarity step heads' ego])
toHorizon :: Phylo -> Phylo
......@@ -645,13 +645,13 @@ toHorizon phylo =
noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
proximity = (phyloProximity $ getConfig phylo)
sim = (similarity $ getConfig phylo)
step = case getSeaElevation phylo of
Constante _ s -> s
Adaptative _ -> 0
-- in headsToAncestors nbDocs diago proximity heads groups []
in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
$ headsToAncestors nbDocs diago proximity step heads []
-- in headsToAncestors nbDocs diago Similarity heads groups []
in map (\ego -> toAncestor nbDocs diago sim step noHeads ego)
$ headsToAncestors nbDocs diago sim step heads []
) periods
-- | 3) process this task concurrently
newGroups :: [[PhyloGroup]]
......
......@@ -31,10 +31,11 @@ import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Core.Viz.Phylo.TemporalMatching (temporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toSimilarity)
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.Vector as Vector
------------------
......@@ -78,6 +79,13 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
-----------------------------
{-
-- create a square ladder
-}
squareLadder :: [Double] -> [Double]
squareLadder ladder = List.map (\x -> x * x) ladder
{-
-- create an adaptative diachronic 'sea elevation' ladder
-}
......@@ -107,7 +115,7 @@ constDiachronicLadder curr step ladder =
-}
scanSimilarity :: Scale -> Phylo -> Phylo
scanSimilarity lvl phylo =
let proximity = phyloProximity $ getConfig phylo
let proximity = similarity $ getConfig phylo
scanning = foldlWithKey (\acc pId pds ->
-- 1) process period by period
let egos = map (\g -> (getGroupId g, g ^. phylo_groupNgrams))
......@@ -124,7 +132,7 @@ scanSimilarity lvl phylo =
map (\(id',ngrams') ->
let nbDocs = (sum . elems) $ filterDocs docs ([idToPrd id, idToPrd id'])
diago = reduceDiagos $ filterDiago diagos ([idToPrd id, idToPrd id'])
in ((id,id'),toProximity nbDocs diago proximity ngrams ngrams' ngrams')
in ((id,id'),toSimilarity nbDocs diago proximity ngrams ngrams' ngrams')
) $ filter (\(_,ngrams') -> (not . null) $ intersect ngrams ngrams') targets
) egos
pairs' = pairs `using` parList rdeepseq
......@@ -173,7 +181,7 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
addTemporalLinksToPhylo :: Phylo -> Phylo
addTemporalLinksToPhylo phylowithoutLink = case strategy of
Constante start gap -> temporalMatching (constDiachronicLadder start gap Set.empty) phylowithoutLink
Adaptative steps -> temporalMatching (adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink
Adaptative steps -> temporalMatching (squareLadder $ adaptDiachronicLadder steps (phylowithoutLink ^. phylo_diaSimScan) Set.empty) phylowithoutLink
where
strategy :: SeaElevation
strategy = getSeaElevation phylowithoutLink
......
......@@ -330,16 +330,16 @@ getPeriodPointers fil g =
ToChildsMemory -> undefined
ToParentsMemory -> undefined
filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local =
case proximity of
filterSimilarity :: Similarity -> Double -> Double -> Bool
filterSimilarity similarity thr local =
case similarity of
WeightedLogJaccard _ _ -> local >= thr
WeightedLogSim _ _ -> local >= thr
Hamming _ _ -> undefined
getProximityName :: Proximity -> String
getProximityName proximity =
case proximity of
getSimilarityName :: Similarity -> String
getSimilarityName similarity =
case similarity of
WeightedLogJaccard _ _ -> "WLJaccard"
WeightedLogSim _ _ -> "WeightedLogSim"
Hamming _ _ -> "Hamming"
......@@ -578,16 +578,16 @@ traceSynchronyStart phylo =
-------------------
-- | Proximity | --
-- | Similarity | --
-------------------
getSensibility :: Proximity -> Double
getSensibility :: Similarity -> Double
getSensibility proxi = case proxi of
WeightedLogJaccard s _ -> s
WeightedLogSim s _ -> s
Hamming _ _ -> undefined
getMinSharedNgrams :: Proximity -> Int
getMinSharedNgrams :: Similarity -> Int
getMinSharedNgrams proxi = case proxi of
WeightedLogJaccard _ m -> m
WeightedLogSim _ m -> m
......@@ -605,8 +605,8 @@ intersectInit acc lst lst' =
then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
else acc
branchIdsToProximity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
branchIdsToProximity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
branchIdsToSimilarity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
branchIdsToSimilarity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
ngramsInBranches :: [[PhyloGroup]] -> [Int]
ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
......@@ -662,4 +662,4 @@ traceTemporalMatching groups =
traceGroupsProxi :: [Double] -> [Double]
traceGroupsProxi l =
trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups proximity" <> "\n") l
trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups Similarity" <> "\n") l
......@@ -124,7 +124,7 @@ toDiamonds groups = foldl' (\acc groups' ->
$ foldl' (\acc g -> acc ++ (map (\(id,_) -> (id,[g]) ) $ g ^. phylo_groupPeriodParents) ) [] groups
groupsToEdges :: Proximity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges :: Similarity -> Synchrony -> Double -> Map Int Double -> [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)]
groupsToEdges prox sync nbDocs diago groups =
case sync of
ByProximityThreshold thr sens _ strat ->
......@@ -153,7 +153,7 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex)
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceGroups :: Similarity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs diagos branch =
-- 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++)
......@@ -197,7 +197,7 @@ levelUpAncestors groups =
synchronicClustering :: Phylo -> Phylo
synchronicClustering phylo =
let prox = phyloProximity $ getConfig phylo
let prox = similarity $ getConfig phylo
sync = phyloSynchrony $ getConfig phylo
docs = phylo ^. phylo_timeDocs
diagos = map coocToDiago $ phylo ^. phylo_timeCooc
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment