Commit ec1b4dd1 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Removed dead phylo-related code

parent 3d9b4e21
Pipeline #7136 passed with stages
in 68 minutes and 11 seconds
......@@ -401,7 +401,6 @@ library
Gargantext.Core.Viz.Graph.Tools.Infomap
Gargantext.Core.Viz.Graph.Utils
Gargantext.Core.Viz.LegacyPhylo
Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
Gargantext.Core.Viz.Phylo.TemporalMatching
Gargantext.Data.HashMap.Strict.Utils
Gargantext.Database
......
......@@ -24,7 +24,7 @@ import Gargantext.API.Worker (WorkerAPI)
import Gargantext.Core.Types (ListId, NodeId)
import Gargantext.Core.Viz.Graph.Types (Graph, GraphLegendAPI, GraphVersions(..), HyperdataGraphAPI)
import Gargantext.Core.Viz.LegacyPhylo (Level)
import Gargantext.Core.Viz.Phylo.Legacy.LegacyMain (MinSizeBranch)
import Gargantext.Prelude (Int)
import Servant
import Servant.XML.Conduit (XML)
......@@ -38,7 +38,7 @@ data PhyloAPI mode = PhyloAPI
newtype GetPhylo mode = GetPhylo
{ getPhyloDataEp :: mode :- QueryParam "listId" ListId
:> QueryParam "level" Level
:> QueryParam "minSizeBranch" MinSizeBranch
:> QueryParam "minSizeBranch" Int
:> Get '[JSON] PhyloData
} deriving Generic
......
......@@ -21,13 +21,10 @@ module Gargantext.Core.Viz.Phylo.API.Tools
import Control.Lens (to, view)
import Data.Aeson (Value, decodeFileStrict, encode, eitherDecodeFileStrict')
import Data.ByteString.Lazy qualified as Lazy
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text (pack)
import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
import Gargantext.API.Ngrams.Prelude (getTermList)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core (withDefaultLanguage, Lang)
import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
......@@ -40,12 +37,12 @@ import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} se
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus(..) )
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(..) )
import Gargantext.Database.Admin.Types.Node (Context, CorpusId, ContextId, PhyloId, nodeId2ContextId)
import Gargantext.Database.Admin.Types.Node (CorpusId, PhyloId)
import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata, _context_id) )
import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata) )
import Gargantext.Database.Schema.Node ( NodePoly(_node_hyperdata), node_hyperdata )
import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( MonadLogger, LogLevel(DEBUG), logLocM )
......@@ -62,12 +59,6 @@ getPhyloData phyloId = do
nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo)
pure $ _hp_data $ _node_hyperdata nodePhylo
putPhylo :: PhyloId -> DBCmd err Phylo
putPhylo = undefined
savePhylo :: PhyloId -> DBCmd err ()
savePhylo = undefined
--------------------------------------------------------------------
maybePhylo2dot2json :: Maybe Phylo -> IO (Maybe Value)
maybePhylo2dot2json Nothing = pure Nothing
......@@ -162,35 +153,6 @@ toPhyloDocs lang patterns time d =
(termsInText' lang patterns $ title <> " " <> abstr) Nothing [] time
context2phyloDocument :: TimeUnit
-> Context HyperdataDocument
-> (Map ContextId (Set NgramsTerm), Map ContextId (Set NgramsTerm))
-> Maybe Document
context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
let contextId = _context_id context
(date, date') <- context2date context timeUnit
let
toText x = Set.toList $ Set.map unNgramsTerm x
text' = maybe [] toText $ Map.lookup (nodeId2ContextId contextId) ngs_terms
sources' = maybe [] toText $ Map.lookup (nodeId2ContextId contextId) ngs_sources
pure $ Document date date' text' Nothing sources' (Year 3 1 5)
-- TODO better default date and log the errors to improve data quality
context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
context2date context timeUnit = do
let hyperdata = _context_hyperdata context
let
year = fromMaybe 1 $ _hd_publication_year hyperdata
month = fromMaybe 1 $ _hd_publication_month hyperdata
day = fromMaybe 1 $ _hd_publication_day hyperdata
pure (toPhyloDate year month day timeUnit, toPhyloDate' year month day timeUnit)
---------------
-- | Dates | --
---------------
......
{-|
Module : Gargantext.Core.Viz.Phylo.Main
Description : Phylomemy Main
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TypeApplications #-}
module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
where
import Control.Lens (to, view)
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Set qualified as Set
import Data.Text qualified as Text
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core (HasDBid, withDefaultLanguage)
import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList ( buildPatterns, termsInText, Patterns )
import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Admin.Types.Node ( NodeType, CorpusId )
import Gargantext.Core.Viz.LegacyPhylo hiding (Svg, Dot)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus(_hc_lang) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(_hd_abstract, _hd_publication_year) )
import Gargantext.Database.Query.Table.Node(defaultList, getNodeWith)
import Gargantext.Database.Query.Table.NodeContext (selectDocs)
import Gargantext.Database.Schema.Node ( node_hyperdata )
import Gargantext.Prelude hiding (to)
type MinSizeBranch = Int
flowPhylo :: (HasNodeStory env err m, HasDBid NodeType)
=> CorpusId
-> m Phylo
flowPhylo cId = do
corpus_node <- getNodeWith cId (Proxy @HyperdataCorpus)
let lang = withDefaultLanguage $ view (node_hyperdata . to _hc_lang) corpus_node
list' <- defaultList cId
termList <- HashMap.toList <$> getTermsWith (Text.words . unNgramsTerm) [list'] NgramsTerms (Set.singleton MapTerm)
docs' <- catMaybes
<$> map (\h -> (,) <$> _hd_publication_year h
<*> _hd_abstract h
)
<$> selectDocs cId
let
patterns = buildPatterns termList
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
filterTerms patterns' (y,d) = (y, fst <$> termsInText lang patterns' d)
docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs'
--liftBase $ flowPhylo' (List.sortOn date docs) termList l m fp
pure $ buildPhylo (List.sortOn date docs) termList
-- TODO SortedList Document
flowPhylo' :: [Document] -> TermList -- ^Build
-> Level -> MinSizeBranch -- ^View
-> FilePath
-> IO FilePath
flowPhylo' corpus terms l m fp = do
let
phylo = buildPhylo corpus terms
phVie = viewPhylo l m phylo
writePhylo fp phVie
defaultQuery :: PhyloQueryBuild
defaultQuery = undefined
-- defaultQuery = defaultQueryBuild'
-- "Default Title"
-- "Default Description"
buildPhylo :: [Document] -> TermList -> Phylo
buildPhylo = trace (show defaultQuery :: Text) $ buildPhylo' defaultQuery
buildPhylo' :: PhyloQueryBuild -> [Document] -> TermList -> Phylo
buildPhylo' _ _ _ = undefined
-- buildPhylo' q corpus termList = toPhylo q corpus termList Map.empty
-- refactor 2021
-- queryView :: Level -> MinSizeBranch -> PhyloQueryView
-- queryView level _minSizeBranch = PhyloQueryView level Merge False 2
-- [BranchAge]
-- []
-- -- [SizeBranch $ SBParams minSizeBranch]
-- [BranchPeakFreq,GroupLabelCooc]
-- (Just (ByBranchAge,Asc))
-- Json Flat True
queryView :: Level -> MinSizeBranch -> PhyloQueryView
queryView _level _minSizeBranch = undefined
viewPhylo :: Level -> MinSizeBranch -> Phylo -> PhyloView
viewPhylo _l _b _phylo = undefined
-- viewPhylo l b phylo = toPhyloView (queryView l b) phylo
writePhylo :: FilePath -> PhyloView -> IO FilePath
writePhylo _fp _phview = undefined
-- writePhylo fp phview = runGraphviz (viewToDot phview) Svg fp
-- refactor 2021
-- viewPhylo2Svg :: PhyloView -> IO DB.ByteString
-- viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
......@@ -18,7 +18,6 @@ import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.GraphViz hiding (DotGraph, Order)
import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
import Data.GraphViz.Attributes.HTML qualified as H
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic
import Data.List (nub, union, (\\), (!!), init, partition, nubBy, elemIndex)
......@@ -44,20 +43,6 @@ dotToFile filePath dotG = writeFile filePath $ Text.pack $ dotToString dotG
dotToString :: DotGraph DotId -> [Char]
dotToString dotG = unpack (printDotGraph dotG)
dynamicToColor :: Int -> H.Attribute
dynamicToColor d
| d == 0 = H.BGColor (toColor LightCoral)
| d == 1 = H.BGColor (toColor Khaki)
| d == 2 = H.BGColor (toColor SkyBlue)
| otherwise = H.Color (toColor Black)
pickLabelColor :: [Int] -> H.Attribute
pickLabelColor lst
| elem 0 lst = dynamicToColor 0
| elem 1 lst = dynamicToColor 1
| elem 2 lst = dynamicToColor 2
| otherwise = dynamicToColor 3
toDotLabel :: Text.Text -> Label
toDotLabel lbl = StrLabel $ fromStrict lbl
......@@ -132,17 +117,6 @@ groupToDotNode fdt g bId =
, toAttr "seaLvl" (show ((g ^. phylo_groupMeta) ! "seaLevels"))
])
toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
toDotEdge' source target thr w edgeType = edge source target
(case edgeType of
GroupToGroup -> undefined
GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
BranchToGroup -> undefined
BranchToBranch -> undefined
GroupToAncestor -> undefined
PeriodToPeriod -> undefined)
toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
toDotEdge source target lbl edgeType = edge source target
(case edgeType of
......@@ -160,12 +134,6 @@ mergePointers groups =
toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
in unionWith (\w w' -> max w w') toChilds toParents
mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
mergePointersMemory groups =
let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
in concat [toChilds,toParents]
mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
mergeAncestors groups = concat
$ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
......@@ -699,12 +667,6 @@ toPhyloExport phylo = exportToDot phylo
$ getGroupsFromScale (phyloScale $ getConfig phylo)
$ tracePhyloInfo phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
traceExportBranches branches =
tracePhylo ("\n"
<> "-- | Export " <> show(length branches) <> " branches" :: Text) branches
tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePhyloAncestors groups =
tracePhylo ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups)
......
......@@ -401,25 +401,6 @@ groupDocsByPeriodRec f prds docs acc =
docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs
in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc)
-- To group a list of Documents by fixed periods
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = parMap rpar (inPeriode f docs') pds
in tracePhylo ("\n" <> "-- | Group "
<> show(length docs)
<> " docs by "
<> show(length pds) <> " periods" <> "\n" :: Text)
$ fromList $ zip pds periods
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [[t]] -> (b, b) -> [t]
inPeriode f' h (start,end) =
concat $ fst $ partition (\d -> f' (head' "inPeriode" d) >= start && f' (head' "inPeriode" d) <= end) h
-- To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
......
......@@ -11,7 +11,6 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.PhyloTools where
......@@ -30,7 +29,6 @@ import Data.Vector (Vector, elemIndex)
import Data.Vector qualified as Vector
import Gargantext.Core.Viz.Phylo
import Gargantext.Prelude hiding (empty)
import Text.Printf
------------
-- | Io | --
......@@ -55,35 +53,15 @@ printIOComment cmt =
-- | Misc | --
--------------
-- truncate' :: Double -> Int -> Double
-- truncate' x n = (fromIntegral (floor (x * t))) / t
-- where t = 10^n
truncate' :: Double -> Int -> Double
truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
where
--------------
t :: Double
t = 10 ^n
getInMap :: Int -> Map Int Double -> Double
getInMap k m =
if (member k m)
then m ! k
else 0
roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
roundToStr = printf "%0.*f"
countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l
dropByIdx :: Int -> [a] -> [a]
dropByIdx k l = take k l ++ drop (k+1) l
elemIndex' :: Eq a => a -> [a] -> Int
elemIndex' e l = case (List.elemIndex e l) of
Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
......@@ -265,23 +243,6 @@ traceFis msg mFis = tracePhylo ( "\n" <> "-- | " <> msg <> " : " <> show (sum $
<> "Nb Ngrams : " <> traceClique mFis <> "\n"
) mFis
----------------
-- | Cluster| --
----------------
getCliqueSupport :: Cluster -> Int
getCliqueSupport unit = case unit of
Fis s _ -> s
MaxClique _ _ _ -> 0
getCliqueSize :: Cluster -> Int
getCliqueSize unit = case unit of
Fis _ s -> s
MaxClique s _ _ -> s
--------------
-- | Cooc | --
--------------
......@@ -298,9 +259,6 @@ listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
listToMatrix :: [Int] -> Map (Int,Int) Double
listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
listToSeq :: Eq a => [a] -> [(a,a)]
listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
......@@ -313,9 +271,6 @@ getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
coocToDiago :: Cooc -> Cooc
coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
coocToAdjacency :: Cooc -> Cooc
coocToAdjacency cooc = Map.map (\_ -> 1) cooc
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc
ngramsToCooc ngrams coocs =
......@@ -400,10 +355,6 @@ coocToConfidence phylo =
confidence a b inter card = maximum [(inter / card ! a),(inter / card ! b)]
sumtest :: [Int] -> [Int] -> Int
sumtest l1 l2 = (head' "test" l1) + (head' "test" $ reverse l2)
findDefaultLevel :: Phylo -> Phylo
findDefaultLevel phylo =
let confidence = Map.filterWithKey (\(a,b) _ -> a /= b)
......@@ -445,13 +396,6 @@ filterSimilarity similarity thr local' =
WeightedLogSim _ _ -> local' >= thr
Hamming _ _ -> undefined
getSimilarityName :: PhyloSimilarity -> String
getSimilarityName similarity =
case similarity of
WeightedLogJaccard _ _ -> "WLJaccard"
WeightedLogSim _ _ -> "WeightedLogSim"
Hamming _ _ -> "Hamming"
---------------
-- | Phylo | --
---------------
......@@ -490,9 +434,6 @@ getPeriodIds phylo = sortOn fst
$ keys
$ phylo ^. phylo_periods
getLastDate :: Phylo -> Date
getLastDate phylo = snd $ last' "lastDate" $ getPeriodIds phylo
getLevelParentId :: PhyloGroup -> PhyloGroupId
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
......@@ -566,9 +507,6 @@ setConfig config phylo = phylo
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getRootsInGroups :: Phylo -> Map Int [PhyloGroupId]
getRootsInGroups phylo = (phylo ^. phylo_foundations) ^. foundations_rootsInGroups
getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources)
......@@ -600,16 +538,6 @@ getGroupsFromScalePeriods lvl periods phylo =
. filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_scaleGroups ) phylo
getGroupsFromPeriods :: Scale -> Map Period PhyloPeriod -> [PhyloGroup]
getGroupsFromPeriods lvl periods =
elems $ view ( traverse
. phylo_periodScales
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_scaleGroups ) periods
updatePhyloGroups :: Scale -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
updatePhyloGroups lvl m phylo =
over ( phylo_periods
......@@ -724,17 +652,6 @@ traceSynchronyStart phylo =
<> " branches" <> "\n" :: Text
) phylo
-------------------
-- | Similarity | --
-------------------
getSensibility :: PhyloSimilarity -> Double
getSensibility proxi = case proxi of
WeightedLogJaccard s _ -> s
WeightedLogSim s _ -> s
Hamming _ _ -> undefined
getMinSharedNgrams :: PhyloSimilarity -> Int
getMinSharedNgrams proxi = case proxi of
WeightedLogJaccard _ m -> m
......@@ -745,21 +662,6 @@ getMinSharedNgrams proxi = case proxi of
-- | Branch | --
----------------
intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
intersectInit acc lst lst' =
if (null lst) || (null lst')
then acc
else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
else acc
branchIdsToSimilarity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
branchIdsToSimilarity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
ngramsInBranches :: [[PhyloGroup]] -> [Int]
ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
traceMatchSuccess thr qua qua' nextBranches =
tracePhylo ( "\n" <> "-- local branches : "
......
......@@ -17,7 +17,7 @@ import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (tail, intersect, nub, nubBy, union, partition)
import Data.List qualified as List
import Data.Map (fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), empty, mapKeys, adjust, filterWithKey)
import Data.Map (fromList, elems, restrictKeys, unionWith, keys, (!), empty, mapKeys, adjust, filterWithKey)
import Data.Map qualified as Map
import Data.Ord
import Data.Set qualified as Set
......@@ -315,53 +315,6 @@ getNextPeriods fil max' pId pIds =
ToParentsMemory -> undefined
{-
-- find all the candidates parents/childs of ego
-}
getCandidates :: Int -> PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
getCandidates minNgrams ego targets =
if (length (ego ^. phylo_groupNgrams)) > 1
then
map (\groups' -> filter (\g' -> (> minNgrams) $ length $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets
else
map (\groups' -> filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets
{-
-- set up and start performing the upstream/downstream inter‐temporal matching period by period
-}
reconstructTemporalLinks :: Int -> [Period] -> PhyloSimilarity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reconstructTemporalLinks frame periods similarity thr docs coocs groups =
let groups' = groupByField _phylo_groupPeriod groups
in foldl' (\acc prd ->
let -- 1) find the parents/childs matching periods
periodsPar = getNextPeriods ToParents frame prd periods
periodsChi = getNextPeriods ToChilds frame prd periods
-- 2) find the parents/childs matching candidates
candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
-- 3) find the parents/childs number of docs by years
docsPar = filterDocs docs ([prd] ++ periodsPar)
docsChi = filterDocs docs ([prd] ++ periodsChi)
-- 4) find the parents/child diago by years
diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
-- 5) match in parallel all the groups (egos) to their possible candidates
egos = map (\ego ->
let pointersPar = phyloGroupMatching (getCandidates (getMinSharedNgrams similarity) ego candidatesPar) ToParents similarity docsPar diagoPar
thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
pointersChi = phyloGroupMatching (getCandidates (getMinSharedNgrams similarity) ego candidatesChi) ToChilds similarity docsChi diagoChi
thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
in addPointers ToChilds TemporalPointer pointersChi
$ addPointers ToParents TemporalPointer pointersPar
$ addMemoryPointers ToChildsMemory TemporalPointer thr pointersChi
$ addMemoryPointers ToParentsMemory TemporalPointer thr pointersPar ego)
$ findWithDefault [] prd groups'
egos' = egos `using` parList rdeepseq
in acc ++ egos'
) [] periods
{-
-- find all the groups matching a list of ngrams
-}
......
......@@ -7,12 +7,14 @@ type-class-roots = true
roots = [ '^Main\.main$'
, '^Paths_.*'
# I'm keeping definitions whose name starts with `test`, in order to
# avoid removing something that might have value, but TODO we should
# clarify what the purpose of each is, and whether the main source tree
# is the right place for them (rather than, say, in the tests)
, '^CLI\.FilterTermsAndCooc\.testCorpus$'
, '^CLI\.FilterTermsAndCooc\.testTermList$'
# Definitions whose name (or the name of the module they are in) suggests
# there is a good reason for them not to be included: "test", "example"
# Name begins with "test" or "trace":
, '.*\.test[^\.]*$'
, '.*\.trace[^\.]*$'
# Module is named "Example":
, '.*\.Example\.[^\.]*$'
# Useful in the REPL. TODO go through each function in this module ---
# I don't think we need that many variations around `runCmd`?
......@@ -23,6 +25,7 @@ roots = [ '^Main\.main$'
# Used by the tests
# TODO this should probably moved to the tests?
, '^Gargantext\.Core\.Text\.Corpus\.API\.Pubmed\.convertQuery$'
, '^Gargantext\.Core\.Text\.Corpus\.API\.Pubmed\.getESearch$'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment