Commit ec1b4dd1 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Removed dead phylo-related code

parent 3d9b4e21
Pipeline #7136 passed with stages
in 68 minutes and 11 seconds
...@@ -401,7 +401,6 @@ library ...@@ -401,7 +401,6 @@ library
Gargantext.Core.Viz.Graph.Tools.Infomap Gargantext.Core.Viz.Graph.Tools.Infomap
Gargantext.Core.Viz.Graph.Utils Gargantext.Core.Viz.Graph.Utils
Gargantext.Core.Viz.LegacyPhylo Gargantext.Core.Viz.LegacyPhylo
Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
Gargantext.Core.Viz.Phylo.TemporalMatching Gargantext.Core.Viz.Phylo.TemporalMatching
Gargantext.Data.HashMap.Strict.Utils Gargantext.Data.HashMap.Strict.Utils
Gargantext.Database Gargantext.Database
......
...@@ -24,7 +24,7 @@ import Gargantext.API.Worker (WorkerAPI) ...@@ -24,7 +24,7 @@ import Gargantext.API.Worker (WorkerAPI)
import Gargantext.Core.Types (ListId, NodeId) import Gargantext.Core.Types (ListId, NodeId)
import Gargantext.Core.Viz.Graph.Types (Graph, GraphLegendAPI, GraphVersions(..), HyperdataGraphAPI) import Gargantext.Core.Viz.Graph.Types (Graph, GraphLegendAPI, GraphVersions(..), HyperdataGraphAPI)
import Gargantext.Core.Viz.LegacyPhylo (Level) import Gargantext.Core.Viz.LegacyPhylo (Level)
import Gargantext.Core.Viz.Phylo.Legacy.LegacyMain (MinSizeBranch) import Gargantext.Prelude (Int)
import Servant import Servant
import Servant.XML.Conduit (XML) import Servant.XML.Conduit (XML)
...@@ -38,7 +38,7 @@ data PhyloAPI mode = PhyloAPI ...@@ -38,7 +38,7 @@ data PhyloAPI mode = PhyloAPI
newtype GetPhylo mode = GetPhylo newtype GetPhylo mode = GetPhylo
{ getPhyloDataEp :: mode :- QueryParam "listId" ListId { getPhyloDataEp :: mode :- QueryParam "listId" ListId
:> QueryParam "level" Level :> QueryParam "level" Level
:> QueryParam "minSizeBranch" MinSizeBranch :> QueryParam "minSizeBranch" Int
:> Get '[JSON] PhyloData :> Get '[JSON] PhyloData
} deriving Generic } deriving Generic
......
...@@ -21,13 +21,10 @@ module Gargantext.Core.Viz.Phylo.API.Tools ...@@ -21,13 +21,10 @@ module Gargantext.Core.Viz.Phylo.API.Tools
import Control.Lens (to, view) import Control.Lens (to, view)
import Data.Aeson (Value, decodeFileStrict, encode, eitherDecodeFileStrict') import Data.Aeson (Value, decodeFileStrict, encode, eitherDecodeFileStrict')
import Data.ByteString.Lazy qualified as Lazy import Data.ByteString.Lazy qualified as Lazy
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text (pack) import Data.Text (pack)
import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian) import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
import Data.Time.Clock.POSIX(posixSecondsToUTCTime) import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
import Gargantext.API.Ngrams.Prelude (getTermList) import Gargantext.API.Ngrams.Prelude (getTermList)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core (withDefaultLanguage, Lang) import Gargantext.Core (withDefaultLanguage, Lang)
import Gargantext.Core.NodeStory.Types (HasNodeStory) import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
...@@ -40,12 +37,12 @@ import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} se ...@@ -40,12 +37,12 @@ import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} se
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus(..) ) import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus(..) )
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(..) ) import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(..) )
import Gargantext.Database.Admin.Types.Node (Context, CorpusId, ContextId, PhyloId, nodeId2ContextId) import Gargantext.Database.Admin.Types.Node (CorpusId, PhyloId)
import Gargantext.Database.Prelude (DBCmd) import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith) import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata, _context_id) ) import Gargantext.Database.Schema.Context ( ContextPoly(_context_hyperdata) )
import Gargantext.Database.Schema.Node ( NodePoly(_node_hyperdata), node_hyperdata ) import Gargantext.Database.Schema.Node ( NodePoly(_node_hyperdata), node_hyperdata )
import Gargantext.Prelude hiding (to) import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( MonadLogger, LogLevel(DEBUG), logLocM ) import Gargantext.System.Logging ( MonadLogger, LogLevel(DEBUG), logLocM )
...@@ -62,12 +59,6 @@ getPhyloData phyloId = do ...@@ -62,12 +59,6 @@ getPhyloData phyloId = do
nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo) nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo)
pure $ _hp_data $ _node_hyperdata nodePhylo pure $ _hp_data $ _node_hyperdata nodePhylo
putPhylo :: PhyloId -> DBCmd err Phylo
putPhylo = undefined
savePhylo :: PhyloId -> DBCmd err ()
savePhylo = undefined
-------------------------------------------------------------------- --------------------------------------------------------------------
maybePhylo2dot2json :: Maybe Phylo -> IO (Maybe Value) maybePhylo2dot2json :: Maybe Phylo -> IO (Maybe Value)
maybePhylo2dot2json Nothing = pure Nothing maybePhylo2dot2json Nothing = pure Nothing
...@@ -162,35 +153,6 @@ toPhyloDocs lang patterns time d = ...@@ -162,35 +153,6 @@ toPhyloDocs lang patterns time d =
(termsInText' lang patterns $ title <> " " <> abstr) Nothing [] time (termsInText' lang patterns $ title <> " " <> abstr) Nothing [] time
context2phyloDocument :: TimeUnit
-> Context HyperdataDocument
-> (Map ContextId (Set NgramsTerm), Map ContextId (Set NgramsTerm))
-> Maybe Document
context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
let contextId = _context_id context
(date, date') <- context2date context timeUnit
let
toText x = Set.toList $ Set.map unNgramsTerm x
text' = maybe [] toText $ Map.lookup (nodeId2ContextId contextId) ngs_terms
sources' = maybe [] toText $ Map.lookup (nodeId2ContextId contextId) ngs_sources
pure $ Document date date' text' Nothing sources' (Year 3 1 5)
-- TODO better default date and log the errors to improve data quality
context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
context2date context timeUnit = do
let hyperdata = _context_hyperdata context
let
year = fromMaybe 1 $ _hd_publication_year hyperdata
month = fromMaybe 1 $ _hd_publication_month hyperdata
day = fromMaybe 1 $ _hd_publication_day hyperdata
pure (toPhyloDate year month day timeUnit, toPhyloDate' year month day timeUnit)
--------------- ---------------
-- | Dates | -- -- | Dates | --
--------------- ---------------
......
{-|
Module : Gargantext.Core.Viz.Phylo.Main
Description : Phylomemy Main
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TypeApplications #-}
module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
where
import Control.Lens (to, view)
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Set qualified as Set
import Data.Text qualified as Text
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core (HasDBid, withDefaultLanguage)
import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList ( buildPatterns, termsInText, Patterns )
import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Admin.Types.Node ( NodeType, CorpusId )
import Gargantext.Core.Viz.LegacyPhylo hiding (Svg, Dot)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus(_hc_lang) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(_hd_abstract, _hd_publication_year) )
import Gargantext.Database.Query.Table.Node(defaultList, getNodeWith)
import Gargantext.Database.Query.Table.NodeContext (selectDocs)
import Gargantext.Database.Schema.Node ( node_hyperdata )
import Gargantext.Prelude hiding (to)
type MinSizeBranch = Int
flowPhylo :: (HasNodeStory env err m, HasDBid NodeType)
=> CorpusId
-> m Phylo
flowPhylo cId = do
corpus_node <- getNodeWith cId (Proxy @HyperdataCorpus)
let lang = withDefaultLanguage $ view (node_hyperdata . to _hc_lang) corpus_node
list' <- defaultList cId
termList <- HashMap.toList <$> getTermsWith (Text.words . unNgramsTerm) [list'] NgramsTerms (Set.singleton MapTerm)
docs' <- catMaybes
<$> map (\h -> (,) <$> _hd_publication_year h
<*> _hd_abstract h
)
<$> selectDocs cId
let
patterns = buildPatterns termList
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
filterTerms patterns' (y,d) = (y, fst <$> termsInText lang patterns' d)
docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs'
--liftBase $ flowPhylo' (List.sortOn date docs) termList l m fp
pure $ buildPhylo (List.sortOn date docs) termList
-- TODO SortedList Document
flowPhylo' :: [Document] -> TermList -- ^Build
-> Level -> MinSizeBranch -- ^View
-> FilePath
-> IO FilePath
flowPhylo' corpus terms l m fp = do
let
phylo = buildPhylo corpus terms
phVie = viewPhylo l m phylo
writePhylo fp phVie
defaultQuery :: PhyloQueryBuild
defaultQuery = undefined
-- defaultQuery = defaultQueryBuild'
-- "Default Title"
-- "Default Description"
buildPhylo :: [Document] -> TermList -> Phylo
buildPhylo = trace (show defaultQuery :: Text) $ buildPhylo' defaultQuery
buildPhylo' :: PhyloQueryBuild -> [Document] -> TermList -> Phylo
buildPhylo' _ _ _ = undefined
-- buildPhylo' q corpus termList = toPhylo q corpus termList Map.empty
-- refactor 2021
-- queryView :: Level -> MinSizeBranch -> PhyloQueryView
-- queryView level _minSizeBranch = PhyloQueryView level Merge False 2
-- [BranchAge]
-- []
-- -- [SizeBranch $ SBParams minSizeBranch]
-- [BranchPeakFreq,GroupLabelCooc]
-- (Just (ByBranchAge,Asc))
-- Json Flat True
queryView :: Level -> MinSizeBranch -> PhyloQueryView
queryView _level _minSizeBranch = undefined
viewPhylo :: Level -> MinSizeBranch -> Phylo -> PhyloView
viewPhylo _l _b _phylo = undefined
-- viewPhylo l b phylo = toPhyloView (queryView l b) phylo
writePhylo :: FilePath -> PhyloView -> IO FilePath
writePhylo _fp _phview = undefined
-- writePhylo fp phview = runGraphviz (viewToDot phview) Svg fp
-- refactor 2021
-- viewPhylo2Svg :: PhyloView -> IO DB.ByteString
-- viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
...@@ -18,7 +18,6 @@ import Control.Lens hiding (Level) ...@@ -18,7 +18,6 @@ import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.GraphViz hiding (DotGraph, Order) import Data.GraphViz hiding (DotGraph, Order)
import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order) import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
import Data.GraphViz.Attributes.HTML qualified as H
import Data.GraphViz.Types.Generalised (DotGraph) import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic import Data.GraphViz.Types.Monadic
import Data.List (nub, union, (\\), (!!), init, partition, nubBy, elemIndex) import Data.List (nub, union, (\\), (!!), init, partition, nubBy, elemIndex)
...@@ -44,20 +43,6 @@ dotToFile filePath dotG = writeFile filePath $ Text.pack $ dotToString dotG ...@@ -44,20 +43,6 @@ dotToFile filePath dotG = writeFile filePath $ Text.pack $ dotToString dotG
dotToString :: DotGraph DotId -> [Char] dotToString :: DotGraph DotId -> [Char]
dotToString dotG = unpack (printDotGraph dotG) dotToString dotG = unpack (printDotGraph dotG)
dynamicToColor :: Int -> H.Attribute
dynamicToColor d
| d == 0 = H.BGColor (toColor LightCoral)
| d == 1 = H.BGColor (toColor Khaki)
| d == 2 = H.BGColor (toColor SkyBlue)
| otherwise = H.Color (toColor Black)
pickLabelColor :: [Int] -> H.Attribute
pickLabelColor lst
| elem 0 lst = dynamicToColor 0
| elem 1 lst = dynamicToColor 1
| elem 2 lst = dynamicToColor 2
| otherwise = dynamicToColor 3
toDotLabel :: Text.Text -> Label toDotLabel :: Text.Text -> Label
toDotLabel lbl = StrLabel $ fromStrict lbl toDotLabel lbl = StrLabel $ fromStrict lbl
...@@ -132,17 +117,6 @@ groupToDotNode fdt g bId = ...@@ -132,17 +117,6 @@ groupToDotNode fdt g bId =
, toAttr "seaLvl" (show ((g ^. phylo_groupMeta) ! "seaLevels")) , toAttr "seaLvl" (show ((g ^. phylo_groupMeta) ! "seaLevels"))
]) ])
toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
toDotEdge' source target thr w edgeType = edge source target
(case edgeType of
GroupToGroup -> undefined
GroupToGroupMemory -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "memoryLink", toAttr "thr" (pack thr), toAttr "weight" (pack w)]
BranchToGroup -> undefined
BranchToBranch -> undefined
GroupToAncestor -> undefined
PeriodToPeriod -> undefined)
toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
toDotEdge source target lbl edgeType = edge source target toDotEdge source target lbl edgeType = edge source target
(case edgeType of (case edgeType of
...@@ -160,12 +134,6 @@ mergePointers groups = ...@@ -160,12 +134,6 @@ mergePointers groups =
toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
in unionWith (\w w' -> max w w') toChilds toParents in unionWith (\w w' -> max w w') toChilds toParents
mergePointersMemory :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId),(Double,Double))]
mergePointersMemory groups =
let toChilds = concat $ map (\g -> map (\(target,(t,w)) -> ((getGroupId g,target),(t,w))) $ g ^. phylo_groupPeriodMemoryChilds) groups
toParents = concat $ map (\g -> map (\(target,(t,w)) -> ((target,getGroupId g),(t,w))) $ g ^. phylo_groupPeriodMemoryParents) groups
in concat [toChilds,toParents]
mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)] mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
mergeAncestors groups = concat mergeAncestors groups = concat
$ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors) $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
...@@ -699,12 +667,6 @@ toPhyloExport phylo = exportToDot phylo ...@@ -699,12 +667,6 @@ toPhyloExport phylo = exportToDot phylo
$ getGroupsFromScale (phyloScale $ getConfig phylo) $ getGroupsFromScale (phyloScale $ getConfig phylo)
$ tracePhyloInfo phylo $ tracePhyloInfo phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
traceExportBranches branches =
tracePhylo ("\n"
<> "-- | Export " <> show(length branches) <> " branches" :: Text) branches
tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]] tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePhyloAncestors groups = tracePhyloAncestors groups =
tracePhylo ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) tracePhylo ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups)
......
...@@ -401,25 +401,6 @@ groupDocsByPeriodRec f prds docs acc = ...@@ -401,25 +401,6 @@ groupDocsByPeriodRec f prds docs acc =
docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs docs' = partition (\d -> (f d >= fst prd) && (f d <= snd prd)) docs
in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc) in groupDocsByPeriodRec f (tail prds) (snd docs') (insert prd (fst docs') acc)
-- To group a list of Documents by fixed periods
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = parMap rpar (inPeriode f docs') pds
in tracePhylo ("\n" <> "-- | Group "
<> show(length docs)
<> " docs by "
<> show(length pds) <> " periods" <> "\n" :: Text)
$ fromList $ zip pds periods
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [[t]] -> (b, b) -> [t]
inPeriode f' h (start,end) =
concat $ fst $ partition (\d -> f' (head' "inPeriode" d) >= start && f' (head' "inPeriode" d) <= end) h
-- To group a list of Documents by fixed periods -- To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods" groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
......
...@@ -11,7 +11,6 @@ Portability : POSIX ...@@ -11,7 +11,6 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-} {-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.PhyloTools where module Gargantext.Core.Viz.Phylo.PhyloTools where
...@@ -30,7 +29,6 @@ import Data.Vector (Vector, elemIndex) ...@@ -30,7 +29,6 @@ import Data.Vector (Vector, elemIndex)
import Data.Vector qualified as Vector import Data.Vector qualified as Vector
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Prelude hiding (empty) import Gargantext.Prelude hiding (empty)
import Text.Printf
------------ ------------
-- | Io | -- -- | Io | --
...@@ -55,35 +53,15 @@ printIOComment cmt = ...@@ -55,35 +53,15 @@ printIOComment cmt =
-- | Misc | -- -- | Misc | --
-------------- --------------
-- truncate' :: Double -> Int -> Double
-- truncate' x n = (fromIntegral (floor (x * t))) / t
-- where t = 10^n
truncate' :: Double -> Int -> Double
truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
where
--------------
t :: Double
t = 10 ^n
getInMap :: Int -> Map Int Double -> Double getInMap :: Int -> Map Int Double -> Double
getInMap k m = getInMap k m =
if (member k m) if (member k m)
then m ! k then m ! k
else 0 else 0
roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
roundToStr = printf "%0.*f"
countSup :: Double -> [Double] -> Int countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l countSup s l = length $ filter (>s) l
dropByIdx :: Int -> [a] -> [a]
dropByIdx k l = take k l ++ drop (k+1) l
elemIndex' :: Eq a => a -> [a] -> Int elemIndex' :: Eq a => a -> [a] -> Int
elemIndex' e l = case (List.elemIndex e l) of elemIndex' e l = case (List.elemIndex e l) of
Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list") Nothing -> panic ("[ERR][Viz.Phylo.PhyloTools] element not in list")
...@@ -265,23 +243,6 @@ traceFis msg mFis = tracePhylo ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ ...@@ -265,23 +243,6 @@ traceFis msg mFis = tracePhylo ( "\n" <> "-- | " <> msg <> " : " <> show (sum $
<> "Nb Ngrams : " <> traceClique mFis <> "\n" <> "Nb Ngrams : " <> traceClique mFis <> "\n"
) mFis ) mFis
----------------
-- | Cluster| --
----------------
getCliqueSupport :: Cluster -> Int
getCliqueSupport unit = case unit of
Fis s _ -> s
MaxClique _ _ _ -> 0
getCliqueSize :: Cluster -> Int
getCliqueSize unit = case unit of
Fis _ s -> s
MaxClique s _ _ -> s
-------------- --------------
-- | Cooc | -- -- | Cooc | --
-------------- --------------
...@@ -298,9 +259,6 @@ listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst) ...@@ -298,9 +259,6 @@ listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
listToMatrix :: [Int] -> Map (Int,Int) Double listToMatrix :: [Int] -> Map (Int,Int) Double
listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
listToMatrix' :: [Ngrams] -> Map (Ngrams,Ngrams) Int
listToMatrix' lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
listToSeq :: Eq a => [a] -> [(a,a)] listToSeq :: Eq a => [a] -> [(a,a)]
listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ] listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y <- rest ]
...@@ -313,9 +271,6 @@ getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc ...@@ -313,9 +271,6 @@ getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
coocToDiago :: Cooc -> Cooc coocToDiago :: Cooc -> Cooc
coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc coocToDiago cooc = filterWithKey (\(k,k') _ -> k == k') cooc
coocToAdjacency :: Cooc -> Cooc
coocToAdjacency cooc = Map.map (\_ -> 1) cooc
-- | To build the local cooc matrix of each phylogroup -- | To build the local cooc matrix of each phylogroup
ngramsToCooc :: [Int] -> [Cooc] -> Cooc ngramsToCooc :: [Int] -> [Cooc] -> Cooc
ngramsToCooc ngrams coocs = ngramsToCooc ngrams coocs =
...@@ -400,10 +355,6 @@ coocToConfidence phylo = ...@@ -400,10 +355,6 @@ coocToConfidence phylo =
confidence a b inter card = maximum [(inter / card ! a),(inter / card ! b)] confidence a b inter card = maximum [(inter / card ! a),(inter / card ! b)]
sumtest :: [Int] -> [Int] -> Int
sumtest l1 l2 = (head' "test" l1) + (head' "test" $ reverse l2)
findDefaultLevel :: Phylo -> Phylo findDefaultLevel :: Phylo -> Phylo
findDefaultLevel phylo = findDefaultLevel phylo =
let confidence = Map.filterWithKey (\(a,b) _ -> a /= b) let confidence = Map.filterWithKey (\(a,b) _ -> a /= b)
...@@ -445,13 +396,6 @@ filterSimilarity similarity thr local' = ...@@ -445,13 +396,6 @@ filterSimilarity similarity thr local' =
WeightedLogSim _ _ -> local' >= thr WeightedLogSim _ _ -> local' >= thr
Hamming _ _ -> undefined Hamming _ _ -> undefined
getSimilarityName :: PhyloSimilarity -> String
getSimilarityName similarity =
case similarity of
WeightedLogJaccard _ _ -> "WLJaccard"
WeightedLogSim _ _ -> "WeightedLogSim"
Hamming _ _ -> "Hamming"
--------------- ---------------
-- | Phylo | -- -- | Phylo | --
--------------- ---------------
...@@ -490,9 +434,6 @@ getPeriodIds phylo = sortOn fst ...@@ -490,9 +434,6 @@ getPeriodIds phylo = sortOn fst
$ keys $ keys
$ phylo ^. phylo_periods $ phylo ^. phylo_periods
getLastDate :: Phylo -> Date
getLastDate phylo = snd $ last' "lastDate" $ getPeriodIds phylo
getLevelParentId :: PhyloGroup -> PhyloGroupId getLevelParentId :: PhyloGroup -> PhyloGroupId
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
...@@ -566,9 +507,6 @@ setConfig config phylo = phylo ...@@ -566,9 +507,6 @@ setConfig config phylo = phylo
getRoots :: Phylo -> Vector Ngrams getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getRootsInGroups :: Phylo -> Map Int [PhyloGroupId]
getRootsInGroups phylo = (phylo ^. phylo_foundations) ^. foundations_rootsInGroups
getSources :: Phylo -> Vector Text getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources) getSources phylo = _sources (phylo ^. phylo_sources)
...@@ -600,16 +538,6 @@ getGroupsFromScalePeriods lvl periods phylo = ...@@ -600,16 +538,6 @@ getGroupsFromScalePeriods lvl periods phylo =
. filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl) . filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_scaleGroups ) phylo . phylo_scaleGroups ) phylo
getGroupsFromPeriods :: Scale -> Map Period PhyloPeriod -> [PhyloGroup]
getGroupsFromPeriods lvl periods =
elems $ view ( traverse
. phylo_periodScales
. traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_scaleScale == lvl)
. phylo_scaleGroups ) periods
updatePhyloGroups :: Scale -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo updatePhyloGroups :: Scale -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
updatePhyloGroups lvl m phylo = updatePhyloGroups lvl m phylo =
over ( phylo_periods over ( phylo_periods
...@@ -724,17 +652,6 @@ traceSynchronyStart phylo = ...@@ -724,17 +652,6 @@ traceSynchronyStart phylo =
<> " branches" <> "\n" :: Text <> " branches" <> "\n" :: Text
) phylo ) phylo
-------------------
-- | Similarity | --
-------------------
getSensibility :: PhyloSimilarity -> Double
getSensibility proxi = case proxi of
WeightedLogJaccard s _ -> s
WeightedLogSim s _ -> s
Hamming _ _ -> undefined
getMinSharedNgrams :: PhyloSimilarity -> Int getMinSharedNgrams :: PhyloSimilarity -> Int
getMinSharedNgrams proxi = case proxi of getMinSharedNgrams proxi = case proxi of
WeightedLogJaccard _ m -> m WeightedLogJaccard _ m -> m
...@@ -745,21 +662,6 @@ getMinSharedNgrams proxi = case proxi of ...@@ -745,21 +662,6 @@ getMinSharedNgrams proxi = case proxi of
-- | Branch | -- -- | Branch | --
---------------- ----------------
intersectInit :: Eq a => [a] -> [a] -> [a] -> [a]
intersectInit acc lst lst' =
if (null lst) || (null lst')
then acc
else if (head' "intersectInit" lst) == (head' "intersectInit" lst')
then intersectInit (acc ++ [head' "intersectInit" lst]) (tail lst) (tail lst')
else acc
branchIdsToSimilarity :: PhyloBranchId -> PhyloBranchId -> Double -> Double -> Double
branchIdsToSimilarity id id' thrInit thrStep = thrInit + thrStep * (fromIntegral $ length $ intersectInit [] (snd id) (snd id'))
ngramsInBranches :: [[PhyloGroup]] -> [Int]
ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgrams)) [] $ concat branches
traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]] traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
traceMatchSuccess thr qua qua' nextBranches = traceMatchSuccess thr qua qua' nextBranches =
tracePhylo ( "\n" <> "-- local branches : " tracePhylo ( "\n" <> "-- local branches : "
......
...@@ -17,7 +17,7 @@ import Control.Lens hiding (Level) ...@@ -17,7 +17,7 @@ import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (tail, intersect, nub, nubBy, union, partition) import Data.List (tail, intersect, nub, nubBy, union, partition)
import Data.List qualified as List import Data.List qualified as List
import Data.Map (fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), empty, mapKeys, adjust, filterWithKey) import Data.Map (fromList, elems, restrictKeys, unionWith, keys, (!), empty, mapKeys, adjust, filterWithKey)
import Data.Map qualified as Map import Data.Map qualified as Map
import Data.Ord import Data.Ord
import Data.Set qualified as Set import Data.Set qualified as Set
...@@ -315,53 +315,6 @@ getNextPeriods fil max' pId pIds = ...@@ -315,53 +315,6 @@ getNextPeriods fil max' pId pIds =
ToParentsMemory -> undefined ToParentsMemory -> undefined
{-
-- find all the candidates parents/childs of ego
-}
getCandidates :: Int -> PhyloGroup -> [[(PhyloGroupId,[Int])]] -> [[(PhyloGroupId,[Int])]]
getCandidates minNgrams ego targets =
if (length (ego ^. phylo_groupNgrams)) > 1
then
map (\groups' -> filter (\g' -> (> minNgrams) $ length $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets
else
map (\groups' -> filter (\g' -> (not . null) $ intersect (ego ^. phylo_groupNgrams) (snd g')) groups') targets
{-
-- set up and start performing the upstream/downstream inter‐temporal matching period by period
-}
reconstructTemporalLinks :: Int -> [Period] -> PhyloSimilarity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reconstructTemporalLinks frame periods similarity thr docs coocs groups =
let groups' = groupByField _phylo_groupPeriod groups
in foldl' (\acc prd ->
let -- 1) find the parents/childs matching periods
periodsPar = getNextPeriods ToParents frame prd periods
periodsChi = getNextPeriods ToChilds frame prd periods
-- 2) find the parents/childs matching candidates
candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
-- 3) find the parents/childs number of docs by years
docsPar = filterDocs docs ([prd] ++ periodsPar)
docsChi = filterDocs docs ([prd] ++ periodsChi)
-- 4) find the parents/child diago by years
diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
-- 5) match in parallel all the groups (egos) to their possible candidates
egos = map (\ego ->
let pointersPar = phyloGroupMatching (getCandidates (getMinSharedNgrams similarity) ego candidatesPar) ToParents similarity docsPar diagoPar
thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
pointersChi = phyloGroupMatching (getCandidates (getMinSharedNgrams similarity) ego candidatesChi) ToChilds similarity docsChi diagoChi
thr (getPeriodPointers ToChilds ego) (getGroupId ego, ego ^. phylo_groupNgrams)
in addPointers ToChilds TemporalPointer pointersChi
$ addPointers ToParents TemporalPointer pointersPar
$ addMemoryPointers ToChildsMemory TemporalPointer thr pointersChi
$ addMemoryPointers ToParentsMemory TemporalPointer thr pointersPar ego)
$ findWithDefault [] prd groups'
egos' = egos `using` parList rdeepseq
in acc ++ egos'
) [] periods
{- {-
-- find all the groups matching a list of ngrams -- find all the groups matching a list of ngrams
-} -}
......
...@@ -7,12 +7,14 @@ type-class-roots = true ...@@ -7,12 +7,14 @@ type-class-roots = true
roots = [ '^Main\.main$' roots = [ '^Main\.main$'
, '^Paths_.*' , '^Paths_.*'
# I'm keeping definitions whose name starts with `test`, in order to # Definitions whose name (or the name of the module they are in) suggests
# avoid removing something that might have value, but TODO we should # there is a good reason for them not to be included: "test", "example"
# clarify what the purpose of each is, and whether the main source tree
# is the right place for them (rather than, say, in the tests) # Name begins with "test" or "trace":
, '^CLI\.FilterTermsAndCooc\.testCorpus$' , '.*\.test[^\.]*$'
, '^CLI\.FilterTermsAndCooc\.testTermList$' , '.*\.trace[^\.]*$'
# Module is named "Example":
, '.*\.Example\.[^\.]*$'
# Useful in the REPL. TODO go through each function in this module --- # Useful in the REPL. TODO go through each function in this module ---
# I don't think we need that many variations around `runCmd`? # I don't think we need that many variations around `runCmd`?
...@@ -23,6 +25,7 @@ roots = [ '^Main\.main$' ...@@ -23,6 +25,7 @@ roots = [ '^Main\.main$'
# Used by the tests # Used by the tests
# TODO this should probably moved to the tests?
, '^Gargantext\.Core\.Text\.Corpus\.API\.Pubmed\.convertQuery$' , '^Gargantext\.Core\.Text\.Corpus\.API\.Pubmed\.convertQuery$'
, '^Gargantext\.Core\.Text\.Corpus\.API\.Pubmed\.getESearch$' , '^Gargantext\.Core\.Text\.Corpus\.API\.Pubmed\.getESearch$'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment