Commit e1b6117a authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] FlowPhylo implemented

parent 25cdbe65
......@@ -19,15 +19,16 @@ import Control.Lens hiding (elements, Indexed)
import Data.Aeson
import Data.Either (Either(..))
import Data.HashMap.Strict (HashMap)
import Data.Map (Map, toList, fromList)
import Data.Map (Map, toList)
import Data.Maybe (catMaybes)
import Data.Set (Set)
import Data.Text (Text, concat, pack)
import Data.Vector (Vector)
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams)
import Gargantext.API.Ngrams (setListNgrams)
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.NodeStory
......@@ -110,7 +111,7 @@ csvApi = csvPostAsync
get :: HasNodeStory env err m =>
ListId -> m (Headers '[Header "Content-Disposition" Text] NgramsList)
get lId = do
lst <- get' lId
lst <- getNgramsList lId
let (NodeId id') = lId
return $ addHeader (concat [ "attachment; filename=GarganText_NgramsList-"
, pack $ show id'
......@@ -118,12 +119,6 @@ get lId = do
]
) lst
get' :: HasNodeStory env err m
=> ListId -> m NgramsList
get' lId = fromList
<$> zip ngramsTypes
<$> mapM (getNgramsTableMap lId) ngramsTypes
------------------------------------------------------------------------
-- TODO : purge list
-- TODO talk
......
......@@ -16,19 +16,38 @@ module Gargantext.API.Ngrams.Prelude
import Data.Maybe (catMaybes)
import Control.Lens (view)
import Data.Map (fromList)
import Data.Hashable (Hashable)
import Data.Validity
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (ListType)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypes)
import Gargantext.Prelude
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.API.Ngrams (getNgramsTableMap)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Database.Admin.Types.Node (ListId)
import qualified Data.HashMap.Strict as HM
import qualified Data.Map.Strict as Map
import qualified Data.List as List
import qualified Data.Text as Text
------------------------------------------------------------------------
getNgramsList :: HasNodeStory env err m
=> ListId -> m NgramsList
getNgramsList lId = fromList
<$> zip ngramsTypes
<$> mapM (getNgramsTableMap lId) ngramsTypes
getTermList :: HasNodeStory env err m
=> ListId -> ListType -> NgramsType -> m (Maybe TermList)
getTermList lId listType ngramsType = do
ngramsList <- getNgramsList lId
pure $ toTermList listType ngramsType ngramsList
------------------------------------------------------------------------
-- | Tools
-- Usage example: toTermList MapTerm NgramsTerms ngramsList
......
......@@ -15,7 +15,7 @@ module Gargantext.Core.Viz.Phylo.PhyloExport where
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.GraphViz hiding (DotGraph, Order)
import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic
import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
......@@ -45,7 +45,7 @@ dotToString :: DotGraph DotId -> [Char]
dotToString dotG = unpack (printDotGraph dotG)
dynamicToColor :: Double -> H.Attribute
dynamicToColor d
dynamicToColor d
| d == 0 = H.BGColor (toColor LightCoral)
| d == 1 = H.BGColor (toColor Khaki)
| d == 2 = H.BGColor (toColor SkyBlue)
......@@ -56,7 +56,7 @@ pickLabelColor lst
| elem 0 lst = dynamicToColor 0
| elem 2 lst = dynamicToColor 2
| elem 1 lst = dynamicToColor 1
| otherwise = dynamicToColor 3
| otherwise = dynamicToColor 3
toDotLabel :: Text.Text -> Label
toDotLabel lbl = StrLabel $ fromStrict lbl
......@@ -82,30 +82,30 @@ groupToTable fdt g = H.Table H.HTable
, H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
, H.tableRows = [header]
<> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
<> ( map ngramsToRow $ splitEvery 4
<> ( map ngramsToRow $ splitEvery 4
$ reverse $ sortOn (snd . snd)
$ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
$ zip (ngramsToText fdt (g ^. phylo_groupNgrams))
$ zip ((g ^. phylo_groupMeta) ! "dynamics") ((g ^. phylo_groupMeta) ! "inclusion"))}
where
--------------------------------------
ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
ngramsToRow ns = H.Cells $ map (\(n,(d,_)) ->
H.LabelCell [H.Align H.HLeft,dynamicToColor d] $ H.Text [H.Str $ fromStrict n]) ns
--------------------------------------
header :: H.Row
header =
H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
header =
H.Cells [ H.LabelCell [pickLabelColor ((g ^. phylo_groupMeta) ! "dynamics")]
$ H.Text [H.Str $ (((fromStrict . Text.toUpper) $ g ^. phylo_groupLabel)
<> (fromStrict " ( ")
<> (pack $ show (fst $ g ^. phylo_groupPeriod))
<> (fromStrict " , ")
<> (pack $ show (snd $ g ^. phylo_groupPeriod))
<> (fromStrict " ) ")
<> (pack $ show (getGroupId g)))]]
<> (pack $ show (getGroupId g)))]]
--------------------------------------
branchToDotNode :: PhyloBranch -> Int -> Dot DotId
branchToDotNode b bId =
branchToDotNode b bId =
node (branchIdToDotId $ b ^. branch_id)
([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ b ^. branch_label)]
<> (metaToAttr $ b ^. branch_meta)
......@@ -116,20 +116,20 @@ branchToDotNode b bId =
, toAttr "branch_y" (fromStrict $ Text.pack $ (show $ b ^. branch_y))
, toAttr "label" (pack $ show $ b ^. branch_label)
])
periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
periodToDotNode prd prd' =
node (periodIdToDotId prd)
([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
<> [ toAttr "nodeType" "period"
, toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
, toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
, toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
, toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
, toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
groupToDotNode :: Vector Ngrams -> PhyloGroup -> Int -> Dot DotId
groupToDotNode fdt g bId =
groupToDotNode fdt g bId =
node (groupIdToDotId $ getGroupId g)
([FontName "Arial", Shape Square, penWidth 4, toLabel (groupToTable fdt g)]
<> [ toAttr "nodeType" "group"
......@@ -137,7 +137,7 @@ groupToDotNode fdt g bId =
, toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
, toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
, toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
, toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
, toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
, toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
, toAttr "bId" (pack $ show bId)
, toAttr "support" (pack $ show (g ^. phylo_groupSupport))
......@@ -149,7 +149,7 @@ groupToDotNode fdt g bId =
, toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
, toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
, toAttr "seaLvl" (pack $ show ((g ^. phylo_groupMeta) ! "seaLevels"))
])
])
toDotEdge' :: DotId -> DotId -> [Char] -> [Char] -> EdgeType -> Dot DotId
......@@ -175,7 +175,7 @@ toDotEdge source target lbl edgeType = edge source target
mergePointers :: [PhyloGroup] -> Map (PhyloGroupId,PhyloGroupId) Double
mergePointers groups =
mergePointers groups =
let toChilds = fromList $ concat $ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupPeriodChilds) groups
toParents = fromList $ concat $ map (\g -> map (\(target,w) -> ((target,getGroupId g),w)) $ g ^. phylo_groupPeriodParents) groups
in unionWith (\w w' -> max w w') toChilds toParents
......@@ -188,22 +188,22 @@ mergePointersMemory groups =
mergeAncestors :: [PhyloGroup] -> [((PhyloGroupId,PhyloGroupId), Double)]
mergeAncestors groups = concat
$ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
$ map (\g -> map (\(target,w) -> ((getGroupId g,target),w)) $ g ^. phylo_groupAncestors)
$ filter (\g -> (not . null) $ g ^. phylo_groupAncestors) groups
toBid :: PhyloGroup -> [PhyloBranch] -> Int
toBid g bs =
toBid g bs =
let b' = head' "toBid" (filter (\b -> b ^. branch_id == g ^. phylo_groupBranchId) bs)
in fromJust $ elemIndex b' bs
exportToDot :: Phylo -> PhyloExport -> DotGraph DotId
exportToDot phylo export =
exportToDot phylo export =
trace ("\n-- | Convert " <> show(length $ export ^. export_branches) <> " branches and "
<> show(length $ export ^. export_groups) <> " groups "
<> show(length $ export ^. export_groups) <> " groups "
<> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups) <> " terms to a dot file\n\n"
<> "##########################") $
digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
{- 1) init the dot graph -}
graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
......@@ -226,12 +226,12 @@ exportToDot phylo export =
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
-- 2) create a layer for the branches labels -}
subgraph (Str "Branches peaks") $ do
subgraph (Str "Branches peaks") $ do
-- graphAttrs [Rank SameRank]
{-
-- 3) group the branches by hierarchy
-- mapM (\branches ->
-- mapM (\branches ->
-- subgraph (Str "Branches clade") $ do
-- graphAttrs [Rank SameRank]
......@@ -243,7 +243,7 @@ exportToDot phylo export =
{-- 5) create a layer for each period -}
_ <- mapM (\period ->
subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
graphAttrs [Rank SameRank]
periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod')
......@@ -253,16 +253,16 @@ exportToDot phylo export =
{-- 7) create the edges between a branch and its first groups -}
_ <- mapM (\(bId,groups) ->
mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
)
$ toList
$ map (\groups -> head' "toDot"
$ map (\groups -> head' "toDot"
$ groupBy (\g g' -> g' ^. phylo_groupPeriod == g ^. phylo_groupPeriod)
$ sortOn (fst . _phylo_groupPeriod) groups)
$ sortOn (fst . _phylo_groupPeriod) groups)
$ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
{- 8) create the edges between the groups -}
_ <- mapM (\((k,k'),v) ->
_ <- mapM (\((k,k'),v) ->
toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
) $ (toList . mergePointers) $ export ^. export_groups
......@@ -275,15 +275,15 @@ exportToDot phylo export =
toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
) $ mergeAncestors $ export ^. export_groups
-- 10) create the edges between the periods
-- 10) create the edges between the periods
_ <- mapM (\(prd,prd') ->
toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
{- 8) create the edges between the branches
{- 8) create the edges between the branches
-- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId'
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId'
-- (getThresholdInit $ phyloProximity $ getConfig phylo)
-- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
......@@ -298,18 +298,18 @@ exportToDot phylo export =
----------------
filterByBranchSize :: Double -> PhyloExport -> PhyloExport
filterByBranchSize thr export =
filterByBranchSize thr export =
let splited = partition (\b -> head' "filter" ((b ^. branch_meta) ! "size") >= thr) $ export ^. export_branches
in export & export_branches .~ (fst splited)
& export_groups %~ (filter (\g -> not $ elem (g ^. phylo_groupBranchId) (map _branch_id $ snd splited)))
processFilters :: [Filter] -> Quality -> PhyloExport -> PhyloExport
processFilters filters qua export =
foldl (\export' f -> case f of
processFilters filters qua export =
foldl (\export' f -> case f of
ByBranchSize thr -> if (thr < (fromIntegral $ qua ^. qua_minBranch))
then filterByBranchSize (fromIntegral $ qua ^. qua_minBranch) export'
else filterByBranchSize thr export'
else filterByBranchSize thr export'
) export filters
--------------
......@@ -321,11 +321,11 @@ branchToIso branches =
let steps = map sum
$ inits
$ map (\(b,x) -> b ^. branch_y + 0.05 - x)
$ zip branches
$ ([0] ++ (map (\(b,b') ->
$ zip branches
$ ([0] ++ (map (\(b,b') ->
let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
in
lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
in
if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
then (b' ^. branch_seaLevel) !! (lmin - 1)
else (b' ^. branch_seaLevel) !! (idx - 1)
......@@ -334,10 +334,10 @@ branchToIso branches =
$ zip steps branches
branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
branchToIso' start step branches =
branchToIso' start step branches =
let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
$ inits
$ ([0] ++ (map (\(b,b') ->
$ ([0] ++ (map (\(b,b') ->
let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
in 1 - start - step * root) $ listToSeq branches))
in map (\(x,b) -> b & branch_x .~ x)
......@@ -348,17 +348,17 @@ sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
sortByHierarchy depth branches =
if (length branches == 1)
then branches
else concat
else concat
$ map (\branches' ->
let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
++ (sortByHierarchy (depth + 1) (snd partitions)))
++ (sortByHierarchy (depth + 1) (snd partitions)))
$ groupBy (\b b' -> ((take depth . snd) $ b ^. branch_id) == ((take depth . snd) $ b' ^. branch_id) )
$ sortOn (\b -> (take depth . snd) $ b ^. branch_id) branches
sortByBirthDate :: Order -> PhyloExport -> PhyloExport
sortByBirthDate order export =
sortByBirthDate order export =
let branches = sortOn (\b -> (b ^. branch_meta) ! "birth") $ export ^. export_branches
branches' = case order of
Asc -> branches
......@@ -367,8 +367,8 @@ sortByBirthDate order export =
processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
processSort sort' elev export = case sort' of
ByBirthDate o -> sortByBirthDate o export
ByHierarchy -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
ByBirthDate o -> sortByBirthDate o export
ByHierarchy -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
$ sortByHierarchy 0 (export ^. export_branches))
......@@ -376,26 +376,26 @@ processSort sort' elev export = case sort' of
-- | Metrics | --
-----------------
-- | Return the conditional probability of i knowing j
-- | Return the conditional probability of i knowing j
conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
conditional m i j = (findWithDefault 0 (i,j) m)
conditional m i j = (findWithDefault 0 (i,j) m)
/ (m ! (j,j))
-- | Return the genericity score of a given ngram
genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
- (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
-- | Return the specificity score of a given ngram
specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
- (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
- (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Return the inclusion score of a given ngram
inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
......@@ -404,14 +404,14 @@ ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
ngramsMetrics phylo export =
over ( export_groups
. traverse )
(\g -> g & phylo_groupMeta %~ insert "genericity"
(\g -> g & phylo_groupMeta %~ insert "genericity"
(map (\n -> genericity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "specificity"
& phylo_groupMeta %~ insert "specificity"
(map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "inclusion"
& phylo_groupMeta %~ insert "inclusion"
(map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "frequence"
(map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "frequence"
(map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
) export
......@@ -419,32 +419,32 @@ branchDating :: PhyloExport -> PhyloExport
branchDating export =
over ( export_branches
. traverse )
(\b ->
(\b ->
let groups = sortOn fst
$ foldl' (\acc g -> if (g ^. phylo_groupBranchId == b ^. branch_id)
then acc ++ [g ^. phylo_groupPeriod]
else acc ) [] $ export ^. export_groups
periods = nub groups
birth = fst $ head' "birth" groups
age = (snd $ last' "age" groups) - birth
in b & branch_meta %~ insert "birth" [fromIntegral birth]
age = (snd $ last' "age" groups) - birth
in b & branch_meta %~ insert "birth" [fromIntegral birth]
& branch_meta %~ insert "age" [fromIntegral age]
& branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
processMetrics :: Phylo -> PhyloExport -> PhyloExport
processMetrics phylo export = ngramsMetrics phylo
$ branchDating export
$ branchDating export
-----------------
-- | Taggers | --
-----------------
-----------------
nk :: Int -> [[Int]] -> Int
nk n groups = sum
$ map (\g -> if (elem n g)
then 1
else 0) groups
else 0) groups
tf :: Int -> [[Int]] -> Double
......@@ -463,7 +463,7 @@ findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
findEmergences groups freq =
let ngrams = map _phylo_groupNgrams groups
dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
$ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
in reverse $ sortOn snd
$ map (\(n,_) -> if (member n freq)
......@@ -471,18 +471,18 @@ findEmergences groups freq =
else (n,0)) emerging
mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
mostEmergentTfIdf nth freq foundations export =
mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
mostEmergentTfIdf nth freq foundations export =
over ( export_branches
. traverse )
(\b ->
(\b ->
let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
tfidf = findTfIdf (map _phylo_groupNgrams groups)
emergences = findEmergences groups freq
selected = if (null emergences)
then map fst $ take nth tfidf
else [fst $ head' "mostEmergentTfIdf" emergences]
++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
else [fst $ head' "mostEmergentTfIdf" emergences]
++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
in b & branch_label .~ (ngramsToLabel foundations selected)) export
......@@ -490,14 +490,14 @@ getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
$ take nth
$ reverse
$ sortOn snd $ zip [0..] meta
$ sortOn snd $ zip [0..] meta
mostInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
mostInclusive nth foundations export =
over ( export_branches
. traverse )
(\b ->
(\b ->
let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
cooc = foldl (\acc g -> unionWith (+) acc (g ^. phylo_groupCooc)) empty groups
ngrams = sort $ foldl (\acc g -> union acc (g ^. phylo_groupNgrams)) [] groups
......@@ -509,14 +509,14 @@ mostInclusive nth foundations export =
mostEmergentInclusive :: Int -> Vector Ngrams -> PhyloExport -> PhyloExport
mostEmergentInclusive nth foundations export =
over ( export_groups
. traverse )
(\g ->
. traverse )
(\g ->
let lbl = ngramsToLabel foundations
$ take nth
$ take nth
$ map (\(_,(_,idx)) -> idx)
$ concat
$ map (\groups -> sortOn (fst . snd) groups)
$ groupBy ((==) `on` fst) $ reverse $ sortOn fst
$ groupBy ((==) `on` fst) $ reverse $ sortOn fst
$ zip ((g ^. phylo_groupMeta) ! "inclusion")
$ zip ((g ^. phylo_groupMeta) ! "dynamics") (g ^. phylo_groupNgrams)
in g & phylo_groupLabel .~ lbl ) export
......@@ -524,26 +524,26 @@ mostEmergentInclusive nth foundations export =
processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
processLabels labels foundations freq export =
foldl (\export' label ->
foldl (\export' label ->
case label of
GroupLabel tagger nth ->
GroupLabel tagger nth ->
case tagger of
MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
MostEmergentInclusive -> mostEmergentInclusive nth foundations export'
_ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger"
BranchLabel tagger nth ->
case tagger of
MostInclusive -> mostInclusive nth foundations export'
MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
_ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
_ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
------------------
-- | Dynamics | --
------------------
------------------
toDynamics :: Int -> [PhyloGroup] -> PhyloGroup -> Map Int (Date,Date) -> Double
toDynamics n parents g m =
toDynamics n parents g m =
let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
......@@ -557,7 +557,7 @@ toDynamics n parents g m =
then 1
else 3
where
--------------------------------------
--------------------------------------
isNew :: Bool
isNew = not $ elem n $ concat $ map _phylo_groupNgrams parents
......@@ -571,11 +571,11 @@ processDynamics groups =
where
--------------------------------------
mapNgrams :: Map Int (Date,Date)
mapNgrams = map (\dates ->
mapNgrams = map (\dates ->
let dates' = sort dates
in (head' "dynamics" dates', last' "dynamics" dates'))
$ fromListWith (++)
$ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
$ foldl (\acc g -> acc ++ ( map (\n -> (n,[fst $ g ^. phylo_groupPeriod, snd $ g ^. phylo_groupPeriod]))
$ (g ^. phylo_groupNgrams))) [] groups
......@@ -584,17 +584,17 @@ processDynamics groups =
-----------------
getGroupThr :: Double -> PhyloGroup -> Double
getGroupThr step g =
getGroupThr step g =
let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
breaks = (g ^. phylo_groupMeta) ! "breaks"
in (last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl)) - step
toAncestor :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGroup] -> PhyloGroup -> PhyloGroup
toAncestor nbDocs diago proximity step candidates ego =
let curr = ego ^. phylo_groupAncestors
let curr = ego ^. phylo_groupAncestors
in ego & phylo_groupAncestors .~ (curr ++ (map (\(g,w) -> (getGroupId g,w))
$ filter (\(g,w) -> (w > 0) && (w >= (min (getGroupThr step ego) (getGroupThr step g))))
$ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
$ map (\g -> (g, toProximity nbDocs diago proximity (ego ^. phylo_groupNgrams) (g ^. phylo_groupNgrams) (g ^. phylo_groupNgrams)))
$ filter (\g -> g ^. phylo_groupBranchId /= ego ^. phylo_groupBranchId ) candidates))
......@@ -602,24 +602,24 @@ headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGro
headsToAncestors nbDocs diago proximity step heads acc =
if (null heads)
then acc
else
else
let ego = head' "headsToAncestors" heads
heads' = tail' "headsToAncestors" heads
in headsToAncestors nbDocs diago proximity step heads' (acc ++ [toAncestor nbDocs diago proximity step heads' ego])
toHorizon :: Phylo -> Phylo
toHorizon phylo =
let phyloAncestor = updatePhyloGroups
level
(fromList $ map (\g -> (getGroupId g, g))
$ concat
toHorizon phylo =
let phyloAncestor = updatePhyloGroups
level
(fromList $ map (\g -> (getGroupId g, g))
$ concat
$ tracePhyloAncestors newGroups) phylo
reBranched = fromList $ map (\g -> (getGroupId g, g)) $ concat
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ getGroupsFromLevel level phyloAncestor
in updatePhyloGroups level reBranched phylo
where
-- | 1) for each periods
-- | 1) for each periods
periods :: [PhyloPeriodId]
periods = getPeriodIds phylo
-- --
......@@ -630,64 +630,64 @@ toHorizon phylo =
frame = getTimeFrame $ timeUnit $ getConfig phylo
-- | 2) find ancestors between groups without parents
mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd ->
mapGroups = map (\prd ->
let groups = getGroupsFromLevelPeriods level [prd] phylo
childs = getPreviousChildIds level frame prd periods phylo
childs = getPreviousChildIds level frame prd periods phylo
-- maybe add a better filter for non isolated ancestors
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
$ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
noHeads = groups \\ heads
noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
proximity = (phyloProximity $ getConfig phylo)
step = case getSeaElevation phylo of
Constante _ s -> s
Constante _ s -> s
Adaptative _ -> undefined
-- in headsToAncestors nbDocs diago proximity heads groups []
in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
in map (\ego -> toAncestor nbDocs diago proximity step noHeads ego)
$ headsToAncestors nbDocs diago proximity step heads []
) periods
-- | 3) process this task concurrently
newGroups :: [[PhyloGroup]]
newGroups = mapGroups `using` parList rdeepseq
newGroups = mapGroups `using` parList rdeepseq
--------------------------------------
getPreviousChildIds :: Level -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> Phylo -> [PhyloGroupId]
getPreviousChildIds lvl frame curr prds phylo =
getPreviousChildIds lvl frame curr prds phylo =
concat $ map ((map fst) . _phylo_groupPeriodChilds)
$ getGroupsFromLevelPeriods lvl (getNextPeriods ToParents frame curr prds) phylo
---------------------
-- | phyloExport | --
---------------------
---------------------
toPhyloExport :: Phylo -> DotGraph DotId
toPhyloExport phylo = exportToDot phylo
$ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
$ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
$ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
$ processMetrics phylo export
$ processMetrics phylo export
where
export :: PhyloExport
export = PhyloExport groups branches
export = PhyloExport groups branches
--------------------------------------
branches :: [PhyloBranch]
branches = map (\g ->
branches = map (\g ->
let seaLvl = (g ^. phylo_groupMeta) ! "seaLevels"
breaks = (g ^. phylo_groupMeta) ! "breaks"
canonId = take (round $ (last' "export" breaks) + 2) (snd $ g ^. phylo_groupBranchId)
in PhyloBranch (g ^. phylo_groupBranchId)
in PhyloBranch (g ^. phylo_groupBranchId)
canonId
seaLvl
0
0
(last' "export" (take (round $ (last' "export" breaks) + 1) seaLvl))
0
0
"" empty)
"" empty)
$ map (\gs -> head' "export" gs)
$ groupBy (\g g' -> g ^. phylo_groupBranchId == g' ^. phylo_groupBranchId)
$ sortOn (\g -> g ^. phylo_groupBranchId) groups
--------------------------------------
--------------------------------------
groups :: [PhyloGroup]
groups = traceExportGroups
$ processDynamics
......
......@@ -67,11 +67,9 @@ toPhylo phyloStep = trace ("# phylo1 groups " <> show(length $ getGroupsFromLeve
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 phyloStep
-- > AD to db here
--------------------------------------
--------------------
-- | To Phylo 1 | --
--------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment