Commit b7ca113d authored by qlobbe's avatar qlobbe

add branch label

parent 7cd80ff2
Pipeline #512 failed with stage
......@@ -26,7 +26,7 @@ import System.Directory (doesFileExist)
import Data.Aeson
import Data.Text (Text, unwords, unlines)
import Data.List ((++))
import Data.List ((++),concat)
import GHC.Generics
import GHC.IO (FilePath)
import Gargantext.Prelude
......@@ -37,6 +37,8 @@ import Gargantext.Text.Parsers (FileFormat(..),parseFile)
import Gargantext.Text.Terms.WithList
import Gargantext.Text.Context (TermList)
import Control.Monad (mapM)
import System.Environment
import Gargantext.Viz.Phylo
......@@ -48,6 +50,8 @@ import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Database.Types.Node
import Data.Maybe
import Control.Concurrent.Async as CCA (mapConcurrently)
import qualified Data.Map as DM
import qualified Data.Vector as DV
import qualified Data.List as DL
......@@ -141,7 +145,8 @@ wosToCorpus limit path = DL.take limit
. filter (\d -> (isJust $_hyperdataDocument_publication_year d)
&& (isJust $_hyperdataDocument_title d)
&& (isJust $_hyperdataDocument_abstract d))
<$> parseFile WOS path
. concat
<$> mapConcurrently (\idx -> parseFile WOS (path <> show(idx) <> ".txt")) [1..20]
-- | To use the correct parser given a CorpusType
......@@ -211,7 +216,7 @@ main = do
(reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge] [SizeBranch $ SBParams (minSizeBranch conf)] [GroupLabelIncDyn,BranchPeakInc] (Just (ByBranchAge,Asc)) Json Flat True
let phylo = toPhylo query corpus termList fis'
......
......@@ -318,7 +318,8 @@ data Metric = BranchAge deriving (Generic, Show, Eq, Read)
-- | Tagger constructors
data Tagger = BranchPeakFreq | GroupLabelCooc | GroupDynamics deriving (Show,Generic,Read)
data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
| GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
--------------
......@@ -408,6 +409,7 @@ data PhyloNode = PhyloNode
, _pn_idx :: [Int]
, _pn_ngrams :: Maybe [Ngrams]
, _pn_metrics :: Map Text [Double]
, _pn_cooc :: Map (Int,Int) Double
, _pn_parents :: Maybe [PhyloGroupId]
, _pn_childs :: [PhyloNode]
} deriving (Generic, Show)
......
......@@ -28,6 +28,8 @@ TODO:
module Gargantext.Viz.Phylo.Example where
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.Text (Text, toLower)
import Data.List ((++))
import Data.Map (Map,empty)
......@@ -43,16 +45,23 @@ import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.Main (writePhylo)
import GHC.IO (FilePath)
import qualified Data.List as List
------------------------------------------------------
-- | STEP 12 | -- Create a PhyloView from a user Query
------------------------------------------------------
export :: IO ()
export = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView
phyloExport :: FilePath -> IO FilePath
phyloExport fp = writePhylo fp phyloView
......@@ -73,7 +82,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 2 Merge False 2 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
phyloQueryView = PhyloQueryView 2 Merge False 2 [BranchAge] [] [BranchPeakInc,GroupLabelIncDyn] (Just (ByBranchAge,Asc)) Json Flat True
--------------------------------------------------
......@@ -100,7 +109,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
3 1 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.6 20) 5 0.8 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.4 0)
3 1 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.5 20) 5 0.8 0.5 4 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0)
......
......@@ -20,7 +20,6 @@ module Gargantext.Viz.Phylo.Main
import Debug.Trace (trace)
import qualified Data.Text as Text
import Data.Map (Map)
import Data.Text (Text)
import Data.Maybe
import Servant
......@@ -30,18 +29,17 @@ import Gargantext.Prelude
import Gargantext.Text.Context (TermList)
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Set as Set
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Core.Types
import Gargantext.Text.Terms.WithList
import Gargantext.Database.Config (userMaster)
-- import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Schema.NodeNode (selectDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Node.Select (selectNodesWithUsername)
-- import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
-- import Gargantext.Database.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Flow
import Gargantext.API.Ngrams.Tools (getTermsWith)
-- TODO : git mv ViewMaker Maker
......@@ -60,7 +58,7 @@ flowPhylo :: FlowCmdM env ServantErr m
flowPhylo cId l m fp = do
list <- defaultList cId
listMaster <- selectNodesWithUsername NodeList userMaster
-- listMaster <- selectNodesWithUsername NodeList userMaster
termList <- Map.toList <$> getTermsWith Text.words [list] NgramsTerms GraphTerm
--printDebug "termList" termList
......
......@@ -24,7 +24,7 @@ import Gargantext.Viz.Phylo.Tools
import Control.Lens hiding (Level)
import Data.List ((\\), sortOn, concat, nub, take, union, intersect, null, (++), sort)
import Data.Map (Map, (!), foldlWithKey, toList, size, insert, unionWith, intersection, intersectionWith, filterWithKey, elems, fromList, findWithDefault, fromListWith)
import Data.Map (Map, (!), toList, size, insert, unionWith, intersection, intersectionWith, filterWithKey, elems, fromList, findWithDefault, fromListWith)
import Data.Text (Text)
-- import Debug.Trace (trace)
......@@ -37,27 +37,25 @@ import Data.Text (Text)
-- | Return the conditional probability of i knowing j
conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
conditional m i j = (findWithDefault 0 (i,j) m)
/ foldlWithKey (\s (x,_) v -> if x == j
then s + v
else s ) 0 m
/ (m ! (j,j))
-- | Return the genericity score of a given ngram
genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
- (sum $ map (\j -> conditional m j i) l)) / 2
- (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
-- | Return the specificity score of a given ngram
specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
- (sum $ map (\j -> conditional m i j) l)) / 2
- (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Return the coverage score of a given ngram
coverage :: Map (Int, Int) Double -> [Int] -> Int -> Double
coverage m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / 2
-- | Return the inclusion score of a given ngram
inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Process some metrics on top of ngrams
......@@ -65,7 +63,7 @@ getNgramsMeta :: Map (Int, Int) Double -> [Int] -> Map Text [Double]
getNgramsMeta m ngrams = fromList
[ ("genericity" , map (\n -> genericity m (ngrams \\ [n]) n) ngrams ),
("specificity", map (\n -> specificity m (ngrams \\ [n]) n) ngrams ),
("coverage" , map (\n -> coverage m (ngrams \\ [n]) n) ngrams )]
("inclusion" , map (\n -> inclusion m (ngrams \\ [n]) n) ngrams )]
-- | To get the nth most occurent elems in a coocurency matrix
......@@ -96,14 +94,14 @@ findDynamics n pv pn m =
bid = fromJust $ (pn ^. pn_bid)
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- | decrease
then 0
else if ((fst prd) == (fst $ m ! n))
-- | emergence
then 1
else if (not $ sharedWithParents (fst prd) bid n pv)
-- | recombination
then 2
else if ((fst prd) == (fst $ m ! n))
-- | recombination
then 0
else if (not $ sharedWithParents (fst prd) bid n pv)
-- | decrease
then 1
else 3
......
......@@ -23,7 +23,7 @@ import Data.GraphViz hiding (DotGraph)
import Data.GraphViz.Attributes.Complete hiding (EdgeType)
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic
import Data.List ((++),unwords,concat,sortOn,nub,sort,group)
import Data.List ((++),unwords,concat,sortOn,nub)
import Data.Map (Map,toList,(!))
import Data.Maybe (isNothing,fromJust)
import Data.Text.Lazy (fromStrict, pack, unpack)
......@@ -134,16 +134,18 @@ setPeakDotEdge bId nId = edge bId nId [Width 3, Color [toWColor Black], ArrowHea
colorFromDynamics :: Double -> H.Attribute
colorFromDynamics d
| d == 0 = H.BGColor (toColor LightPink)
| d == 1 = H.BGColor (toColor PaleGreen)
| d == 2 = H.BGColor (toColor SkyBlue)
| d == 0 = H.BGColor (toColor PaleGreen)
| d == 1 = H.BGColor (toColor SkyBlue)
| d == 2 = H.BGColor (toColor LightPink)
| otherwise = H.Color (toColor Black)
getGroupDynamic :: [Double] -> H.Attribute
getGroupDynamic dy = colorFromDynamics $ head' "getGroupDynamic" (head' "getGroupDynamic" $ reverse $ sortOn length $ group $ sort dy)
getGroupDynamic dy
| elem 0 dy = colorFromDynamics 0
| elem 1 dy = colorFromDynamics 1
| elem 2 dy = colorFromDynamics 2
| otherwise = colorFromDynamics 3
-- | To set an HTML table
......@@ -151,21 +153,33 @@ setHtmlTable :: PhyloNode -> H.Label
setHtmlTable pn = H.Table H.HTable
{ H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
, H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
, H.tableRows = [header] <> (if isNothing $ pn ^. pn_ngrams
then []
else map ngramsToRow $ splitEvery 4 $ zip (fromJust $ pn ^. pn_ngrams) dynamics) }
, H.tableRows = [header]
<> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
<> (if isNothing $ pn ^. pn_ngrams
then []
else map ngramsToRow $ splitEvery 4
$ reverse $ sortOn (snd . snd)
$ zip (fromJust $ pn ^. pn_ngrams) $ zip dynamics inclusion) }
where
--------------------------------------
ngramsToRow :: [(Ngrams,Double)] -> H.Row
ngramsToRow ns = H.Cells $ map (\(n,d) -> H.LabelCell [H.BAlign H.HLeft,colorFromDynamics d]
ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
ngramsToRow ns = H.Cells $ map (\(n,(d,_)) -> H.LabelCell [H.Align H.HLeft,colorFromDynamics d]
$ H.Text [H.Str $ fromStrict n]) ns
--------------------------------------
inclusion :: [Double]
inclusion = (pn ^. pn_metrics) ! "inclusion"
--------------------------------------
dynamics :: [Double]
dynamics = (pn ^. pn_metrics) ! "dynamics"
--------------------------------------
header :: H.Row
header = H.Cells [H.LabelCell [getGroupDynamic dynamics]
$ H.Text [H.Str $ (fromStrict . T.toUpper) $ pn ^. pn_label]]
$ H.Text [H.Str $ (((fromStrict . T.toUpper) $ pn ^. pn_label)
<> (fromStrict " ( ")
<> (pack $ show (fst $ getNodePeriod pn))
<> (fromStrict " , ")
<> (pack $ show (snd $ getNodePeriod pn))
<> (fromStrict " ) "))]]
--------------------------------------
......
......@@ -18,16 +18,18 @@ module Gargantext.Viz.Phylo.View.Taggers
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,groupBy,sortOn,sort, (!!), take)
import Data.List (concat,nub,groupBy,sortOn,sort, (!!), take, union, (\\))
import Data.Text (Text)
import Data.Tuple (fst, snd)
import Data.Vector (Vector)
import Data.Map (Map, (!))
import Data.Map (Map, (!), empty, unionWith)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.BranchMaker
import Gargantext.Viz.Phylo.Metrics
import qualified Data.Map as Map
import Control.Parallel.Strategies
-- import Debug.Trace (trace)
......@@ -82,26 +84,65 @@ branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ getNodesByBranches v
getNthMostMeta :: Int -> Text -> PhyloGroup -> [Int]
getNthMostMeta nth meta g = map (\(idx,_) -> (getGroupNgrams g !! idx))
$ take nth
$ sortOn snd $ zip [0..]
$ (g ^. phylo_groupNgramsMeta) ! meta
getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
$ take nth
$ reverse
$ sortOn snd $ zip [0..] meta
-- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes
nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelCooc v thr p = over (pv_nodes
. traverse)
(\n -> let g = head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p
lbl = ngramsToLabel (getFoundationsRoots p) $ getNthMostMeta thr "coverage" g
lbl = ngramsToLabel (getFoundationsRoots p) $ mostOccNgrams thr g
in n & pn_label .~ lbl) v
-- | To set the label of a PhyloNode as the nth most inclusives terms of its PhyloNodes
nodeLabelInc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelInc v thr p = over (pv_nodes
. traverse)
(\n -> let g = head' "inclusion" $ getGroupsFromIds [getNodeId n] p
lbl = ngramsToLabel (getFoundationsRoots p)
$ getNthMostMeta thr ((g ^. phylo_groupNgramsMeta) ! "inclusion") (getGroupNgrams g)
in n & pn_label .~ lbl) v
nodeLabelInc' :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelInc' v nth p = over (pv_nodes
. traverse)
(\pn -> let lbl = ngramsToLabel (getFoundationsRoots p)
$ take nth
$ map (\(_,(_,idx)) -> idx)
$ concat
$ map (\groups -> sortOn (fst . snd) groups)
$ groupBy ((==) `on` fst) $ reverse $ sortOn fst
$ zip ((pn ^. pn_metrics) ! "inclusion")
$ zip ((pn ^. pn_metrics) ! "dynamics") (pn ^. pn_idx)
in pn & pn_label .~ lbl) v
branchPeakInc :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakInc v nth p =
let labels = map (\(id,nodes) ->
let cooc = foldl (\mem pn -> unionWith (+) mem (pn ^. pn_cooc)) empty nodes
ngrams = sort $ foldl (\mem pn -> union mem (pn ^. pn_idx)) [] nodes
inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
lbl = ngramsToLabel (getFoundationsRoots p) $ getNthMostMeta nth inc ngrams
in (id, lbl))
$ getNodesByBranches v
labels' = labels `using` parList rdeepseq
in foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v labels'
-- | To process a sorted list of Taggers to a PhyloView
processTaggers :: [Tagger] -> Phylo -> PhyloView -> PhyloView
processTaggers ts p v = foldl (\v' t -> case t of
BranchPeakFreq -> branchPeakFreq v' 2 p
-- BranchPeakFreq -> branchPeakCooc v' 3 p
GroupLabelCooc -> nodeLabelCooc v' 2 p
_ -> panic "[ERR][Viz.Phylo.View.Taggers.processTaggers] tagger not found") v ts
BranchPeakFreq -> branchPeakFreq v' 2 p
BranchPeakCooc -> branchPeakCooc v' 2 p
BranchPeakInc -> branchPeakInc v' 2 p
GroupLabelInc -> nodeLabelInc v' 2 p
GroupLabelIncDyn -> nodeLabelInc' v' 2 p
GroupLabelCooc -> nodeLabelCooc v' 2 p) v ts
......@@ -73,6 +73,7 @@ groupsToNodes isR isV ns gs = map (\g -> let idxs = getGroupNgrams g
then Just (ngramsToText ns idxs)
else Nothing)
(g ^. phylo_groupNgramsMeta)
(g ^. phylo_groupCooc)
(if (not isR)
then Just (getGroupLevelParentsId g)
else Nothing)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment