Commit e0eb4aec authored by qlobbe's avatar qlobbe

add Callon's density

parent deee2cd3
...@@ -371,6 +371,7 @@ data PhyloFoundations = PhyloFoundations ...@@ -371,6 +371,7 @@ data PhyloFoundations = PhyloFoundations
data PhyloCounts = PhyloCounts data PhyloCounts = PhyloCounts
{ coocByDate :: !(Map Date Cooc) { coocByDate :: !(Map Date Cooc)
, docsByDate :: !(Map Date Double) , docsByDate :: !(Map Date Double)
, rootsCountByDate :: !(Map Date (Map Int Double))
, rootsCount :: !(Map Int Double) , rootsCount :: !(Map Int Double)
, rootsFreq :: !(Map Int Double) , rootsFreq :: !(Map Int Double)
, lastRootsFreq :: !(Map Int Double) , lastRootsFreq :: !(Map Int Double)
...@@ -487,8 +488,10 @@ data PhyloGroup = ...@@ -487,8 +488,10 @@ data PhyloGroup =
, _phylo_groupSources :: [Int] , _phylo_groupSources :: [Int]
, _phylo_groupNgrams :: [Int] , _phylo_groupNgrams :: [Int]
, _phylo_groupCooc :: !(Cooc) , _phylo_groupCooc :: !(Cooc)
, _phylo_groupDensity :: Double
, _phylo_groupBranchId :: PhyloBranchId , _phylo_groupBranchId :: PhyloBranchId
, _phylo_groupMeta :: Map Text [Double] , _phylo_groupMeta :: Map Text [Double]
, _phylo_groupRootsCount :: Map Int Double
, _phylo_groupScaleParents :: [Pointer] , _phylo_groupScaleParents :: [Pointer]
, _phylo_groupScaleChilds :: [Pointer] , _phylo_groupScaleChilds :: [Pointer]
, _phylo_groupPeriodParents :: [Pointer] , _phylo_groupPeriodParents :: [Pointer]
......
...@@ -144,6 +144,8 @@ groupToDotNode fdt g bId = ...@@ -144,6 +144,8 @@ groupToDotNode fdt g bId =
, toAttr "weight" (pack $ show (g ^. phylo_groupWeight)) , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
, toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources)) , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
, toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources)) , toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
, toAttr "density" (pack $ show (g ^. phylo_groupDensity))
, toAttr "cooc" (pack $ show (g ^. phylo_groupCooc))
, toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams))) , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
, toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams))) , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
, toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics"))) , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
...@@ -432,8 +434,10 @@ branchDating export = ...@@ -432,8 +434,10 @@ branchDating export =
else acc ) [] $ export ^. export_groups else acc ) [] $ export ^. export_groups
periods = nub groups periods = nub groups
birth = fst $ head' "birth" groups birth = fst $ head' "birth" groups
age = (snd $ last' "age" groups) - birth death = snd $ last' "death" groups
age = death - birth
in b & branch_meta %~ insert "birth" [fromIntegral birth] in b & branch_meta %~ insert "birth" [fromIntegral birth]
& branch_meta %~ insert "death" [fromIntegral death]
& branch_meta %~ insert "age" [fromIntegral age] & branch_meta %~ insert "age" [fromIntegral age]
& branch_meta %~ insert "size" [fromIntegral $ length periods] ) export & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
......
...@@ -34,6 +34,7 @@ import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatch ...@@ -34,6 +34,7 @@ import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatch
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
...@@ -191,7 +192,7 @@ findSeaLadder phylo = case getSeaElevation phylo of ...@@ -191,7 +192,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
in acc ++ (concat pairs') in acc ++ (concat pairs')
) [] $ keys $ phylo ^. phylo_periods ) [] $ keys $ phylo ^. phylo_periods
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods $ over ( phylo_periods
. traverse . traverse
...@@ -206,23 +207,28 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -206,23 +207,28 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
& phylo_scaleGroups .~ (fromList $ foldl (\groups obj -> & phylo_scaleGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj pId pId' lvl (length groups) , f obj pId pId' lvl (length groups)
(elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [pId])) -- select the cooc of the periods
(elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [pId])
-- select and merge the roots count of the periods
(foldl (\acc count -> unionWith (+) acc count) empty
$ elems $ restrictKeys (getRootsCountByDate phylo) $ periodsToYears [pId]))
] ) [] phyloCUnit) ] ) [] phyloCUnit)
else else
phyloLvl ) phyloLvl )
phylo phylo
clusterToGroup :: Clustering -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup clusterToGroup :: Clustering -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup
clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx "" clusterToGroup fis pId pId' lvl idx coocs rootsCount = PhyloGroup pId pId' lvl idx ""
(fis ^. clustering_support ) (fis ^. clustering_support )
(fis ^. clustering_visWeighting) (fis ^. clustering_visWeighting)
(fis ^. clustering_visFiltering) (fis ^. clustering_visFiltering)
(fis ^. clustering_roots) (fis ^. clustering_roots)
(ngramsToCooc (fis ^. clustering_roots) coocs) (ngramsToCooc (fis ^. clustering_roots) coocs)
(ngramsToDensity (fis ^. clustering_roots) coocs rootsCount)
(1,[0]) -- branchid (lvl,[path in the branching tree]) (1,[0]) -- branchid (lvl,[path in the branching tree])
(fromList [("breaks",[0]),("seaLevels",[0])]) (fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] [] rootsCount [] [] [] [] [] [] []
----------------------- -----------------------
...@@ -447,6 +453,16 @@ docsToTermCount docs roots = fromList ...@@ -447,6 +453,16 @@ docsToTermCount docs roots = fromList
docsToTimeTermCount :: [Document] -> Vector Ngrams -> (Map Date (Map Int Double))
docsToTimeTermCount docs roots =
let docs' = Map.map (\l -> fromList $ map (\lst -> (head' "docsToTimeTermCount" lst, fromIntegral $ length lst))
$ group $ sort l)
$ fromListWith (++)
$ map (\d -> (date d, nub $ ngramsToIdx (text d) roots)) docs
time = fromList $ map (\t -> (t,Map.empty)) $ toTimeScale (keys docs') 1
in unionWith (Map.union) time docs'
docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
docsToLastTermFreq n docs fdt = docsToLastTermFreq n docs fdt =
let last = take n $ reverse $ sort $ map date docs let last = take n $ reverse $ sort $ map date docs
...@@ -497,6 +513,7 @@ initPhylo docs conf = ...@@ -497,6 +513,7 @@ initPhylo docs conf =
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs) docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
docsCounts = PhyloCounts (docsToTimeScaleCooc docs (foundations ^. foundations_roots)) docsCounts = PhyloCounts (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs) (docsToTimeScaleNb docs)
(docsToTimeTermCount docs (foundations ^. foundations_roots))
(docsToTermCount docs (foundations ^. foundations_roots)) (docsToTermCount docs (foundations ^. foundations_roots))
(docsToTermFreq docs (foundations ^. foundations_roots)) (docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots)) (docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots))
......
...@@ -14,7 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloTools where ...@@ -14,7 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloTools where
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, iterate, transpose, partition, tails, nubBy, group, notElem, (!!)) import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, iterate, transpose, partition, tails, nubBy, group, notElem, (!!))
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys) import Data.Map (Map, elems, fromList, findWithDefault, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Set (Set, disjoint) import Data.Set (Set, disjoint)
import Data.String (String) import Data.String (String)
import Data.Text (Text,unpack) import Data.Text (Text,unpack)
...@@ -313,6 +313,27 @@ ngramsToCooc ngrams coocs = ...@@ -313,6 +313,27 @@ ngramsToCooc ngrams coocs =
in filterWithKey (\k _ -> elem k pairs) cooc in filterWithKey (\k _ -> elem k pairs) cooc
-----------------
-- | Density | --
-----------------
-- | To build the density of a phylogroup
-- density is defined in Callon M, Courtial JP, Laville F (1991) Co-word analysis as a tool for describing
-- the network of interaction between basic and technological research: The case of polymer chemistry.
-- Scientometric 22: 155–205.
ngramsToDensity :: [Int] -> [Cooc] -> (Map Int Double) -> Double
ngramsToDensity ngrams coocs rootsCount =
let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
pairs = listToCombi' ngrams
density = map (\(i,j) ->
let nij = findWithDefault 0 (i,j) cooc
in (nij * nij) / ((rootsCount ! i) * (rootsCount ! j))) pairs
in (sum density) / (fromIntegral $ length ngrams)
------------------ ------------------
-- | Defaults | -- -- | Defaults | --
------------------ ------------------
...@@ -458,6 +479,9 @@ getPeriodIds phylo = sortOn fst ...@@ -458,6 +479,9 @@ getPeriodIds phylo = sortOn fst
$ keys $ keys
$ phylo ^. phylo_periods $ phylo ^. phylo_periods
getLastDate :: Phylo -> Date
getLastDate phylo = snd $ last' "lastDate" $ getPeriodIds phylo
getLevelParentId :: PhyloGroup -> PhyloGroupId getLevelParentId :: PhyloGroup -> PhyloGroupId
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
...@@ -503,6 +527,9 @@ getLadder phylo = phylo ^. phylo_seaLadder ...@@ -503,6 +527,9 @@ getLadder phylo = phylo ^. phylo_seaLadder
getCoocByDate :: Phylo -> Map Date Cooc getCoocByDate :: Phylo -> Map Date Cooc
getCoocByDate phylo = coocByDate (phylo ^. phylo_counts) getCoocByDate phylo = coocByDate (phylo ^. phylo_counts)
getRootsCountByDate :: Phylo -> Map Date (Map Int Double)
getRootsCountByDate phylo = rootsCountByDate (phylo ^. phylo_counts)
getDocsByDate :: Phylo -> Map Date Double getDocsByDate :: Phylo -> Map Date Double
getDocsByDate phylo = docsByDate (phylo ^. phylo_counts) getDocsByDate phylo = docsByDate (phylo ^. phylo_counts)
......
...@@ -16,7 +16,7 @@ import Control.Lens hiding (Level) ...@@ -16,7 +16,7 @@ import Control.Lens hiding (Level)
import Control.Monad (sequence) import Control.Monad (sequence)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy) import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member) import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member, unionWith)
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics) import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
...@@ -32,6 +32,7 @@ import qualified Data.Map as Map ...@@ -32,6 +32,7 @@ import qualified Data.Map as Map
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs = mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
counts = foldl (\acc count -> unionWith (+) acc count) empty $ map _phylo_groupRootsCount childs
in PhyloGroup (fst $ fst id) (_phylo_groupPeriod' $ head' "mergeGroups" childs) in PhyloGroup (fst $ fst id) (_phylo_groupPeriod' $ head' "mergeGroups" childs)
(snd $ fst id) (snd id) "" (snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs) (sum $ map _phylo_groupSupport childs)
...@@ -40,8 +41,12 @@ mergeGroups coocs id mapIds childs = ...@@ -40,8 +41,12 @@ mergeGroups coocs id mapIds childs =
(concat $ map _phylo_groupSources childs) (concat $ map _phylo_groupSources childs)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(ngramsToDensity ngrams coocs counts)
-- todo add density here
((snd $ fst id),bId) ((snd $ fst id),bId)
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs) (mergeMeta bId childs)
counts
[] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs) (updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs) (updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
(mergeAncestors $ concat $ map _phylo_groupAncestors childs) (mergeAncestors $ concat $ map _phylo_groupAncestors childs)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment