Commit e0eb4aec authored by qlobbe's avatar qlobbe

add Callon's density

parent deee2cd3
Pipeline #3920 failed with stage
in 124 minutes and 42 seconds
......@@ -371,6 +371,7 @@ data PhyloFoundations = PhyloFoundations
data PhyloCounts = PhyloCounts
{ coocByDate :: !(Map Date Cooc)
, docsByDate :: !(Map Date Double)
, rootsCountByDate :: !(Map Date (Map Int Double))
, rootsCount :: !(Map Int Double)
, rootsFreq :: !(Map Int Double)
, lastRootsFreq :: !(Map Int Double)
......@@ -487,8 +488,10 @@ data PhyloGroup =
, _phylo_groupSources :: [Int]
, _phylo_groupNgrams :: [Int]
, _phylo_groupCooc :: !(Cooc)
, _phylo_groupDensity :: Double
, _phylo_groupBranchId :: PhyloBranchId
, _phylo_groupMeta :: Map Text [Double]
, _phylo_groupRootsCount :: Map Int Double
, _phylo_groupScaleParents :: [Pointer]
, _phylo_groupScaleChilds :: [Pointer]
, _phylo_groupPeriodParents :: [Pointer]
......
......@@ -144,6 +144,8 @@ groupToDotNode fdt g bId =
, toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
, toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
, toAttr "sourceFull" (pack $ show (g ^. phylo_groupSources))
, toAttr "density" (pack $ show (g ^. phylo_groupDensity))
, toAttr "cooc" (pack $ show (g ^. phylo_groupCooc))
, toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
, toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
, toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
......@@ -432,8 +434,10 @@ branchDating export =
else acc ) [] $ export ^. export_groups
periods = nub groups
birth = fst $ head' "birth" groups
age = (snd $ last' "age" groups) - birth
death = snd $ last' "death" groups
age = death - birth
in b & branch_meta %~ insert "birth" [fromIntegral birth]
& branch_meta %~ insert "death" [fromIntegral death]
& branch_meta %~ insert "age" [fromIntegral age]
& branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
......
......@@ -34,6 +34,7 @@ import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatch
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Vector as Vector
......@@ -191,7 +192,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
in acc ++ (concat pairs')
) [] $ keys $ phylo ^. phylo_periods
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods
. traverse
......@@ -206,23 +207,28 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
& phylo_scaleGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups)
, f obj pId pId' lvl (length groups)
(elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [pId]))
-- select the cooc of the periods
(elems $ restrictKeys (getCoocByDate phylo) $ periodsToYears [pId])
-- select and merge the roots count of the periods
(foldl (\acc count -> unionWith (+) acc count) empty
$ elems $ restrictKeys (getRootsCountByDate phylo) $ periodsToYears [pId]))
] ) [] phyloCUnit)
else
phyloLvl )
phylo
clusterToGroup :: Clustering -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> PhyloGroup
clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
clusterToGroup :: Clustering -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup
clusterToGroup fis pId pId' lvl idx coocs rootsCount = PhyloGroup pId pId' lvl idx ""
(fis ^. clustering_support )
(fis ^. clustering_visWeighting)
(fis ^. clustering_visFiltering)
(fis ^. clustering_roots)
(ngramsToCooc (fis ^. clustering_roots) coocs)
(ngramsToDensity (fis ^. clustering_roots) coocs rootsCount)
(1,[0]) -- branchid (lvl,[path in the branching tree])
(fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] []
rootsCount [] [] [] [] [] [] []
-----------------------
......@@ -447,6 +453,16 @@ docsToTermCount docs roots = fromList
docsToTimeTermCount :: [Document] -> Vector Ngrams -> (Map Date (Map Int Double))
docsToTimeTermCount docs roots =
let docs' = Map.map (\l -> fromList $ map (\lst -> (head' "docsToTimeTermCount" lst, fromIntegral $ length lst))
$ group $ sort l)
$ fromListWith (++)
$ map (\d -> (date d, nub $ ngramsToIdx (text d) roots)) docs
time = fromList $ map (\t -> (t,Map.empty)) $ toTimeScale (keys docs') 1
in unionWith (Map.union) time docs'
docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
docsToLastTermFreq n docs fdt =
let last = take n $ reverse $ sort $ map date docs
......@@ -497,6 +513,7 @@ initPhylo docs conf =
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
docsCounts = PhyloCounts (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs)
(docsToTimeTermCount docs (foundations ^. foundations_roots))
(docsToTermCount docs (foundations ^. foundations_roots))
(docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod $ timeUnit conf) docs (foundations ^. foundations_roots))
......
......@@ -14,7 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloTools where
import Control.Lens hiding (Level)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, iterate, transpose, partition, tails, nubBy, group, notElem, (!!))
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Map (Map, elems, fromList, findWithDefault, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Set (Set, disjoint)
import Data.String (String)
import Data.Text (Text,unpack)
......@@ -313,6 +313,27 @@ ngramsToCooc ngrams coocs =
in filterWithKey (\k _ -> elem k pairs) cooc
-----------------
-- | Density | --
-----------------
-- | To build the density of a phylogroup
-- density is defined in Callon M, Courtial JP, Laville F (1991) Co-word analysis as a tool for describing
-- the network of interaction between basic and technological research: The case of polymer chemistry.
-- Scientometric 22: 155–205.
ngramsToDensity :: [Int] -> [Cooc] -> (Map Int Double) -> Double
ngramsToDensity ngrams coocs rootsCount =
let cooc = foldl (\acc cooc' -> sumCooc acc cooc') empty coocs
pairs = listToCombi' ngrams
density = map (\(i,j) ->
let nij = findWithDefault 0 (i,j) cooc
in (nij * nij) / ((rootsCount ! i) * (rootsCount ! j))) pairs
in (sum density) / (fromIntegral $ length ngrams)
------------------
-- | Defaults | --
------------------
......@@ -458,6 +479,9 @@ getPeriodIds phylo = sortOn fst
$ keys
$ phylo ^. phylo_periods
getLastDate :: Phylo -> Date
getLastDate phylo = snd $ last' "lastDate" $ getPeriodIds phylo
getLevelParentId :: PhyloGroup -> PhyloGroupId
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupScaleParents
......@@ -503,6 +527,9 @@ getLadder phylo = phylo ^. phylo_seaLadder
getCoocByDate :: Phylo -> Map Date Cooc
getCoocByDate phylo = coocByDate (phylo ^. phylo_counts)
getRootsCountByDate :: Phylo -> Map Date (Map Int Double)
getRootsCountByDate phylo = rootsCountByDate (phylo ^. phylo_counts)
getDocsByDate :: Phylo -> Map Date Double
getDocsByDate phylo = docsByDate (phylo ^. phylo_counts)
......
......@@ -16,7 +16,7 @@ import Control.Lens hiding (Level)
import Control.Monad (sequence)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member, unionWith)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
import Gargantext.Core.Viz.Phylo.PhyloTools
......@@ -32,6 +32,7 @@ import qualified Data.Map as Map
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
counts = foldl (\acc count -> unionWith (+) acc count) empty $ map _phylo_groupRootsCount childs
in PhyloGroup (fst $ fst id) (_phylo_groupPeriod' $ head' "mergeGroups" childs)
(snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs)
......@@ -40,8 +41,12 @@ mergeGroups coocs id mapIds childs =
(concat $ map _phylo_groupSources childs)
ngrams
(ngramsToCooc ngrams coocs)
(ngramsToDensity ngrams coocs counts)
-- todo add density here
((snd $ fst id),bId)
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
(mergeMeta bId childs)
counts
[] (map (\g -> (getGroupId g, 1)) childs)
(updatePointers $ concat $ map _phylo_groupPeriodParents childs)
(updatePointers $ concat $ map _phylo_groupPeriodChilds childs)
(mergeAncestors $ concat $ map _phylo_groupAncestors childs)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment