Commit 5a5c6521 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-phylo' of...

Merge branch 'dev-phylo' of ssh://gitlab.iscpif.fr:20022/gargantext/haskell-gargantext into dev-phylo
parents d5ae1fc2 7d722d28
...@@ -172,12 +172,23 @@ main = do ...@@ -172,12 +172,23 @@ main = do
Hamming -> undefined Hamming -> undefined
WeightedLogJaccard s -> (show s) WeightedLogJaccard s -> (show s)
let sync = case (phyloSynchrony config) of
ByProximityThreshold t _ _ _ -> (show t)
ByProximityDistribution _ _ -> undefined
-- to be improved
-- let br_length = case (take 1 $ exportFilter config) of
-- ByBranchSize t -> (show t)
let output = (outputPath config) let output = (outputPath config)
<> (unpack $ phyloName config) <> (unpack $ phyloName config)
<> "-scale_" <> (show (_qua_granularity $ phyloQuality config))
<> "-level_" <> (show (phyloLevel config))
<> "-" <> clq <> "-" <> clq
<> "-level_" <> (show (phyloLevel config))
<> "-sens_" <> sensibility <> "-sens_" <> sensibility
-- <> "-lenght_" <> br_length
<> "-scale_" <> (show (_qua_granularity $ phyloQuality config))
<> "-sync_" <> sync
<> ".dot" <> ".dot"
dotToFile output dot dotToFile output dot
...@@ -47,7 +47,7 @@ cooc2graph' distance threshold myCooc = distanceMap ...@@ -47,7 +47,7 @@ cooc2graph' distance threshold myCooc = distanceMap
where where
(ti, _) = createIndices myCooc (ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc myCooc' = toIndex ti myCooc
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 0) myCooc' matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc distanceMat = measure distance matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat distanceMap = Map.filter (> threshold) $ mat2map distanceMat
......
...@@ -142,6 +142,7 @@ groupToDotNode fdt g bId = ...@@ -142,6 +142,7 @@ groupToDotNode fdt g bId =
, toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams))) , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
, toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams))) , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
, toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics"))) , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
, toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
]) ])
...@@ -192,7 +193,7 @@ exportToDot phylo export = ...@@ -192,7 +193,7 @@ exportToDot phylo export =
,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)) ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)) ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)) ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo)) -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
]) ])
{- {-
...@@ -201,7 +202,7 @@ exportToDot phylo export = ...@@ -201,7 +202,7 @@ exportToDot phylo export =
-- 2) create a layer for the branches labels -} -- 2) create a layer for the branches labels -}
subgraph (Str "Branches peaks") $ do subgraph (Str "Branches peaks") $ do
graphAttrs [Rank SameRank] -- graphAttrs [Rank SameRank]
{- {-
-- 3) group the branches by hierarchy -- 3) group the branches by hierarchy
-- mapM (\branches -> -- mapM (\branches ->
...@@ -368,8 +369,8 @@ inclusion m l i = ( (sum $ map (\j -> conditional m j i) l) ...@@ -368,8 +369,8 @@ inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1) + (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
ngramsMetrics :: PhyloExport -> PhyloExport ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
ngramsMetrics export = ngramsMetrics phylo export =
over ( export_groups over ( export_groups
. traverse ) . traverse )
(\g -> g & phylo_groupMeta %~ insert "genericity" (\g -> g & phylo_groupMeta %~ insert "genericity"
...@@ -378,6 +379,8 @@ ngramsMetrics export = ...@@ -378,6 +379,8 @@ ngramsMetrics export =
(map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams) (map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "inclusion" & phylo_groupMeta %~ insert "inclusion"
(map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams) (map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "frequence"
(map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
) export ) export
...@@ -397,8 +400,8 @@ branchDating export = ...@@ -397,8 +400,8 @@ branchDating export =
& branch_meta %~ insert "age" [fromIntegral age] & branch_meta %~ insert "age" [fromIntegral age]
& branch_meta %~ insert "size" [fromIntegral $ length periods] ) export & branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
processMetrics :: PhyloExport -> PhyloExport processMetrics :: Phylo -> PhyloExport -> PhyloExport
processMetrics export = ngramsMetrics processMetrics phylo export = ngramsMetrics phylo
$ branchDating export $ branchDating export
...@@ -630,7 +633,7 @@ toPhyloExport phylo = exportToDot phylo ...@@ -630,7 +633,7 @@ toPhyloExport phylo = exportToDot phylo
$ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo) $ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
$ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo) $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
$ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo) $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
$ processMetrics export $ processMetrics phylo export
where where
export :: PhyloExport export :: PhyloExport
export = PhyloExport groups branches export = PhyloExport groups branches
......
...@@ -202,7 +202,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -202,7 +202,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ foldl sumCooc empty $ foldl sumCooc empty
$ map listToMatrix $ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs $ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0.01 cooc)) in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0.001 cooc))
$ toList phyloDocs $ toList phyloDocs
mcl' = mcl `using` parList rdeepseq mcl' = mcl `using` parList rdeepseq
in fromList mcl' in fromList mcl'
...@@ -290,7 +290,8 @@ docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double ...@@ -290,7 +290,8 @@ docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
docsToLastTermFreq n docs fdt = docsToLastTermFreq n docs fdt =
let last = take n $ reverse $ sort $ map date docs let last = take n $ reverse $ sort $ map date docs
nbDocs = fromIntegral $ length $ filter (\d -> elem (date d) last) docs nbDocs = fromIntegral $ length $ filter (\d -> elem (date d) last) docs
freqs = map (/(nbDocs)) freqs = map (\x -> truncate' (x/nbDocs) 4)
-- $ map (/(nbDocs))
$ fromList $ fromList
$ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs $ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs
......
...@@ -19,6 +19,8 @@ import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithK ...@@ -19,6 +19,8 @@ import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithK
import Data.String (String) import Data.String (String)
import Data.Text (Text) import Data.Text (Text)
import Prelude (floor)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo import Gargantext.Core.Viz.AdaptativePhylo
import Text.Printf import Text.Printf
...@@ -56,6 +58,22 @@ printIOComment cmt = ...@@ -56,6 +58,22 @@ printIOComment cmt =
-- | Misc | -- -- | Misc | --
-------------- --------------
-- truncate' :: Double -> Int -> Double
-- truncate' x n = (fromIntegral (floor (x * t))) / t
-- where t = 10^n
truncate' :: Double -> Int -> Double
truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
where
--------------
t :: Double
t = 10 ^n
getInMap :: Int -> Map Int Double -> Double
getInMap k m =
if (member k m)
then m ! k
else 0
roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
roundToStr = printf "%0.*f" roundToStr = printf "%0.*f"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment