Commit 9863bd4f authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] merge with dev-phylo

parents 928c717f 6bddaf45
Pipeline #1460 failed with stage
...@@ -19,13 +19,14 @@ module Main where ...@@ -19,13 +19,14 @@ module Main where
import Data.Aeson import Data.Aeson
import Data.List (concat, nub, isSuffixOf) import Data.List (concat, nub, isSuffixOf)
import Data.String (String) import Data.String (String)
import Data.Text (Text, unwords, unpack, replace) import Data.Text (Text, unwords, unpack, replace, pack)
import Crypto.Hash.SHA256 (hash) import Crypto.Hash.SHA256 (hash)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year) import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day,
csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile) import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList) import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList) import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
...@@ -33,23 +34,25 @@ import Gargantext.Core.Viz.AdaptativePhylo ...@@ -33,23 +34,25 @@ import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep) import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig) import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile) import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import GHC.IO (FilePath) import GHC.IO (FilePath)
import Prelude (Either(Left, Right)) import Prelude (Either(Left, Right),toInteger)
import System.Environment import System.Environment
import System.Directory (listDirectory,doesFileExist) import System.Directory (listDirectory,doesFileExist)
import Control.Concurrent.Async (mapConcurrently) import Control.Concurrent.Async (mapConcurrently)
import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
import qualified Data.ByteString.Char8 as C8 import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy as Lazy import qualified Data.ByteString.Lazy as Lazy
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
import qualified Data.Text as T import qualified Data.Text as T
-- import Debug.Trace (trace)
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show) data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
--------------- ---------------
-- | Tools | -- -- | Tools | --
...@@ -60,9 +63,35 @@ data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show) ...@@ -60,9 +63,35 @@ data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
getFilesFromPath :: FilePath -> IO([FilePath]) getFilesFromPath :: FilePath -> IO([FilePath])
getFilesFromPath path = do getFilesFromPath path = do
if (isSuffixOf "/" path) if (isSuffixOf "/" path)
then (listDirectory path) then (listDirectory path)
else return [path] else return [path]
---------------
-- | Dates | --
---------------
toMonths :: Integer -> Int -> Int -> Date
toMonths y m d = fromIntegral $ cdMonths
$ diffGregorianDurationClip (fromGregorian y m d) (fromGregorian 0000 0 0)
toDays :: Integer -> Int -> Int -> Date
toDays y m d = fromIntegral
$ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
toPhyloDate y m d tu = case tu of
Year _ _ _ -> y
Month _ _ _ -> toMonths (toInteger y) m d
Week _ _ _ -> div (toDays (toInteger y) m d) 7
Day _ _ _ -> toDays (toInteger y) m d
toPhyloDate' :: Int -> Int -> Int -> Text
toPhyloDate' y m d = pack $ showGregorian $ fromGregorian (toInteger y) m d
-------------- --------------
-- | Json | -- -- | Json | --
...@@ -79,26 +108,28 @@ readJson path = Lazy.readFile path ...@@ -79,26 +108,28 @@ readJson path = Lazy.readFile path
---------------- ----------------
-- | To filter the Ngrams of a document based on the termList -- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (a, Text) -> (a, [Text]) termsInText :: Patterns -> Text -> [Text]
filterTerms patterns (y,d) = (y,termsInText patterns d) termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList pats txt
where
--------------------------------------
termsInText :: Patterns -> Text -> [Text] -- | To transform a Wos file (or [file]) into a list of Docs
termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList pats txt wosToDocs :: Int -> Patterns -> TimeUnit -> FilePath -> IO ([Document])
-------------------------------------- wosToDocs limit patterns time path = do
-- | To transform a Wos file (or [file]) into a readable corpus
wosToCorpus :: Int -> FilePath -> IO ([(Int,Text)])
wosToCorpus limit path = do
files <- getFilesFromPath path files <- getFilesFromPath path
take limit take limit
<$> map (\d -> let date' = fromJust $ _hd_publication_year d <$> map (\d -> let title = fromJust $ _hd_title d
title = fromJust $ _hd_title d
abstr = if (isJust $ _hd_abstract d) abstr = if (isJust $ _hd_abstract d)
then fromJust $ _hd_abstract d then fromJust $ _hd_abstract d
else "" else ""
in (date', title <> " " <> abstr)) in Document (toPhyloDate
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d) time)
(toPhyloDate'
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d))
(termsInText patterns $ title <> " " <> abstr) Nothing [])
<$> concat <$> concat
<$> mapConcurrently (\file -> <$> mapConcurrently (\file ->
filter (\d -> (isJust $ _hd_publication_year d) filter (\d -> (isJust $ _hd_publication_year d)
...@@ -106,33 +137,51 @@ wosToCorpus limit path = do ...@@ -106,33 +137,51 @@ wosToCorpus limit path = do
<$> parseFile WOS (path <> file) ) files <$> parseFile WOS (path <> file) ) files
-- | To transform a Csv file into a readable corpus -- To transform a Csv file into a list of Document
csvToCorpus :: Int -> FilePath -> IO ([(Int,Text)]) csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO ([Document])
csvToCorpus limit path = Vector.toList csvToDocs parser patterns time path =
<$> Vector.take limit case parser of
<$> Vector.map (\row -> (csv_publication_year row, (csv_title row) <> " " <> (csv_abstract row))) Wos _ -> undefined
<$> snd <$> Csv.readFile path Csv limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv_publication_year row) (csv_publication_month row) (csv_publication_day row) time)
-- | To use the correct parser given a CorpusType (toPhyloDate' (csv_publication_year row) (csv_publication_month row) (csv_publication_day row))
fileToCorpus :: CorpusParser -> FilePath -> IO ([(Int,Text)]) (termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
fileToCorpus parser path = case parser of Nothing
Wos limit -> wosToCorpus limit path []
Csv limit -> csvToCorpus limit path ) <$> snd <$> Csv.readFile path
Csv' limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row))
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row))
(Just $ csv'_weight row)
[csv'_source row]
) <$> snd <$> Csv.readWeightedCsv path
-- To parse a file into a list of Document
fileToDocs' :: CorpusParser -> FilePath -> TimeUnit -> TermList -> IO [Document]
fileToDocs' parser path time lst = do
let patterns = buildPatterns lst
case parser of
Wos limit -> wosToDocs limit patterns time path
Csv _ -> csvToDocs parser patterns time path
Csv' _ -> csvToDocs parser patterns time path
-- | To parse a file into a list of Document ---------------
fileToDocs :: CorpusParser -> FilePath -> TermList -> IO [Document] -- | Label | --
fileToDocs parser path lst = do ---------------
corpus <- fileToCorpus parser path
let patterns = buildPatterns lst
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
-- Config time parameters to label -- Config time parameters to label
timeToLabel :: Config -> [Char] timeToLabel :: Config -> [Char]
timeToLabel config = case (timeUnit config) of timeToLabel config = case (timeUnit config) of
Year p s f -> ("time"<> "_"<> (show p) <> "_" <> (show s) <> "_" <> (show f)) Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Month p s f -> ("time_months"<> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
seaToLabel :: Config -> [Char] seaToLabel :: Config -> [Char]
...@@ -235,7 +284,7 @@ main = do ...@@ -235,7 +284,7 @@ main = do
printIOMsg "Parse the corpus" printIOMsg "Parse the corpus"
mapList <- csvMapTermList (listPath config) mapList <- csvMapTermList (listPath config)
corpus <- fileToDocs (corpusParser config) (corpusPath config) mapList corpus <- fileToDocs' (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus") printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOMsg "Reconstruct the phylo" printIOMsg "Reconstruct the phylo"
......
...@@ -330,7 +330,8 @@ executables: ...@@ -330,7 +330,8 @@ executables:
- optparse-generic - optparse-generic
- split - split
- unordered-containers - unordered-containers
- cryptohash - cryptohash
- time
gargantext-import: gargantext-import:
main: Main.hs main: Main.hs
......
...@@ -395,3 +395,34 @@ parseCsv fp = V.toList <$> V.map csv2doc <$> snd <$> readFile fp ...@@ -395,3 +395,34 @@ parseCsv fp = V.toList <$> V.map csv2doc <$> snd <$> readFile fp
parseCsv' :: BL.ByteString -> [HyperdataDocument] parseCsv' :: BL.ByteString -> [HyperdataDocument]
parseCsv' bs = V.toList $ V.map csv2doc $ snd $ readCsvLazyBS bs parseCsv' bs = V.toList $ V.map csv2doc $ snd $ readCsvLazyBS bs
------------------------------------------------------------------------
-- Csv v3 weighted for phylo
data Csv' = Csv'
{ csv'_title :: !Text
, csv'_source :: !Text
, csv'_publication_year :: !Int
, csv'_publication_month :: !Int
, csv'_publication_day :: !Int
, csv'_abstract :: !Text
, csv'_authors :: !Text
, csv'_weight :: !Double } deriving (Show)
instance FromNamedRecord Csv' where
parseNamedRecord r = Csv' <$> r .: "title"
<*> r .: "source"
<*> r .: "publication_year"
<*> r .: "publication_month"
<*> r .: "publication_day"
<*> r .: "abstract"
<*> r .: "authors"
<*> r .: "weight"
readWeightedCsv :: FilePath -> IO (Header, Vector Csv')
readWeightedCsv fp =
fmap (\bs ->
case decodeByNameWith csvDecodeOptions bs of
Left e -> panic (pack e)
Right corpus -> corpus
) $ BL.readFile fp
\ No newline at end of file
...@@ -21,6 +21,7 @@ module Gargantext.Core.Text.Metrics.FrequentItemSet ...@@ -21,6 +21,7 @@ module Gargantext.Core.Text.Metrics.FrequentItemSet
, fisWithSizePoly , fisWithSizePoly
, fisWithSizePoly2 , fisWithSizePoly2
, fisWithSizePolyMap , fisWithSizePolyMap
, fisWithSizePolyMap'
, module HLCM , module HLCM
) )
where where
...@@ -35,6 +36,8 @@ import qualified Data.Map.Strict as Map ...@@ -35,6 +36,8 @@ import qualified Data.Map.Strict as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Vector as V import qualified Data.Vector as V
import Control.Monad (sequence)
data Size = Point Int | Segment Int Int data Size = Point Int | Segment Int Int
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -120,6 +123,28 @@ fisWithSizePolyMap n f is = ...@@ -120,6 +123,28 @@ fisWithSizePolyMap n f is =
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
---- Weighted [[Item]]
isSublistOf :: Ord a => [a] -> [a] -> Bool
isSublistOf sub lst = all (\i -> elem i lst) sub
reIndexFis :: Ord a => [([a],(b,c))] -> [Fis' a] -> [(Fis' a,([b],[c]))]
reIndexFis items fis = map (\f ->
let docs = filter (\(lst,_) -> isSublistOf (_fisItemSet f) lst) items
in (f, (map (fst . snd) docs,map (snd . snd) docs))) fis
wsum :: [Maybe Double] -> Maybe Double
wsum lst = fmap sum $ sequence lst
fisWithSizePolyMap' :: Ord a => Size -> Frequency -> [([a], (Maybe Double,[Int]))] -> Map (Set a) (Int, (Maybe Double,[Int]))
fisWithSizePolyMap' n f is = Map.fromList
$ map (\(fis,(ws,sources)) -> (Set.fromList (_fisItemSet fis),(_fisCount fis,(wsum ws,concat sources))))
$ reIndexFis is
$ fisWithSizePoly2 n f (map fst is)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- --
---- | /!\ indexes are not the same: ---- | /!\ indexes are not the same:
......
...@@ -50,8 +50,9 @@ import qualified Data.Text.Lazy as TextLazy ...@@ -50,8 +50,9 @@ import qualified Data.Text.Lazy as TextLazy
data CorpusParser = data CorpusParser =
Wos {_wos_limit :: Int} Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int} | Csv {_csv_limit :: Int}
| Csv' {_csv'_limit :: Int}
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data SeaElevation = data SeaElevation =
...@@ -106,6 +107,18 @@ data TimeUnit = ...@@ -106,6 +107,18 @@ data TimeUnit =
{ _year_period :: Int { _year_period :: Int
, _year_step :: Int , _year_step :: Int
, _year_matchingFrame :: Int } , _year_matchingFrame :: Int }
| Month
{ _month_period :: Int
, _month_step :: Int
, _month_matchingFrame :: Int }
| Week
{ _week_period :: Int
, _week_step :: Int
, _week_matchingFrame :: Int }
| Day
{ _day_period :: Int
, _day_step :: Int
, _day_matchingFrame :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq) data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
...@@ -231,17 +244,20 @@ defaultPhyloParam = ...@@ -231,17 +244,20 @@ defaultPhyloParam =
-- | Document | -- -- | Document | --
------------------ ------------------
-- | Date : a simple Integer -- | Date : a simple Integer
type Date = Int type Date = Int
-- | Ngrams : a contiguous sequence of n terms -- | Ngrams : a contiguous sequence of n terms
type Ngrams = Text type Ngrams = Text
-- | Document : a piece of Text linked to a Date -- Document : a piece of Text linked to a Date
-- date = computational date; date' = original string date yyyy-mm-dd
data Document = Document data Document = Document
{ date :: Date { date :: Date
, text :: [Ngrams] , date' :: Text
, text :: [Ngrams]
, weight :: Maybe Double
, sources :: [Text]
} deriving (Eq,Show,Generic,NFData) } deriving (Eq,Show,Generic,NFData)
...@@ -257,6 +273,10 @@ data PhyloFoundations = PhyloFoundations ...@@ -257,6 +273,10 @@ data PhyloFoundations = PhyloFoundations
} deriving (Generic, Show, Eq) } deriving (Generic, Show, Eq)
data PhyloSources = PhyloSources
{ _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
--------------------------- ---------------------------
-- | Coocurency Matrix | -- -- | Coocurency Matrix | --
--------------------------- ---------------------------
...@@ -279,6 +299,7 @@ type Cooc = Map (Int,Int) Double ...@@ -279,6 +299,7 @@ type Cooc = Map (Int,Int) Double
-- periods : the temporal steps of a phylomemy -- periods : the temporal steps of a phylomemy
data Phylo = data Phylo =
Phylo { _phylo_foundations :: PhyloFoundations Phylo { _phylo_foundations :: PhyloFoundations
, _phylo_sources :: PhyloSources
, _phylo_timeCooc :: !(Map Date Cooc) , _phylo_timeCooc :: !(Map Date Cooc)
, _phylo_timeDocs :: !(Map Date Double) , _phylo_timeDocs :: !(Map Date Double)
, _phylo_termFreq :: !(Map Int Double) , _phylo_termFreq :: !(Map Int Double)
...@@ -298,8 +319,9 @@ type PhyloPeriodId = (Date,Date) ...@@ -298,8 +319,9 @@ type PhyloPeriodId = (Date,Date)
-- id: tuple (start date, end date) of the temporal step of the phylomemy -- id: tuple (start date, end date) of the temporal step of the phylomemy
-- levels: levels of granularity -- levels: levels of granularity
data PhyloPeriod = data PhyloPeriod =
PhyloPeriod { _phylo_periodPeriod :: (Date,Date) PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
, _phylo_periodLevels :: Map PhyloLevelId PhyloLevel , _phylo_periodPeriod' :: (Text,Text)
, _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
} deriving (Generic, Show, Eq) } deriving (Generic, Show, Eq)
...@@ -315,9 +337,10 @@ type PhyloLevelId = (PhyloPeriodId,Level) ...@@ -315,9 +337,10 @@ type PhyloLevelId = (PhyloPeriodId,Level)
-- Level 1: First level of clustering (the Fis) -- Level 1: First level of clustering (the Fis)
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis) -- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
data PhyloLevel = data PhyloLevel =
PhyloLevel { _phylo_levelPeriod :: (Date,Date) PhyloLevel { _phylo_levelPeriod :: (Date,Date)
, _phylo_levelLevel :: Level , _phylo_levelPeriod' :: (Text,Text)
, _phylo_levelGroups :: Map PhyloGroupId PhyloGroup , _phylo_levelLevel :: Level
, _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
...@@ -331,10 +354,13 @@ type PhyloBranchId = (Level, [Int]) ...@@ -331,10 +354,13 @@ type PhyloBranchId = (Level, [Int])
-- | PhyloGroup : group of ngrams at each level and period -- | PhyloGroup : group of ngrams at each level and period
data PhyloGroup = data PhyloGroup =
PhyloGroup { _phylo_groupPeriod :: (Date,Date) PhyloGroup { _phylo_groupPeriod :: (Date,Date)
, _phylo_groupPeriod' :: (Text,Text)
, _phylo_groupLevel :: Level , _phylo_groupLevel :: Level
, _phylo_groupIndex :: Int , _phylo_groupIndex :: Int
, _phylo_groupLabel :: Text , _phylo_groupLabel :: Text
, _phylo_groupSupport :: Support , _phylo_groupSupport :: Support
, _phylo_groupWeight :: Maybe Double
, _phylo_groupSources :: [Int]
, _phylo_groupNgrams :: [Int] , _phylo_groupNgrams :: [Int]
, _phylo_groupCooc :: !(Cooc) , _phylo_groupCooc :: !(Cooc)
, _phylo_groupBranchId :: PhyloBranchId , _phylo_groupBranchId :: PhyloBranchId
...@@ -368,6 +394,8 @@ data PhyloClique = PhyloClique ...@@ -368,6 +394,8 @@ data PhyloClique = PhyloClique
{ _phyloClique_nodes :: [Int] { _phyloClique_nodes :: [Int]
, _phyloClique_support :: Support , _phyloClique_support :: Support
, _phyloClique_period :: (Date,Date) , _phyloClique_period :: (Date,Date)
, _phyloClique_weight :: Maybe Double
, _phyloClique_sources :: [Int]
} deriving (Generic,NFData,Show,Eq) } deriving (Generic,NFData,Show,Eq)
---------------- ----------------
...@@ -441,6 +469,8 @@ makeLenses ''PhyloBranch ...@@ -441,6 +469,8 @@ makeLenses ''PhyloBranch
instance FromJSON Phylo instance FromJSON Phylo
instance ToJSON Phylo instance ToJSON Phylo
instance FromJSON PhyloSources
instance ToJSON PhyloSources
instance FromJSON PhyloParam instance FromJSON PhyloParam
instance ToJSON PhyloParam instance ToJSON PhyloParam
instance FromJSON PhyloPeriod instance FromJSON PhyloPeriod
......
...@@ -109,8 +109,12 @@ config = ...@@ -109,8 +109,12 @@ config =
docs :: [Document] docs :: [Document]
docs = map (\(d,t) docs = map (\(d,t)
-> Document d ( filter (\n -> isRoots n (foundations ^. foundations_roots)) -> Document d
$ monoTexts t)) corpus ""
(filter (\n -> isRoots n (foundations ^. foundations_roots)) $ monoTexts t)
Nothing
[]
) corpus
foundations :: PhyloFoundations foundations :: PhyloFoundations
......
...@@ -120,11 +120,13 @@ branchToDotNode b bId = ...@@ -120,11 +120,13 @@ branchToDotNode b bId =
, toAttr "label" (pack $ show $ b ^. branch_label) , toAttr "label" (pack $ show $ b ^. branch_label)
]) ])
periodToDotNode :: (Date,Date) -> Dot DotId periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
periodToDotNode prd = periodToDotNode prd prd' =
node (periodIdToDotId prd) node (periodIdToDotId prd)
([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))] ([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
<> [ toAttr "nodeType" "period" <> [ toAttr "nodeType" "period"
, toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
, toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
, toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd)) , toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
, toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))]) , toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
...@@ -136,9 +138,13 @@ groupToDotNode fdt g bId = ...@@ -136,9 +138,13 @@ groupToDotNode fdt g bId =
<> [ toAttr "nodeType" "group" <> [ toAttr "nodeType" "group"
, toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod)) , toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
, toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod)) , toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
, toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
, toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
, toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId)) , toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
, toAttr "bId" (pack $ show bId) , toAttr "bId" (pack $ show bId)
, toAttr "support" (pack $ show (g ^. phylo_groupSupport)) , toAttr "support" (pack $ show (g ^. phylo_groupSupport))
, toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
, toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
, toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams))) , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
, toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams))) , toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
, toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics"))) , toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
...@@ -193,6 +199,8 @@ exportToDot phylo export = ...@@ -193,6 +199,8 @@ exportToDot phylo export =
,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods)) ,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches)) ,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)) ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo)) -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
]) ])
...@@ -217,13 +225,13 @@ exportToDot phylo export = ...@@ -217,13 +225,13 @@ exportToDot phylo export =
{-- 5) create a layer for each period -} {-- 5) create a layer for each period -}
_ <- mapM (\period -> _ <- mapM (\period ->
subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
graphAttrs [Rank SameRank] graphAttrs [Rank SameRank]
periodToDotNode period periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod')
{-- 6) create a node for each group -} {-- 6) create a node for each group -}
mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups) mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
) $ getPeriodIds phylo ) $ phylo ^. phylo_periods
{-- 7) create the edges between a branch and its first groups -} {-- 7) create the edges between a branch and its first groups -}
_ <- mapM (\(bId,groups) -> _ <- mapM (\(bId,groups) ->
......
...@@ -14,6 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where ...@@ -14,6 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail) import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Text (Text)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo import Gargantext.Core.Viz.AdaptativePhylo
...@@ -21,7 +22,7 @@ import Gargantext.Core.Viz.Phylo.PhyloTools ...@@ -21,7 +22,7 @@ import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity) import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..)) import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques) import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Methods.Distances (Distance(Conditional)) import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon) import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
...@@ -104,7 +105,7 @@ toGroupsProxi lvl phylo = ...@@ -104,7 +105,7 @@ toGroupsProxi lvl phylo =
in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi) in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi)
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> PhyloPeriodId -> (Text,Text) -> Level -> Int -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods $ over ( phylo_periods
. traverse . traverse
...@@ -112,12 +113,13 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -112,12 +113,13 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
. traverse) . traverse)
(\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel) (\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel)
then then
let pId = phyloLvl ^. phylo_levelPeriod let pId = phyloLvl ^. phylo_levelPeriod
pId' = phyloLvl ^. phylo_levelPeriod'
phyloCUnit = m ! pId phyloCUnit = m ! pId
in phyloLvl in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj -> & phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups) groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups) , f obj pId pId' lvl (length groups)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId])) (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloCUnit) ] ) [] phyloCUnit)
else else
...@@ -125,9 +127,11 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co ...@@ -125,9 +127,11 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo phylo
cliqueToGroup :: PhyloClique -> PhyloPeriodId -> Level -> Int -> [Cooc] -> PhyloGroup cliqueToGroup :: PhyloClique -> PhyloPeriodId -> (Text,Text) -> Level -> Int -> [Cooc] -> PhyloGroup
cliqueToGroup fis pId lvl idx coocs = PhyloGroup pId lvl idx "" cliqueToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
(fis ^. phyloClique_support) (fis ^. phyloClique_support)
(fis ^. phyloClique_weight)
(fis ^. phyloClique_sources)
(fis ^. phyloClique_nodes) (fis ^. phyloClique_nodes)
(ngramsToCooc (fis ^. phyloClique_nodes) coocs) (ngramsToCooc (fis ^. phyloClique_nodes) coocs)
(1,[0]) -- branchid (lvl,[path in the branching tree]) (1,[0]) -- branchid (lvl,[path in the branching tree])
...@@ -142,14 +146,27 @@ toPhylo1 phyloStep = case (getSeaElevation phyloStep) of ...@@ -142,14 +146,27 @@ toPhylo1 phyloStep = case (getSeaElevation phyloStep) of
----------------------- -----------------------
-- | To Phylo Step | -- -- | To Phylo Step | --
----------------------- -----------------------
indexDates' :: Map (Date,Date) [Document] -> Map (Date,Date) (Text,Text)
indexDates' m = map (\docs ->
let ds = map (\d -> date' d) docs
f = if (null ds)
then ""
else toFstDate ds
l = if (null ds)
then ""
else toLstDate ds
in (f,l)) m
-- To build the first phylo step from docs and terms -- To build the first phylo step from docs and terms
toPhyloStep :: [Document] -> TermList -> Config -> Phylo toPhyloStep :: [Document] -> TermList -> Config -> Phylo
toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique phyloBase Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique (updatePeriods (indexDates' docs') phyloBase)
Adaptative _ -> toGroupsProxi 1 $ appendGroups cliqueToGroup 1 phyloClique phyloBase Adaptative _ -> toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique (updatePeriods (indexDates' docs') phyloBase)
where where
-------------------------------------- --------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique] phyloClique :: Map (Date,Date) [PhyloClique]
...@@ -217,8 +234,14 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -217,8 +234,14 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
phyloClique = case (clique $ getConfig phylo) of phyloClique = case (clique $ getConfig phylo) of
Fis _ _ -> Fis _ _ ->
let fis = map (\(prd,docs) -> let fis = map (\(prd,docs) ->
let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs) case (corpusParser $ getConfig phylo) of
in (prd, map (\f -> PhyloClique (Set.toList $ fst f) (snd f) prd) lst)) Csv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
in (prd, map (\f -> PhyloClique (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst)
_ -> let lst = toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
in (prd, map (\f -> PhyloClique (Set.toList $ fst f) (snd f) prd Nothing []) lst)
)
$ toList phyloDocs $ toList phyloDocs
fis' = fis `using` parList rdeepseq fis' = fis `using` parList rdeepseq
in fromList fis' in fromList fis'
...@@ -228,7 +251,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -228,7 +251,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ foldl sumCooc empty $ foldl sumCooc empty
$ map listToMatrix $ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs $ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques filterType Conditional thr cooc)) in (prd, map (\cl -> PhyloClique cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs $ toList phyloDocs
mcl' = mcl `using` parList rdeepseq mcl' = mcl `using` parList rdeepseq
in fromList mcl' in fromList mcl'
...@@ -335,17 +358,20 @@ docsToTimeScaleNb docs = ...@@ -335,17 +358,20 @@ docsToTimeScaleNb docs =
initPhyloLevels :: Int -> PhyloPeriodId -> Map PhyloLevelId PhyloLevel initPhyloLevels :: Int -> PhyloPeriodId -> Map PhyloLevelId PhyloLevel
initPhyloLevels lvlMax pId = initPhyloLevels lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId lvl empty)) [1..lvlMax] fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId ("","") lvl empty)) [1..lvlMax]
-- To init the basic elements of a Phylo -- To init the basic elements of a Phylo
toPhyloBase :: [Document] -> TermList -> Config -> Phylo toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase docs lst conf = toPhyloBase docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
params = defaultPhyloParam { _phyloParam_config = conf } params = defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n") in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n")
$ Phylo foundations $ Phylo foundations
docsSources
(docsToTimeScaleCooc docs (foundations ^. foundations_roots)) (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs) (docsToTimeScaleNb docs)
(docsToTermFreq docs (foundations ^. foundations_roots)) (docsToTermFreq docs (foundations ^. foundations_roots))
...@@ -353,4 +379,4 @@ toPhyloBase docs lst conf = ...@@ -353,4 +379,4 @@ toPhyloBase docs lst conf =
empty empty
empty empty
params params
(fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels 1 prd))) periods) (fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloLevels 1 prd))) periods)
...@@ -17,9 +17,9 @@ import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tai ...@@ -17,9 +17,9 @@ import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tai
import Data.Set (Set, disjoint) import Data.Set (Set, disjoint)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.String (String) import Data.String (String)
import Data.Text (Text) import Data.Text (Text,unpack)
import Prelude (floor) import Prelude (floor,read)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo import Gargantext.Core.Viz.AdaptativePhylo
...@@ -115,6 +115,10 @@ isRoots n ns = Vector.elem n ns ...@@ -115,6 +115,10 @@ isRoots n ns = Vector.elem n ns
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int] ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
-- | To transform a list of sources into a list of sources' index
sourcesToIdx :: [Text] -> Vector Text -> [Int]
sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
-- | To transform a list of Ngrams Indexes into a Label -- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text ngramsToLabel :: Vector Ngrams -> [Int] -> Text
ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
...@@ -153,6 +157,32 @@ toPeriods dates p s = ...@@ -153,6 +157,32 @@ toPeriods dates p s =
$ chunkAlong p s [start .. end] $ chunkAlong p s [start .. end]
toFstDate :: [Text] -> Text
toFstDate ds = snd
$ head' "firstDate"
$ sortOn fst
$ map (\d ->
let d' = read (filter (\c -> c /= '-') $ unpack d)::Int
in (d',d)) ds
toLstDate :: [Text] -> Text
toLstDate ds = snd
$ head' "firstDate"
$ reverse
$ sortOn fst
$ map (\d ->
let d' = read (filter (\c -> c /= '-') $ unpack d)::Int
in (d',d)) ds
getTimeScale :: Phylo -> [Char]
getTimeScale p = case (timeUnit $ getConfig p) of
Year _ _ _ -> "year"
Month _ _ _ -> "month"
Week _ _ _ -> "week"
Day _ _ _ -> "day"
-- | Get a regular & ascendante timeScale from a given list of dates -- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale :: [Date] -> Int -> [Date] toTimeScale :: [Date] -> Int -> [Date]
toTimeScale dates step = toTimeScale dates step =
...@@ -162,15 +192,24 @@ toTimeScale dates step = ...@@ -162,15 +192,24 @@ toTimeScale dates step =
getTimeStep :: TimeUnit -> Int getTimeStep :: TimeUnit -> Int
getTimeStep time = case time of getTimeStep time = case time of
Year _ s _ -> s Year _ s _ -> s
Month _ s _ -> s
Week _ s _ -> s
Day _ s _ -> s
getTimePeriod :: TimeUnit -> Int getTimePeriod :: TimeUnit -> Int
getTimePeriod time = case time of getTimePeriod time = case time of
Year p _ _ -> p Year p _ _ -> p
Month p _ _ -> p
Week p _ _ -> p
Day p _ _ -> p
getTimeFrame :: TimeUnit -> Int getTimeFrame :: TimeUnit -> Int
getTimeFrame time = case time of getTimeFrame time = case time of
Year _ _ f -> f Year _ _ f -> f
Month _ _ f -> f
Week _ _ f -> f
Day _ _ f -> f
------------- -------------
-- | Fis | -- -- | Fis | --
...@@ -359,6 +398,9 @@ setConfig config phylo = phylo ...@@ -359,6 +398,9 @@ setConfig config phylo = phylo
getRoots :: Phylo -> Vector Ngrams getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources)
phyloToLastBranches :: Phylo -> [[PhyloGroup]] phyloToLastBranches :: Phylo -> [[PhyloGroup]]
phyloToLastBranches phylo = elems phyloToLastBranches phylo = elems
$ fromListWith (++) $ fromListWith (++)
...@@ -411,6 +453,16 @@ updatePhyloGroups lvl m phylo = ...@@ -411,6 +453,16 @@ updatePhyloGroups lvl m phylo =
then m ! id then m ! id
else g ) phylo else g ) phylo
updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
updatePeriods periods' phylo =
over (phylo_periods . traverse)
(\prd ->
let prd' = periods' ! (prd ^. phylo_periodPeriod)
lvls = map (\lvl -> lvl & phylo_levelPeriod' .~ prd') $ prd ^. phylo_periodLevels
in prd & phylo_periodPeriod' .~ prd'
& phylo_periodLevels .~ lvls
) phylo
traceToPhylo :: Level -> Phylo -> Phylo traceToPhylo :: Level -> Phylo -> Phylo
traceToPhylo lvl phylo = traceToPhylo lvl phylo =
......
...@@ -22,6 +22,7 @@ import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, ...@@ -22,6 +22,7 @@ import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty,
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Control.Monad (sequence)
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -35,8 +36,13 @@ import qualified Data.Map as Map ...@@ -35,8 +36,13 @@ import qualified Data.Map as Map
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs = mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) "" in PhyloGroup (fst $ fst id) (_phylo_groupPeriod' $ head' "mergeGroups" childs)
(sum $ map _phylo_groupSupport childs) ngrams (snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs)
(fmap sum $ sequence
$ map _phylo_groupWeight childs)
(concat $ map _phylo_groupSources childs)
ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
((snd $ fst id),bId) ((snd $ fst id),bId)
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs) (mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
...@@ -54,12 +60,12 @@ mergeGroups coocs id mapIds childs = ...@@ -54,12 +60,12 @@ mergeGroups coocs id mapIds childs =
mergeAncestors :: [Pointer] -> [Pointer] mergeAncestors :: [Pointer] -> [Pointer]
mergeAncestors pointers = Map.toList $ fromListWith max pointers mergeAncestors pointers = Map.toList $ fromListWith max pointers
addPhyloLevel :: Level -> Phylo -> Phylo addPhyloLevel :: Level -> Phylo -> Phylo
addPhyloLevel lvl phylo = addPhyloLevel lvl phylo =
over ( phylo_periods . traverse ) over ( phylo_periods . traverse )
(\phyloPrd -> phyloPrd & phylo_periodLevels (\phyloPrd -> phyloPrd & phylo_periodLevels
%~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) (PhyloLevel (phyloPrd ^. phylo_periodPeriod) lvl empty))) phylo %~ (insert (phyloPrd ^. phylo_periodPeriod, lvl)
(PhyloLevel (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriod') lvl empty))) phylo
toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment