Commit 9863bd4f authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] merge with dev-phylo

parents 928c717f 6bddaf45
......@@ -19,13 +19,14 @@ module Main where
import Data.Aeson
import Data.List (concat, nub, isSuffixOf)
import Data.String (String)
import Data.Text (Text, unwords, unpack, replace)
import Data.Text (Text, unwords, unpack, replace, pack)
import Crypto.Hash.SHA256 (hash)
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day,
csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
......@@ -33,23 +34,25 @@ import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import GHC.IO (FilePath)
import Prelude (Either(Left, Right))
import Prelude (Either(Left, Right),toInteger)
import System.Environment
import System.Directory (listDirectory,doesFileExist)
import Control.Concurrent.Async (mapConcurrently)
import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.Vector as Vector
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
import qualified Data.Text as T
-- import Debug.Trace (trace)
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
---------------
-- | Tools | --
......@@ -60,9 +63,35 @@ data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
getFilesFromPath :: FilePath -> IO([FilePath])
getFilesFromPath path = do
if (isSuffixOf "/" path)
then (listDirectory path)
then (listDirectory path)
else return [path]
---------------
-- | Dates | --
---------------
toMonths :: Integer -> Int -> Int -> Date
toMonths y m d = fromIntegral $ cdMonths
$ diffGregorianDurationClip (fromGregorian y m d) (fromGregorian 0000 0 0)
toDays :: Integer -> Int -> Int -> Date
toDays y m d = fromIntegral
$ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
toPhyloDate y m d tu = case tu of
Year _ _ _ -> y
Month _ _ _ -> toMonths (toInteger y) m d
Week _ _ _ -> div (toDays (toInteger y) m d) 7
Day _ _ _ -> toDays (toInteger y) m d
toPhyloDate' :: Int -> Int -> Int -> Text
toPhyloDate' y m d = pack $ showGregorian $ fromGregorian (toInteger y) m d
--------------
-- | Json | --
......@@ -79,26 +108,28 @@ readJson path = Lazy.readFile path
----------------
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (a, Text) -> (a, [Text])
filterTerms patterns (y,d) = (y,termsInText patterns d)
where
--------------------------------------
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList pats txt
--------------------------------------
-- | To transform a Wos file (or [file]) into a readable corpus
wosToCorpus :: Int -> FilePath -> IO ([(Int,Text)])
wosToCorpus limit path = do
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList pats txt
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs :: Int -> Patterns -> TimeUnit -> FilePath -> IO ([Document])
wosToDocs limit patterns time path = do
files <- getFilesFromPath path
take limit
<$> map (\d -> let date' = fromJust $ _hd_publication_year d
title = fromJust $ _hd_title d
<$> map (\d -> let title = fromJust $ _hd_title d
abstr = if (isJust $ _hd_abstract d)
then fromJust $ _hd_abstract d
else ""
in (date', title <> " " <> abstr))
in Document (toPhyloDate
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d) time)
(toPhyloDate'
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d))
(termsInText patterns $ title <> " " <> abstr) Nothing [])
<$> concat
<$> mapConcurrently (\file ->
filter (\d -> (isJust $ _hd_publication_year d)
......@@ -106,33 +137,51 @@ wosToCorpus limit path = do
<$> parseFile WOS (path <> file) ) files
-- | To transform a Csv file into a readable corpus
csvToCorpus :: Int -> FilePath -> IO ([(Int,Text)])
csvToCorpus limit path = Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> (csv_publication_year row, (csv_title row) <> " " <> (csv_abstract row)))
<$> snd <$> Csv.readFile path
-- | To use the correct parser given a CorpusType
fileToCorpus :: CorpusParser -> FilePath -> IO ([(Int,Text)])
fileToCorpus parser path = case parser of
Wos limit -> wosToCorpus limit path
Csv limit -> csvToCorpus limit path
-- To transform a Csv file into a list of Document
csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO ([Document])
csvToDocs parser patterns time path =
case parser of
Wos _ -> undefined
Csv limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv_publication_year row) (csv_publication_month row) (csv_publication_day row) time)
(toPhyloDate' (csv_publication_year row) (csv_publication_month row) (csv_publication_day row))
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
) <$> snd <$> Csv.readFile path
Csv' limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row))
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row))
(Just $ csv'_weight row)
[csv'_source row]
) <$> snd <$> Csv.readWeightedCsv path
-- To parse a file into a list of Document
fileToDocs' :: CorpusParser -> FilePath -> TimeUnit -> TermList -> IO [Document]
fileToDocs' parser path time lst = do
let patterns = buildPatterns lst
case parser of
Wos limit -> wosToDocs limit patterns time path
Csv _ -> csvToDocs parser patterns time path
Csv' _ -> csvToDocs parser patterns time path
-- | To parse a file into a list of Document
fileToDocs :: CorpusParser -> FilePath -> TermList -> IO [Document]
fileToDocs parser path lst = do
corpus <- fileToCorpus parser path
let patterns = buildPatterns lst
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
---------------
-- | Label | --
---------------
-- Config time parameters to label
timeToLabel :: Config -> [Char]
timeToLabel config = case (timeUnit config) of
Year p s f -> ("time"<> "_"<> (show p) <> "_" <> (show s) <> "_" <> (show f))
Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Month p s f -> ("time_months"<> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
seaToLabel :: Config -> [Char]
......@@ -235,7 +284,7 @@ main = do
printIOMsg "Parse the corpus"
mapList <- csvMapTermList (listPath config)
corpus <- fileToDocs (corpusParser config) (corpusPath config) mapList
corpus <- fileToDocs' (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOMsg "Reconstruct the phylo"
......
......@@ -330,7 +330,8 @@ executables:
- optparse-generic
- split
- unordered-containers
- cryptohash
- cryptohash
- time
gargantext-import:
main: Main.hs
......
......@@ -395,3 +395,34 @@ parseCsv fp = V.toList <$> V.map csv2doc <$> snd <$> readFile fp
parseCsv' :: BL.ByteString -> [HyperdataDocument]
parseCsv' bs = V.toList $ V.map csv2doc $ snd $ readCsvLazyBS bs
------------------------------------------------------------------------
-- Csv v3 weighted for phylo
data Csv' = Csv'
{ csv'_title :: !Text
, csv'_source :: !Text
, csv'_publication_year :: !Int
, csv'_publication_month :: !Int
, csv'_publication_day :: !Int
, csv'_abstract :: !Text
, csv'_authors :: !Text
, csv'_weight :: !Double } deriving (Show)
instance FromNamedRecord Csv' where
parseNamedRecord r = Csv' <$> r .: "title"
<*> r .: "source"
<*> r .: "publication_year"
<*> r .: "publication_month"
<*> r .: "publication_day"
<*> r .: "abstract"
<*> r .: "authors"
<*> r .: "weight"
readWeightedCsv :: FilePath -> IO (Header, Vector Csv')
readWeightedCsv fp =
fmap (\bs ->
case decodeByNameWith csvDecodeOptions bs of
Left e -> panic (pack e)
Right corpus -> corpus
) $ BL.readFile fp
\ No newline at end of file
......@@ -21,6 +21,7 @@ module Gargantext.Core.Text.Metrics.FrequentItemSet
, fisWithSizePoly
, fisWithSizePoly2
, fisWithSizePolyMap
, fisWithSizePolyMap'
, module HLCM
)
where
......@@ -35,6 +36,8 @@ import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Vector as V
import Control.Monad (sequence)
data Size = Point Int | Segment Int Int
------------------------------------------------------------------------
......@@ -120,6 +123,28 @@ fisWithSizePolyMap n f is =
------------------------------------------------------------------------
------------------------------------------------------------------------
---- Weighted [[Item]]
isSublistOf :: Ord a => [a] -> [a] -> Bool
isSublistOf sub lst = all (\i -> elem i lst) sub
reIndexFis :: Ord a => [([a],(b,c))] -> [Fis' a] -> [(Fis' a,([b],[c]))]
reIndexFis items fis = map (\f ->
let docs = filter (\(lst,_) -> isSublistOf (_fisItemSet f) lst) items
in (f, (map (fst . snd) docs,map (snd . snd) docs))) fis
wsum :: [Maybe Double] -> Maybe Double
wsum lst = fmap sum $ sequence lst
fisWithSizePolyMap' :: Ord a => Size -> Frequency -> [([a], (Maybe Double,[Int]))] -> Map (Set a) (Int, (Maybe Double,[Int]))
fisWithSizePolyMap' n f is = Map.fromList
$ map (\(fis,(ws,sources)) -> (Set.fromList (_fisItemSet fis),(_fisCount fis,(wsum ws,concat sources))))
$ reIndexFis is
$ fisWithSizePoly2 n f (map fst is)
------------------------------------------------------------------------
------------------------------------------------------------------------
--
---- | /!\ indexes are not the same:
......
......@@ -50,8 +50,9 @@ import qualified Data.Text.Lazy as TextLazy
data CorpusParser =
Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int}
Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int}
| Csv' {_csv'_limit :: Int}
deriving (Show,Generic,Eq)
data SeaElevation =
......@@ -106,6 +107,18 @@ data TimeUnit =
{ _year_period :: Int
, _year_step :: Int
, _year_matchingFrame :: Int }
| Month
{ _month_period :: Int
, _month_step :: Int
, _month_matchingFrame :: Int }
| Week
{ _week_period :: Int
, _week_step :: Int
, _week_matchingFrame :: Int }
| Day
{ _day_period :: Int
, _day_step :: Int
, _day_matchingFrame :: Int }
deriving (Show,Generic,Eq)
data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
......@@ -231,17 +244,20 @@ defaultPhyloParam =
-- | Document | --
------------------
-- | Date : a simple Integer
type Date = Int
-- | Ngrams : a contiguous sequence of n terms
type Ngrams = Text
-- | Document : a piece of Text linked to a Date
-- Document : a piece of Text linked to a Date
-- date = computational date; date' = original string date yyyy-mm-dd
data Document = Document
{ date :: Date
, text :: [Ngrams]
{ date :: Date
, date' :: Text
, text :: [Ngrams]
, weight :: Maybe Double
, sources :: [Text]
} deriving (Eq,Show,Generic,NFData)
......@@ -257,6 +273,10 @@ data PhyloFoundations = PhyloFoundations
} deriving (Generic, Show, Eq)
data PhyloSources = PhyloSources
{ _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
---------------------------
-- | Coocurency Matrix | --
---------------------------
......@@ -279,6 +299,7 @@ type Cooc = Map (Int,Int) Double
-- periods : the temporal steps of a phylomemy
data Phylo =
Phylo { _phylo_foundations :: PhyloFoundations
, _phylo_sources :: PhyloSources
, _phylo_timeCooc :: !(Map Date Cooc)
, _phylo_timeDocs :: !(Map Date Double)
, _phylo_termFreq :: !(Map Int Double)
......@@ -298,8 +319,9 @@ type PhyloPeriodId = (Date,Date)
-- id: tuple (start date, end date) of the temporal step of the phylomemy
-- levels: levels of granularity
data PhyloPeriod =
PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
, _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
, _phylo_periodPeriod' :: (Text,Text)
, _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
} deriving (Generic, Show, Eq)
......@@ -315,9 +337,10 @@ type PhyloLevelId = (PhyloPeriodId,Level)
-- Level 1: First level of clustering (the Fis)
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
data PhyloLevel =
PhyloLevel { _phylo_levelPeriod :: (Date,Date)
, _phylo_levelLevel :: Level
, _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
PhyloLevel { _phylo_levelPeriod :: (Date,Date)
, _phylo_levelPeriod' :: (Text,Text)
, _phylo_levelLevel :: Level
, _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
}
deriving (Generic, Show, Eq)
......@@ -331,10 +354,13 @@ type PhyloBranchId = (Level, [Int])
-- | PhyloGroup : group of ngrams at each level and period
data PhyloGroup =
PhyloGroup { _phylo_groupPeriod :: (Date,Date)
, _phylo_groupPeriod' :: (Text,Text)
, _phylo_groupLevel :: Level
, _phylo_groupIndex :: Int
, _phylo_groupIndex :: Int
, _phylo_groupLabel :: Text
, _phylo_groupSupport :: Support
, _phylo_groupWeight :: Maybe Double
, _phylo_groupSources :: [Int]
, _phylo_groupNgrams :: [Int]
, _phylo_groupCooc :: !(Cooc)
, _phylo_groupBranchId :: PhyloBranchId
......@@ -368,6 +394,8 @@ data PhyloClique = PhyloClique
{ _phyloClique_nodes :: [Int]
, _phyloClique_support :: Support
, _phyloClique_period :: (Date,Date)
, _phyloClique_weight :: Maybe Double
, _phyloClique_sources :: [Int]
} deriving (Generic,NFData,Show,Eq)
----------------
......@@ -441,6 +469,8 @@ makeLenses ''PhyloBranch
instance FromJSON Phylo
instance ToJSON Phylo
instance FromJSON PhyloSources
instance ToJSON PhyloSources
instance FromJSON PhyloParam
instance ToJSON PhyloParam
instance FromJSON PhyloPeriod
......
......@@ -109,8 +109,12 @@ config =
docs :: [Document]
docs = map (\(d,t)
-> Document d ( filter (\n -> isRoots n (foundations ^. foundations_roots))
$ monoTexts t)) corpus
-> Document d
""
(filter (\n -> isRoots n (foundations ^. foundations_roots)) $ monoTexts t)
Nothing
[]
) corpus
foundations :: PhyloFoundations
......
......@@ -120,11 +120,13 @@ branchToDotNode b bId =
, toAttr "label" (pack $ show $ b ^. branch_label)
])
periodToDotNode :: (Date,Date) -> Dot DotId
periodToDotNode prd =
periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
periodToDotNode prd prd' =
node (periodIdToDotId prd)
([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
<> [ toAttr "nodeType" "period"
<> [ toAttr "nodeType" "period"
, toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
, toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
, toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
, toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
......@@ -136,9 +138,13 @@ groupToDotNode fdt g bId =
<> [ toAttr "nodeType" "group"
, toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
, toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
, toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
, toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
, toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
, toAttr "bId" (pack $ show bId)
, toAttr "support" (pack $ show (g ^. phylo_groupSupport))
, toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
, toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
, toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
, toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
, toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
......@@ -193,6 +199,8 @@ exportToDot phylo export =
,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
])
......@@ -217,13 +225,13 @@ exportToDot phylo export =
{-- 5) create a layer for each period -}
_ <- mapM (\period ->
subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
graphAttrs [Rank SameRank]
periodToDotNode period
periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod')
{-- 6) create a node for each group -}
mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
) $ getPeriodIds phylo
mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
) $ phylo ^. phylo_periods
{-- 7) create the edges between a branch and its first groups -}
_ <- mapM (\(bId,groups) ->
......
......@@ -14,6 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Vector (Vector)
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
......@@ -21,7 +22,7 @@ import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
......@@ -104,7 +105,7 @@ toGroupsProxi lvl phylo =
in phylo & phylo_groupsProxi .~ ((traceGroupsProxi . fromList) groupsProxi)
appendGroups :: (a -> PhyloPeriodId -> Level -> Int -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups :: (a -> PhyloPeriodId -> (Text,Text) -> Level -> Int -> [Cooc] -> PhyloGroup) -> Level -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n")
$ over ( phylo_periods
. traverse
......@@ -112,12 +113,13 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
. traverse)
(\phyloLvl -> if lvl == (phyloLvl ^. phylo_levelLevel)
then
let pId = phyloLvl ^. phylo_levelPeriod
let pId = phyloLvl ^. phylo_levelPeriod
pId' = phyloLvl ^. phylo_levelPeriod'
phyloCUnit = m ! pId
in phyloLvl
& phylo_levelGroups .~ (fromList $ foldl (\groups obj ->
groups ++ [ (((pId,lvl),length groups)
, f obj pId lvl (length groups)
, f obj pId pId' lvl (length groups)
(elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [pId]))
] ) [] phyloCUnit)
else
......@@ -125,9 +127,11 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo
cliqueToGroup :: PhyloClique -> PhyloPeriodId -> Level -> Int -> [Cooc] -> PhyloGroup
cliqueToGroup fis pId lvl idx coocs = PhyloGroup pId lvl idx ""
cliqueToGroup :: PhyloClique -> PhyloPeriodId -> (Text,Text) -> Level -> Int -> [Cooc] -> PhyloGroup
cliqueToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
(fis ^. phyloClique_support)
(fis ^. phyloClique_weight)
(fis ^. phyloClique_sources)
(fis ^. phyloClique_nodes)
(ngramsToCooc (fis ^. phyloClique_nodes) coocs)
(1,[0]) -- branchid (lvl,[path in the branching tree])
......@@ -142,14 +146,27 @@ toPhylo1 phyloStep = case (getSeaElevation phyloStep) of
-----------------------
-- | To Phylo Step | --
-----------------------
-----------------------
indexDates' :: Map (Date,Date) [Document] -> Map (Date,Date) (Text,Text)
indexDates' m = map (\docs ->
let ds = map (\d -> date' d) docs
f = if (null ds)
then ""
else toFstDate ds
l = if (null ds)
then ""
else toLstDate ds
in (f,l)) m
-- To build the first phylo step from docs and terms
toPhyloStep :: [Document] -> TermList -> Config -> Phylo
toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique phyloBase
Adaptative _ -> toGroupsProxi 1 $ appendGroups cliqueToGroup 1 phyloClique phyloBase
Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique (updatePeriods (indexDates' docs') phyloBase)
Adaptative _ -> toGroupsProxi 1
$ appendGroups cliqueToGroup 1 phyloClique (updatePeriods (indexDates' docs') phyloBase)
where
--------------------------------------
phyloClique :: Map (Date,Date) [PhyloClique]
......@@ -217,8 +234,14 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
phyloClique = case (clique $ getConfig phylo) of
Fis _ _ ->
let fis = map (\(prd,docs) ->
let lst = toList $ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
in (prd, map (\f -> PhyloClique (Set.toList $ fst f) (snd f) prd) lst))
case (corpusParser $ getConfig phylo) of
Csv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
in (prd, map (\f -> PhyloClique (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst)
_ -> let lst = toList
$ fisWithSizePolyMap (Segment 1 20) 1 (map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs)
in (prd, map (\f -> PhyloClique (Set.toList $ fst f) (snd f) prd Nothing []) lst)
)
$ toList phyloDocs
fis' = fis `using` parList rdeepseq
in fromList fis'
......@@ -228,7 +251,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques filterType Conditional thr cooc))
in (prd, map (\cl -> PhyloClique cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs
mcl' = mcl `using` parList rdeepseq
in fromList mcl'
......@@ -335,17 +358,20 @@ docsToTimeScaleNb docs =
initPhyloLevels :: Int -> PhyloPeriodId -> Map PhyloLevelId PhyloLevel
initPhyloLevels lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId lvl empty)) [1..lvlMax]
fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId ("","") lvl empty)) [1..lvlMax]
-- To init the basic elements of a Phylo
toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs)
params = defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n")
$ Phylo foundations
docsSources
(docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs)
(docsToTermFreq docs (foundations ^. foundations_roots))
......@@ -353,4 +379,4 @@ toPhyloBase docs lst conf =
empty
empty
params
(fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels 1 prd))) periods)
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloLevels 1 prd))) periods)
......@@ -17,9 +17,9 @@ import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tai
import Data.Set (Set, disjoint)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.String (String)
import Data.Text (Text)
import Data.Text (Text,unpack)
import Prelude (floor)
import Prelude (floor,read)
import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
......@@ -115,6 +115,10 @@ isRoots n ns = Vector.elem n ns
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
-- | To transform a list of sources into a list of sources' index
sourcesToIdx :: [Text] -> Vector Text -> [Int]
sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text
ngramsToLabel ngrams l = Text.unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
......@@ -153,6 +157,32 @@ toPeriods dates p s =
$ chunkAlong p s [start .. end]
toFstDate :: [Text] -> Text
toFstDate ds = snd
$ head' "firstDate"
$ sortOn fst
$ map (\d ->
let d' = read (filter (\c -> c /= '-') $ unpack d)::Int
in (d',d)) ds
toLstDate :: [Text] -> Text
toLstDate ds = snd
$ head' "firstDate"
$ reverse
$ sortOn fst
$ map (\d ->
let d' = read (filter (\c -> c /= '-') $ unpack d)::Int
in (d',d)) ds
getTimeScale :: Phylo -> [Char]
getTimeScale p = case (timeUnit $ getConfig p) of
Year _ _ _ -> "year"
Month _ _ _ -> "month"
Week _ _ _ -> "week"
Day _ _ _ -> "day"
-- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale :: [Date] -> Int -> [Date]
toTimeScale dates step =
......@@ -162,15 +192,24 @@ toTimeScale dates step =
getTimeStep :: TimeUnit -> Int
getTimeStep time = case time of
Year _ s _ -> s
Year _ s _ -> s
Month _ s _ -> s
Week _ s _ -> s
Day _ s _ -> s
getTimePeriod :: TimeUnit -> Int
getTimePeriod time = case time of
Year p _ _ -> p
Year p _ _ -> p
Month p _ _ -> p
Week p _ _ -> p
Day p _ _ -> p
getTimeFrame :: TimeUnit -> Int
getTimeFrame time = case time of
Year _ _ f -> f
Year _ _ f -> f
Month _ _ f -> f
Week _ _ f -> f
Day _ _ f -> f
-------------
-- | Fis | --
......@@ -359,6 +398,9 @@ setConfig config phylo = phylo
getRoots :: Phylo -> Vector Ngrams
getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots
getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources)
phyloToLastBranches :: Phylo -> [[PhyloGroup]]
phyloToLastBranches phylo = elems
$ fromListWith (++)
......@@ -411,6 +453,16 @@ updatePhyloGroups lvl m phylo =
then m ! id
else g ) phylo
updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
updatePeriods periods' phylo =
over (phylo_periods . traverse)
(\prd ->
let prd' = periods' ! (prd ^. phylo_periodPeriod)
lvls = map (\lvl -> lvl & phylo_levelPeriod' .~ prd') $ prd ^. phylo_periodLevels
in prd & phylo_periodPeriod' .~ prd'
& phylo_periodLevels .~ lvls
) phylo
traceToPhylo :: Level -> Phylo -> Phylo
traceToPhylo lvl phylo =
......
......@@ -22,6 +22,7 @@ import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty,
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Control.Monad (sequence)
-- import Debug.Trace (trace)
import qualified Data.Map as Map
......@@ -35,8 +36,13 @@ import qualified Data.Map as Map
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
in PhyloGroup (fst $ fst id) (snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs) ngrams
in PhyloGroup (fst $ fst id) (_phylo_groupPeriod' $ head' "mergeGroups" childs)
(snd $ fst id) (snd id) ""
(sum $ map _phylo_groupSupport childs)
(fmap sum $ sequence
$ map _phylo_groupWeight childs)
(concat $ map _phylo_groupSources childs)
ngrams
(ngramsToCooc ngrams coocs)
((snd $ fst id),bId)
(mergeMeta bId childs) [] (map (\g -> (getGroupId g, 1)) childs)
......@@ -54,12 +60,12 @@ mergeGroups coocs id mapIds childs =
mergeAncestors :: [Pointer] -> [Pointer]
mergeAncestors pointers = Map.toList $ fromListWith max pointers
addPhyloLevel :: Level -> Phylo -> Phylo
addPhyloLevel lvl phylo =
over ( phylo_periods . traverse )
(\phyloPrd -> phyloPrd & phylo_periodLevels
%~ (insert (phyloPrd ^. phylo_periodPeriod, lvl) (PhyloLevel (phyloPrd ^. phylo_periodPeriod) lvl empty))) phylo
%~ (insert (phyloPrd ^. phylo_periodPeriod, lvl)
(PhyloLevel (phyloPrd ^. phylo_periodPeriod) (phyloPrd ^. phylo_periodPeriod') lvl empty))) phylo
toNextLevel' :: Phylo -> [PhyloGroup] -> Phylo
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment