Commit 92b4221b authored by qlobbe's avatar qlobbe

add branching

parent d3097207
...@@ -128,16 +128,16 @@ csvToCorpus limit path = Vector.toList ...@@ -128,16 +128,16 @@ csvToCorpus limit path = Vector.toList
-- | To use the correct parser given a CorpusType -- | To use the correct parser given a CorpusType
fileToCorpus :: CorpusParser -> Int -> FilePath -> IO ([(Int,Text)]) fileToCorpus :: CorpusParser -> FilePath -> IO ([(Int,Text)])
fileToCorpus parser limit path = case parser of fileToCorpus parser path = case parser of
Wos -> wosToCorpus limit path Wos limit -> wosToCorpus limit path
Csv -> csvToCorpus limit path Csv limit -> csvToCorpus limit path
-- | To parse a file into a list of Document -- | To parse a file into a list of Document
fileToDocs :: CorpusParser -> Int -> FilePath -> TermList -> IO [Document] fileToDocs :: CorpusParser -> FilePath -> TermList -> IO [Document]
fileToDocs parser limit path lst = do fileToDocs parser path lst = do
corpus <- fileToCorpus parser limit path corpus <- fileToCorpus parser path
let patterns = buildPatterns lst let patterns = buildPatterns lst
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
...@@ -162,7 +162,7 @@ main = do ...@@ -162,7 +162,7 @@ main = do
printIOMsg "Parse the corpus" printIOMsg "Parse the corpus"
mapList <- csvGraphTermList (listPath config) mapList <- csvGraphTermList (listPath config)
corpus <- fileToDocs (corpusParser config) (corpusLimit config) (corpusPath config) mapList corpus <- fileToDocs (corpusParser config) (corpusPath config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus") printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOMsg "Reconstruct the Phylo" printIOMsg "Reconstruct the Phylo"
......
...@@ -50,46 +50,63 @@ import Control.Lens (makeLenses) ...@@ -50,46 +50,63 @@ import Control.Lens (makeLenses)
---------------- ----------------
data CorpusParser = Wos | Csv deriving (Show,Generic,Eq) data CorpusParser =
Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int}
deriving (Show,Generic,Eq)
data Proximity = WeightedLogJaccard {_sensibility :: Double} data Proximity =
WeightedLogJaccard
{ _wlj_sensibility :: Double
, _wlj_thresholdInit :: Double
, _wlj_thresholdStep :: Double }
| Hamming | Hamming
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data TimeUnit =
Year
{ _year_period :: Int
, _year_step :: Int
, _year_matchingFrame :: Int }
deriving (Show,Generic,Eq)
data ContextualUnit =
Fis
{ _fis_support :: Int
, _fis_size :: Int }
deriving (Show,Generic,Eq)
data Config = data Config =
Config { corpusPath :: FilePath Config { corpusPath :: FilePath
, listPath :: FilePath , listPath :: FilePath
, outputPath :: FilePath , outputPath :: FilePath
, corpusParser :: CorpusParser , corpusParser :: CorpusParser
, corpusLimit :: Int
, phyloName :: Text , phyloName :: Text
, phyloLevel :: Int , phyloLevel :: Int
, phyloQuality :: Double
, phyloProximity :: Proximity , phyloProximity :: Proximity
, timeUnit :: Int , timeUnit :: TimeUnit
, maxTimeMatch :: Int , contextualUnit :: ContextualUnit
, timePeriod :: Int
, timeStep :: Int
, fisSupport :: Int
, fisSize :: Int
, branchSize :: Int , branchSize :: Int
} deriving (Show,Generic,Eq) } deriving (Show,Generic,Eq)
defaultConfig :: Config defaultConfig :: Config
defaultConfig = defaultConfig =
Config { corpusPath = "" Config { corpusPath = ""
, listPath = "" , listPath = ""
, outputPath = "" , outputPath = ""
, corpusParser = Csv , corpusParser = Csv 1000
, corpusLimit = 1000
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 , phyloQuality = 0.5
, timeUnit = 1 , phyloProximity = WeightedLogJaccard 10 0 0.05
, maxTimeMatch = 5 , timeUnit = Year 3 1 5
, timePeriod = 3 , contextualUnit = Fis 2 4
, timeStep = 1
, fisSupport = 2
, fisSize = 4
, branchSize = 3 , branchSize = 3
} }
...@@ -99,6 +116,10 @@ instance FromJSON CorpusParser ...@@ -99,6 +116,10 @@ instance FromJSON CorpusParser
instance ToJSON CorpusParser instance ToJSON CorpusParser
instance FromJSON Proximity instance FromJSON Proximity
instance ToJSON Proximity instance ToJSON Proximity
instance FromJSON TimeUnit
instance ToJSON TimeUnit
instance FromJSON ContextualUnit
instance ToJSON ContextualUnit
-- | Software parameters -- | Software parameters
...@@ -237,7 +258,7 @@ data PhyloGroup = ...@@ -237,7 +258,7 @@ data PhyloGroup =
, _phylo_groupLevelChilds :: [Pointer] , _phylo_groupLevelChilds :: [Pointer]
, _phylo_groupPeriodParents :: [Pointer] , _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer] , _phylo_groupPeriodChilds :: [Pointer]
, _phylo_groupBreakPointer :: Maybe Pointer , _phylo_groupGhostPointers :: [Pointer]
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
...@@ -276,6 +297,9 @@ data PhyloFis = PhyloFis ...@@ -276,6 +297,9 @@ data PhyloFis = PhyloFis
---------------- ----------------
makeLenses ''Config makeLenses ''Config
makeLenses ''Proximity
makeLenses ''ContextualUnit
makeLenses ''TimeUnit
makeLenses ''PhyloFoundations makeLenses ''PhyloFoundations
makeLenses ''PhyloFis makeLenses ''PhyloFis
makeLenses ''Phylo makeLenses ''Phylo
......
...@@ -49,7 +49,7 @@ phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase ...@@ -49,7 +49,7 @@ phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase
phyloFis :: Map (Date,Date) [PhyloFis] phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis = toPhyloFis docsByPeriods (fisSupport config) (fisSize config) phyloFis = toPhyloFis docsByPeriods (getFisSupport $ contextualUnit config) (getFisSize $ contextualUnit config)
docsByPeriods :: Map (Date,Date) [Document] docsByPeriods :: Map (Date,Date) [Document]
...@@ -66,23 +66,22 @@ phyloBase = toPhyloBase docs mapList config ...@@ -66,23 +66,22 @@ phyloBase = toPhyloBase docs mapList config
phyloCooc :: Map Date Cooc phyloCooc :: Map Date Cooc
phyloCooc = docsToCoocByYear docs (foundations ^. foundations_roots) config phyloCooc = docsToTimeScaleCooc docs (foundations ^. foundations_roots) config
periods :: [(Date,Date)] periods :: [(Date,Date)]
periods = toPeriods (sort $ nub $ map date docs) (timePeriod config) (timeStep config) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit config) (getTimeStep $ timeUnit config)
nbDocsByYear :: Map Date Double nbDocsByYear :: Map Date Double
nbDocsByYear = nbDocsByTime docs (timeUnit config) nbDocsByYear = docsToTimeScaleNb docs
config :: Config config :: Config
config = config =
defaultConfig { phyloName = "Cesar et Cleopatre" defaultConfig { phyloName = "Cesar et Cleopatre"
, branchSize = 0 , branchSize = 0
, fisSupport = 0 , contextualUnit = Fis 0 0 }
, fisSize = 0 }
docs :: [Document] docs :: [Document]
......
...@@ -87,8 +87,7 @@ fisToGroup fis pId lvl idx fdt coocs = ...@@ -87,8 +87,7 @@ fisToGroup fis pId lvl idx fdt coocs =
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[]) (1,[])
[] [] [] [] [] [] [] [] []
Nothing
toPhylo1 :: [Document] -> Phylo -> Phylo toPhylo1 :: [Document] -> Phylo -> Phylo
...@@ -96,7 +95,7 @@ toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase ...@@ -96,7 +95,7 @@ toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase
where where
-------------------------------------- --------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis] phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis = toPhyloFis docs' (fisSupport $ getConfig phyloBase) (fisSize $ getConfig phyloBase) phyloFis = toPhyloFis docs' (getFisSupport $ contextualUnit $ getConfig phyloBase) (getFisSize $ contextualUnit $ getConfig phyloBase)
-------------------------------------- --------------------------------------
docs' :: Map (Date,Date) [Document] docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
...@@ -174,14 +173,14 @@ ngramsToCooc ngrams coocs = ...@@ -174,14 +173,14 @@ ngramsToCooc ngrams coocs =
-- | To transform the docs into a time map of coocurency matrix -- | To transform the docs into a time map of coocurency matrix
docsToCoocByYear :: [Document] -> Vector Ngrams -> Config -> Map Date Cooc docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Config -> Map Date Cooc
docsToCoocByYear docs fdt conf = docsToTimeScaleCooc docs fdt conf =
let mCooc = fromListWith sumCooc let mCooc = fromListWith sumCooc
$ map (\(_d,l) -> (_d, listToMatrix l)) $ map (\(_d,l) -> (_d, listToMatrix l))
$ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs $ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs
mCooc' = fromList mCooc' = fromList
$ map (\t -> (t,empty)) $ map (\t -> (t,empty))
$ toTimeScale (map date docs) (timeUnit conf) $ toTimeScale (map date docs) 1
in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n") in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n")
$ unionWith sumCooc mCooc mCooc' $ unionWith sumCooc mCooc mCooc'
...@@ -208,11 +207,11 @@ groupDocsByPeriod f pds es = ...@@ -208,11 +207,11 @@ groupDocsByPeriod f pds es =
-------------------------------------- --------------------------------------
-- | To count the number of docs by unit of time (like a year) -- | To count the number of docs by unit of time
nbDocsByTime :: [Document] -> Int -> Map Date Double docsToTimeScaleNb :: [Document] -> Map Date Double
nbDocsByTime docs step = docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') step time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n") in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n")
$ unionWith (+) time docs' $ unionWith (+) time docs'
...@@ -227,10 +226,10 @@ toPhyloBase :: [Document] -> TermList -> Config -> Phylo ...@@ -227,10 +226,10 @@ toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase docs lst conf = toPhyloBase docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
params = defaultPhyloParam { _phyloParam_config = conf } params = defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (timePeriod conf) (timeStep conf) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n") in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n")
$ Phylo foundations $ Phylo foundations
(docsToCoocByYear docs (foundations ^. foundations_roots) conf) (docsToTimeScaleCooc docs (foundations ^. foundations_roots) conf)
(nbDocsByTime docs $ timeUnit conf) (docsToTimeScaleNb docs)
params params
(fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels (phyloLevel conf) prd))) periods) (fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels (phyloLevel conf) prd))) periods)
...@@ -91,6 +91,18 @@ toTimeScale dates step = ...@@ -91,6 +91,18 @@ toTimeScale dates step =
in [start, (start + step) .. end] in [start, (start + step) .. end]
getTimeStep :: TimeUnit -> Int
getTimeStep time = case time of
Year _ s _ -> s
getTimePeriod :: TimeUnit -> Int
getTimePeriod time = case time of
Year p _ _ -> p
getTimeFrame :: TimeUnit -> Int
getTimeFrame time = case time of
Year _ _ f -> f
------------- -------------
-- | Fis | -- -- | Fis | --
------------- -------------
...@@ -136,6 +148,22 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l ...@@ -136,6 +148,22 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
<> "Clique : " <> (traceClique mFis) <> "\n" ) mFis <> "Clique : " <> (traceClique mFis) <> "\n" ) mFis
-------------------------
-- | Contextual unit | --
-------------------------
getFisSupport :: ContextualUnit -> Int
getFisSupport unit = case unit of
Fis s _ -> s
_ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
getFisSize :: ContextualUnit -> Int
getFisSize unit = case unit of
Fis _ s -> s
_ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
-------------- --------------
-- | Cooc | -- -- | Cooc | --
-------------- --------------
...@@ -223,6 +251,7 @@ updatePhyloGroups lvl m phylo = ...@@ -223,6 +251,7 @@ updatePhyloGroups lvl m phylo =
-- | Pointers | -- -- | Pointers | --
------------------ ------------------
pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link] pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link]
pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers
...@@ -230,3 +259,23 @@ mergeLinks :: [Link] -> [Link] -> [Link] ...@@ -230,3 +259,23 @@ mergeLinks :: [Link] -> [Link] -> [Link]
mergeLinks toChilds toParents = mergeLinks toChilds toParents =
let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds
in toList $ unionWith max (fromList toParents) toChilds' in toList $ unionWith max (fromList toParents) toChilds'
-------------------
-- | Proximity | --
-------------------
getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of
WeightedLogJaccard s _ _ -> s
Hamming -> undefined
getThresholdInit :: Proximity -> Double
getThresholdInit proxi = case proxi of
WeightedLogJaccard _ t _ -> t
Hamming -> undefined
getThresholdStep :: Proximity -> Double
getThresholdStep proxi = case proxi of
WeightedLogJaccard _ _ s -> s
Hamming -> undefined
\ No newline at end of file
...@@ -19,11 +19,19 @@ import Gargantext.Prelude ...@@ -19,11 +19,19 @@ import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools import Gargantext.Viz.Phylo.PhyloTools
import Data.List (foldl', (++), null, intersect, (\\), union, nub, concat)
-------------------- --------------------
-- | Clustering | -- -- | Clustering | --
-------------------- --------------------
relatedComponents :: [PhyloGroup] -> [[PhyloGroup]] relatedComponents :: Eq a => [[a]] -> [[a]]
relatedComponents groups = undefined relatedComponents graphs = foldl' (\mem groups ->
\ No newline at end of file if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment