Commit 92b4221b authored by qlobbe's avatar qlobbe

add branching

parent d3097207
Pipeline #548 failed with stage
......@@ -128,16 +128,16 @@ csvToCorpus limit path = Vector.toList
-- | To use the correct parser given a CorpusType
fileToCorpus :: CorpusParser -> Int -> FilePath -> IO ([(Int,Text)])
fileToCorpus parser limit path = case parser of
Wos -> wosToCorpus limit path
Csv -> csvToCorpus limit path
fileToCorpus :: CorpusParser -> FilePath -> IO ([(Int,Text)])
fileToCorpus parser path = case parser of
Wos limit -> wosToCorpus limit path
Csv limit -> csvToCorpus limit path
-- | To parse a file into a list of Document
fileToDocs :: CorpusParser -> Int -> FilePath -> TermList -> IO [Document]
fileToDocs parser limit path lst = do
corpus <- fileToCorpus parser limit path
fileToDocs :: CorpusParser -> FilePath -> TermList -> IO [Document]
fileToDocs parser path lst = do
corpus <- fileToCorpus parser path
let patterns = buildPatterns lst
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
......@@ -162,7 +162,7 @@ main = do
printIOMsg "Parse the corpus"
mapList <- csvGraphTermList (listPath config)
corpus <- fileToDocs (corpusParser config) (corpusLimit config) (corpusPath config) mapList
corpus <- fileToDocs (corpusParser config) (corpusPath config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOMsg "Reconstruct the Phylo"
......
......@@ -50,46 +50,63 @@ import Control.Lens (makeLenses)
----------------
data CorpusParser = Wos | Csv deriving (Show,Generic,Eq)
data CorpusParser =
Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int}
deriving (Show,Generic,Eq)
data Proximity =
WeightedLogJaccard
{ _wlj_sensibility :: Double
, _wlj_thresholdInit :: Double
, _wlj_thresholdStep :: Double }
| Hamming
deriving (Show,Generic,Eq)
data TimeUnit =
Year
{ _year_period :: Int
, _year_step :: Int
, _year_matchingFrame :: Int }
deriving (Show,Generic,Eq)
data ContextualUnit =
Fis
{ _fis_support :: Int
, _fis_size :: Int }
deriving (Show,Generic,Eq)
data Proximity = WeightedLogJaccard {_sensibility :: Double}
| Hamming
deriving (Show,Generic,Eq)
data Config =
Config { corpusPath :: FilePath
, listPath :: FilePath
, outputPath :: FilePath
, corpusParser :: CorpusParser
, corpusLimit :: Int
, phyloName :: Text
, phyloLevel :: Int
, phyloQuality :: Double
, phyloProximity :: Proximity
, timeUnit :: Int
, maxTimeMatch :: Int
, timePeriod :: Int
, timeStep :: Int
, fisSupport :: Int
, fisSize :: Int
, timeUnit :: TimeUnit
, contextualUnit :: ContextualUnit
, branchSize :: Int
} deriving (Show,Generic,Eq)
defaultConfig :: Config
defaultConfig =
Config { corpusPath = ""
, listPath = ""
, outputPath = ""
, corpusParser = Csv
, corpusLimit = 1000
, corpusParser = Csv 1000
, phyloName = pack "Default Phylo"
, phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10
, timeUnit = 1
, maxTimeMatch = 5
, timePeriod = 3
, timeStep = 1
, fisSupport = 2
, fisSize = 4
, phyloQuality = 0.5
, phyloProximity = WeightedLogJaccard 10 0 0.05
, timeUnit = Year 3 1 5
, contextualUnit = Fis 2 4
, branchSize = 3
}
......@@ -99,6 +116,10 @@ instance FromJSON CorpusParser
instance ToJSON CorpusParser
instance FromJSON Proximity
instance ToJSON Proximity
instance FromJSON TimeUnit
instance ToJSON TimeUnit
instance FromJSON ContextualUnit
instance ToJSON ContextualUnit
-- | Software parameters
......@@ -237,7 +258,7 @@ data PhyloGroup =
, _phylo_groupLevelChilds :: [Pointer]
, _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer]
, _phylo_groupBreakPointer :: Maybe Pointer
, _phylo_groupGhostPointers :: [Pointer]
}
deriving (Generic, Show, Eq)
......@@ -276,6 +297,9 @@ data PhyloFis = PhyloFis
----------------
makeLenses ''Config
makeLenses ''Proximity
makeLenses ''ContextualUnit
makeLenses ''TimeUnit
makeLenses ''PhyloFoundations
makeLenses ''PhyloFis
makeLenses ''Phylo
......
......@@ -49,7 +49,7 @@ phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase
phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis = toPhyloFis docsByPeriods (fisSupport config) (fisSize config)
phyloFis = toPhyloFis docsByPeriods (getFisSupport $ contextualUnit config) (getFisSize $ contextualUnit config)
docsByPeriods :: Map (Date,Date) [Document]
......@@ -66,23 +66,22 @@ phyloBase = toPhyloBase docs mapList config
phyloCooc :: Map Date Cooc
phyloCooc = docsToCoocByYear docs (foundations ^. foundations_roots) config
phyloCooc = docsToTimeScaleCooc docs (foundations ^. foundations_roots) config
periods :: [(Date,Date)]
periods = toPeriods (sort $ nub $ map date docs) (timePeriod config) (timeStep config)
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit config) (getTimeStep $ timeUnit config)
nbDocsByYear :: Map Date Double
nbDocsByYear = nbDocsByTime docs (timeUnit config)
nbDocsByYear = docsToTimeScaleNb docs
config :: Config
config =
defaultConfig { phyloName = "Cesar et Cleopatre"
, branchSize = 0
, fisSupport = 0
, fisSize = 0 }
, contextualUnit = Fis 0 0 }
docs :: [Document]
......
......@@ -87,8 +87,7 @@ fisToGroup fis pId lvl idx fdt coocs =
ngrams
(ngramsToCooc ngrams coocs)
(1,[])
[] [] [] []
Nothing
[] [] [] [] []
toPhylo1 :: [Document] -> Phylo -> Phylo
......@@ -96,7 +95,7 @@ toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase
where
--------------------------------------
phyloFis :: Map (Date,Date) [PhyloFis]
phyloFis = toPhyloFis docs' (fisSupport $ getConfig phyloBase) (fisSize $ getConfig phyloBase)
phyloFis = toPhyloFis docs' (getFisSupport $ contextualUnit $ getConfig phyloBase) (getFisSize $ contextualUnit $ getConfig phyloBase)
--------------------------------------
docs' :: Map (Date,Date) [Document]
docs' = groupDocsByPeriod date (getPeriodIds phyloBase) docs
......@@ -174,14 +173,14 @@ ngramsToCooc ngrams coocs =
-- | To transform the docs into a time map of coocurency matrix
docsToCoocByYear :: [Document] -> Vector Ngrams -> Config -> Map Date Cooc
docsToCoocByYear docs fdt conf =
docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Config -> Map Date Cooc
docsToTimeScaleCooc docs fdt conf =
let mCooc = fromListWith sumCooc
$ map (\(_d,l) -> (_d, listToMatrix l))
$ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs
mCooc' = fromList
$ map (\t -> (t,empty))
$ toTimeScale (map date docs) (timeUnit conf)
$ toTimeScale (map date docs) 1
in trace ("\n" <> "-- | Build the coocurency matrix for " <> show (length $ keys mCooc') <> " unit of time" <> "\n")
$ unionWith sumCooc mCooc mCooc'
......@@ -208,11 +207,11 @@ groupDocsByPeriod f pds es =
--------------------------------------
-- | To count the number of docs by unit of time (like a year)
nbDocsByTime :: [Document] -> Int -> Map Date Double
nbDocsByTime docs step =
-- | To count the number of docs by unit of time
docsToTimeScaleNb :: [Document] -> Map Date Double
docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') step
time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1
in trace ("\n" <> "-- | Group " <> show(length docs) <> " docs by " <> show(length time) <> " unit of time" <> "\n")
$ unionWith (+) time docs'
......@@ -227,10 +226,10 @@ toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
params = defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (timePeriod conf) (timeStep conf)
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod $ timeUnit conf) (getTimeStep $ timeUnit conf)
in trace ("\n" <> "-- | Create PhyloBase out of " <> show(length docs) <> " docs \n")
$ Phylo foundations
(docsToCoocByYear docs (foundations ^. foundations_roots) conf)
(nbDocsByTime docs $ timeUnit conf)
(docsToTimeScaleCooc docs (foundations ^. foundations_roots) conf)
(docsToTimeScaleNb docs)
params
(fromList $ map (\prd -> (prd, PhyloPeriod prd (initPhyloLevels (phyloLevel conf) prd))) periods)
......@@ -91,6 +91,18 @@ toTimeScale dates step =
in [start, (start + step) .. end]
getTimeStep :: TimeUnit -> Int
getTimeStep time = case time of
Year _ s _ -> s
getTimePeriod :: TimeUnit -> Int
getTimePeriod time = case time of
Year p _ _ -> p
getTimeFrame :: TimeUnit -> Int
getTimeFrame time = case time of
Year _ _ f -> f
-------------
-- | Fis | --
-------------
......@@ -136,6 +148,22 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
<> "Clique : " <> (traceClique mFis) <> "\n" ) mFis
-------------------------
-- | Contextual unit | --
-------------------------
getFisSupport :: ContextualUnit -> Int
getFisSupport unit = case unit of
Fis s _ -> s
_ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
getFisSize :: ContextualUnit -> Int
getFisSize unit = case unit of
Fis _ s -> s
_ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
--------------
-- | Cooc | --
--------------
......@@ -223,10 +251,31 @@ updatePhyloGroups lvl m phylo =
-- | Pointers | --
------------------
pointersToLinks :: PhyloGroupId -> [Pointer] -> [Link]
pointersToLinks id pointers = map (\p -> ((id,fst p),snd p)) pointers
mergeLinks :: [Link] -> [Link] -> [Link]
mergeLinks toChilds toParents =
let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds
in toList $ unionWith max (fromList toParents) toChilds'
\ No newline at end of file
in toList $ unionWith max (fromList toParents) toChilds'
-------------------
-- | Proximity | --
-------------------
getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of
WeightedLogJaccard s _ _ -> s
Hamming -> undefined
getThresholdInit :: Proximity -> Double
getThresholdInit proxi = case proxi of
WeightedLogJaccard _ t _ -> t
Hamming -> undefined
getThresholdStep :: Proximity -> Double
getThresholdStep proxi = case proxi of
WeightedLogJaccard _ _ s -> s
Hamming -> undefined
\ No newline at end of file
......@@ -19,11 +19,19 @@ import Gargantext.Prelude
import Gargantext.Viz.AdaptativePhylo
import Gargantext.Viz.Phylo.PhyloTools
import Data.List (foldl', (++), null, intersect, (\\), union, nub, concat)
--------------------
-- | Clustering | --
--------------------
relatedComponents :: [PhyloGroup] -> [[PhyloGroup]]
relatedComponents groups = undefined
\ No newline at end of file
relatedComponents :: Eq a => [[a]] -> [[a]]
relatedComponents graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment