Commit 74d2038a authored by qlobbe's avatar qlobbe Committed by Alexandre Delanoë

add a list parser param

parent 878907d0
...@@ -16,43 +16,40 @@ Adaptative Phylo binaries ...@@ -16,43 +16,40 @@ Adaptative Phylo binaries
module Main where module Main where
-- import Debug.Trace (trace)
import Control.Concurrent.Async (mapConcurrently) import Control.Concurrent.Async (mapConcurrently)
import Crypto.Hash.SHA256 (hash) import Crypto.Hash.SHA256 (hash)
import Data.Aeson import Data.Aeson
import Data.Either (Either(..)) import Data.Either (Either(..), fromRight)
import Data.List (concat, nub, isSuffixOf) import Data.List (concat, nub, isSuffixOf)
import Data.List.Split
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.String (String) import Data.String (String)
import GHC.IO (FilePath)
import qualified Prelude as Prelude
import System.Environment
import System.Directory (listDirectory,doesFileExist)
import Data.Text (Text, unwords, unpack, replace, pack) import Data.Text (Text, unwords, unpack, replace, pack)
import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian) import GHC.IO (FilePath)
import Gargantext.API.Ngrams.Prelude (toTermList)
import qualified Data.ByteString.Char8 as C8 import Gargantext.API.Ngrams.Types
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.Vector as Vector
import qualified Data.Text as T
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day,
csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile) import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList) import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList) import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep) import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig) import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
-- import Gargantext.API.Ngrams.Prelude (toTermList) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Core.Viz.Phylo.API (toPhyloDate, toPhyloDate') import Gargantext.Prelude
import System.Directory (listDirectory,doesFileExist)
import System.Environment
-- import Debug.Trace (trace) import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.Text as T
import qualified Data.Vector as Vector
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show) data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
...@@ -60,24 +57,13 @@ data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show) ...@@ -60,24 +57,13 @@ data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
-- | Tools | -- -- | Tools | --
--------------- ---------------
-- | To get all the files in a directory or just a file -- | To get all the files in a directory or just a file
getFilesFromPath :: FilePath -> IO([FilePath]) getFilesFromPath :: FilePath -> IO [FilePath]
getFilesFromPath path = do getFilesFromPath path = do
if (isSuffixOf "/" path) if (isSuffixOf "/" path)
then (listDirectory path) then (listDirectory path)
else return [path] else return [path]
--------------
-- | Json | --
--------------
-- | To read and decode a Json file
readJson :: FilePath -> IO Lazy.ByteString
readJson path = Lazy.readFile path
---------------- ----------------
-- | Parser | -- -- | Parser | --
---------------- ----------------
...@@ -90,31 +76,26 @@ termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList p ...@@ -90,31 +76,26 @@ termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList p
-- | To transform a Wos file (or [file]) into a list of Docs -- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs :: Int -> Patterns -> TimeUnit -> FilePath -> IO [Document] wosToDocs :: Int -> Patterns -> TimeUnit -> FilePath -> IO [Document]
wosToDocs limit patterns time path = do wosToDocs limit patterns time path = do
files <- getFilesFromPath path files <- getFilesFromPath path
let parseFile' file = do take limit
eParsed <- parseFile WOS (path <> file) <$> map (\d -> let title = fromJust $ _hd_title d
case eParsed of abstr = if (isJust $ _hd_abstract d)
Right ps -> pure ps then fromJust $ _hd_abstract d
Left e -> panic $ "Error: " <> (pack e) else ""
take limit in Document (toPhyloDate
<$> map (\d -> let title = fromJust $ _hd_title d (fromIntegral $ fromJust $ _hd_publication_year d)
abstr = if (isJust $ _hd_abstract d) (fromJust $ _hd_publication_month d)
then fromJust $ _hd_abstract d (fromJust $ _hd_publication_day d) time)
else "" (toPhyloDate'
in Document (toPhyloDate (fromIntegral $ fromJust $ _hd_publication_year d)
(fromIntegral $ fromJust $ _hd_publication_year d) (fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_month d) (fromJust $ _hd_publication_day d) time)
(fromJust $ _hd_publication_day d) time) (termsInText patterns $ title <> " " <> abstr) Nothing [])
(toPhyloDate' <$> concat
(fromIntegral $ fromJust $ _hd_publication_year d) <$> mapConcurrently (\file ->
(fromJust $ _hd_publication_month d) filter (\d -> (isJust $ _hd_publication_year d)
(fromJust $ _hd_publication_day d)) && (isJust $ _hd_title d))
(termsInText patterns $ title <> " " <> abstr) Nothing []) <$> fromRight [] <$> parseFile WOS (path <> file) ) files
<$> concat
<$> mapConcurrently (\file ->
filter (\d -> (isJust $ _hd_publication_year d)
&& (isJust $ _hd_title d))
<$> parseFile' file) files
-- To transform a Csv file into a list of Document -- To transform a Csv file into a list of Document
...@@ -122,31 +103,21 @@ csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document] ...@@ -122,31 +103,21 @@ csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document]
csvToDocs parser patterns time path = csvToDocs parser patterns time path =
case parser of case parser of
Wos _ -> undefined Wos _ -> undefined
Csv limit -> do Csv limit -> Vector.toList
eR <- Csv.readFile path <$> Vector.take limit
case eR of <$> Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row) (fromMaybe Csv.defaultMonth $ csv_publication_month row) (fromMaybe Csv.defaultDay $ csv_publication_day row) time)
Right r -> (toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row) (fromMaybe Csv.defaultMonth $ csv_publication_month row) (fromMaybe Csv.defaultDay $ csv_publication_day row) time)
pure $ Vector.toList (termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
$ Vector.take limit Nothing
$ Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row) []
(fromMaybe Csv.defaultMonth $ csv_publication_month row) ) <$> snd <$> either (\err -> panic $ cs $ "CSV error" <> (show err)) identity <$> Csv.readFile path
(fromMaybe Csv.defaultDay $ csv_publication_day row)
time)
(toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row))
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
) $ snd r
Left e -> panic $ "Error: " <> (pack e)
Csv' limit -> Vector.toList Csv' limit -> Vector.toList
<$> Vector.take limit <$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time) <$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row)) (toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row)) (termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row))
(Just $ csv'_weight row) (Just $ csv'_weight row)
[csv'_source row] (map (T.strip . pack) $ splitOn ";" (unpack $ (csv'_source row)))
) <$> snd <$> Csv.readWeightedCsv path ) <$> snd <$> Csv.readWeightedCsv path
...@@ -168,10 +139,11 @@ fileToDocs' parser path time lst = do ...@@ -168,10 +139,11 @@ fileToDocs' parser path time lst = do
-- Config time parameters to label -- Config time parameters to label
timeToLabel :: Config -> [Char] timeToLabel :: Config -> [Char]
timeToLabel config = case (timeUnit config) of timeToLabel config = case (timeUnit config) of
Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f)) Epoch p s f -> ("time_epochs" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Month p s f -> ("time_months"<> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f)) Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f)) Month p s f -> ("time_months" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f)) Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
seaToLabel :: Config -> [Char] seaToLabel :: Config -> [Char]
...@@ -182,7 +154,7 @@ seaToLabel config = case (seaElevation config) of ...@@ -182,7 +154,7 @@ seaToLabel config = case (seaElevation config) of
sensToLabel :: Config -> [Char] sensToLabel :: Config -> [Char]
sensToLabel config = case (phyloProximity config) of sensToLabel config = case (phyloProximity config) of
Hamming -> undefined Hamming _ -> undefined
WeightedLogJaccard s -> ("WeightedLogJaccard_" <> show s) WeightedLogJaccard s -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s -> ( "WeightedLogSim-sens_" <> show s) WeightedLogSim s -> ( "WeightedLogSim-sens_" <> show s)
...@@ -240,18 +212,23 @@ configToSha stage config = unpack ...@@ -240,18 +212,23 @@ configToSha stage config = unpack
<> (show (phyloLevel config)) <> (show (phyloLevel config))
writePhylo :: [Char] -> Phylo -> IO () readListV4 :: [Char] -> IO NgramsList
writePhylo path phylo = Lazy.writeFile path $ encode phylo readListV4 path = do
listJson <- (eitherDecode <$> readJson path) :: IO (Either String NgramsList)
case listJson of
readPhylo :: [Char] -> IO Phylo
readPhylo path = do
phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
case phyloJson of
Left err -> do Left err -> do
putStrLn err putStrLn err
undefined undefined
Right phylo -> pure phylo Right listV4 -> pure listV4
fileToList :: ListParser -> FilePath -> IO TermList
fileToList parser path =
case parser of
V3 -> csvMapTermList path
V4 -> fromJust
<$> toTermList MapTerm NgramsTerms
<$> readListV4 path
-------------- --------------
...@@ -273,7 +250,7 @@ main = do ...@@ -273,7 +250,7 @@ main = do
Right config -> do Right config -> do
printIOMsg "Parse the corpus" printIOMsg "Parse the corpus"
mapList <- csvMapTermList (listPath config) mapList <- fileToList (listParser config) (listPath config)
corpus <- fileToDocs' (corpusParser config) (corpusPath config) (timeUnit config) mapList corpus <- fileToDocs' (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus") printIOComment (show (length corpus) <> " parsed docs from the corpus")
...@@ -297,7 +274,6 @@ main = do ...@@ -297,7 +274,6 @@ main = do
-- let phylo = toPhylo (setConfig config phyloStep) -- let phylo = toPhylo (setConfig config phyloStep)
-- QL: 2 files read from disk
phyloWithLinks <- if phyloWithLinksExists phyloWithLinks <- if phyloWithLinksExists
then do then do
printIOMsg "Reconstruct the phylo from an existing file with intertemporal links" printIOMsg "Reconstruct the phylo from an existing file with intertemporal links"
......
...@@ -54,6 +54,7 @@ library: ...@@ -54,6 +54,7 @@ library:
- Gargantext.API.Ngrams - Gargantext.API.Ngrams
- Gargantext.API.Ngrams.Tools - Gargantext.API.Ngrams.Tools
- Gargantext.API.Ngrams.Types - Gargantext.API.Ngrams.Types
- Gargantext.API.Ngrams.Prelude
- Gargantext.API.Admin.Settings - Gargantext.API.Admin.Settings
- Gargantext.API.Admin.EnvTypes - Gargantext.API.Admin.EnvTypes
- Gargantext.API.Admin.Types - Gargantext.API.Admin.Types
...@@ -73,6 +74,7 @@ library: ...@@ -73,6 +74,7 @@ library:
- Gargantext.Database.Query.Table.Node - Gargantext.Database.Query.Table.Node
- Gargantext.Database.Query.Table.Node.UpdateOpaleye - Gargantext.Database.Query.Table.Node.UpdateOpaleye
- Gargantext.Database.Query.Table.NgramsPostag - Gargantext.Database.Query.Table.NgramsPostag
- Gargantext.Database.Schema.Ngrams
- Gargantext.Database.Prelude - Gargantext.Database.Prelude
- Gargantext.Database.Admin.Trigger.Init - Gargantext.Database.Admin.Trigger.Init
- Gargantext.Database.Admin.Config - Gargantext.Database.Admin.Config
......
{-| {-
Module : Gargantext.Core.Viz.AdaptativePhylo Module : Gargantext.Core.Viz.AdaptativePhylo
Description : Phylomemy definitions and types. Description : Phylomemy definitions and types.
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
...@@ -26,6 +26,8 @@ one 8, e54847. ...@@ -26,6 +26,8 @@ one 8, e54847.
module Gargantext.Core.Viz.Phylo where module Gargantext.Core.Viz.Phylo where
import Data.Swagger
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Control.DeepSeq (NFData) import Control.DeepSeq (NFData)
import Control.Lens (makeLenses) import Control.Lens (makeLenses)
import Data.Aeson import Data.Aeson
...@@ -50,6 +52,14 @@ data CorpusParser = ...@@ -50,6 +52,14 @@ data CorpusParser =
| Csv' {_csv'_limit :: Int} | Csv' {_csv'_limit :: Int}
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema CorpusParser where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
data ListParser = V3 | V4 deriving (Show,Generic,Eq)
instance ToSchema ListParser
data SeaElevation = data SeaElevation =
Constante Constante
{ _cons_start :: Double { _cons_start :: Double
...@@ -58,6 +68,8 @@ data SeaElevation = ...@@ -58,6 +68,8 @@ data SeaElevation =
{ _adap_granularity :: Double } { _adap_granularity :: Double }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema SeaElevation
data Proximity = data Proximity =
WeightedLogJaccard WeightedLogJaccard
{ _wlj_sensibility :: Double { _wlj_sensibility :: Double
...@@ -77,13 +89,23 @@ data Proximity = ...@@ -77,13 +89,23 @@ data Proximity =
-- , _wlj_elevation :: Double -- , _wlj_elevation :: Double
-} -}
} }
| Hamming | Hamming { _wlj_sensibility :: Double }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema Proximity where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
data SynchronyScope = SingleBranch | SiblingBranches | AllBranches
deriving (Show,Generic,Eq, ToSchema)
data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq) data SynchronyStrategy = MergeRegularGroups | MergeAllGroups
deriving (Show,Generic,Eq)
instance ToSchema SynchronyStrategy where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
data Synchrony = data Synchrony =
ByProximityThreshold ByProximityThreshold
...@@ -96,9 +118,17 @@ data Synchrony = ...@@ -96,9 +118,17 @@ data Synchrony =
, _bpd_strategy :: SynchronyStrategy } , _bpd_strategy :: SynchronyStrategy }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema Synchrony where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
data TimeUnit = data TimeUnit =
Year Epoch
{ _epoch_period :: Int
, _epoch_step :: Int
, _epoch_matchingFrame :: Int }
| Year
{ _year_period :: Int { _year_period :: Int
, _year_step :: Int , _year_step :: Int
, _year_matchingFrame :: Int } , _year_matchingFrame :: Int }
...@@ -116,8 +146,17 @@ data TimeUnit = ...@@ -116,8 +146,17 @@ data TimeUnit =
, _day_matchingFrame :: Int } , _day_matchingFrame :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema TimeUnit where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq) data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
instance ToSchema CliqueFilter where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
data Clique = data Clique =
Fis Fis
{ _fis_support :: Int { _fis_support :: Int
...@@ -128,18 +167,26 @@ data Clique = ...@@ -128,18 +167,26 @@ data Clique =
, _mcl_filter :: CliqueFilter } , _mcl_filter :: CliqueFilter }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema Clique where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
data Quality = data Quality =
Quality { _qua_granularity :: Double Quality { _qua_granularity :: Double
, _qua_minBranch :: Int } , _qua_minBranch :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema Quality where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_qua_")
data Config = data Config =
Config { corpusPath :: FilePath Config { corpusPath :: FilePath
, listPath :: FilePath , listPath :: FilePath
, outputPath :: FilePath , outputPath :: FilePath
, corpusParser :: CorpusParser , corpusParser :: CorpusParser
, listParser :: ListParser
, phyloName :: Text , phyloName :: Text
, phyloLevel :: Int , phyloLevel :: Int
, phyloProximity :: Proximity , phyloProximity :: Proximity
...@@ -154,25 +201,28 @@ data Config = ...@@ -154,25 +201,28 @@ data Config =
, exportFilter :: [Filter] , exportFilter :: [Filter]
} deriving (Show,Generic,Eq) } deriving (Show,Generic,Eq)
instance ToSchema Config
defaultConfig :: Config defaultConfig :: Config
defaultConfig = defaultConfig =
Config { corpusPath = "" Config { corpusPath = "corpus.csv" -- useful for commandline only
, listPath = "" , listPath = "list.csv" -- useful for commandline only
, outputPath = "" , outputPath = "data/"
, corpusParser = Csv 1000 , corpusParser = Csv 100000
, phyloName = pack "Default Phylo" , listParser = V4
, phyloName = pack "Phylo Name"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 , phyloProximity = WeightedLogJaccard 0.5
, seaElevation = Constante 0.1 0.1 , seaElevation = Constante 0.1 0.1
, findAncestors = True , findAncestors = False
, phyloSynchrony = ByProximityThreshold 0.1 10 SiblingBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups
, phyloQuality = Quality 0 1 , phyloQuality = Quality 0.5 1
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, clique = MaxClique 0 3 ByNeighbours , clique = MaxClique 5 0.0001 ByThreshold
, exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy , exportSort = ByHierarchy Desc
, exportFilter = [ByBranchSize 2] , exportFilter = [ByBranchSize 3]
} }
instance FromJSON Config instance FromJSON Config
...@@ -181,6 +231,9 @@ instance ToJSON Config ...@@ -181,6 +231,9 @@ instance ToJSON Config
instance FromJSON CorpusParser instance FromJSON CorpusParser
instance ToJSON CorpusParser instance ToJSON CorpusParser
instance FromJSON ListParser
instance ToJSON ListParser
instance FromJSON Proximity instance FromJSON Proximity
instance ToJSON Proximity instance ToJSON Proximity
...@@ -230,6 +283,11 @@ data Software = ...@@ -230,6 +283,11 @@ data Software =
, _software_version :: Text , _software_version :: Text
} deriving (Generic, Show, Eq) } deriving (Generic, Show, Eq)
instance ToSchema Software where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
defaultSoftware :: Software defaultSoftware :: Software
defaultSoftware = defaultSoftware =
Software { _software_name = pack "Gargantext" Software { _software_name = pack "Gargantext"
...@@ -243,6 +301,11 @@ data PhyloParam = ...@@ -243,6 +301,11 @@ data PhyloParam =
, _phyloParam_config :: Config , _phyloParam_config :: Config
} deriving (Generic, Show, Eq) } deriving (Generic, Show, Eq)
instance ToSchema PhyloParam where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
defaultPhyloParam :: PhyloParam defaultPhyloParam :: PhyloParam
defaultPhyloParam = defaultPhyloParam =
PhyloParam { _phyloParam_version = pack "v2.adaptative" PhyloParam { _phyloParam_version = pack "v2.adaptative"
...@@ -283,10 +346,16 @@ data PhyloFoundations = PhyloFoundations ...@@ -283,10 +346,16 @@ data PhyloFoundations = PhyloFoundations
, _foundations_mapList :: TermList , _foundations_mapList :: TermList
} deriving (Generic, Show, Eq) } deriving (Generic, Show, Eq)
instance ToSchema PhyloFoundations where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_foundations_")
data PhyloSources = PhyloSources data PhyloSources = PhyloSources
{ _sources :: !(Vector Text) } deriving (Generic, Show, Eq) { _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
instance ToSchema PhyloSources where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
--------------------------- ---------------------------
-- | Coocurency Matrix | -- -- | Coocurency Matrix | --
...@@ -322,6 +391,9 @@ data Phylo = ...@@ -322,6 +391,9 @@ data Phylo =
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
instance ToSchema Phylo where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
-- | PhyloPeriodId : the id of a given period -- | PhyloPeriodId : the id of a given period
type PhyloPeriodId = (Date,Date) type PhyloPeriodId = (Date,Date)
...@@ -335,6 +407,10 @@ data PhyloPeriod = ...@@ -335,6 +407,10 @@ data PhyloPeriod =
, _phylo_periodLevels :: Map PhyloLevelId PhyloLevel , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
} deriving (Generic, Show, Eq) } deriving (Generic, Show, Eq)
instance ToSchema PhyloPeriod where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
-- | Level : a level of clustering -- | Level : a level of clustering
type Level = Int type Level = Int
...@@ -355,6 +431,9 @@ data PhyloLevel = ...@@ -355,6 +431,9 @@ data PhyloLevel =
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
instance ToSchema PhyloLevel where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
type PhyloGroupId = (PhyloLevelId, Int) type PhyloGroupId = (PhyloLevelId, Int)
...@@ -381,16 +460,25 @@ data PhyloGroup = ...@@ -381,16 +460,25 @@ data PhyloGroup =
, _phylo_groupPeriodParents :: [Pointer] , _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer] , _phylo_groupPeriodChilds :: [Pointer]
, _phylo_groupAncestors :: [Pointer] , _phylo_groupAncestors :: [Pointer]
, _phylo_groupPeriodMemoryParents :: [Pointer']
, _phylo_groupPeriodMemoryChilds :: [Pointer']
} }
deriving (Generic, Show, Eq, NFData) deriving (Generic, Show, Eq, NFData)
instance ToSchema PhyloGroup where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
-- | Weight : A generic mesure that can be associated with an Id -- | Weight : A generic mesure that can be associated with an Id
type Weight = Double type Weight = Double
type Thr = Double
-- | Pointer : A weighted pointer to a given PhyloGroup -- | Pointer : A weighted pointer to a given PhyloGroup
type Pointer = (PhyloGroupId, Weight) type Pointer = (PhyloGroupId, Weight)
-- | Pointer' : A weighted pointer to a given PhyloGroup with a lower bounded threshold
type Pointer' = (PhyloGroupId, (Thr,Weight))
data Filiation = ToParents | ToChilds deriving (Generic, Show) data Filiation = ToParents | ToChilds | ToParentsMemory | ToChildsMemory deriving (Generic, Show)
data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show) data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
...@@ -415,15 +503,24 @@ data PhyloClique = PhyloClique ...@@ -415,15 +503,24 @@ data PhyloClique = PhyloClique
type DotId = TextLazy.Text type DotId = TextLazy.Text
data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq) data EdgeType = GroupToGroup | GroupToGroupMemory | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq) data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
instance ToSchema Filter where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
data Order = Asc | Desc deriving (Show,Generic,Eq, ToSchema)
data Order = Asc | Desc deriving (Show,Generic,Eq) data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy {_sort_order :: Order } deriving (Show,Generic,Eq)
instance ToSchema Sort where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sort_")
data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq) data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
instance ToSchema Tagger where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
data PhyloLabel = data PhyloLabel =
BranchLabel BranchLabel
...@@ -434,6 +531,10 @@ data PhyloLabel = ...@@ -434,6 +531,10 @@ data PhyloLabel =
, _group_labelSize :: Int } , _group_labelSize :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema PhyloLabel where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
data PhyloBranch = data PhyloBranch =
PhyloBranch PhyloBranch
{ _branch_id :: PhyloBranchId { _branch_id :: PhyloBranchId
...@@ -447,11 +548,17 @@ data PhyloBranch = ...@@ -447,11 +548,17 @@ data PhyloBranch =
, _branch_meta :: Map Text [Double] , _branch_meta :: Map Text [Double]
} deriving (Generic, Show, Eq) } deriving (Generic, Show, Eq)
instance ToSchema PhyloBranch where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_branch_")
data PhyloExport = data PhyloExport =
PhyloExport PhyloExport
{ _export_groups :: [PhyloGroup] { _export_groups :: [PhyloGroup]
, _export_branches :: [PhyloBranch] , _export_branches :: [PhyloBranch]
} deriving (Generic, Show) } deriving (Generic, Show)
instance ToSchema PhyloExport where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_export_")
---------------- ----------------
-- | Lenses | -- -- | Lenses | --
......
...@@ -13,7 +13,7 @@ Portability : POSIX ...@@ -13,7 +13,7 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloTools where module Gargantext.Core.Viz.Phylo.PhyloTools where
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group) import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Set (Set, disjoint) import Data.Set (Set, disjoint)
import Data.String (String) import Data.String (String)
...@@ -36,9 +36,9 @@ import qualified Data.Vector as Vector ...@@ -36,9 +36,9 @@ import qualified Data.Vector as Vector
-- | To print an important message as an IO() -- | To print an important message as an IO()
printIOMsg :: String -> IO () printIOMsg :: String -> IO ()
printIOMsg msg = printIOMsg msg =
putStrLn ( "\n" putStrLn ( "\n"
<> "------------" <> "------------"
<> "\n" <> "\n"
<> "-- | " <> msg <> "\n" ) <> "-- | " <> msg <> "\n" )
...@@ -59,13 +59,13 @@ printIOComment cmt = ...@@ -59,13 +59,13 @@ printIOComment cmt =
truncate' :: Double -> Int -> Double truncate' :: Double -> Int -> Double
truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
where where
-------------- --------------
t :: Double t :: Double
t = 10 ^n t = 10 ^n
getInMap :: Int -> Map Int Double -> Double getInMap :: Int -> Map Int Double -> Double
getInMap k m = getInMap k m =
if (member k m) if (member k m)
then m ! k then m ! k
else 0 else 0
...@@ -140,15 +140,15 @@ periodsToYears periods = (Set.fromList . sort . concat) ...@@ -140,15 +140,15 @@ periodsToYears periods = (Set.fromList . sort . concat)
findBounds :: [Date] -> (Date,Date) findBounds :: [Date] -> (Date,Date)
findBounds dates = findBounds dates =
let dates' = sort dates let dates' = sort dates
in (head' "findBounds" dates', last' "findBounds" dates') in (head' "findBounds" dates', last' "findBounds" dates')
toPeriods :: [Date] -> Int -> Int -> [(Date,Date)] toPeriods :: [Date] -> Int -> Int -> [(Date,Date)]
toPeriods dates p s = toPeriods dates p s =
let (start,end) = findBounds dates let (start,end) = findBounds dates
in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates')) in map (\dates' -> (head' "toPeriods" dates', last' "toPeriods" dates'))
$ chunkAlong p s [start .. end] $ chunkAlong p s [start .. end]
...@@ -156,8 +156,8 @@ toFstDate :: [Text] -> Text ...@@ -156,8 +156,8 @@ toFstDate :: [Text] -> Text
toFstDate ds = snd toFstDate ds = snd
$ head' "firstDate" $ head' "firstDate"
$ sortOn fst $ sortOn fst
$ map (\d -> $ map (\d ->
let d' = read (filter (\c -> c /= '-') $ unpack d)::Int let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
in (d',d)) ds in (d',d)) ds
toLstDate :: [Text] -> Text toLstDate :: [Text] -> Text
...@@ -165,46 +165,50 @@ toLstDate ds = snd ...@@ -165,46 +165,50 @@ toLstDate ds = snd
$ head' "firstDate" $ head' "firstDate"
$ reverse $ reverse
$ sortOn fst $ sortOn fst
$ map (\d -> $ map (\d ->
let d' = read (filter (\c -> c /= '-') $ unpack d)::Int let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
in (d',d)) ds in (d',d)) ds
getTimeScale :: Phylo -> [Char] getTimeScale :: Phylo -> [Char]
getTimeScale p = case (timeUnit $ getConfig p) of getTimeScale p = case (timeUnit $ getConfig p) of
Epoch _ _ _ -> "epoch"
Year _ _ _ -> "year" Year _ _ _ -> "year"
Month _ _ _ -> "month" Month _ _ _ -> "month"
Week _ _ _ -> "week" Week _ _ _ -> "week"
Day _ _ _ -> "day" Day _ _ _ -> "day"
-- | Get a regular & ascendante timeScale from a given list of dates -- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale :: [Date] -> Int -> [Date] toTimeScale :: [Date] -> Int -> [Date]
toTimeScale dates step = toTimeScale dates step =
let (start,end) = findBounds dates let (start,end) = findBounds dates
in [start, (start + step) .. end] in [start, (start + step) .. end]
getTimeStep :: TimeUnit -> Int getTimeStep :: TimeUnit -> Int
getTimeStep time = case time of getTimeStep time = case time of
Epoch _ s _ -> s
Year _ s _ -> s Year _ s _ -> s
Month _ s _ -> s Month _ s _ -> s
Week _ s _ -> s Week _ s _ -> s
Day _ s _ -> s Day _ s _ -> s
getTimePeriod :: TimeUnit -> Int getTimePeriod :: TimeUnit -> Int
getTimePeriod time = case time of getTimePeriod time = case time of
Epoch p _ _ -> p
Year p _ _ -> p Year p _ _ -> p
Month p _ _ -> p Month p _ _ -> p
Week p _ _ -> p Week p _ _ -> p
Day p _ _ -> p Day p _ _ -> p
getTimeFrame :: TimeUnit -> Int getTimeFrame :: TimeUnit -> Int
getTimeFrame time = case time of getTimeFrame time = case time of
Epoch _ _ f -> f
Year _ _ f -> f Year _ _ f -> f
Month _ _ f -> f Month _ _ f -> f
Week _ _ f -> f Week _ _ f -> f
Day _ _ f -> f Day _ _ f -> f
------------- -------------
-- | Fis | -- -- | Fis | --
...@@ -217,7 +221,7 @@ isNested l l' ...@@ -217,7 +221,7 @@ isNested l l'
| null l' = True | null l' = True
| length l' > length l = False | length l' > length l = False
| (union l l') == l = True | (union l l') == l = True
| otherwise = False | otherwise = False
-- | To filter Fis with small Support but by keeping non empty Periods -- | To filter Fis with small Support but by keeping non empty Periods
...@@ -233,7 +237,7 @@ traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (> ...@@ -233,7 +237,7 @@ traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>
-------------------------------------- --------------------------------------
cliques :: [Double] cliques :: [Double]
cliques = sort $ map (fromIntegral . length . _phyloClique_nodes) $ concat $ elems mFis cliques = sort $ map (fromIntegral . length . _phyloClique_nodes) $ concat $ elems mFis
-------------------------------------- --------------------------------------
traceSupport :: Map (Date, Date) [PhyloClique] -> String traceSupport :: Map (Date, Date) [PhyloClique] -> String
...@@ -242,7 +246,7 @@ traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " ...@@ -242,7 +246,7 @@ traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> "
-------------------------------------- --------------------------------------
supports :: [Double] supports :: [Double]
supports = sort $ map (fromIntegral . _phyloClique_support) $ concat $ elems mFis supports = sort $ map (fromIntegral . _phyloClique_support) $ concat $ elems mFis
-------------------------------------- --------------------------------------
traceFis :: [Char] -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique] traceFis :: [Char] -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
...@@ -257,12 +261,12 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l ...@@ -257,12 +261,12 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
getCliqueSupport :: Clique -> Int getCliqueSupport :: Clique -> Int
getCliqueSupport unit = case unit of getCliqueSupport unit = case unit of
Fis s _ -> s Fis s _ -> s
MaxClique _ _ _ -> 0 MaxClique _ _ _ -> 0
getCliqueSize :: Clique -> Int getCliqueSize :: Clique -> Int
getCliqueSize unit = case unit of getCliqueSize unit = case unit of
Fis _ s -> s Fis _ s -> s
MaxClique s _ _ -> s MaxClique s _ _ -> s
...@@ -292,7 +296,7 @@ listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y ...@@ -292,7 +296,7 @@ listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y
sumCooc :: Cooc -> Cooc -> Cooc sumCooc :: Cooc -> Cooc -> Cooc
sumCooc cooc cooc' = unionWith (+) cooc cooc' sumCooc cooc cooc' = unionWith (+) cooc cooc'
getTrace :: Cooc -> Double getTrace :: Cooc -> Double
getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
coocToDiago :: Cooc -> Cooc coocToDiago :: Cooc -> Cooc
...@@ -310,7 +314,7 @@ ngramsToCooc ngrams coocs = ...@@ -310,7 +314,7 @@ ngramsToCooc ngrams coocs =
-- | PhyloGroup | -- -- | PhyloGroup | --
-------------------- --------------------
getGroupId :: PhyloGroup -> PhyloGroupId getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupLevel), g ^. phylo_groupIndex) getGroupId g = ((g ^. phylo_groupPeriod, g ^. phylo_groupLevel), g ^. phylo_groupIndex)
idToPrd :: PhyloGroupId -> PhyloPeriodId idToPrd :: PhyloGroupId -> PhyloPeriodId
...@@ -320,38 +324,58 @@ groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup ...@@ -320,38 +324,58 @@ groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup
groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups groupByField toField groups = fromListWith (++) $ map (\g -> (toField g, [g])) groups
getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer] getPeriodPointers :: Filiation -> PhyloGroup -> [Pointer]
getPeriodPointers fil g = getPeriodPointers fil g =
case fil of case fil of
ToChilds -> g ^. phylo_groupPeriodChilds ToChilds -> g ^. phylo_groupPeriodChilds
ToParents -> g ^. phylo_groupPeriodParents ToParents -> g ^. phylo_groupPeriodParents
ToChildsMemory -> undefined
ToParentsMemory -> undefined
filterProximity :: Proximity -> Double -> Double -> Bool filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local = filterProximity proximity thr local =
case proximity of case proximity of
WeightedLogJaccard _ -> local >= thr WeightedLogJaccard _ -> local >= thr
WeightedLogSim _ -> local >= thr WeightedLogSim _ -> local >= thr
Hamming -> undefined Hamming _ -> undefined
getProximityName :: Proximity -> String getProximityName :: Proximity -> String
getProximityName proximity = getProximityName proximity =
case proximity of case proximity of
WeightedLogJaccard _ -> "WLJaccard" WeightedLogJaccard _ -> "WLJaccard"
WeightedLogSim _ -> "WeightedLogSim" WeightedLogSim _ -> "WeightedLogSim"
Hamming -> "Hamming" Hamming _ -> "Hamming"
--------------- ---------------
-- | Phylo | -- -- | Phylo | --
--------------- ---------------
addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup addPointers :: Filiation -> PointerType -> [Pointer] -> PhyloGroup -> PhyloGroup
addPointers fil pty pointers g = addPointers fil pty pointers g =
case pty of case pty of
TemporalPointer -> case fil of TemporalPointer -> case fil of
ToChilds -> g & phylo_groupPeriodChilds .~ pointers ToChilds -> g & phylo_groupPeriodChilds .~ pointers
ToParents -> g & phylo_groupPeriodParents .~ pointers ToParents -> g & phylo_groupPeriodParents .~ pointers
LevelPointer -> case fil of ToChildsMemory -> undefined
ToParentsMemory -> undefined
LevelPointer -> case fil of
ToChilds -> g & phylo_groupLevelChilds .~ pointers ToChilds -> g & phylo_groupLevelChilds .~ pointers
ToParents -> g & phylo_groupLevelParents .~ pointers ToParents -> g & phylo_groupLevelParents .~ pointers
ToChildsMemory -> undefined
ToParentsMemory -> undefined
toPointer' :: Double -> Pointer -> Pointer'
toPointer' thr pt = (fst pt,(thr,snd pt))
addMemoryPointers :: Filiation -> PointerType -> Double -> [Pointer] -> PhyloGroup -> PhyloGroup
addMemoryPointers fil pty thr pointers g =
case pty of
TemporalPointer -> case fil of
ToChilds -> undefined
ToParents -> undefined
ToChildsMemory -> g & phylo_groupPeriodMemoryChilds .~ (concat [(g ^. phylo_groupPeriodMemoryChilds),(map (\pt -> toPointer' thr pt) pointers)])
ToParentsMemory -> g & phylo_groupPeriodMemoryParents .~ (concat [(g ^. phylo_groupPeriodMemoryParents),(map (\pt -> toPointer' thr pt) pointers)])
LevelPointer -> undefined
getPeriodIds :: Phylo -> [(Date,Date)] getPeriodIds :: Phylo -> [(Date,Date)]
...@@ -359,14 +383,14 @@ getPeriodIds phylo = sortOn fst ...@@ -359,14 +383,14 @@ getPeriodIds phylo = sortOn fst
$ keys $ keys
$ phylo ^. phylo_periods $ phylo ^. phylo_periods
getLevelParentId :: PhyloGroup -> PhyloGroupId getLevelParentId :: PhyloGroup -> PhyloGroupId
getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupLevelParents getLevelParentId g = fst $ head' "getLevelParentId" $ g ^. phylo_groupLevelParents
getLastLevel :: Phylo -> Level getLastLevel :: Phylo -> Level
getLastLevel phylo = last' "lastLevel" $ getLevels phylo getLastLevel phylo = last' "lastLevel" $ getLevels phylo
getLevels :: Phylo -> [Level] getLevels :: Phylo -> [Level]
getLevels phylo = nub getLevels phylo = nub
$ map snd $ map snd
$ keys $ view ( phylo_periods $ keys $ view ( phylo_periods
. traverse . traverse
...@@ -381,10 +405,10 @@ getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config ...@@ -381,10 +405,10 @@ getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
setConfig :: Config -> Phylo -> Phylo setConfig :: Config -> Phylo -> Phylo
setConfig config phylo = phylo setConfig config phylo = phylo
& phylo_param .~ (PhyloParam & phylo_param .~ (PhyloParam
((phylo ^. phylo_param) ^. phyloParam_version) ((phylo ^. phylo_param) ^. phyloParam_version)
((phylo ^. phylo_param) ^. phyloParam_software) ((phylo ^. phylo_param) ^. phyloParam_software)
config) config)
-- & phylo_param & phyloParam_config & phyloParam_config .~ config -- & phylo_param & phyloParam_config & phyloParam_config .~ config
...@@ -397,13 +421,13 @@ getSources :: Phylo -> Vector Text ...@@ -397,13 +421,13 @@ getSources :: Phylo -> Vector Text
getSources phylo = _sources (phylo ^. phylo_sources) getSources phylo = _sources (phylo ^. phylo_sources)
phyloToLastBranches :: Phylo -> [[PhyloGroup]] phyloToLastBranches :: Phylo -> [[PhyloGroup]]
phyloToLastBranches phylo = elems phyloToLastBranches phylo = elems
$ fromListWith (++) $ fromListWith (++)
$ map (\g -> (g ^. phylo_groupBranchId, [g])) $ map (\g -> (g ^. phylo_groupBranchId, [g]))
$ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo $ getGroupsFromLevel (last' "byBranches" $ getLevels phylo) phylo
getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup] getGroupsFromLevel :: Level -> Phylo -> [PhyloGroup]
getGroupsFromLevel lvl phylo = getGroupsFromLevel lvl phylo =
elems $ view ( phylo_periods elems $ view ( phylo_periods
. traverse . traverse
. phylo_periodLevels . phylo_periodLevels
...@@ -413,18 +437,18 @@ getGroupsFromLevel lvl phylo = ...@@ -413,18 +437,18 @@ getGroupsFromLevel lvl phylo =
getGroupsFromLevelPeriods :: Level -> [PhyloPeriodId] -> Phylo -> [PhyloGroup] getGroupsFromLevelPeriods :: Level -> [PhyloPeriodId] -> Phylo -> [PhyloGroup]
getGroupsFromLevelPeriods lvl periods phylo = getGroupsFromLevelPeriods lvl periods phylo =
elems $ view ( phylo_periods elems $ view ( phylo_periods
. traverse . traverse
. filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods) . filtered (\phyloPrd -> elem (phyloPrd ^. phylo_periodPeriod) periods)
. phylo_periodLevels . phylo_periodLevels
. traverse . traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl) . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups ) phylo . phylo_levelGroups ) phylo
getGroupsFromPeriods :: Level -> Map PhyloPeriodId PhyloPeriod -> [PhyloGroup] getGroupsFromPeriods :: Level -> Map PhyloPeriodId PhyloPeriod -> [PhyloGroup]
getGroupsFromPeriods lvl periods = getGroupsFromPeriods lvl periods =
elems $ view ( traverse elems $ view ( traverse
. phylo_periodLevels . phylo_periodLevels
. traverse . traverse
...@@ -433,25 +457,25 @@ getGroupsFromPeriods lvl periods = ...@@ -433,25 +457,25 @@ getGroupsFromPeriods lvl periods =
updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo updatePhyloGroups :: Level -> Map PhyloGroupId PhyloGroup -> Phylo -> Phylo
updatePhyloGroups lvl m phylo = updatePhyloGroups lvl m phylo =
over ( phylo_periods over ( phylo_periods
. traverse . traverse
. phylo_periodLevels . phylo_periodLevels
. traverse . traverse
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl) . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == lvl)
. phylo_levelGroups . phylo_levelGroups
. traverse . traverse
) (\g -> ) (\g ->
let id = getGroupId g let id = getGroupId g
in in
if member id m if member id m
then m ! id then m ! id
else g ) phylo else g ) phylo
updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo updatePeriods :: Map (Date,Date) (Text,Text) -> Phylo -> Phylo
updatePeriods periods' phylo = updatePeriods periods' phylo =
over (phylo_periods . traverse) over (phylo_periods . traverse)
(\prd -> (\prd ->
let prd' = periods' ! (prd ^. phylo_periodPeriod) let prd' = periods' ! (prd ^. phylo_periodPeriod)
lvls = map (\lvl -> lvl & phylo_levelPeriod' .~ prd') $ prd ^. phylo_periodLevels lvls = map (\lvl -> lvl & phylo_levelPeriod' .~ prd') $ prd ^. phylo_periodLevels
in prd & phylo_periodPeriod' .~ prd' in prd & phylo_periodPeriod' .~ prd'
...@@ -460,10 +484,10 @@ updatePeriods periods' phylo = ...@@ -460,10 +484,10 @@ updatePeriods periods' phylo =
traceToPhylo :: Level -> Phylo -> Phylo traceToPhylo :: Level -> Phylo -> Phylo
traceToPhylo lvl phylo = traceToPhylo lvl phylo =
trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with " trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with "
<> show (length $ getGroupsFromLevel lvl phylo) <> " groups and " <> show (length $ getGroupsFromLevel lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel lvl phylo) <> " branches" <> "\n") phylo
-------------------- --------------------
-- | Clustering | -- -- | Clustering | --
...@@ -474,28 +498,28 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids ...@@ -474,28 +498,28 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
where where
-- | 2) find the most Up Left ids in the hierarchy of similarity -- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]] -- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' = -- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids' -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds -- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids -- | 1) find the most frequent ids
mostFreq' :: [[Int]] -> [[Int]] mostFreq' :: [[Int]] -> [[Int]]
mostFreq' ids' = mostFreq' ids' =
let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids' let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
sup = (fst . maximum) groupIds sup = (fst . maximum) groupIds
in map snd $ filter (\gIds -> fst gIds == sup) groupIds in map snd $ filter (\gIds -> fst gIds == sup) groupIds
mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double] mergeMeta :: [Int] -> [PhyloGroup] -> Map Text [Double]
mergeMeta bId groups = mergeMeta bId groups =
let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups let ego = head' "mergeMeta" $ filter (\g -> (snd (g ^. phylo_groupBranchId)) == bId) groups
in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")] in fromList [("breaks",(ego ^. phylo_groupMeta) ! "breaks"),("seaLevels",(ego ^. phylo_groupMeta) ! "seaLevels")]
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches groups = groupsToBranches groups =
{- run the related component algorithm -} {- run the related component algorithm -}
let egos = map (\g -> [getGroupId g] let egos = map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents) ++ (map fst $ g ^. phylo_groupPeriodParents)
++ (map fst $ g ^. phylo_groupPeriodChilds) ++ (map fst $ g ^. phylo_groupPeriodChilds)
++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups ++ (map fst $ g ^. phylo_groupAncestors)) $ elems groups
...@@ -510,30 +534,30 @@ relatedComponents :: Ord a => [[a]] -> [[a]] ...@@ -510,30 +534,30 @@ relatedComponents :: Ord a => [[a]] -> [[a]]
relatedComponents graph = foldl' (\acc groups -> relatedComponents graph = foldl' (\acc groups ->
if (null acc) if (null acc)
then acc ++ [groups] then acc ++ [groups]
else else
let acc' = partition (\groups' -> disjoint (Set.fromList groups') (Set.fromList groups)) acc let acc' = partition (\groups' -> disjoint (Set.fromList groups') (Set.fromList groups)) acc
in (fst acc') ++ [nub $ concat $ (snd acc') ++ [groups]]) [] graph in (fst acc') ++ [nub $ concat $ (snd acc') ++ [groups]]) [] graph
toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]] toRelatedComponents :: [PhyloGroup] -> [((PhyloGroup,PhyloGroup),Double)] -> [[PhyloGroup]]
toRelatedComponents nodes edges = toRelatedComponents nodes edges =
let ref = fromList $ map (\g -> (getGroupId g, g)) nodes let ref = fromList $ map (\g -> (getGroupId g, g)) nodes
clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes)) clusters = relatedComponents $ ((map (\((g,g'),_) -> [getGroupId g, getGroupId g']) edges) ++ (map (\g -> [getGroupId g]) nodes))
in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters in map (\cluster -> map (\gId -> ref ! gId) cluster) clusters
traceSynchronyEnd :: Phylo -> Phylo traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo = traceSynchronyEnd phylo =
trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo) trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches" <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo <> "\n" ) phylo
traceSynchronyStart :: Phylo -> Phylo traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo = traceSynchronyStart phylo =
trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo) trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches" <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromLevel (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo <> "\n" ) phylo
------------------- -------------------
...@@ -541,10 +565,10 @@ traceSynchronyStart phylo = ...@@ -541,10 +565,10 @@ traceSynchronyStart phylo =
------------------- -------------------
getSensibility :: Proximity -> Double getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of getSensibility proxi = case proxi of
WeightedLogJaccard s -> s WeightedLogJaccard s -> s
WeightedLogSim s -> s WeightedLogSim s -> s
Hamming -> undefined Hamming _ -> undefined
---------------- ----------------
-- | Branch | -- -- | Branch | --
...@@ -599,7 +623,7 @@ traceMatchLimit branches = ...@@ -599,7 +623,7 @@ traceMatchLimit branches =
<> ",(1.." <> show (length branches) <> ")]" <> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n" <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
<> " - unable to increase the threshold above 1" <> "\n" <> " - unable to increase the threshold above 1" <> "\n"
) branches ) branches
traceMatchEnd :: [PhyloGroup] -> [PhyloGroup] traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
...@@ -609,10 +633,10 @@ traceMatchEnd groups = ...@@ -609,10 +633,10 @@ traceMatchEnd groups =
traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup] traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
traceTemporalMatching groups = traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n") groups
traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double traceGroupsProxi :: Map (PhyloGroupId,PhyloGroupId) Double -> Map (PhyloGroupId,PhyloGroupId) Double
traceGroupsProxi m = traceGroupsProxi m =
trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m trace ( "\n" <> "-- | " <> show(Map.size m) <> " computed pairs of groups proximity" <> "\n") m
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment