Commit 7be480f3 authored by Quentin Lobbé's avatar Quentin Lobbé

adapt Main to example

parent 83d4e9cd
Pipeline #354 canceled with stage
...@@ -23,16 +23,26 @@ Phylo binaries ...@@ -23,16 +23,26 @@ Phylo binaries
module Main where module Main where
import Data.Aeson import Data.Aeson
import Data.ByteString.Lazy (writeFile)
import Data.Text (Text) import Data.Text (Text)
import GHC.Generics import GHC.Generics
import GHC.IO (FilePath)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.List.CSV (csvGraphTermList) import Gargantext.Text.List.CSV (csvGraphTermList)
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year) import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Text.Terms.WithList import Gargantext.Text.Terms.WithList
import System.Environment import System.Environment
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.View.ViewMaker
import qualified Data.Map as DM import qualified Data.Map as DM
import qualified Data.Vector as DV import qualified Data.Vector as DV
import qualified Data.List as DL
import qualified Prelude as P
import qualified Data.ByteString.Lazy as L
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Format to produce the Phylo -- Format to produce the Phylo
...@@ -42,30 +52,60 @@ data TextsByYear = ...@@ -42,30 +52,60 @@ data TextsByYear =
} deriving (Show, Generic) } deriving (Show, Generic)
instance ToJSON TextsByYear instance ToJSON TextsByYear
instance ToJSON Document
------------------------------------------------------------------------ ------------------------------------------------------------------------
filterTerms :: Patterns -> (a, [Text]) -> (a, [[Text]]) filterTerms :: Patterns -> (a, Text) -> (a, [Text])
filterTerms patterns (year', docs) = (year', map (termsInText patterns) docs) filterTerms patterns (year', doc) = (year',termsInText patterns doc)
where where
termsInText :: Patterns -> Text -> [Text] termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = extractTermsWithList' pats txt termsInText pats txt = extractTermsWithList' pats txt
-- csvToCorpus :: Int -> FilePath -> IO (DM.Map Int [Text])
csvToCorpus :: Int -> FilePath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList
-- DM.fromListWith (<>)
. DV.take limit
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> readCsv csv
main :: IO () main :: IO ()
main = do main = do
[corpusFile, termListFile, outputFile] <- getArgs
corpus <- DM.fromListWith (<>) -- [corpusFile, termListFile, outputFile] <- getArgs
. DV.toList
. DV.map (\n -> (csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)])) let corpusPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/texts/fullCorpus.csv"
. snd let termListPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/termList.csv"
<$> readCsv corpusFile let outputPath = "/home/qlobbe/data/epique/output/cultural_evolution.dot"
corpus <- csvToCorpus 10 corpusPath
termList <- csvGraphTermList termListPath
termList <- csvGraphTermList termListFile
putStrLn $ show $ length termList putStrLn $ show $ length termList
let patterns = buildPatterns termList let patterns = buildPatterns termList
let corpusParsed = map ( (\(y,t) -> TextsByYear y t) . filterTerms patterns) (DM.toList corpus) let corpusParsed = map ( (\(y,t) -> Document y (filter (\e -> e /= "") t)) . filterTerms patterns) corpus
let query = PhyloQueryBuild "cultural_evolution" "Test" 5 3 defaultFis [] [] defaultWeightedLogJaccard 3 defaultRelatedComponents
let tree = []
let foundations = DL.nub $ DL.concat $ map _pat_terms patterns
let phylo = toPhylo query corpusParsed foundations tree
let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let view = toPhyloView queryView phylo
-- TODO Phylo here -- TODO Phylo here
writeFile outputFile $ encode corpusParsed P.writeFile outputPath $ dotToString $ viewToDot view
-- L.writeFile outputPath $ encode corpusParsed
...@@ -193,7 +193,7 @@ type Ngrams = Text ...@@ -193,7 +193,7 @@ type Ngrams = Text
data Document = Document data Document = Document
{ date :: Date { date :: Date
, text :: [Ngrams] , text :: [Ngrams]
} deriving (Show) } deriving (Show,Generic)
-- | Clique : Set of ngrams cooccurring in the same Document -- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams type Clique = Set Ngrams
......
...@@ -72,6 +72,5 @@ parseDocs fds roots c = map (\(d,t) ...@@ -72,6 +72,5 @@ parseDocs fds roots c = map (\(d,t)
-- | To transform a Corpus of texts into a Map of aggregated Documents grouped by Periods -- | To transform a Corpus of texts into a Map of aggregated Documents grouped by Periods
corpusToDocs :: [(Date,Text)] -> Phylo -> Map (Date,Date) [Document]
corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p)
$ parseDocs (getFoundations p) (getRoots p) c
...@@ -181,8 +181,39 @@ toPhylo0 d p = addPhyloLevel 0 d p ...@@ -181,8 +181,39 @@ toPhylo0 d p = addPhyloLevel 0 d p
-- | To reconstruct the Base of a Phylo -- | To reconstruct the Base of a Phylo
toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [(Date, Text)] -> [Ngrams] -> [Tree Ngrams] -> Phylo
toPhyloBase q p c a ts = initPhyloBase periods foundations roots p -- | To reconstruct a Phylomemy from a PhyloQueryBuild, a Corpus and a list of actants
class PhyloMaker corpus
where
toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> [Tree Ngrams] -> Phylo
toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> [Tree Ngrams] -> Phylo
corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
instance PhyloMaker [(Date, Text)]
where
--------------------------------------
toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs c phyloBase
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c a ts
--------------------------------------
--------------------------------------
toPhyloBase q p c a ts = initPhyloBase periods foundations roots p
where where
-------------------------------------- --------------------------------------
roots :: PhyloRoots roots :: PhyloRoots
...@@ -195,11 +226,14 @@ toPhyloBase q p c a ts = initPhyloBase periods foundations roots p ...@@ -195,11 +226,14 @@ toPhyloBase q p c a ts = initPhyloBase periods foundations roots p
foundations :: Vector Ngrams foundations :: Vector Ngrams
foundations = initFoundations a foundations = initFoundations a
-------------------------------------- --------------------------------------
--------------------------------------
corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundations p) (getRoots p) c
-- | To reconstruct a Phylomemy from a PhyloQueryBuild, a Corpus and a list of actants instance PhyloMaker [Document]
toPhylo :: PhyloQueryBuild -> [(Date, Text)] -> [Ngrams] -> [Tree Ngrams] -> Phylo where
toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1 --------------------------------------
toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
where where
-------------------------------------- --------------------------------------
phylo1 :: Phylo phylo1 :: Phylo
...@@ -214,3 +248,19 @@ toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getN ...@@ -214,3 +248,19 @@ toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getN
phyloBase :: Phylo phyloBase :: Phylo
phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c a ts phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c a ts
-------------------------------------- --------------------------------------
--------------------------------------
toPhyloBase q p c a ts = initPhyloBase periods foundations roots p
where
--------------------------------------
roots :: PhyloRoots
roots = initRoots (map (\t -> alterLabels phyloAnalyzer t) ts) foundations
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both date (head' "LevelMaker" c,last c)
--------------------------------------
foundations :: Vector Ngrams
foundations = initFoundations a
--------------------------------------
--------------------------------------
corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
\ No newline at end of file
...@@ -48,7 +48,10 @@ type DotId = T'.Text ...@@ -48,7 +48,10 @@ type DotId = T'.Text
--------------------- ---------------------
dotToFile :: FilePath -> FilePath -> DotGraph DotId -> IO () dotToFile :: FilePath -> FilePath -> DotGraph DotId -> IO ()
dotToFile filePath fileName dotG = writeFile (combine filePath fileName) $ unpack (printDotGraph dotG) dotToFile filePath fileName dotG = writeFile (combine filePath fileName) $ dotToString dotG
dotToString :: DotGraph DotId -> [Char]
dotToString dotG = unpack (printDotGraph dotG)
-------------------------- --------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment