Commit 972ed4c0 authored by Quentin Lobbé's avatar Quentin Lobbé

adapt Main to example

parent 17749dcf
...@@ -23,16 +23,26 @@ Phylo binaries ...@@ -23,16 +23,26 @@ Phylo binaries
module Main where module Main where
import Data.Aeson import Data.Aeson
import Data.ByteString.Lazy (writeFile)
import Data.Text (Text) import Data.Text (Text)
import GHC.Generics import GHC.Generics
import GHC.IO (FilePath)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.List.CSV (csvGraphTermList) import Gargantext.Text.List.CSV (csvGraphTermList)
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year) import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Text.Terms.WithList import Gargantext.Text.Terms.WithList
import System.Environment import System.Environment
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.View.ViewMaker
import qualified Data.Map as DM import qualified Data.Map as DM
import qualified Data.Vector as DV import qualified Data.Vector as DV
import qualified Data.List as DL
import qualified Prelude as P
import qualified Data.ByteString.Lazy as L
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Format to produce the Phylo -- Format to produce the Phylo
...@@ -42,30 +52,60 @@ data TextsByYear = ...@@ -42,30 +52,60 @@ data TextsByYear =
} deriving (Show, Generic) } deriving (Show, Generic)
instance ToJSON TextsByYear instance ToJSON TextsByYear
instance ToJSON Document
------------------------------------------------------------------------ ------------------------------------------------------------------------
filterTerms :: Patterns -> (a, [Text]) -> (a, [[Text]]) filterTerms :: Patterns -> (a, Text) -> (a, [Text])
filterTerms patterns (year', docs) = (year', map (termsInText patterns) docs) filterTerms patterns (year', doc) = (year',termsInText patterns doc)
where where
termsInText :: Patterns -> Text -> [Text] termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = extractTermsWithList' pats txt termsInText pats txt = extractTermsWithList' pats txt
-- csvToCorpus :: Int -> FilePath -> IO (DM.Map Int [Text])
csvToCorpus :: Int -> FilePath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList
-- DM.fromListWith (<>)
. DV.take limit
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> readCsv csv
main :: IO () main :: IO ()
main = do main = do
[corpusFile, termListFile, outputFile] <- getArgs
-- [corpusFile, termListFile, outputFile] <- getArgs
let corpusPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/texts/fullCorpus.csv"
let termListPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/termList.csv"
let outputPath = "/home/qlobbe/data/epique/output/cultural_evolution.dot"
corpus <- csvToCorpus 10 corpusPath
corpus <- DM.fromListWith (<>) termList <- csvGraphTermList termListPath
. DV.toList
. DV.map (\n -> (csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
. snd
<$> readCsv corpusFile
termList <- csvGraphTermList termListFile
putStrLn $ show $ length termList putStrLn $ show $ length termList
let patterns = buildPatterns termList let patterns = buildPatterns termList
let corpusParsed = map ( (\(y,t) -> TextsByYear y t) . filterTerms patterns) (DM.toList corpus) let corpusParsed = map ( (\(y,t) -> Document y (filter (\e -> e /= "") t)) . filterTerms patterns) corpus
let query = PhyloQueryBuild "cultural_evolution" "Test" 5 3 defaultFis [] [] defaultWeightedLogJaccard 3 defaultRelatedComponents
let tree = []
let foundations = DL.nub $ DL.concat $ map _pat_terms patterns
let phylo = toPhylo query corpusParsed foundations tree
let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let view = toPhyloView queryView phylo
-- TODO Phylo here
P.writeFile outputPath $ dotToString $ viewToDot view
-- L.writeFile outputPath $ encode corpusParsed
-- TODO Phylo here
writeFile outputFile $ encode corpusParsed
...@@ -194,7 +194,7 @@ type Ngrams = Text ...@@ -194,7 +194,7 @@ type Ngrams = Text
data Document = Document data Document = Document
{ date :: Date { date :: Date
, text :: [Ngrams] , text :: [Ngrams]
} deriving (Show) } deriving (Show,Generic)
-- | Clique : Set of ngrams cooccurring in the same Document -- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams type Clique = Set Ngrams
......
...@@ -72,6 +72,5 @@ parseDocs fds roots c = map (\(d,t) ...@@ -72,6 +72,5 @@ parseDocs fds roots c = map (\(d,t)
-- | To transform a Corpus of texts into a Map of aggregated Documents grouped by Periods -- | To transform a Corpus of texts into a Map of aggregated Documents grouped by Periods
corpusToDocs :: [(Date,Text)] -> Phylo -> Map (Date,Date) [Document]
corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p)
$ parseDocs (getFoundations p) (getRoots p) c
...@@ -181,36 +181,86 @@ toPhylo0 d p = addPhyloLevel 0 d p ...@@ -181,36 +181,86 @@ toPhylo0 d p = addPhyloLevel 0 d p
-- | To reconstruct the Base of a Phylo -- | To reconstruct the Base of a Phylo
toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [(Date, Text)] -> [Ngrams] -> [Tree Ngrams] -> Phylo
toPhyloBase q p c a ts = initPhyloBase periods foundations roots p -- | To reconstruct a Phylomemy from a PhyloQueryBuild, a Corpus and a list of actants
class PhyloMaker corpus
where
toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> [Tree Ngrams] -> Phylo
toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> [Tree Ngrams] -> Phylo
corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
instance PhyloMaker [(Date, Text)]
where where
-------------------------------------- --------------------------------------
roots :: PhyloRoots toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
roots = initRoots (map (\t -> alterLabels phyloAnalyzer t) ts) foundations where
-------------------------------------- --------------------------------------
periods :: [(Date,Date)] phylo1 :: Phylo
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q) phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
$ both fst (head' "LevelMaker" c,last c) --------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs c phyloBase
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c a ts
--------------------------------------
-------------------------------------- --------------------------------------
foundations :: Vector Ngrams toPhyloBase q p c a ts = initPhyloBase periods foundations roots p
foundations = initFoundations a where
--------------------------------------
roots :: PhyloRoots
roots = initRoots (map (\t -> alterLabels phyloAnalyzer t) ts) foundations
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both fst (head' "LevelMaker" c,last c)
--------------------------------------
foundations :: Vector Ngrams
foundations = initFoundations a
--------------------------------------
-------------------------------------- --------------------------------------
corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundations p) (getRoots p) c
-- | To reconstruct a Phylomemy from a PhyloQueryBuild, a Corpus and a list of actants instance PhyloMaker [Document]
toPhylo :: PhyloQueryBuild -> [(Date, Text)] -> [Ngrams] -> [Tree Ngrams] -> Phylo
toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
where where
-------------------------------------- --------------------------------------
phylo1 :: Phylo toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0 where
-------------------------------------- --------------------------------------
phylo0 :: Phylo phylo1 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
-------------------------------------- --------------------------------------
phyloDocs :: Map (Date, Date) [Document] phylo0 :: Phylo
phyloDocs = corpusToDocs c phyloBase phylo0 = toPhylo0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs c phyloBase
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c a ts
--------------------------------------
-------------------------------------- --------------------------------------
phyloBase :: Phylo toPhyloBase q p c a ts = initPhyloBase periods foundations roots p
phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c a ts where
--------------------------------------
roots :: PhyloRoots
roots = initRoots (map (\t -> alterLabels phyloAnalyzer t) ts) foundations
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both date (head' "LevelMaker" c,last c)
--------------------------------------
foundations :: Vector Ngrams
foundations = initFoundations a
--------------------------------------
-------------------------------------- --------------------------------------
corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
\ No newline at end of file
...@@ -48,7 +48,10 @@ type DotId = T'.Text ...@@ -48,7 +48,10 @@ type DotId = T'.Text
--------------------- ---------------------
dotToFile :: FilePath -> FilePath -> DotGraph DotId -> IO () dotToFile :: FilePath -> FilePath -> DotGraph DotId -> IO ()
dotToFile filePath fileName dotG = writeFile (combine filePath fileName) $ unpack (printDotGraph dotG) dotToFile filePath fileName dotG = writeFile (combine filePath fileName) $ dotToString dotG
dotToString :: DotGraph DotId -> [Char]
dotToString dotG = unpack (printDotGraph dotG)
-------------------------- --------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment