Commit 972ed4c0 authored by Quentin Lobbé's avatar Quentin Lobbé

adapt Main to example

parent 17749dcf
......@@ -23,16 +23,26 @@ Phylo binaries
module Main where
import Data.Aeson
import Data.ByteString.Lazy (writeFile)
import Data.Text (Text)
import GHC.Generics
import GHC.IO (FilePath)
import Gargantext.Prelude
import Gargantext.Text.List.CSV (csvGraphTermList)
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Text.Terms.WithList
import System.Environment
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.View.ViewMaker
import qualified Data.Map as DM
import qualified Data.Vector as DV
import qualified Data.List as DL
import qualified Prelude as P
import qualified Data.ByteString.Lazy as L
------------------------------------------------------------------------
-- Format to produce the Phylo
......@@ -42,30 +52,60 @@ data TextsByYear =
} deriving (Show, Generic)
instance ToJSON TextsByYear
instance ToJSON Document
------------------------------------------------------------------------
filterTerms :: Patterns -> (a, [Text]) -> (a, [[Text]])
filterTerms patterns (year', docs) = (year', map (termsInText patterns) docs)
filterTerms :: Patterns -> (a, Text) -> (a, [Text])
filterTerms patterns (year', doc) = (year',termsInText patterns doc)
where
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = extractTermsWithList' pats txt
-- csvToCorpus :: Int -> FilePath -> IO (DM.Map Int [Text])
csvToCorpus :: Int -> FilePath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList
-- DM.fromListWith (<>)
. DV.take limit
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> readCsv csv
main :: IO ()
main = do
[corpusFile, termListFile, outputFile] <- getArgs
-- [corpusFile, termListFile, outputFile] <- getArgs
let corpusPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/texts/fullCorpus.csv"
let termListPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/termList.csv"
let outputPath = "/home/qlobbe/data/epique/output/cultural_evolution.dot"
corpus <- csvToCorpus 10 corpusPath
corpus <- DM.fromListWith (<>)
. DV.toList
. DV.map (\n -> (csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
. snd
<$> readCsv corpusFile
termList <- csvGraphTermList termListPath
termList <- csvGraphTermList termListFile
putStrLn $ show $ length termList
let patterns = buildPatterns termList
let corpusParsed = map ( (\(y,t) -> TextsByYear y t) . filterTerms patterns) (DM.toList corpus)
let corpusParsed = map ( (\(y,t) -> Document y (filter (\e -> e /= "") t)) . filterTerms patterns) corpus
let query = PhyloQueryBuild "cultural_evolution" "Test" 5 3 defaultFis [] [] defaultWeightedLogJaccard 3 defaultRelatedComponents
let tree = []
let foundations = DL.nub $ DL.concat $ map _pat_terms patterns
let phylo = toPhylo query corpusParsed foundations tree
let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
let view = toPhyloView queryView phylo
-- TODO Phylo here
P.writeFile outputPath $ dotToString $ viewToDot view
-- L.writeFile outputPath $ encode corpusParsed
-- TODO Phylo here
writeFile outputFile $ encode corpusParsed
......@@ -194,7 +194,7 @@ type Ngrams = Text
data Document = Document
{ date :: Date
, text :: [Ngrams]
} deriving (Show)
} deriving (Show,Generic)
-- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams
......
......@@ -72,6 +72,5 @@ parseDocs fds roots c = map (\(d,t)
-- | To transform a Corpus of texts into a Map of aggregated Documents grouped by Periods
corpusToDocs :: [(Date,Text)] -> Phylo -> Map (Date,Date) [Document]
corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p)
$ parseDocs (getFoundations p) (getRoots p) c
......@@ -181,36 +181,86 @@ toPhylo0 d p = addPhyloLevel 0 d p
-- | To reconstruct the Base of a Phylo
toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [(Date, Text)] -> [Ngrams] -> [Tree Ngrams] -> Phylo
toPhyloBase q p c a ts = initPhyloBase periods foundations roots p
-- | To reconstruct a Phylomemy from a PhyloQueryBuild, a Corpus and a list of actants
class PhyloMaker corpus
where
toPhylo :: PhyloQueryBuild -> corpus -> [Ngrams] -> [Tree Ngrams] -> Phylo
toPhyloBase :: PhyloQueryBuild -> PhyloParam -> corpus -> [Ngrams] -> [Tree Ngrams] -> Phylo
corpusToDocs :: corpus -> Phylo -> Map (Date,Date) [Document]
instance PhyloMaker [(Date, Text)]
where
--------------------------------------
roots :: PhyloRoots
roots = initRoots (map (\t -> alterLabels phyloAnalyzer t) ts) foundations
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both fst (head' "LevelMaker" c,last c)
toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs c phyloBase
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c a ts
--------------------------------------
--------------------------------------
foundations :: Vector Ngrams
foundations = initFoundations a
toPhyloBase q p c a ts = initPhyloBase periods foundations roots p
where
--------------------------------------
roots :: PhyloRoots
roots = initRoots (map (\t -> alterLabels phyloAnalyzer t) ts) foundations
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both fst (head' "LevelMaker" c,last c)
--------------------------------------
foundations :: Vector Ngrams
foundations = initFoundations a
--------------------------------------
--------------------------------------
corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) $ parseDocs (getFoundations p) (getRoots p) c
-- | To reconstruct a Phylomemy from a PhyloQueryBuild, a Corpus and a list of actants
toPhylo :: PhyloQueryBuild -> [(Date, Text)] -> [Ngrams] -> [Tree Ngrams] -> Phylo
toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
instance PhyloMaker [Document]
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs c phyloBase
toPhylo q c a ts = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) (getContextualUnitMetrics q) (getContextualUnitFilters q) phyloDocs phylo0
--------------------------------------
phylo0 :: Phylo
phylo0 = toPhylo0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = corpusToDocs c phyloBase
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c a ts
--------------------------------------
--------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c a ts
toPhyloBase q p c a ts = initPhyloBase periods foundations roots p
where
--------------------------------------
roots :: PhyloRoots
roots = initRoots (map (\t -> alterLabels phyloAnalyzer t) ts) foundations
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both date (head' "LevelMaker" c,last c)
--------------------------------------
foundations :: Vector Ngrams
foundations = initFoundations a
--------------------------------------
--------------------------------------
corpusToDocs c p = groupDocsByPeriod date (getPhyloPeriods p) c
\ No newline at end of file
......@@ -48,7 +48,10 @@ type DotId = T'.Text
---------------------
dotToFile :: FilePath -> FilePath -> DotGraph DotId -> IO ()
dotToFile filePath fileName dotG = writeFile (combine filePath fileName) $ unpack (printDotGraph dotG)
dotToFile filePath fileName dotG = writeFile (combine filePath fileName) $ dotToString dotG
dotToString :: DotGraph DotId -> [Char]
dotToString dotG = unpack (printDotGraph dotG)
--------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment