Commit bfa50733 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[CLI] cooc by year threaded.

parent 481ca536
......@@ -23,9 +23,14 @@ Main specifications to index a corpus with a term list
module Main where
import qualified Data.Vector as DV
import qualified Data.Maybe as DMaybe
import Control.Monad (zipWithM)
import Control.Monad.IO.Class
import qualified Data.Map.Strict as DM
import Data.Map (Map)
import Data.Text (Text)
import Data.List (cycle)
import System.IO (hPutStr, hFlush, stderr)
......@@ -33,13 +38,14 @@ import System.Environment
import Control.Concurrent.Async as CCA (mapConcurrently)
import Gargantext.Prelude
import Gargantext.Text.Context
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Text.Terms
import Gargantext.Text.Terms.WithList
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract)
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Text.List.CSV (csvGraphTermList)
import Gargantext.Text.Terms (terms)
import Gargantext.Text.Metrics.Count (coocOn)
import Gargantext.Text.Metrics.Count (coocOn, Coocs)
mapMP :: MonadIO m => (a -> m b) -> [a] -> m [b]
mapMP f xs = do
......@@ -52,26 +58,37 @@ mapMP f xs = do
liftIO $ hFlush stderr
f x
main :: IO ()
filterTermsAndCooc
:: TermType Lang
-> [Text]
-> IO (Map (Terms, Terms) Coocs)
filterTermsAndCooc patterns ts = coocOn identity <$> mapM (terms patterns) ts
--main :: IO [()]
main = do
[corpusFile, termListFile, outputFile] <- getArgs
[corpusFile, termListFile, _] <- getArgs
-- corpus :: [Text]
corpus <- DV.toList <$> map (\n -> (csv_title n) <> " " <> (csv_abstract n))
<$> snd
<$> readCsv corpusFile
corpus <- foldl' (\m e -> DM.insertWith (\_ x -> (snd e) <> x) (fst e) [] m) DM.empty
<$> DV.toList
<$> DV.map (\n -> (csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
<$> snd
<$> readCsv corpusFile
putStrLn $ show $ length corpus
-- termListMap :: [Text]
termList <- csvGraphTermList termListFile
putStrLn $ show $ length termList
let years = DM.keys corpus
let patterns = WithList $ buildPatterns termList
--corpusIndexed <- mapMP (terms patterns) corpus
corpusIndexed <- mapConcurrently (terms patterns) corpus
mapM (putStrLn . show) corpusIndexed
let myCooc = coocOn identity corpusIndexed
let corpus' = DMaybe.catMaybes $ map (\k -> DM.lookup k corpus) years
putStrLn $ show myCooc
r <- zip years <$> mapConcurrently (filterTermsAndCooc patterns) corpus'
putStrLn $ show r
--writeFile outputFile cooc
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment