Commit bfa50733 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[CLI] cooc by year threaded.

parent 481ca536
...@@ -23,9 +23,14 @@ Main specifications to index a corpus with a term list ...@@ -23,9 +23,14 @@ Main specifications to index a corpus with a term list
module Main where module Main where
import qualified Data.Vector as DV import qualified Data.Vector as DV
import qualified Data.Maybe as DMaybe
import Control.Monad (zipWithM) import Control.Monad (zipWithM)
import Control.Monad.IO.Class import Control.Monad.IO.Class
import qualified Data.Map.Strict as DM
import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import Data.List (cycle) import Data.List (cycle)
import System.IO (hPutStr, hFlush, stderr) import System.IO (hPutStr, hFlush, stderr)
...@@ -33,13 +38,14 @@ import System.Environment ...@@ -33,13 +38,14 @@ import System.Environment
import Control.Concurrent.Async as CCA (mapConcurrently) import Control.Concurrent.Async as CCA (mapConcurrently)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Context import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Text.Terms import Gargantext.Text.Terms
import Gargantext.Text.Terms.WithList import Gargantext.Text.Terms.WithList
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract) import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Text.List.CSV (csvGraphTermList) import Gargantext.Text.List.CSV (csvGraphTermList)
import Gargantext.Text.Terms (terms) import Gargantext.Text.Terms (terms)
import Gargantext.Text.Metrics.Count (coocOn) import Gargantext.Text.Metrics.Count (coocOn, Coocs)
mapMP :: MonadIO m => (a -> m b) -> [a] -> m [b] mapMP :: MonadIO m => (a -> m b) -> [a] -> m [b]
mapMP f xs = do mapMP f xs = do
...@@ -52,26 +58,37 @@ mapMP f xs = do ...@@ -52,26 +58,37 @@ mapMP f xs = do
liftIO $ hFlush stderr liftIO $ hFlush stderr
f x f x
main :: IO ()
filterTermsAndCooc
:: TermType Lang
-> [Text]
-> IO (Map (Terms, Terms) Coocs)
filterTermsAndCooc patterns ts = coocOn identity <$> mapM (terms patterns) ts
--main :: IO [()]
main = do main = do
[corpusFile, termListFile, outputFile] <- getArgs [corpusFile, termListFile, _] <- getArgs
-- corpus :: [Text] -- corpus :: [Text]
corpus <- DV.toList <$> map (\n -> (csv_title n) <> " " <> (csv_abstract n)) corpus <- foldl' (\m e -> DM.insertWith (\_ x -> (snd e) <> x) (fst e) [] m) DM.empty
<$> snd <$> DV.toList
<$> readCsv corpusFile <$> DV.map (\n -> (csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
<$> snd
<$> readCsv corpusFile
putStrLn $ show $ length corpus
-- termListMap :: [Text] -- termListMap :: [Text]
termList <- csvGraphTermList termListFile termList <- csvGraphTermList termListFile
putStrLn $ show $ length termList putStrLn $ show $ length termList
let years = DM.keys corpus
let patterns = WithList $ buildPatterns termList let patterns = WithList $ buildPatterns termList
--corpusIndexed <- mapMP (terms patterns) corpus let corpus' = DMaybe.catMaybes $ map (\k -> DM.lookup k corpus) years
corpusIndexed <- mapConcurrently (terms patterns) corpus
mapM (putStrLn . show) corpusIndexed
let myCooc = coocOn identity corpusIndexed
putStrLn $ show myCooc r <- zip years <$> mapConcurrently (filterTermsAndCooc patterns) corpus'
putStrLn $ show r
--writeFile outputFile cooc --writeFile outputFile cooc
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment