Commit 5be28fb7 authored by qlobbe's avatar qlobbe

add a list parser param

parent 6bddaf45
...@@ -34,7 +34,11 @@ import Gargantext.Core.Viz.AdaptativePhylo ...@@ -34,7 +34,11 @@ import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep) import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig) import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile) import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
-- import Gargantext.API.Ngrams.Prelude (toTermList) import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.API.Ngrams.Prelude (toTermList)
-- import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
import GHC.IO (FilePath) import GHC.IO (FilePath)
import Prelude (Either(Left, Right),toInteger) import Prelude (Either(Left, Right),toInteger)
...@@ -43,6 +47,7 @@ import System.Directory (listDirectory,doesFileExist) ...@@ -43,6 +47,7 @@ import System.Directory (listDirectory,doesFileExist)
import Control.Concurrent.Async (mapConcurrently) import Control.Concurrent.Async (mapConcurrently)
import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian) import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
import qualified Data.ByteString.Char8 as C8 import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy as Lazy import qualified Data.ByteString.Lazy as Lazy
...@@ -50,6 +55,8 @@ import qualified Data.Vector as Vector ...@@ -50,6 +55,8 @@ import qualified Data.Vector as Vector
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
import qualified Data.Text as T import qualified Data.Text as T
import Data.List.Split
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show) data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
...@@ -83,14 +90,21 @@ toDays y m d = fromIntegral ...@@ -83,14 +90,21 @@ toDays y m d = fromIntegral
toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
toPhyloDate y m d tu = case tu of toPhyloDate y m d tu = case tu of
Epoch _ _ _ -> y
Year _ _ _ -> y Year _ _ _ -> y
Month _ _ _ -> toMonths (toInteger y) m d Month _ _ _ -> toMonths (toInteger y) m d
Week _ _ _ -> div (toDays (toInteger y) m d) 7 Week _ _ _ -> div (toDays (toInteger y) m d) 7
Day _ _ _ -> toDays (toInteger y) m d Day _ _ _ -> toDays (toInteger y) m d
toPhyloDate' :: Int -> Int -> Int -> Text toPhyloDate' :: Int -> Int -> Int -> TimeUnit -> Text
toPhyloDate' y m d = pack $ showGregorian $ fromGregorian (toInteger y) m d toPhyloDate' y m d tu = case tu of
Epoch _ _ _ -> pack $ show $ posixSecondsToUTCTime $ fromIntegral y
Year _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
Month _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
Week _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
Day _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
-------------- --------------
...@@ -128,7 +142,7 @@ wosToDocs limit patterns time path = do ...@@ -128,7 +142,7 @@ wosToDocs limit patterns time path = do
(toPhyloDate' (toPhyloDate'
(fromIntegral $ fromJust $ _hd_publication_year d) (fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d) (fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d)) (fromJust $ _hd_publication_day d) time)
(termsInText patterns $ title <> " " <> abstr) Nothing []) (termsInText patterns $ title <> " " <> abstr) Nothing [])
<$> concat <$> concat
<$> mapConcurrently (\file -> <$> mapConcurrently (\file ->
...@@ -145,7 +159,7 @@ csvToDocs parser patterns time path = ...@@ -145,7 +159,7 @@ csvToDocs parser patterns time path =
Csv limit -> Vector.toList Csv limit -> Vector.toList
<$> Vector.take limit <$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv_publication_year row) (csv_publication_month row) (csv_publication_day row) time) <$> Vector.map (\row -> Document (toPhyloDate (csv_publication_year row) (csv_publication_month row) (csv_publication_day row) time)
(toPhyloDate' (csv_publication_year row) (csv_publication_month row) (csv_publication_day row)) (toPhyloDate' (csv_publication_year row) (csv_publication_month row) (csv_publication_day row) time)
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row)) (termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing Nothing
[] []
...@@ -153,10 +167,10 @@ csvToDocs parser patterns time path = ...@@ -153,10 +167,10 @@ csvToDocs parser patterns time path =
Csv' limit -> Vector.toList Csv' limit -> Vector.toList
<$> Vector.take limit <$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time) <$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row)) (toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row)) (termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row))
(Just $ csv'_weight row) (Just $ csv'_weight row)
[csv'_source row] (map (T.strip . pack) $ splitOn ";" (unpack $ (csv'_source row)))
) <$> snd <$> Csv.readWeightedCsv path ) <$> snd <$> Csv.readWeightedCsv path
...@@ -178,10 +192,11 @@ fileToDocs' parser path time lst = do ...@@ -178,10 +192,11 @@ fileToDocs' parser path time lst = do
-- Config time parameters to label -- Config time parameters to label
timeToLabel :: Config -> [Char] timeToLabel :: Config -> [Char]
timeToLabel config = case (timeUnit config) of timeToLabel config = case (timeUnit config) of
Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f)) Epoch p s f -> ("time_epochs" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Month p s f -> ("time_months"<> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f)) Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f)) Month p s f -> ("time_months" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f)) Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
seaToLabel :: Config -> [Char] seaToLabel :: Config -> [Char]
...@@ -264,6 +279,25 @@ readPhylo path = do ...@@ -264,6 +279,25 @@ readPhylo path = do
Right phylo -> pure phylo Right phylo -> pure phylo
readListV4 :: [Char] -> IO NgramsList
readListV4 path = do
listJson <- (eitherDecode <$> readJson path) :: IO (Either String NgramsList)
case listJson of
Left err -> do
putStrLn err
undefined
Right listV4 -> pure listV4
fileToList :: ListParser -> FilePath -> IO TermList
fileToList parser path =
case parser of
V3 -> csvMapTermList path
V4 -> fromJust
<$> toTermList MapTerm NgramsTerms
<$> readListV4 path
-------------- --------------
-- | Main | -- -- | Main | --
-------------- --------------
...@@ -283,7 +317,7 @@ main = do ...@@ -283,7 +317,7 @@ main = do
Right config -> do Right config -> do
printIOMsg "Parse the corpus" printIOMsg "Parse the corpus"
mapList <- csvMapTermList (listPath config) mapList <- fileToList (listParser config) (listPath config)
corpus <- fileToDocs' (corpusParser config) (corpusPath config) (timeUnit config) mapList corpus <- fileToDocs' (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus") printIOComment (show (length corpus) <> " parsed docs from the corpus")
......
...@@ -44,6 +44,7 @@ library: ...@@ -44,6 +44,7 @@ library:
- Gargantext.API.Node.File - Gargantext.API.Node.File
- Gargantext.API.Ngrams - Gargantext.API.Ngrams
- Gargantext.API.Ngrams.Types - Gargantext.API.Ngrams.Types
- Gargantext.API.Ngrams.Prelude
- Gargantext.API.Admin.Settings - Gargantext.API.Admin.Settings
- Gargantext.API.Admin.EnvTypes - Gargantext.API.Admin.EnvTypes
- Gargantext.API.Admin.Types - Gargantext.API.Admin.Types
...@@ -59,6 +60,7 @@ library: ...@@ -59,6 +60,7 @@ library:
- Gargantext.Database.Query.Table.Node - Gargantext.Database.Query.Table.Node
- Gargantext.Database.Query.Table.Node.UpdateOpaleye - Gargantext.Database.Query.Table.Node.UpdateOpaleye
- Gargantext.Database.Query.Table.NgramsPostag - Gargantext.Database.Query.Table.NgramsPostag
- Gargantext.Database.Schema.Ngrams
- Gargantext.Database.Prelude - Gargantext.Database.Prelude
- Gargantext.Database.Admin.Trigger.Init - Gargantext.Database.Admin.Trigger.Init
- Gargantext.Database.Admin.Config - Gargantext.Database.Admin.Config
......
...@@ -53,7 +53,9 @@ data CorpusParser = ...@@ -53,7 +53,9 @@ data CorpusParser =
Wos {_wos_limit :: Int} Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int} | Csv {_csv_limit :: Int}
| Csv' {_csv'_limit :: Int} | Csv' {_csv'_limit :: Int}
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data ListParser = V3 | V4 deriving (Show,Generic,Eq)
data SeaElevation = data SeaElevation =
Constante Constante
...@@ -102,8 +104,12 @@ data Synchrony = ...@@ -102,8 +104,12 @@ data Synchrony =
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data TimeUnit = data TimeUnit =
Year Epoch
{ _epoch_period :: Int
, _epoch_step :: Int
, _epoch_matchingFrame :: Int }
| Year
{ _year_period :: Int { _year_period :: Int
, _year_step :: Int , _year_step :: Int
, _year_matchingFrame :: Int } , _year_matchingFrame :: Int }
...@@ -145,6 +151,7 @@ data Config = ...@@ -145,6 +151,7 @@ data Config =
, listPath :: FilePath , listPath :: FilePath
, outputPath :: FilePath , outputPath :: FilePath
, corpusParser :: CorpusParser , corpusParser :: CorpusParser
, listParser :: ListParser
, phyloName :: Text , phyloName :: Text
, phyloLevel :: Int , phyloLevel :: Int
, phyloProximity :: Proximity , phyloProximity :: Proximity
...@@ -166,6 +173,7 @@ defaultConfig = ...@@ -166,6 +173,7 @@ defaultConfig =
, listPath = "" , listPath = ""
, outputPath = "" , outputPath = ""
, corpusParser = Csv 1000 , corpusParser = Csv 1000
, listParser = V3
, phyloName = pack "Default Phylo" , phyloName = pack "Default Phylo"
, phyloLevel = 2 , phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10 , phyloProximity = WeightedLogJaccard 10
...@@ -184,6 +192,8 @@ instance FromJSON Config ...@@ -184,6 +192,8 @@ instance FromJSON Config
instance ToJSON Config instance ToJSON Config
instance FromJSON CorpusParser instance FromJSON CorpusParser
instance ToJSON CorpusParser instance ToJSON CorpusParser
instance FromJSON ListParser
instance ToJSON ListParser
instance FromJSON Proximity instance FromJSON Proximity
instance ToJSON Proximity instance ToJSON Proximity
instance FromJSON SeaElevation instance FromJSON SeaElevation
......
...@@ -13,7 +13,7 @@ Portability : POSIX ...@@ -13,7 +13,7 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloTools where module Gargantext.Core.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex) import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group) import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem)
import Data.Set (Set, disjoint) import Data.Set (Set, disjoint)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.String (String) import Data.String (String)
...@@ -162,7 +162,7 @@ toFstDate ds = snd ...@@ -162,7 +162,7 @@ toFstDate ds = snd
$ head' "firstDate" $ head' "firstDate"
$ sortOn fst $ sortOn fst
$ map (\d -> $ map (\d ->
let d' = read (filter (\c -> c /= '-') $ unpack d)::Int let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
in (d',d)) ds in (d',d)) ds
toLstDate :: [Text] -> Text toLstDate :: [Text] -> Text
...@@ -171,12 +171,13 @@ toLstDate ds = snd ...@@ -171,12 +171,13 @@ toLstDate ds = snd
$ reverse $ reverse
$ sortOn fst $ sortOn fst
$ map (\d -> $ map (\d ->
let d' = read (filter (\c -> c /= '-') $ unpack d)::Int let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
in (d',d)) ds in (d',d)) ds
getTimeScale :: Phylo -> [Char] getTimeScale :: Phylo -> [Char]
getTimeScale p = case (timeUnit $ getConfig p) of getTimeScale p = case (timeUnit $ getConfig p) of
Epoch _ _ _ -> "epoch"
Year _ _ _ -> "year" Year _ _ _ -> "year"
Month _ _ _ -> "month" Month _ _ _ -> "month"
Week _ _ _ -> "week" Week _ _ _ -> "week"
...@@ -192,6 +193,7 @@ toTimeScale dates step = ...@@ -192,6 +193,7 @@ toTimeScale dates step =
getTimeStep :: TimeUnit -> Int getTimeStep :: TimeUnit -> Int
getTimeStep time = case time of getTimeStep time = case time of
Epoch _ s _ -> s
Year _ s _ -> s Year _ s _ -> s
Month _ s _ -> s Month _ s _ -> s
Week _ s _ -> s Week _ s _ -> s
...@@ -199,6 +201,7 @@ getTimeStep time = case time of ...@@ -199,6 +201,7 @@ getTimeStep time = case time of
getTimePeriod :: TimeUnit -> Int getTimePeriod :: TimeUnit -> Int
getTimePeriod time = case time of getTimePeriod time = case time of
Epoch p _ _ -> p
Year p _ _ -> p Year p _ _ -> p
Month p _ _ -> p Month p _ _ -> p
Week p _ _ -> p Week p _ _ -> p
...@@ -206,6 +209,7 @@ getTimePeriod time = case time of ...@@ -206,6 +209,7 @@ getTimePeriod time = case time of
getTimeFrame :: TimeUnit -> Int getTimeFrame :: TimeUnit -> Int
getTimeFrame time = case time of getTimeFrame time = case time of
Epoch _ _ f -> f
Year _ _ f -> f Year _ _ f -> f
Month _ _ f -> f Month _ _ f -> f
Week _ _ f -> f Week _ _ f -> f
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment