Commit 5be28fb7 authored by qlobbe's avatar qlobbe

add a list parser param

parent 6bddaf45
......@@ -34,7 +34,11 @@ import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.API.Ngrams.Prelude (toTermList)
-- import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
import GHC.IO (FilePath)
import Prelude (Either(Left, Right),toInteger)
......@@ -43,6 +47,7 @@ import System.Directory (listDirectory,doesFileExist)
import Control.Concurrent.Async (mapConcurrently)
import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy as Lazy
......@@ -50,6 +55,8 @@ import qualified Data.Vector as Vector
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
import qualified Data.Text as T
import Data.List.Split
-- import Debug.Trace (trace)
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
......@@ -83,14 +90,21 @@ toDays y m d = fromIntegral
toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
toPhyloDate y m d tu = case tu of
Epoch _ _ _ -> y
Year _ _ _ -> y
Month _ _ _ -> toMonths (toInteger y) m d
Week _ _ _ -> div (toDays (toInteger y) m d) 7
Day _ _ _ -> toDays (toInteger y) m d
toPhyloDate' :: Int -> Int -> Int -> Text
toPhyloDate' y m d = pack $ showGregorian $ fromGregorian (toInteger y) m d
toPhyloDate' :: Int -> Int -> Int -> TimeUnit -> Text
toPhyloDate' y m d tu = case tu of
Epoch _ _ _ -> pack $ show $ posixSecondsToUTCTime $ fromIntegral y
Year _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
Month _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
Week _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
Day _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
--------------
......@@ -128,7 +142,7 @@ wosToDocs limit patterns time path = do
(toPhyloDate'
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d))
(fromJust $ _hd_publication_day d) time)
(termsInText patterns $ title <> " " <> abstr) Nothing [])
<$> concat
<$> mapConcurrently (\file ->
......@@ -145,7 +159,7 @@ csvToDocs parser patterns time path =
Csv limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv_publication_year row) (csv_publication_month row) (csv_publication_day row) time)
(toPhyloDate' (csv_publication_year row) (csv_publication_month row) (csv_publication_day row))
(toPhyloDate' (csv_publication_year row) (csv_publication_month row) (csv_publication_day row) time)
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
......@@ -153,10 +167,10 @@ csvToDocs parser patterns time path =
Csv' limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row))
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row))
(Just $ csv'_weight row)
[csv'_source row]
(map (T.strip . pack) $ splitOn ";" (unpack $ (csv'_source row)))
) <$> snd <$> Csv.readWeightedCsv path
......@@ -178,10 +192,11 @@ fileToDocs' parser path time lst = do
-- Config time parameters to label
timeToLabel :: Config -> [Char]
timeToLabel config = case (timeUnit config) of
Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Month p s f -> ("time_months"<> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Epoch p s f -> ("time_epochs" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Month p s f -> ("time_months" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
seaToLabel :: Config -> [Char]
......@@ -264,6 +279,25 @@ readPhylo path = do
Right phylo -> pure phylo
readListV4 :: [Char] -> IO NgramsList
readListV4 path = do
listJson <- (eitherDecode <$> readJson path) :: IO (Either String NgramsList)
case listJson of
Left err -> do
putStrLn err
undefined
Right listV4 -> pure listV4
fileToList :: ListParser -> FilePath -> IO TermList
fileToList parser path =
case parser of
V3 -> csvMapTermList path
V4 -> fromJust
<$> toTermList MapTerm NgramsTerms
<$> readListV4 path
--------------
-- | Main | --
--------------
......@@ -283,7 +317,7 @@ main = do
Right config -> do
printIOMsg "Parse the corpus"
mapList <- csvMapTermList (listPath config)
mapList <- fileToList (listParser config) (listPath config)
corpus <- fileToDocs' (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
......
......@@ -44,6 +44,7 @@ library:
- Gargantext.API.Node.File
- Gargantext.API.Ngrams
- Gargantext.API.Ngrams.Types
- Gargantext.API.Ngrams.Prelude
- Gargantext.API.Admin.Settings
- Gargantext.API.Admin.EnvTypes
- Gargantext.API.Admin.Types
......@@ -59,6 +60,7 @@ library:
- Gargantext.Database.Query.Table.Node
- Gargantext.Database.Query.Table.Node.UpdateOpaleye
- Gargantext.Database.Query.Table.NgramsPostag
- Gargantext.Database.Schema.Ngrams
- Gargantext.Database.Prelude
- Gargantext.Database.Admin.Trigger.Init
- Gargantext.Database.Admin.Config
......
......@@ -53,7 +53,9 @@ data CorpusParser =
Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int}
| Csv' {_csv'_limit :: Int}
deriving (Show,Generic,Eq)
deriving (Show,Generic,Eq)
data ListParser = V3 | V4 deriving (Show,Generic,Eq)
data SeaElevation =
Constante
......@@ -102,8 +104,12 @@ data Synchrony =
deriving (Show,Generic,Eq)
data TimeUnit =
Year
data TimeUnit =
Epoch
{ _epoch_period :: Int
, _epoch_step :: Int
, _epoch_matchingFrame :: Int }
| Year
{ _year_period :: Int
, _year_step :: Int
, _year_matchingFrame :: Int }
......@@ -145,6 +151,7 @@ data Config =
, listPath :: FilePath
, outputPath :: FilePath
, corpusParser :: CorpusParser
, listParser :: ListParser
, phyloName :: Text
, phyloLevel :: Int
, phyloProximity :: Proximity
......@@ -166,6 +173,7 @@ defaultConfig =
, listPath = ""
, outputPath = ""
, corpusParser = Csv 1000
, listParser = V3
, phyloName = pack "Default Phylo"
, phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10
......@@ -184,6 +192,8 @@ instance FromJSON Config
instance ToJSON Config
instance FromJSON CorpusParser
instance ToJSON CorpusParser
instance FromJSON ListParser
instance ToJSON ListParser
instance FromJSON Proximity
instance ToJSON Proximity
instance FromJSON SeaElevation
......
......@@ -13,7 +13,7 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group, notElem)
import Data.Set (Set, disjoint)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.String (String)
......@@ -162,7 +162,7 @@ toFstDate ds = snd
$ head' "firstDate"
$ sortOn fst
$ map (\d ->
let d' = read (filter (\c -> c /= '-') $ unpack d)::Int
let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
in (d',d)) ds
toLstDate :: [Text] -> Text
......@@ -171,12 +171,13 @@ toLstDate ds = snd
$ reverse
$ sortOn fst
$ map (\d ->
let d' = read (filter (\c -> c /= '-') $ unpack d)::Int
let d' = read (filter (\c -> notElem c ['U','T','C',' ',':','-']) $ unpack d)::Int
in (d',d)) ds
getTimeScale :: Phylo -> [Char]
getTimeScale p = case (timeUnit $ getConfig p) of
Epoch _ _ _ -> "epoch"
Year _ _ _ -> "year"
Month _ _ _ -> "month"
Week _ _ _ -> "week"
......@@ -192,6 +193,7 @@ toTimeScale dates step =
getTimeStep :: TimeUnit -> Int
getTimeStep time = case time of
Epoch _ s _ -> s
Year _ s _ -> s
Month _ s _ -> s
Week _ s _ -> s
......@@ -199,6 +201,7 @@ getTimeStep time = case time of
getTimePeriod :: TimeUnit -> Int
getTimePeriod time = case time of
Epoch p _ _ -> p
Year p _ _ -> p
Month p _ _ -> p
Week p _ _ -> p
......@@ -206,6 +209,7 @@ getTimePeriod time = case time of
getTimeFrame :: TimeUnit -> Int
getTimeFrame time = case time of
Epoch _ _ f -> f
Year _ _ f -> f
Month _ _ f -> f
Week _ _ f -> f
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment