Commit 6cb3efe5 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[CsvDoc] implement Maybe for year/month/date

parent 5bee1178
Pipeline #1739 passed with stage
in 36 minutes and 21 seconds
......@@ -21,6 +21,7 @@ import Crypto.Hash.SHA256 (hash)
import Data.Aeson
import Data.Either (Either(..))
import Data.List (concat, nub, isSuffixOf)
import Data.Maybe (fromMaybe)
import Data.String (String)
import GHC.IO (FilePath)
import qualified Prelude as Prelude
......@@ -152,8 +153,13 @@ csvToDocs parser patterns time path =
Right r ->
pure $ Vector.toList
$ Vector.take limit
$ Vector.map (\row -> Document (toPhyloDate (Csv.unIntOrDec $ csv_publication_year row) (csv_publication_month row) (csv_publication_day row) time)
(toPhyloDate' (Csv.unIntOrDec $ csv_publication_year row) (csv_publication_month row) (csv_publication_day row))
$ Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row)
time)
(toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row))
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
......
......@@ -42,7 +42,7 @@ import Gargantext.Core.Types
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year, unIntOrDec)
import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year, unIntOrDec, fromMIntOrDec, defaultYear)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms (terms)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
......@@ -91,7 +91,7 @@ main = do
Right cf -> do
let corpus = DM.fromListWith (<>)
. DV.toList
. DV.map (\n -> (unIntOrDec $ csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
. DV.map (\n -> (fromMIntOrDec defaultYear $ csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
. snd $ cf
-- termListMap :: [Text]
......
......@@ -283,7 +283,7 @@ csvPost l m = do
let lst = readCsvText m
let p = parseCsvData lst
--printDebug "[csvPost] lst" lst
--printDebug "[csvPost] p" p
printDebug "[csvPost] p" p
_ <- setListNgrams l NgramsTerms p
pure True
------------------------------------------------------------------------
......
......@@ -20,6 +20,7 @@ import qualified Data.ByteString.Lazy as BL
import Data.Char (ord)
import Data.Csv
import Data.Either (Either(..))
import Data.Maybe (fromMaybe)
import Data.Text (Text, pack, length, intercalate)
import Data.Time.Segment (jour)
import qualified Data.Vector as V
......@@ -85,8 +86,10 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
-- | Types Conversions
toDocs :: Vector CsvDoc -> [CsvGargV3]
toDocs v = V.toList
$ V.zipWith (\nId (CsvDoc t s (IntOrDec py) pm pd abst auth)
-> CsvGargV3 nId t s py pm pd abst auth )
$ V.zipWith (\nId (CsvDoc t s mPy pm pd abst auth)
-> CsvGargV3 nId t s
(fromMIntOrDec defaultYear mPy) (fromMaybe defaultMonth pm) (fromMaybe defaultDay pd)
abst auth )
(V.enumFromN 1 (V.length v'')) v''
where
v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
......@@ -96,7 +99,7 @@ toDocs v = V.toList
fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
fromDocs docs = V.map fromDocs' docs
where
fromDocs' (CsvGargV3 _ t s py pm pd abst auth) = (CsvDoc t s (IntOrDec py) pm pd abst auth)
fromDocs' (CsvGargV3 _ t s py pm pd abst auth) = (CsvDoc t s (Just $ IntOrDec py) (Just pm) (Just pd) abst auth)
---------------------------------------------------------------
-- | Split a document in its context
......@@ -150,12 +153,21 @@ instance FromField IntOrDec where
instance ToField IntOrDec where
toField (IntOrDec i) = toField i
fromMIntOrDec :: Int -> Maybe IntOrDec -> Int
fromMIntOrDec default' mVal = unIntOrDec $ fromMaybe (IntOrDec default') mVal
defaultYear :: Int
defaultYear = 1973
defaultMonth :: Int
defaultMonth = 1
defaultDay :: Int
defaultDay = 1
data CsvDoc = CsvDoc
{ csv_title :: !Text
, csv_source :: !Text
, csv_publication_year :: !IntOrDec
, csv_publication_month :: !Int
, csv_publication_day :: !Int
{ csv_title :: !Text
, csv_source :: !Text
, csv_publication_year :: !(Maybe IntOrDec)
, csv_publication_month :: !(Maybe Int)
, csv_publication_day :: !(Maybe Int)
, csv_abstract :: !Text
, csv_authors :: !Text
}
......@@ -172,21 +184,21 @@ instance FromNamedRecord CsvDoc where
instance ToNamedRecord CsvDoc where
toNamedRecord (CsvDoc t s py pm pd abst aut) =
namedRecord [ "title" .= t
, "source" .= s
namedRecord [ "title" .= t
, "source" .= s
, "publication_year" .= py
, "publication_month" .= pm
, "publication_day" .= pd
, "abstract" .= abst
, "authors" .= aut
]
]
hyperdataDocument2csvDoc :: HyperdataDocument -> CsvDoc
hyperdataDocument2csvDoc h = CsvDoc (m $ _hd_title h)
(m $ _hd_source h)
(IntOrDec $ mI $ _hd_publication_year h)
(mI $ _hd_publication_month h)
(mI $ _hd_publication_day h)
(Just $ IntOrDec $ mI $ _hd_publication_year h)
(Just $ mI $ _hd_publication_month h)
(Just $ mI $ _hd_publication_day h)
(m $ _hd_abstract h)
(m $ _hd_authors h)
......@@ -368,7 +380,7 @@ csvHal2doc (CsvHal title source
csv2doc :: CsvDoc -> HyperdataDocument
csv2doc (CsvDoc title source
(IntOrDec pub_year) pub_month pub_day
mPubYear mPubMonth mPubDay
abstract authors ) = HyperdataDocument (Just "CsvHal")
Nothing
Nothing
......@@ -380,14 +392,18 @@ csv2doc (CsvDoc title source
Nothing
(Just source)
(Just abstract)
(Just $ pack . show $ jour (fromIntegral pub_year) pub_month pub_day)
(Just $ fromIntegral pub_year)
(Just pub_month)
(Just pub_day)
(Just $ pack . show $ jour (fromIntegral pubYear) pubMonth pubDay)
(Just pubYear)
(Just pubMonth)
(Just pubDay)
Nothing
Nothing
Nothing
Nothing
where
pubYear = fromMIntOrDec defaultYear mPubYear
pubMonth = fromMaybe defaultMonth mPubMonth
pubDay = fromMaybe defaultDay mPubDay
------------------------------------------------------------------------
parseHal :: FilePath -> IO (Either Prelude.String [HyperdataDocument])
......
......@@ -28,7 +28,7 @@ import System.IO (FilePath)
import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
import Data.Vector (fromList)
data Patent = Patent { _patent_title :: Text
data Patent = Patent { _patent_title :: Text
, _patent_abstract :: Text
, _patent_year :: Text
, _patent_id :: Text
......@@ -49,7 +49,7 @@ json2csv fin fout = do
patent2csvDoc :: Patent -> CsvDoc
patent2csvDoc (Patent title abstract year _) =
CsvDoc title "Source" (read (unpack year)) 1 1 abstract "Authors"
CsvDoc title "Source" (Just $ read (unpack year)) (Just 1) (Just 1) abstract "Authors"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment