[CsvDoc] implement Maybe for year/month/date

6cb3efe5 · Przemyslaw Kaminski · 5bee1178 · 6cb3efe5 · 6cb3efe5 · 6cb3efe5
Commit 6cb3efe5 authored Aug 24, 2021 by Przemyslaw Kaminski
5 changed files
--- a/bin/gargantext-adaptative-phylo/Main.hs
+++ b/bin/gargantext-adaptative-phylo/Main.hs
@@ -21,6 +21,7 @@ import Crypto.Hash.SHA256 (hash)
 import Data.Aeson
 import Data.Either (Either(..))
 import Data.List  (concat, nub, isSuffixOf)
+import Data.Maybe (fromMaybe)
 import Data.String (String)
 import GHC.IO (FilePath) 
 import qualified Prelude as Prelude
@@ -152,8 +153,13 @@ csvToDocs parser patterns time path =
        Right r ->
          pure $ Vector.toList
            $ Vector.take limit
-            $ Vector.map (\row -> Document (toPhyloDate  (Csv.unIntOrDec $ csv_publication_year row) (csv_publication_month row) (csv_publication_day row) time)
+            $ Vector.map (\row -> Document (toPhyloDate  (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
-                                           (toPhyloDate' (Csv.unIntOrDec $ csv_publication_year row) (csv_publication_month row) (csv_publication_day row))
+                                                         (fromMaybe Csv.defaultMonth $ csv_publication_month row)
+                                                         (fromMaybe Csv.defaultDay $ csv_publication_day row)
+                                                         time)
+                                           (toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
+                                                         (fromMaybe Csv.defaultMonth $ csv_publication_month row)
+                                                         (fromMaybe Csv.defaultDay $ csv_publication_day row))
                                           (termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
                                           Nothing
                                           []

--- a/bin/gargantext-cli/Main.hs
+++ b/bin/gargantext-cli/Main.hs
@@ -42,7 +42,7 @@ import Gargantext.Core.Types
 import Gargantext.Core.Text.Terms
 import Gargantext.Core.Text.Context
 import Gargantext.Core.Text.Terms.WithList
-import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year, unIntOrDec)
+import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year, unIntOrDec, fromMIntOrDec, defaultYear)
 import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
 import Gargantext.Core.Text.Terms (terms)
 import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
@@ -91,7 +91,7 @@ main = do
    Right cf -> do
      let corpus = DM.fromListWith (<>)
                   . DV.toList
-                   . DV.map (\n -> (unIntOrDec $ csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
+                   . DV.map (\n -> (fromMIntOrDec defaultYear $ csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
                   . snd $ cf
      -- termListMap :: [Text]

--- a/src/Gargantext/API/Ngrams/List.hs
+++ b/src/Gargantext/API/Ngrams/List.hs
@@ -283,7 +283,7 @@ csvPost l m  = do
  let lst = readCsvText m
  let p = parseCsvData lst
  --printDebug "[csvPost] lst" lst
-  --printDebug "[csvPost] p" p
+  printDebug "[csvPost] p" p
  _ <- setListNgrams l NgramsTerms p
  pure True
 ------------------------------------------------------------------------

--- a/src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+++ b/src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
@@ -20,6 +20,7 @@ import qualified Data.ByteString.Lazy as BL
 import Data.Char (ord)
 import Data.Csv
 import Data.Either (Either(..))
+import Data.Maybe (fromMaybe)
 import Data.Text (Text, pack, length, intercalate)
 import Data.Time.Segment (jour)
 import qualified Data.Vector          as V
@@ -85,8 +86,10 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
 -- | Types Conversions
 toDocs :: Vector CsvDoc -> [CsvGargV3]
 toDocs v = V.toList
-         $ V.zipWith (\nId (CsvDoc t s (IntOrDec py) pm pd abst auth)
+         $ V.zipWith (\nId (CsvDoc t s mPy pm pd abst auth)
-                       -> CsvGargV3 nId t s py pm pd abst auth )
+                       -> CsvGargV3 nId t s
+                          (fromMIntOrDec defaultYear mPy) (fromMaybe defaultMonth pm) (fromMaybe defaultDay pd)
+                          abst auth )
                       (V.enumFromN 1 (V.length v'')) v''
          where
            v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
@@ -96,7 +99,7 @@ toDocs v = V.toList
 fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
 fromDocs docs = V.map fromDocs' docs
  where
-    fromDocs' (CsvGargV3 _ t s py pm pd abst auth) = (CsvDoc t s (IntOrDec py) pm pd abst auth)
+    fromDocs' (CsvGargV3 _ t s py pm pd abst auth) = (CsvDoc t s (Just $ IntOrDec py) (Just pm) (Just pd) abst auth)
 ---------------------------------------------------------------
 -- | Split a document in its context
@@ -150,12 +153,21 @@ instance FromField IntOrDec where
 instance ToField IntOrDec where
  toField (IntOrDec i) = toField i
+fromMIntOrDec :: Int -> Maybe IntOrDec -> Int
+fromMIntOrDec default' mVal = unIntOrDec $ fromMaybe (IntOrDec default') mVal
+defaultYear :: Int
+defaultYear = 1973
+defaultMonth :: Int
+defaultMonth = 1
+defaultDay :: Int
+defaultDay = 1
 data CsvDoc = CsvDoc
-    { csv_title  :: !Text
+    { csv_title             :: !Text
-    , csv_source :: !Text
+    , csv_source            :: !Text
-    , csv_publication_year  :: !IntOrDec
+    , csv_publication_year  :: !(Maybe IntOrDec)
-    , csv_publication_month :: !Int
+    , csv_publication_month :: !(Maybe Int)
-    , csv_publication_day   :: !Int
+    , csv_publication_day   :: !(Maybe Int)
    , csv_abstract          :: !Text
    , csv_authors           :: !Text
    }
@@ -172,21 +184,21 @@ instance FromNamedRecord CsvDoc where
 instance ToNamedRecord CsvDoc where
  toNamedRecord (CsvDoc t s py pm pd abst aut) =
-    namedRecord [ "title"  .= t
+    namedRecord [ "title"             .= t
-                , "source" .= s
+                , "source"            .= s
                , "publication_year"  .= py
                , "publication_month" .= pm
                , "publication_day"   .= pd
                , "abstract"          .= abst
                , "authors"           .= aut
-               ]
+                ]
 hyperdataDocument2csvDoc :: HyperdataDocument -> CsvDoc
 hyperdataDocument2csvDoc h = CsvDoc (m  $ _hd_title h)
                                    (m  $ _hd_source h)
-                                    (IntOrDec $ mI $ _hd_publication_year h)
+                                    (Just $ IntOrDec $ mI $ _hd_publication_year h)
-                                    (mI $ _hd_publication_month h)
+                                    (Just $ mI $ _hd_publication_month h)
-                                    (mI $ _hd_publication_day   h)
+                                    (Just $ mI $ _hd_publication_day   h)
                                    (m  $ _hd_abstract h)
                                    (m  $ _hd_authors h)
@@ -368,7 +380,7 @@ csvHal2doc (CsvHal title source
 csv2doc :: CsvDoc -> HyperdataDocument
 csv2doc (CsvDoc title source
-       (IntOrDec pub_year) pub_month pub_day
+       mPubYear mPubMonth mPubDay
       abstract authors ) = HyperdataDocument (Just "CsvHal")
                               Nothing
                               Nothing
@@ -380,14 +392,18 @@ csv2doc (CsvDoc title source
                               Nothing
                               (Just source)
                               (Just abstract)
-                               (Just $ pack . show $ jour (fromIntegral pub_year) pub_month pub_day)
+                               (Just $ pack . show $ jour (fromIntegral pubYear) pubMonth pubDay)
-                               (Just $ fromIntegral pub_year)
+                               (Just pubYear)
-                               (Just pub_month)
+                               (Just pubMonth)
-                               (Just pub_day)
+                               (Just pubDay)
                               Nothing
                               Nothing
                               Nothing
                               Nothing
+  where
+    pubYear = fromMIntOrDec defaultYear mPubYear
+    pubMonth = fromMaybe defaultMonth mPubMonth
+    pubDay = fromMaybe defaultDay mPubDay
 ------------------------------------------------------------------------
 parseHal :: FilePath -> IO (Either Prelude.String [HyperdataDocument])

--- a/src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
+++ b/src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
@@ -28,7 +28,7 @@ import System.IO (FilePath)
 import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
 import Data.Vector (fromList)
-data Patent = Patent { _patent_title :: Text
+data Patent = Patent { _patent_title    :: Text
                     , _patent_abstract :: Text
                     , _patent_year     :: Text
                     , _patent_id       :: Text
@@ -49,7 +49,7 @@ json2csv fin fout = do
 patent2csvDoc :: Patent -> CsvDoc
 patent2csvDoc (Patent title abstract year _) =
-  CsvDoc title "Source" (read (unpack year)) 1 1 abstract "Authors"
+  CsvDoc title "Source" (Just $ read (unpack year)) (Just 1) (Just 1) abstract "Authors"