Commit f35d84d9 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] merge

parent 9a29b3fc
...@@ -21,6 +21,7 @@ import Crypto.Hash.SHA256 (hash) ...@@ -21,6 +21,7 @@ import Crypto.Hash.SHA256 (hash)
import Data.Aeson import Data.Aeson
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.List (concat, nub, isSuffixOf) import Data.List (concat, nub, isSuffixOf)
import Data.Maybe (fromMaybe)
import Data.String (String) import Data.String (String)
import GHC.IO (FilePath) import GHC.IO (FilePath)
import qualified Prelude as Prelude import qualified Prelude as Prelude
...@@ -152,8 +153,13 @@ csvToDocs parser patterns time path = ...@@ -152,8 +153,13 @@ csvToDocs parser patterns time path =
Right r -> Right r ->
pure $ Vector.toList pure $ Vector.toList
$ Vector.take limit $ Vector.take limit
$ Vector.map (\row -> Document (toPhyloDate (Csv.unIntOrDec $ csv_publication_year row) (csv_publication_month row) (csv_publication_day row) time) $ Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(toPhyloDate' (Csv.unIntOrDec $ csv_publication_year row) (csv_publication_month row) (csv_publication_day row)) (fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row)
time)
(toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row))
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row)) (termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing Nothing
[] []
......
...@@ -42,7 +42,7 @@ import Gargantext.Core.Types ...@@ -42,7 +42,7 @@ import Gargantext.Core.Types
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Context import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.WithList import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year, unIntOrDec) import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year, unIntOrDec, fromMIntOrDec, defaultYear)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList) import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms (terms) import Gargantext.Core.Text.Terms (terms)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs) import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
...@@ -91,7 +91,7 @@ main = do ...@@ -91,7 +91,7 @@ main = do
Right cf -> do Right cf -> do
let corpus = DM.fromListWith (<>) let corpus = DM.fromListWith (<>)
. DV.toList . DV.toList
. DV.map (\n -> (unIntOrDec $ csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)])) . DV.map (\n -> (fromMIntOrDec defaultYear $ csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
. snd $ cf . snd $ cf
-- termListMap :: [Text] -- termListMap :: [Text]
......
...@@ -17,24 +17,23 @@ module Gargantext.API.Ngrams.List ...@@ -17,24 +17,23 @@ module Gargantext.API.Ngrams.List
import Control.Lens hiding (elements, Indexed) import Control.Lens hiding (elements, Indexed)
import Data.Aeson import Data.Aeson
import Data.Either (Either(..))
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map (toList, fromList) import Data.Map (Map, toList, fromList)
import Data.Maybe (catMaybes) import Data.Maybe (catMaybes)
import Data.Set (Set) import Data.Set (Set)
import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
import Data.Text (Text, concat, pack) import Data.Text (Text, concat, pack)
import GHC.Generics (Generic) import Data.Vector (Vector)
import Gargantext.API.Admin.Orchestrator.Types import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams) import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams)
import Gargantext.API.Ngrams.Tools (getTermsWith) import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.New.File (FileType(..)) import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Prelude (GargServer) import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Terms (ExtractedNgrams(..)) import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText) import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Database.Action.Flow (saveDocNgramsWith) import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast') import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
...@@ -48,29 +47,25 @@ import Gargantext.Prelude ...@@ -48,29 +47,25 @@ import Gargantext.Prelude
import Network.HTTP.Media ((//), (/:)) import Network.HTTP.Media ((//), (/:))
import Servant import Servant
import Servant.Job.Async import Servant.Job.Async
import Servant.Job.Utils (jsonOptions) import qualified Data.ByteString.Lazy as BSL
import Web.FormUrlEncoded (FromForm) import qualified Data.Csv as Csv
import qualified Data.HashMap.Strict as HashMap import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vec
import qualified Prelude as Prelude
import qualified Protolude as P
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO refactor -- | TODO refactor
{-
type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList) type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool -- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|> PostAPI :<|> PostAPI
:<|> CSVPostAPI :<|> CSVPostAPI
data HTML
instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")
instance ToJSON a => MimeRender HTML a where
mimeRender _ = encode
api :: ListId -> GargServer API api :: ListId -> GargServer API
api l = get l :<|> postAsync l :<|> csvPostAsync l api l = get l :<|> postAsync l :<|> csvPostAsync l
-}
---------------------- ----------------------
type GETAPI = Summary "Get List" type GETAPI = Summary "Get List"
...@@ -80,6 +75,12 @@ type GETAPI = Summary "Get List" ...@@ -80,6 +75,12 @@ type GETAPI = Summary "Get List"
getApi :: GargServer GETAPI getApi :: GargServer GETAPI
getApi = get getApi = get
data HTML
instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")
instance ToJSON a => MimeRender HTML a where
mimeRender _ = encode
---------------------- ----------------------
type JSONAPI = Summary "Update List" type JSONAPI = Summary "Update List"
:> "lists" :> "lists"
...@@ -100,15 +101,11 @@ type CSVAPI = Summary "Update List (legacy v3 CSV)" ...@@ -100,15 +101,11 @@ type CSVAPI = Summary "Update List (legacy v3 CSV)"
:> "add" :> "add"
:> "form" :> "form"
:> "async" :> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog :> AsyncJobs JobLog '[FormUrlEncoded] WithTextFile JobLog
csvApi :: GargServer CSVAPI csvApi :: GargServer CSVAPI
csvApi = csvPostAsync csvApi = csvPostAsync
----------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
get :: HasNodeStory env err m => get :: HasNodeStory env err m =>
ListId -> m (Headers '[Header "Content-Disposition" Text] NgramsList) ListId -> m (Headers '[Header "Content-Disposition" Text] NgramsList)
...@@ -142,16 +139,6 @@ post l m = do ...@@ -142,16 +139,6 @@ post l m = do
pure True pure True
------------------------------------------------------------------------ ------------------------------------------------------------------------
csvPost :: FlowCmdM env err m
=> ListId
-> NgramsList
-> m Bool
csvPost l m = do
printDebug "[csvPost] l" l
printDebug "[csvPost] m" m
pure True
-----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here) -- | Re-index documents of a corpus with new ngrams (called orphans here)
reIndexWith :: ( HasNodeStory env err m reIndexWith :: ( HasNodeStory env err m
, FlowCmdM env err m , FlowCmdM env err m
...@@ -254,6 +241,7 @@ postAsync' l (WithFile _ m _) logStatus = do ...@@ -254,6 +241,7 @@ postAsync' l (WithFile _ m _) logStatus = do
, _scst_events = Just [] , _scst_events = Just []
} }
------------------------------------------------------------------------ ------------------------------------------------------------------------
type CSVPostAPI = Summary "Update List (legacy v3 CSV)" type CSVPostAPI = Summary "Update List (legacy v3 CSV)"
:> "csv" :> "csv"
:> "add" :> "add"
...@@ -261,20 +249,61 @@ type CSVPostAPI = Summary "Update List (legacy v3 CSV)" ...@@ -261,20 +249,61 @@ type CSVPostAPI = Summary "Update List (legacy v3 CSV)"
:> "async" :> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog :> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog
readCsvText :: Text -> [(Text, Text, Text)]
readCsvText t = case eDec of
Left _ -> []
Right dec -> Vec.toList dec
where
lt = BSL.fromStrict $ P.encodeUtf8 t
eDec = Csv.decodeWith
(Csv.defaultDecodeOptions { Csv.decDelimiter = fromIntegral (P.ord '\t') })
Csv.HasHeader lt :: Either Prelude.String (Vector (Text, Text, Text))
parseCsvData :: [(Text, Text, Text)] -> Map NgramsTerm NgramsRepoElement
parseCsvData lst = Map.fromList $ conv <$> lst
where
conv (_status, label, _forms) =
(NgramsTerm label, NgramsRepoElement { _nre_size = 1
, _nre_list = CandidateTerm
, _nre_root = Nothing
, _nre_parent = Nothing
, _nre_children = MSet Map.empty })
csvPost :: FlowCmdM env err m
=> ListId
-> Text
-> m Bool
csvPost l m = do
printDebug "[csvPost] l" l
-- printDebug "[csvPost] m" m
-- status label forms
let lst = readCsvText m
let p = parseCsvData lst
--printDebug "[csvPost] lst" lst
printDebug "[csvPost] p" p
_ <- setListNgrams l NgramsTerms p
pure True
------------------------------------------------------------------------
csvPostAsync :: GargServer CSVAPI csvPostAsync :: GargServer CSVAPI
csvPostAsync lId = csvPostAsync lId =
serveJobsAPI $ serveJobsAPI $
JobFunction $ \f@(WithFile ft _ n) log' -> do JobFunction $ \f@(WithTextFile ft _ n) log' -> do
printDebug "[csvPostAsync] filetype" ft let log'' x = do
printDebug "[csvPostAsync] name" n printDebug "[csvPostAsync] filetype" ft
csvPostAsync' lId f (liftBase . log') printDebug "[csvPostAsync] name" n
liftBase $ log' x
csvPostAsync' lId f log''
csvPostAsync' :: FlowCmdM env err m csvPostAsync' :: FlowCmdM env err m
=> ListId => ListId
-> WithFile -> WithTextFile
-> (JobLog -> m ()) -> (JobLog -> m ())
-> m JobLog -> m JobLog
csvPostAsync' l (WithFile _ m _) logStatus = do csvPostAsync' l (WithTextFile _ m _) logStatus = do
logStatus JobLog { _scst_succeeded = Just 0 logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just 1 , _scst_remaining = Just 1
...@@ -288,18 +317,3 @@ csvPostAsync' l (WithFile _ m _) logStatus = do ...@@ -288,18 +317,3 @@ csvPostAsync' l (WithFile _ m _) logStatus = do
, _scst_events = Just [] , _scst_events = Just []
} }
------------------------------------------------------------------------ ------------------------------------------------------------------------
data WithFile = WithFile
{ _wf_filetype :: !FileType
, _wf_data :: !NgramsList
, _wf_name :: !Text
} deriving (Eq, Show, Generic)
makeLenses ''WithFile
instance FromForm WithFile
instance FromJSON WithFile where
parseJSON = genericParseJSON $ jsonOptions "_wf_"
instance ToJSON WithFile where
toJSON = genericToJSON $ jsonOptions "_wf_"
instance ToSchema WithFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
...@@ -20,6 +20,7 @@ import qualified Data.ByteString.Lazy as BL ...@@ -20,6 +20,7 @@ import qualified Data.ByteString.Lazy as BL
import Data.Char (ord) import Data.Char (ord)
import Data.Csv import Data.Csv
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.Maybe (fromMaybe)
import Data.Text (Text, pack, length, intercalate) import Data.Text (Text, pack, length, intercalate)
import Data.Time.Segment (jour) import Data.Time.Segment (jour)
import qualified Data.Vector as V import qualified Data.Vector as V
...@@ -85,8 +86,10 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) = ...@@ -85,8 +86,10 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
-- | Types Conversions -- | Types Conversions
toDocs :: Vector CsvDoc -> [CsvGargV3] toDocs :: Vector CsvDoc -> [CsvGargV3]
toDocs v = V.toList toDocs v = V.toList
$ V.zipWith (\nId (CsvDoc t s (IntOrDec py) pm pd abst auth) $ V.zipWith (\nId (CsvDoc t s mPy pm pd abst auth)
-> CsvGargV3 nId t s py pm pd abst auth ) -> CsvGargV3 nId t s
(fromMIntOrDec defaultYear mPy) (fromMaybe defaultMonth pm) (fromMaybe defaultDay pd)
abst auth )
(V.enumFromN 1 (V.length v'')) v'' (V.enumFromN 1 (V.length v'')) v''
where where
v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
...@@ -96,7 +99,7 @@ toDocs v = V.toList ...@@ -96,7 +99,7 @@ toDocs v = V.toList
fromDocs :: Vector CsvGargV3 -> Vector CsvDoc fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
fromDocs docs = V.map fromDocs' docs fromDocs docs = V.map fromDocs' docs
where where
fromDocs' (CsvGargV3 _ t s py pm pd abst auth) = (CsvDoc t s (IntOrDec py) pm pd abst auth) fromDocs' (CsvGargV3 _ t s py pm pd abst auth) = (CsvDoc t s (Just $ IntOrDec py) (Just pm) (Just pd) abst auth)
--------------------------------------------------------------- ---------------------------------------------------------------
-- | Split a document in its context -- | Split a document in its context
...@@ -150,12 +153,21 @@ instance FromField IntOrDec where ...@@ -150,12 +153,21 @@ instance FromField IntOrDec where
instance ToField IntOrDec where instance ToField IntOrDec where
toField (IntOrDec i) = toField i toField (IntOrDec i) = toField i
fromMIntOrDec :: Int -> Maybe IntOrDec -> Int
fromMIntOrDec default' mVal = unIntOrDec $ fromMaybe (IntOrDec default') mVal
defaultYear :: Int
defaultYear = 1973
defaultMonth :: Int
defaultMonth = 1
defaultDay :: Int
defaultDay = 1
data CsvDoc = CsvDoc data CsvDoc = CsvDoc
{ csv_title :: !Text { csv_title :: !Text
, csv_source :: !Text , csv_source :: !Text
, csv_publication_year :: !IntOrDec , csv_publication_year :: !(Maybe IntOrDec)
, csv_publication_month :: !Int , csv_publication_month :: !(Maybe Int)
, csv_publication_day :: !Int , csv_publication_day :: !(Maybe Int)
, csv_abstract :: !Text , csv_abstract :: !Text
, csv_authors :: !Text , csv_authors :: !Text
} }
...@@ -172,21 +184,21 @@ instance FromNamedRecord CsvDoc where ...@@ -172,21 +184,21 @@ instance FromNamedRecord CsvDoc where
instance ToNamedRecord CsvDoc where instance ToNamedRecord CsvDoc where
toNamedRecord (CsvDoc t s py pm pd abst aut) = toNamedRecord (CsvDoc t s py pm pd abst aut) =
namedRecord [ "title" .= t namedRecord [ "title" .= t
, "source" .= s , "source" .= s
, "publication_year" .= py , "publication_year" .= py
, "publication_month" .= pm , "publication_month" .= pm
, "publication_day" .= pd , "publication_day" .= pd
, "abstract" .= abst , "abstract" .= abst
, "authors" .= aut , "authors" .= aut
] ]
hyperdataDocument2csvDoc :: HyperdataDocument -> CsvDoc hyperdataDocument2csvDoc :: HyperdataDocument -> CsvDoc
hyperdataDocument2csvDoc h = CsvDoc (m $ _hd_title h) hyperdataDocument2csvDoc h = CsvDoc (m $ _hd_title h)
(m $ _hd_source h) (m $ _hd_source h)
(IntOrDec $ mI $ _hd_publication_year h) (Just $ IntOrDec $ mI $ _hd_publication_year h)
(mI $ _hd_publication_month h) (Just $ mI $ _hd_publication_month h)
(mI $ _hd_publication_day h) (Just $ mI $ _hd_publication_day h)
(m $ _hd_abstract h) (m $ _hd_abstract h)
(m $ _hd_authors h) (m $ _hd_authors h)
...@@ -368,7 +380,7 @@ csvHal2doc (CsvHal title source ...@@ -368,7 +380,7 @@ csvHal2doc (CsvHal title source
csv2doc :: CsvDoc -> HyperdataDocument csv2doc :: CsvDoc -> HyperdataDocument
csv2doc (CsvDoc title source csv2doc (CsvDoc title source
(IntOrDec pub_year) pub_month pub_day mPubYear mPubMonth mPubDay
abstract authors ) = HyperdataDocument (Just "CsvHal") abstract authors ) = HyperdataDocument (Just "CsvHal")
Nothing Nothing
Nothing Nothing
...@@ -380,14 +392,18 @@ csv2doc (CsvDoc title source ...@@ -380,14 +392,18 @@ csv2doc (CsvDoc title source
Nothing Nothing
(Just source) (Just source)
(Just abstract) (Just abstract)
(Just $ pack . show $ jour (fromIntegral pub_year) pub_month pub_day) (Just $ pack . show $ jour (fromIntegral pubYear) pubMonth pubDay)
(Just $ fromIntegral pub_year) (Just pubYear)
(Just pub_month) (Just pubMonth)
(Just pub_day) (Just pubDay)
Nothing Nothing
Nothing Nothing
Nothing Nothing
Nothing Nothing
where
pubYear = fromMIntOrDec defaultYear mPubYear
pubMonth = fromMaybe defaultMonth mPubMonth
pubDay = fromMaybe defaultDay mPubDay
------------------------------------------------------------------------ ------------------------------------------------------------------------
parseHal :: FilePath -> IO (Either Prelude.String [HyperdataDocument]) parseHal :: FilePath -> IO (Either Prelude.String [HyperdataDocument])
......
...@@ -28,7 +28,7 @@ import System.IO (FilePath) ...@@ -28,7 +28,7 @@ import System.IO (FilePath)
import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3) import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
import Data.Vector (fromList) import Data.Vector (fromList)
data Patent = Patent { _patent_title :: Text data Patent = Patent { _patent_title :: Text
, _patent_abstract :: Text , _patent_abstract :: Text
, _patent_year :: Text , _patent_year :: Text
, _patent_id :: Text , _patent_id :: Text
...@@ -49,7 +49,7 @@ json2csv fin fout = do ...@@ -49,7 +49,7 @@ json2csv fin fout = do
patent2csvDoc :: Patent -> CsvDoc patent2csvDoc :: Patent -> CsvDoc
patent2csvDoc (Patent title abstract year _) = patent2csvDoc (Patent title abstract year _) =
CsvDoc title "Source" (read (unpack year)) 1 1 abstract "Authors" CsvDoc title "Source" (Just $ read (unpack year)) (Just 1) (Just 1) abstract "Authors"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment