Commit f35d84d9 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] merge

parent 9a29b3fc
......@@ -21,6 +21,7 @@ import Crypto.Hash.SHA256 (hash)
import Data.Aeson
import Data.Either (Either(..))
import Data.List (concat, nub, isSuffixOf)
import Data.Maybe (fromMaybe)
import Data.String (String)
import GHC.IO (FilePath)
import qualified Prelude as Prelude
......@@ -152,8 +153,13 @@ csvToDocs parser patterns time path =
Right r ->
pure $ Vector.toList
$ Vector.take limit
$ Vector.map (\row -> Document (toPhyloDate (Csv.unIntOrDec $ csv_publication_year row) (csv_publication_month row) (csv_publication_day row) time)
(toPhyloDate' (Csv.unIntOrDec $ csv_publication_year row) (csv_publication_month row) (csv_publication_day row))
$ Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row)
time)
(toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row))
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
......
......@@ -42,7 +42,7 @@ import Gargantext.Core.Types
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year, unIntOrDec)
import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year, unIntOrDec, fromMIntOrDec, defaultYear)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms (terms)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
......@@ -91,7 +91,7 @@ main = do
Right cf -> do
let corpus = DM.fromListWith (<>)
. DV.toList
. DV.map (\n -> (unIntOrDec $ csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
. DV.map (\n -> (fromMIntOrDec defaultYear $ csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)]))
. snd $ cf
-- termListMap :: [Text]
......
......@@ -17,24 +17,23 @@ module Gargantext.API.Ngrams.List
import Control.Lens hiding (elements, Indexed)
import Data.Aeson
import Data.Either (Either(..))
import Data.HashMap.Strict (HashMap)
import Data.Map (toList, fromList)
import Data.Map (Map, toList, fromList)
import Data.Maybe (catMaybes)
import Data.Set (Set)
import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
import Data.Text (Text, concat, pack)
import GHC.Generics (Generic)
import Data.Vector (Vector)
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams)
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.New.File (FileType(..))
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
......@@ -48,29 +47,25 @@ import Gargantext.Prelude
import Network.HTTP.Media ((//), (/:))
import Servant
import Servant.Job.Async
import Servant.Job.Utils (jsonOptions)
import Web.FormUrlEncoded (FromForm)
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Csv as Csv
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Text as Text
import qualified Data.Vector as Vec
import qualified Prelude as Prelude
import qualified Protolude as P
------------------------------------------------------------------------
-- | TODO refactor
{-
type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|> PostAPI
:<|> CSVPostAPI
data HTML
instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")
instance ToJSON a => MimeRender HTML a where
mimeRender _ = encode
api :: ListId -> GargServer API
api l = get l :<|> postAsync l :<|> csvPostAsync l
-}
----------------------
type GETAPI = Summary "Get List"
......@@ -80,6 +75,12 @@ type GETAPI = Summary "Get List"
getApi :: GargServer GETAPI
getApi = get
data HTML
instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")
instance ToJSON a => MimeRender HTML a where
mimeRender _ = encode
----------------------
type JSONAPI = Summary "Update List"
:> "lists"
......@@ -100,15 +101,11 @@ type CSVAPI = Summary "Update List (legacy v3 CSV)"
:> "add"
:> "form"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog
:> AsyncJobs JobLog '[FormUrlEncoded] WithTextFile JobLog
csvApi :: GargServer CSVAPI
csvApi = csvPostAsync
----------------------
------------------------------------------------------------------------
get :: HasNodeStory env err m =>
ListId -> m (Headers '[Header "Content-Disposition" Text] NgramsList)
......@@ -142,16 +139,6 @@ post l m = do
pure True
------------------------------------------------------------------------
csvPost :: FlowCmdM env err m
=> ListId
-> NgramsList
-> m Bool
csvPost l m = do
printDebug "[csvPost] l" l
printDebug "[csvPost] m" m
pure True
-----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here)
reIndexWith :: ( HasNodeStory env err m
, FlowCmdM env err m
......@@ -254,6 +241,7 @@ postAsync' l (WithFile _ m _) logStatus = do
, _scst_events = Just []
}
------------------------------------------------------------------------
type CSVPostAPI = Summary "Update List (legacy v3 CSV)"
:> "csv"
:> "add"
......@@ -261,20 +249,61 @@ type CSVPostAPI = Summary "Update List (legacy v3 CSV)"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog
readCsvText :: Text -> [(Text, Text, Text)]
readCsvText t = case eDec of
Left _ -> []
Right dec -> Vec.toList dec
where
lt = BSL.fromStrict $ P.encodeUtf8 t
eDec = Csv.decodeWith
(Csv.defaultDecodeOptions { Csv.decDelimiter = fromIntegral (P.ord '\t') })
Csv.HasHeader lt :: Either Prelude.String (Vector (Text, Text, Text))
parseCsvData :: [(Text, Text, Text)] -> Map NgramsTerm NgramsRepoElement
parseCsvData lst = Map.fromList $ conv <$> lst
where
conv (_status, label, _forms) =
(NgramsTerm label, NgramsRepoElement { _nre_size = 1
, _nre_list = CandidateTerm
, _nre_root = Nothing
, _nre_parent = Nothing
, _nre_children = MSet Map.empty })
csvPost :: FlowCmdM env err m
=> ListId
-> Text
-> m Bool
csvPost l m = do
printDebug "[csvPost] l" l
-- printDebug "[csvPost] m" m
-- status label forms
let lst = readCsvText m
let p = parseCsvData lst
--printDebug "[csvPost] lst" lst
printDebug "[csvPost] p" p
_ <- setListNgrams l NgramsTerms p
pure True
------------------------------------------------------------------------
csvPostAsync :: GargServer CSVAPI
csvPostAsync lId =
serveJobsAPI $
JobFunction $ \f@(WithFile ft _ n) log' -> do
printDebug "[csvPostAsync] filetype" ft
printDebug "[csvPostAsync] name" n
csvPostAsync' lId f (liftBase . log')
JobFunction $ \f@(WithTextFile ft _ n) log' -> do
let log'' x = do
printDebug "[csvPostAsync] filetype" ft
printDebug "[csvPostAsync] name" n
liftBase $ log' x
csvPostAsync' lId f log''
csvPostAsync' :: FlowCmdM env err m
=> ListId
-> WithFile
-> WithTextFile
-> (JobLog -> m ())
-> m JobLog
csvPostAsync' l (WithFile _ m _) logStatus = do
csvPostAsync' l (WithTextFile _ m _) logStatus = do
logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
, _scst_remaining = Just 1
......@@ -288,18 +317,3 @@ csvPostAsync' l (WithFile _ m _) logStatus = do
, _scst_events = Just []
}
------------------------------------------------------------------------
data WithFile = WithFile
{ _wf_filetype :: !FileType
, _wf_data :: !NgramsList
, _wf_name :: !Text
} deriving (Eq, Show, Generic)
makeLenses ''WithFile
instance FromForm WithFile
instance FromJSON WithFile where
parseJSON = genericParseJSON $ jsonOptions "_wf_"
instance ToJSON WithFile where
toJSON = genericToJSON $ jsonOptions "_wf_"
instance ToSchema WithFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
......@@ -20,6 +20,7 @@ import qualified Data.ByteString.Lazy as BL
import Data.Char (ord)
import Data.Csv
import Data.Either (Either(..))
import Data.Maybe (fromMaybe)
import Data.Text (Text, pack, length, intercalate)
import Data.Time.Segment (jour)
import qualified Data.Vector as V
......@@ -85,8 +86,10 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
-- | Types Conversions
toDocs :: Vector CsvDoc -> [CsvGargV3]
toDocs v = V.toList
$ V.zipWith (\nId (CsvDoc t s (IntOrDec py) pm pd abst auth)
-> CsvGargV3 nId t s py pm pd abst auth )
$ V.zipWith (\nId (CsvDoc t s mPy pm pd abst auth)
-> CsvGargV3 nId t s
(fromMIntOrDec defaultYear mPy) (fromMaybe defaultMonth pm) (fromMaybe defaultDay pd)
abst auth )
(V.enumFromN 1 (V.length v'')) v''
where
v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
......@@ -96,7 +99,7 @@ toDocs v = V.toList
fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
fromDocs docs = V.map fromDocs' docs
where
fromDocs' (CsvGargV3 _ t s py pm pd abst auth) = (CsvDoc t s (IntOrDec py) pm pd abst auth)
fromDocs' (CsvGargV3 _ t s py pm pd abst auth) = (CsvDoc t s (Just $ IntOrDec py) (Just pm) (Just pd) abst auth)
---------------------------------------------------------------
-- | Split a document in its context
......@@ -150,12 +153,21 @@ instance FromField IntOrDec where
instance ToField IntOrDec where
toField (IntOrDec i) = toField i
fromMIntOrDec :: Int -> Maybe IntOrDec -> Int
fromMIntOrDec default' mVal = unIntOrDec $ fromMaybe (IntOrDec default') mVal
defaultYear :: Int
defaultYear = 1973
defaultMonth :: Int
defaultMonth = 1
defaultDay :: Int
defaultDay = 1
data CsvDoc = CsvDoc
{ csv_title :: !Text
, csv_source :: !Text
, csv_publication_year :: !IntOrDec
, csv_publication_month :: !Int
, csv_publication_day :: !Int
{ csv_title :: !Text
, csv_source :: !Text
, csv_publication_year :: !(Maybe IntOrDec)
, csv_publication_month :: !(Maybe Int)
, csv_publication_day :: !(Maybe Int)
, csv_abstract :: !Text
, csv_authors :: !Text
}
......@@ -172,21 +184,21 @@ instance FromNamedRecord CsvDoc where
instance ToNamedRecord CsvDoc where
toNamedRecord (CsvDoc t s py pm pd abst aut) =
namedRecord [ "title" .= t
, "source" .= s
namedRecord [ "title" .= t
, "source" .= s
, "publication_year" .= py
, "publication_month" .= pm
, "publication_day" .= pd
, "abstract" .= abst
, "authors" .= aut
]
]
hyperdataDocument2csvDoc :: HyperdataDocument -> CsvDoc
hyperdataDocument2csvDoc h = CsvDoc (m $ _hd_title h)
(m $ _hd_source h)
(IntOrDec $ mI $ _hd_publication_year h)
(mI $ _hd_publication_month h)
(mI $ _hd_publication_day h)
(Just $ IntOrDec $ mI $ _hd_publication_year h)
(Just $ mI $ _hd_publication_month h)
(Just $ mI $ _hd_publication_day h)
(m $ _hd_abstract h)
(m $ _hd_authors h)
......@@ -368,7 +380,7 @@ csvHal2doc (CsvHal title source
csv2doc :: CsvDoc -> HyperdataDocument
csv2doc (CsvDoc title source
(IntOrDec pub_year) pub_month pub_day
mPubYear mPubMonth mPubDay
abstract authors ) = HyperdataDocument (Just "CsvHal")
Nothing
Nothing
......@@ -380,14 +392,18 @@ csv2doc (CsvDoc title source
Nothing
(Just source)
(Just abstract)
(Just $ pack . show $ jour (fromIntegral pub_year) pub_month pub_day)
(Just $ fromIntegral pub_year)
(Just pub_month)
(Just pub_day)
(Just $ pack . show $ jour (fromIntegral pubYear) pubMonth pubDay)
(Just pubYear)
(Just pubMonth)
(Just pubDay)
Nothing
Nothing
Nothing
Nothing
where
pubYear = fromMIntOrDec defaultYear mPubYear
pubMonth = fromMaybe defaultMonth mPubMonth
pubDay = fromMaybe defaultDay mPubDay
------------------------------------------------------------------------
parseHal :: FilePath -> IO (Either Prelude.String [HyperdataDocument])
......
......@@ -28,7 +28,7 @@ import System.IO (FilePath)
import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
import Data.Vector (fromList)
data Patent = Patent { _patent_title :: Text
data Patent = Patent { _patent_title :: Text
, _patent_abstract :: Text
, _patent_year :: Text
, _patent_id :: Text
......@@ -49,7 +49,7 @@ json2csv fin fout = do
patent2csvDoc :: Patent -> CsvDoc
patent2csvDoc (Patent title abstract year _) =
CsvDoc title "Source" (read (unpack year)) 1 1 abstract "Authors"
CsvDoc title "Source" (Just $ read (unpack year)) (Just 1) (Just 1) abstract "Authors"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment