1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
{-|
Module : Gargantext.API.Node.DocumentsFromWriteNodes
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.DocumentsFromWriteNodes
where
-- import Data.Maybe (fromMaybe)
import Conduit
import Control.Lens ((^.))
import Data.Aeson
import Data.Either (Either(..), rights)
import Data.Maybe (fromMaybe)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Prelude (GargM, GargError)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
import Gargantext.Core.Text.List.Social (FlowSocialListWith)
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Gargantext.Core.Text.Corpus.Parsers.Date (split')
import Servant
import Text.Read (readMaybe)
import qualified Data.List as List
import qualified Data.Text as T
-- import qualified Gargantext.Defaults as Defaults
------------------------------------------------------------------------
type API = Summary " Documents from Write nodes."
:> AsyncJobs JobLog '[JSON] Params JobLog
------------------------------------------------------------------------
data Params = Params
{ id :: Int
, paragraphs :: Text
, lang :: Lang
, selection :: FlowSocialListWith
}
deriving (Generic, Show)
instance FromJSON Params where
parseJSON = genericParseJSON defaultOptions
instance ToJSON Params where
toJSON = genericToJSON defaultOptions
instance ToSchema Params
------------------------------------------------------------------------
api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
api uId nId =
serveJobsAPI DocumentFromWriteNodeJob $ \jHandle p ->
documentsFromWriteNodes uId nId p jHandle
documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m, MonadJobStatus m)
=> UserId
-> NodeId
-> Params
-> JobHandle m
-> m ()
documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } jobHandle = do
markStarted 2 jobHandle
markProgress 1 jobHandle
mcId <- getClosestParentIdByType' nId NodeCorpus
cId <- case mcId of
Just cId -> pure cId
Nothing -> do
let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
markFailed (Just msg) jobHandle
panic msg
frameWriteIds <- getChildrenByType nId Notes
-- https://write.frame.gargantext.org/<frame_id>/download
frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
frameWritesWithContents <- liftBase $
mapM (\node -> do
contents <- getHyperdataFrameContents (node ^. node_hyperdata)
pure (node, contents)
) frameWrites
let paragraphs' = fromMaybe (7 :: Int) $ (readMaybe $ T.unpack paragraphs)
let parsedE = (\(node, contents)
-> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents
let parsed = List.concat $ rights parsedE
-- printDebug "DocumentsFromWriteNodes: uId" uId
_ <- flowDataText (RootId (NodeId uId))
(DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
(Multi lang)
cId
(Just selection)
jobHandle
markProgress 1 jobHandle
------------------------------------------------------------------------
hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
case parseLines contents of
Left _ -> Left "Error parsing node"
Right (Parsed { authors, contents = ctxts}) ->
let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
authors' = T.concat $ authorJoinSingle <$> authors
--{-
(year',month',day') = split' (node^. node_date)
date' = Just $ T.concat [ T.pack $ show year', "-"
, T.pack $ show month', "-"
, T.pack $ show day'
]
--}
{-
date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
, T.pack $ show month', "-"
, T.pack $ show day' ]) <$> date
year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
--}
in
Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just $ cs $ show Notes
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just t
, _hd_authors = Just authors'
, _hd_institutes = Nothing
, _hd_source = Just $ node ^. node_name
, _hd_abstract = Just ctxt
, _hd_publication_date = date'
, _hd_publication_year = Just year'
, _hd_publication_month = Just month'
, _hd_publication_day = Just day'
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ T.pack $ show lang }
) (text2titleParagraphs paragraphSize ctxts)
)