DocumentsFromWriteNodes.hs 5.78 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
{-|
Module      : Gargantext.API.Node.DocumentsFromWriteNodes
Description :
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

{-# LANGUAGE MonoLocalBinds     #-}
{-# LANGUAGE TemplateHaskell    #-}
{-# LANGUAGE TypeOperators      #-}

module Gargantext.API.Node.DocumentsFromWriteNodes
      where

19
import Control.Lens ((^.))
20
import Data.Aeson
21
import Data.Either (Either(..), rights)
22
import Data.Swagger
23
import qualified Data.Text as T
24 25
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
26
import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
27
import Gargantext.API.Prelude (GargServer)
28 29 30 31 32
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
33
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
34 35
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Frame
36
import Gargantext.Database.Admin.Types.Node
37
import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
38
import Gargantext.Database.Schema.Node (node_hyperdata)
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
import Gargantext.Prelude
import GHC.Generics (Generic)
import Servant
import Servant.Job.Async (JobFunction(..), serveJobsAPI)

------------------------------------------------------------------------
type API = Summary " Documents from Write nodes."
         :> AsyncJobs JobLog '[JSON] Params JobLog
------------------------------------------------------------------------
newtype Params = Params { id :: Int }
  deriving (Generic, Show)

instance FromJSON Params where
  parseJSON = genericParseJSON defaultOptions
instance ToJSON Params where
  toJSON = genericToJSON defaultOptions
instance ToSchema Params
------------------------------------------------------------------------
api :: UserId -> NodeId -> GargServer API
api uId nId =
  serveJobsAPI $
    JobFunction (\p log'' ->
      let
        log' x = do
          liftBase $ log'' x
      in documentsFromWriteNodes uId nId p (liftBase . log')
      )

documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
    => UserId
    -> NodeId
    -> Params
    -> (JobLog -> m ())
    -> m JobLog
73
documentsFromWriteNodes uId nId _p logStatus = do
74 75 76 77 78 79
  let jobLog = JobLog { _scst_succeeded = Just 1
                      , _scst_failed    = Just 0
                      , _scst_remaining = Just 1
                      , _scst_events    = Just []
                      }
  logStatus jobLog
80

81
  mcId <- getClosestParentIdByType' nId NodeCorpus
82 83 84 85 86 87
  cId <- case mcId of
    Just cId -> pure cId
    Nothing -> do
      let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
      logStatus $ jobLogFailTotalWithMessage msg jobLog
      panic msg
88

89 90 91 92 93 94 95 96 97 98 99 100 101 102
  frameWriteIds <- getChildrenByType nId NodeFrameWrite

  -- https://write.frame.gargantext.org/<frame_id>/download
  frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds

  frameWritesWithContents <- liftBase $
    mapM (\node -> do
             contents <- getHyperdataFrameContents (node ^. node_hyperdata)
             pure (node, contents)
         ) frameWrites

  let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
  let parsed = rights parsedE

103
  _ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing logStatus
104

105
  pure $ jobLogSuccess jobLog
106
------------------------------------------------------------------------
107 108 109 110 111
hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
  case parseLines contents of
    Left _ -> Left "Error parsing node"
    Right (Parsed { authors, contents = c, date, source, title = t }) ->
112 113 114 115 116 117 118 119
      let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
          authors' = T.concat $ authorJoinSingle <$> authors 
          date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
                                                            , T.pack $ show month, "-"
                                                            , T.pack $ show day ]) <$> date
          year' = fromIntegral $ maybe 2021 (\(Date { year }) -> year) date
          month' = fromIntegral $ maybe 10 (\(Date { month }) -> month) date
          day' = fromIntegral $ maybe 4 (\(Date { day }) -> day) date in
120
      Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
121 122 123 124 125 126 127 128 129 130
                              , _hd_doi = Nothing
                              , _hd_url = Nothing
                              , _hd_uniqId = Nothing
                              , _hd_uniqIdBdd = Nothing
                              , _hd_page = Nothing
                              , _hd_title = Just t
                              , _hd_authors = Just authors'
                              , _hd_institutes = Nothing
                              , _hd_source = source
                              , _hd_abstract = Just c
131 132 133 134
                              , _hd_publication_date = date'
                              , _hd_publication_year = Just year'
                              , _hd_publication_month = Just month'
                              , _hd_publication_day = Just day'
135 136 137 138
                              , _hd_publication_hour = Nothing
                              , _hd_publication_minute = Nothing
                              , _hd_publication_second = Nothing
                              , _hd_language_iso2 = Just $ T.pack $ show EN }