Commit 1b1b4a07 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[MERGE]

parents fee10d43 45c0817e
use_nix
......@@ -11,6 +11,7 @@ profiling
# Stack
.stack-work
dist-newstyle
# Emacs
TAGS
......@@ -33,5 +34,7 @@ _darcs
# Runtime
# Repo
repos
repo.json*
tmp*repo*json
data
......@@ -13,6 +13,11 @@ source-repository-package
location: https://gitlab.iscpif.fr/gargantext/patches-class.git
tag: 271ba32d6c940029dc653354dd7974a819f48e77
source-repository-package
type: git
location: https://gitlab.iscpif.fr/cgenie/haskell-gargantext-prelude.git
tag: 35b09629a658fc16cc9ff63e7591e58511cd98a7
-- External Data API connectors
source-repository-package
type: git
......@@ -76,7 +81,7 @@ source-repository-package
source-repository-package
type: git
location: https://github.com/delanoe/haskell-opaleye.git
tag: 63ee65d974e9d20eaaf17a2e83652175988cbb79
tag: d3ab7acd5ede737478763630035aa880f7e34444
source-repository-package
type: git
......@@ -100,4 +105,4 @@ source-repository-package
location: https://gitlab.iscpif.fr/anoe/accelerate-utility.git
tag: 83ada76e78ac10d9559af8ed6bd4064ec81308e4
constraints: unordered-containers==0.2.13.*
\ No newline at end of file
constraints: unordered-containers==0.2.14.*
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="bootstrap-1.4.0.min.css">
<link rel="stylesheet" href="monitor.css" type="text/css">
<script type="text/javascript" src="jquery-1.6.4.min.js"></script>
<script type="text/javascript" src="jquery.flot.min.js"></script>
<title>ekg</title>
</head>
<body>
<div class="topbar">
<div class="topbar-inner">
<div class="container-fluid">
<span class="brand">ekg</span>
<p class="pull-right">Polling interval:
<select id="updateInterval" class="small">
<option value="100">100 ms</option>
<option value="200">200 ms</option>
<option value="500">500 ms</option>
<option value="1000" selected="selected">1 s</option>
<option value="2000">2 s</option>
<option value="5000">5 s</option>
<option value="10000">10 s</option>
</select> |
<button id="pause-ui" class="btn">Pause UI</button>
</p>
</div>
</div>
</div>
<div class="container">
<div class="row">
<div class="alert-message error fade in hide" data-alert="alert">
<p>Lost connection to server.</p>
</div>
</div>
<div class="row">
<div id="plots" class="span11">
<div id="current-bytes-used-plot" class="plot-container">
<h3>Current residency</h3>
<div class="plot"></div>
</div>
<div id="allocation-rate-plot" class="plot-container">
<h3>Allocation rate</h3>
<div class="plot"></div>
</div>
<div id="productivity-plot" class="plot-container">
<h3>Productivity</h3>
<div class="plot"></div>
</div>
</div>
<div class="span5">
<h3>GC and memory statistics</h3>
<table class="condensed-table">
<thead>
<tr>
<th>Statistic</th>
<th>Value</th>
</tr>
</thead>
<tbody>
<tr>
<td>Maximum residency</td>
<td id="max-bytes-used" class="span3 value">0</td>
</tr>
<tr>
<td>Current residency</td>
<td id="current-bytes-used" class="value">0</td>
</tr>
<tr>
<td>Maximum slop</td>
<td id="max-bytes-slop" class="value">0</td>
</tr>
<tr>
<td>Current slop</td>
<td id="current-bytes-slop" class="value">0</td>
</tr>
<tr>
<td>Productivity (wall clock time)</td>
<td id="productivity-wall" class="value">0</td>
</tr>
<tr>
<td>Productivity (cpu time)</td>
<td id="productivity-cpu" class="value">0</td>
</tr>
<tr>
<td>Allocation rate</td>
<td id="allocation-rate" class="value">0</td>
</tr>
</tbody>
</table>
<h3>Metrics</h3>
<table id="metric-table" class="condensed-table">
<thead>
<tr>
<th class="span4">Name</th>
<th class="span1">Value</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
</div>
</div>
</div>
<script type="text/javascript" src="monitor.js"></script>
</body>
</html>
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/**
* Blueprint/flot compatibility
*
* Resets some styles back to the browser default.
*/
.plot table {
width: auto;
border-spacing: 2px;
}
.plot th,
.plot td,
.plot caption {
padding: 0;
}
/**
* Body margin
*/
body {
padding-top: 60px;
}
/**
* Plots
*/
.plot {
width: 600px;
height: 300px;
margin-bottom: 1.5em;
}
.close-button {
float: right;
cursor: pointer;
}
/**
* Table
*/
.value {
text-align: right;
}
.string {
text-align: left;
}
.graph-button {
cursor: pointer;
vertical-align: middle;
}
This diff is collapsed.
......@@ -33,6 +33,7 @@ FRAME_VISIO_URL = URL_TO_CHANGE
FRAME_SEARX_URL = URL_TO_CHANGE
FRAME_ISTEX_URL = URL_TO_CHANGE
MAX_DOCS_PARSERS = 1000000
MAX_DOCS_SCRAPERS = 10000
[server]
......
......@@ -26,6 +26,8 @@ rec {
blas
gfortran7
# gfortran7.cc.lib
expat
icu
];
libPaths = pkgs.lib.makeLibraryPath nonhsBuildInputs;
shellHook = ''
......
name: gargantext
version: '0.0.4.3'
version: '0.0.4.6'
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -25,6 +25,15 @@ default-extensions:
- OverloadedStrings
- RankNTypes
- RecordWildCards
data-files:
- ekg-assets/index.html
- ekg-assets/monitor.js
- ekg-assets/monitor.css
- ekg-assets/jquery.flot.min.js
- ekg-assets/jquery-1.6.4.min.js
- ekg-assets/bootstrap-1.4.0.min.css
- ekg-assets/chart_line_add.png
- ekg-assets/cross.png
library:
source-dirs: src
ghc-options:
......@@ -137,6 +146,8 @@ library:
- deepseq
- directory
- duckling
- ekg-core
- ekg-json
- exceptions
- fast-logger
- fclabels
......@@ -206,6 +217,7 @@ library:
- servant-blaze
- servant-cassava
- servant-client
- servant-ekg
- servant-job
- servant-mock
- servant-multipart
......@@ -259,6 +271,7 @@ executables:
- -rtsopts
- -threaded
- -with-rtsopts=-N
- -with-rtsopts=-T
- -fprof-auto
dependencies:
- base
......
......@@ -27,7 +27,7 @@ Pouillard (who mainly made it).
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API
where
......@@ -43,6 +43,7 @@ import GHC.Generics (Generic)
import Gargantext.API.Admin.Auth.Types (AuthContext)
import Gargantext.API.Admin.Settings (newEnv)
import Gargantext.API.Admin.Types (FireWall(..), PortNumber, cookieSettings, jwtSettings, settings)
import Gargantext.API.EKG
import Gargantext.API.Ngrams (saveNodeStory)
import Gargantext.API.Prelude
import Gargantext.API.Routes
......@@ -54,11 +55,11 @@ import Network.Wai
import Network.Wai.Handler.Warp hiding (defaultSettings)
import Network.Wai.Middleware.Cors
import Network.Wai.Middleware.RequestLogger
import Paths_gargantext (getDataDir)
import Servant
import System.IO (FilePath)
import System.FilePath
data Mode = Dev | Mock | Prod
data Mode = Dev | Mock | Prod
deriving (Show, Read, Generic)
-- | startGargantext takes as parameters port number and Ini file.
......@@ -191,8 +192,15 @@ serverGargAdminAPI = roots
--gargMock :: Server GargAPI
--gargMock = mock apiGarg Proxy
---------------------------------------------------------------------
makeApp :: EnvC env => env -> IO Application
makeApp env = serveWithContext api cfg <$> server env
makeApp env = do
serv <- server env
(ekgStore, ekgMid) <- newEkgStore api
ekgDir <- (</> "ekg-assets") <$> getDataDir
return $ ekgMid $ serveWithContext apiWithEkg cfg
(ekgServer ekgDir ekgStore :<|> serv)
where
cfg :: Servant.Context AuthContext
cfg = env ^. settings . jwtSettings
......@@ -206,6 +214,9 @@ makeApp env = serveWithContext api cfg <$> server env
api :: Proxy API
api = Proxy
apiWithEkg :: Proxy (EkgAPI :<|> API)
apiWithEkg = Proxy
apiGarg :: Proxy GargAPI
apiGarg = Proxy
---------------------------------------------------------------------
......
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.API.EKG where
import Data.HashMap.Strict as HM
import Data.Text as T
import Data.Text.IO as T
import Data.Time.Clock.POSIX (getPOSIXTime)
import Network.Wai
import Protolude
import Servant
import Servant.Auth
import Servant.Ekg
import System.Metrics
import qualified System.Metrics.Json as J
-- Mimics https://github.com/tibbe/ekg/blob/master/System/Remote/Snap.hs#L98
type EkgAPI =
"ekg" :>
( "api" :>
( Get '[JSON] J.Sample :<|>
CaptureAll "segments" Text :> Get '[JSON] J.Value
) :<|>
Raw
)
ekgServer :: FilePath -> Store -> Server EkgAPI
ekgServer assetsDir store = (getAll :<|> getOne) :<|> serveDirectoryFileServer assetsDir
where getAll = J.Sample <$> liftIO (sampleAll store)
getOne segments = do
let metric = T.intercalate "." segments
metrics <- liftIO (sampleAll store)
maybe (liftIO (T.putStrLn "not found boohoo") >> throwError err404) (return . J.Value) (HM.lookup metric metrics)
newEkgStore :: HasEndpoint api => Proxy api -> IO (Store, Middleware)
newEkgStore api = do
s <- newStore
registerGcMetrics s
registerCounter "ekg.server_timestamp_ms" getTimeMs s -- used by UI
mid <- monitorEndpoints api s
return (s, mid)
where getTimeMs = (round . (* 1000)) `fmap` getPOSIXTime
instance HasEndpoint api => HasEndpoint (Auth xs a :> api) where
getEndpoint _ = getEndpoint (Proxy :: Proxy api)
enumerateEndpoints _ = enumerateEndpoints (Proxy :: Proxy api)
......@@ -3,6 +3,7 @@ module Gargantext.API.Job where
import Control.Lens (over, _Just)
import Data.IORef
import Data.Maybe
import qualified Data.Text as T
import Gargantext.Prelude
......@@ -16,6 +17,14 @@ jobLogInit rem =
, _scst_failed = Just 0
, _scst_events = Just [] }
addEvent :: T.Text -> T.Text -> JobLog -> JobLog
addEvent level message (JobLog { _scst_events = mEvts, .. }) = JobLog { _scst_events = Just (evts <> [ newEvt ]), .. }
where
evts = fromMaybe [] mEvts
newEvt = ScraperEvent { _scev_message = Just message
, _scev_level = Just level
, _scev_date = Nothing }
jobLogSuccess :: JobLog -> JobLog
jobLogSuccess jl = over (scst_succeeded . _Just) (+ 1) $
over (scst_remaining . _Just) (\x -> x - 1) jl
......@@ -38,6 +47,9 @@ jobLogFailTotal (JobLog { _scst_succeeded = mSucc
Nothing -> (Nothing, mFail)
Just rem -> (Just 0, (+ rem) <$> mFail)
jobLogFailTotalWithMessage :: T.Text -> JobLog -> JobLog
jobLogFailTotalWithMessage message jl = addEvent "ERROR" message $ jobLogFailTotal jl
jobLogEvt :: JobLog -> ScraperEvent -> JobLog
jobLogEvt jl evt = over (scst_events . _Just) (\evts -> (evt:evts)) jl
......
......@@ -285,7 +285,7 @@ updatePie cId maybeListId tabType maybeLimit = do
pure ()
updatePie' :: FlowCmdM env err m =>
CorpusId
CorpusId
-> Maybe ListId
-> TabType
-> Maybe Limit
......
......@@ -46,7 +46,6 @@ import Gargantext.API.Metrics
import Gargantext.API.Ngrams (TableNgramsApi, apiNgramsTableCorpus)
import Gargantext.API.Ngrams.Types (TabType(..))
import Gargantext.API.Node.File
import Gargantext.API.Node.FrameCalcUpload (FrameCalcUploadAPI, frameCalcUploadAPI)
import Gargantext.API.Node.New
import Gargantext.API.Prelude
import Gargantext.API.Table
......@@ -69,6 +68,8 @@ import Gargantext.Database.Query.Tree (tree, TreeMode(..))
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo.Legacy.LegacyAPI (PhyloAPI, phyloAPI)
import qualified Gargantext.API.Node.DocumentsFromWriteNodes as DocumentsFromWriteNodes
import qualified Gargantext.API.Node.DocumentUpload as DocumentUpload
import qualified Gargantext.API.Node.FrameCalcUpload as FrameCalcUpload
import qualified Gargantext.API.Node.Share as Share
import qualified Gargantext.API.Node.Update as Update
import qualified Gargantext.API.Search as Search
......@@ -125,7 +126,7 @@ type NodeAPI a = Get '[JSON] (Node a)
:<|> "rename" :> RenameApi
:<|> PostNodeApi -- TODO move to children POST
:<|> PostNodeAsync
:<|> FrameCalcUploadAPI
:<|> FrameCalcUpload.API
:<|> ReqBody '[JSON] a :> Put '[JSON] Int
:<|> "update" :> Update.API
:<|> Delete '[JSON] Int
......@@ -159,6 +160,7 @@ type NodeAPI a = Get '[JSON] (Node a)
:<|> "async" :> FileAsyncApi
:<|> "documents-from-write-nodes" :> DocumentsFromWriteNodes.API
:<|> DocumentUpload.API
-- TODO-ACCESS: check userId CanRenameNode nodeId
-- TODO-EVENTS: NodeRenamed RenameNode or re-use some more general NodeEdited...
......@@ -210,7 +212,7 @@ nodeAPI p uId id' = withAccess (Proxy :: Proxy (NodeAPI a)) Proxy uId (PathNode
:<|> rename id'
:<|> postNode uId id'
:<|> postNodeAsyncAPI uId id'
:<|> frameCalcUploadAPI uId id'
:<|> FrameCalcUpload.api uId id'
:<|> putNode id'
:<|> Update.api uId id'
:<|> Action.deleteNode (RootId $ NodeId uId) id'
......@@ -244,6 +246,7 @@ nodeAPI p uId id' = withAccess (Proxy :: Proxy (NodeAPI a)) Proxy uId (PathNode
:<|> fileAsyncApi uId id'
:<|> DocumentsFromWriteNodes.api uId id'
:<|> DocumentUpload.api uId id'
------------------------------------------------------------------------
......
......@@ -21,25 +21,25 @@ module Gargantext.API.Node.Corpus.New
import Control.Lens hiding (elements, Empty)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import qualified Data.ByteString.Base64 as BSB64
import Data.Either
import Data.Maybe (fromMaybe)
import Data.Swagger
import Data.Text (Text)
import qualified Data.Text as T
import GHC.Generics (Generic)
import qualified Prelude as Prelude
import Protolude (readFile)
import Servant
import Servant.Job.Utils (jsonOptions)
-- import Servant.Multipart
import qualified Data.Text.Encoding as TE
-- import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.Prelude
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs, ScraperEvent(..), scst_events)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job (jobLogSuccess, jobLogFailTotal)
import Gargantext.API.Job (jobLogSuccess, jobLogFailTotal, jobLogFailTotalWithMessage)
import Gargantext.API.Node.Corpus.New.File
import Gargantext.API.Node.Corpus.Searx
import Gargantext.API.Node.Corpus.Types
......@@ -57,11 +57,12 @@ import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), UserId)
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (getNodeWith)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Node (node_hyperdata)
import qualified Gargantext.Database.GargDB as GargDB
import Gargantext.Prelude.Config (gc_max_docs_parsers)
------------------------------------------------------------------------
{-
data Query = Query { query_query :: Text
......@@ -240,7 +241,7 @@ type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] NewWithForm JobLog
addToCorpusWithForm :: FlowCmdM env err m
addToCorpusWithForm :: (FlowCmdM env err m)
=> User
-> CorpusId
-> NewWithForm
......@@ -258,12 +259,33 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
WOS -> Parser.parseFormat Parser.WOS
PresseRIS -> Parser.parseFormat Parser.RisPresse
ZIP -> Parser.parseFormat Parser.ZIP
-- TODO granularity of the logStatus
eDocs <- liftBase $ parse $ cs d
let data' = case ft of
ZIP -> case BSB64.decode $ TE.encodeUtf8 d of
Left err -> panic $ T.pack "[addToCorpusWithForm] error decoding base64: " <> T.pack err
Right decoded -> decoded
_ -> cs d
eDocs <- liftBase $ parse data'
case eDocs of
Right docs' -> do
let docs = splitEvery 500 $ take 1000000 docs'
-- TODO Add progress (jobStatus) update for docs - this is a
-- long action
limit' <- view $ hasConfig . gc_max_docs_parsers
let limit = fromIntegral limit'
if length docs' > limit then do
printDebug "[addToCorpusWithForm] number of docs exceeds the limit" (show $ length docs')
let panicMsg' = [ "[addToCorpusWithForm] number of docs ("
, show $ length docs'
, ") exceeds the MAX_DOCS_PARSERS limit ("
, show limit
, ")" ]
let panicMsg = T.concat $ T.pack <$> panicMsg'
logStatus $ jobLogFailTotalWithMessage panicMsg jobLog
panic panicMsg
else
pure ()
let docs = splitEvery 500 $ take limit docs'
printDebug "Parsing corpus finished : " cid
logStatus jobLog2
......@@ -283,20 +305,19 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
logStatus jobLog3
pure $ jobLog3
Left e -> do
printDebug "Error" e
printDebug "[addToCorpusWithForm] parse error" e
logStatus jobLogE
let evt = ScraperEvent { _scev_message = Just $ T.pack e
, _scev_level = Just "ERROR"
, _scev_date = Nothing }
logStatus $ over (scst_events . _Just) (\evt' -> evt' <> [evt]) jobLogE
pure jobLogE
where
jobLog2 = jobLogSuccess jobLog
jobLog3 = jobLogSuccess jobLog2
jobLogE = jobLogFailTotal jobLog
parseCsvGargV3Path :: [Char] -> IO (Either Prelude.String [HyperdataDocument])
parseCsvGargV3Path fp = do
contents <- readFile fp
Parser.parseFormat Parser.CsvGargV3 $ cs contents
{-
addToCorpusWithFile :: FlowCmdM env err m
=> CorpusId
......
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.DocumentUpload where
import Control.Lens (makeLenses, view)
import Data.Aeson
import Data.Swagger (ToSchema)
import qualified Data.Text as T
import Data.Time.Clock
import Data.Time.Calendar
import GHC.Generics (Generic)
import Servant
import Servant.Job.Async
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Job (jobLogSuccess)
import Gargantext.API.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType')
import Gargantext.Prelude
data DocumentUpload = DocumentUpload
{ _du_abstract :: T.Text
, _du_authors :: T.Text
, _du_sources :: T.Text
, _du_title :: T.Text }
deriving (Generic)
$(makeLenses ''DocumentUpload)
instance ToSchema DocumentUpload
instance FromJSON DocumentUpload
where
parseJSON = genericParseJSON
( defaultOptions { sumEncoding = ObjectWithSingleField
, fieldLabelModifier = unCapitalize . dropPrefix "_du_"
, omitNothingFields = True
}
)
instance ToJSON DocumentUpload
where
toJSON = genericToJSON
( defaultOptions { sumEncoding = ObjectWithSingleField
, fieldLabelModifier = unCapitalize . dropPrefix "_du_"
, omitNothingFields = True
}
)
type API = Summary " Document upload"
:> "document"
:> "upload"
:> "async"
:> AsyncJobs JobLog '[JSON] DocumentUpload JobLog
api :: UserId -> NodeId -> GargServer API
api uId nId =
serveJobsAPI $
JobFunction (\q log' -> do
documentUpload uId nId q (liftBase . log')
)
documentUpload :: (FlowCmdM env err m)
=> UserId
-> NodeId
-> DocumentUpload
-> (JobLog -> m ())
-> m JobLog
documentUpload uId nId doc logStatus = do
let jl = JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just [] }
logStatus jl
mcId <- getClosestParentIdByType' nId NodeCorpus
let cId = case mcId of
Just c -> c
Nothing -> panic $ T.pack $ "[G.A.N.DU] Node has no corpus parent: " <> show nId
(year, month, day) <- liftBase $ getCurrentTime >>= return . toGregorian . utctDay
let nowS = T.pack $ show year <> "-" <> show month <> "-" <> show day
let hd = HyperdataDocument { _hd_bdd = Nothing
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ view du_title doc
, _hd_authors = Just $ view du_authors doc
, _hd_institutes = Nothing
, _hd_source = Just $ view du_sources doc
, _hd_abstract = Just $ view du_abstract doc
, _hd_publication_date = Just nowS
, _hd_publication_year = Just $ fromIntegral year
, _hd_publication_month = Just month
, _hd_publication_day = Just day
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ T.pack $ show EN }
_ <- flowDataText (RootId (NodeId uId)) (DataNew [[hd]]) (Multi EN) cId Nothing
pure $ jobLogSuccess jl
......@@ -12,7 +12,6 @@ Portability : POSIX
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.API.Node.DocumentsFromWriteNodes
where
......@@ -24,6 +23,7 @@ import Data.Swagger
import qualified Data.Text as T
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
......@@ -71,15 +71,20 @@ documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
-> (JobLog -> m ())
-> m JobLog
documentsFromWriteNodes uId nId _p logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
let jobLog = JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
logStatus jobLog
mcId <- getClosestParentIdByType' nId NodeCorpus
let cId = maybe (panic "[G.A.N.DFWN] Node has no parent") identity mcId
cId <- case mcId of
Just cId -> pure cId
Nothing -> do
let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
logStatus $ jobLogFailTotalWithMessage msg jobLog
panic msg
frameWriteIds <- getChildrenByType nId NodeFrameWrite
......@@ -97,11 +102,7 @@ documentsFromWriteNodes uId nId _p logStatus = do
_ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
pure $ jobLogSuccess jobLog
------------------------------------------------------------------------
hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
......
......@@ -27,6 +27,7 @@ import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (HasConfig)
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
......@@ -39,21 +40,21 @@ instance FromJSON FrameCalcUpload
instance ToJSON FrameCalcUpload
instance ToSchema FrameCalcUpload
type FrameCalcUploadAPI = Summary " FrameCalc upload"
:> "add"
:> "framecalc"
:> "async"
:> AsyncJobs JobLog '[JSON] FrameCalcUpload JobLog
type API = Summary " FrameCalc upload"
:> "add"
:> "framecalc"
:> "async"
:> AsyncJobs JobLog '[JSON] FrameCalcUpload JobLog
frameCalcUploadAPI :: UserId -> NodeId -> GargServer FrameCalcUploadAPI
frameCalcUploadAPI uId nId =
api :: UserId -> NodeId -> GargServer API
api uId nId =
serveJobsAPI $
JobFunction (\p logs ->
frameCalcUploadAsync uId nId p (liftBase . logs) (jobLogInit 5)
)
frameCalcUploadAsync :: FlowCmdM env err m
frameCalcUploadAsync :: (HasConfig env, FlowCmdM env err m)
=> UserId
-> NodeId
-> FrameCalcUpload
......
......@@ -24,7 +24,7 @@ import Gargantext.API.Node.Corpus.New.File (FileType)
-------------------------------------------------------
data NewWithForm = NewWithForm
{ _wf_filetype :: !FileType
, _wf_data :: !Text
, _wf_data :: !Text -- NOTE for binary files, this represents base-64 data
, _wf_lang :: !(Maybe Lang)
, _wf_name :: !Text
} deriving (Eq, Show, Generic)
......
......@@ -23,17 +23,19 @@ import Data.Swagger
import GHC.Generics (Generic)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import qualified Gargantext.API.Metrics as Metrics
import Gargantext.API.Ngrams.List (reIndexWith)
import qualified Gargantext.API.Ngrams.Types as NgramsTypes
import Gargantext.API.Prelude (GargServer, simuLogs)
import Gargantext.Core.Methods.Distances (GraphMetric(..))
import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Schema.Node (node_parent_id)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Schema.Node (node_parent_id)
import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Prelude (Ord, Eq, (<$>), ($), liftBase, (.), printDebug, pure, show, cs, (<>), panic)
import qualified Gargantext.Utils.Aeson as GUA
import Prelude (Enum, Bounded, minBound, maxBound)
......@@ -119,6 +121,35 @@ updateNode _uId nid1 (LinkNodeReq nt nid2) logStatus = do
, _scst_events = Just []
}
-- | `Advanced` to update graphs
updateNode _uId lId (UpdateNodeParamsList Advanced) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 2
, _scst_events = Just []
}
corpusId <- view node_parent_id <$> getNode lId
logStatus JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
_ <- case corpusId of
Just cId -> do
_ <- Metrics.updatePie' cId (Just lId) NgramsTypes.Authors Nothing
_ <- Metrics.updateTree' cId (Just lId) NgramsTypes.Institutes MapTerm
_ <- Metrics.updatePie' cId (Just lId) NgramsTypes.Sources Nothing
pure ()
Nothing -> pure ()
pure JobLog { _scst_succeeded = Just 3
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
......
{-# OPTIONS_GHC -freduction-depth=400 #-}
{-|
Module : Gargantext.API.Swagger
Description : Swagger API generation
......
......@@ -7,6 +7,9 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
A Node Story is a Map between NodeId and an Archive (with state,
version and history) for that node.
TODO:
- remove
- filter
......
......@@ -25,7 +25,8 @@ module Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), clean, parseFile, cl
import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Monad (join)
import Control.Monad (join, sequence)
import Control.Monad.IO.Class (liftIO)
import Data.Attoparsec.ByteString (parseOnly, Parser)
import Data.Either(Either(..))
import Data.Either.Extra (partitionEithers)
......@@ -43,6 +44,7 @@ import qualified Data.ByteString.Lazy as DBL
import qualified Data.Map as DM
import qualified Data.Text as DT
import qualified Prelude as Prelude
import System.IO.Temp (emptySystemTempFile)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
......@@ -95,9 +97,14 @@ parseFormat WOS bs = do
$ partitionEithers
$ [runParser' WOS bs]
pure $ Right docs
parseFormat ZIP _bs = do
printDebug "[parseFormat]" ZIP
pure $ Left "Not implemented for ZIP"
parseFormat ZIP bs = do
path <- emptySystemTempFile "parsed.zip"
DB.writeFile path bs
withArchive path $ do
files <- DM.keys <$> getEntries
filesContents <- mapM getEntry files
ddocs <- liftIO $ mapM (parseFormat CsvGargV3) filesContents
pure $ concat <$> sequence ddocs
parseFormat _ _ = undefined
-- | Parse file into documents
......
......@@ -178,7 +178,7 @@ datePrefixP = do
dateP :: Parser Date
dateP = try datePrefixP
*> dateISOP
-- *> many (noneOf "\n")
-- *> many (noneOf "\n")
dateISOP :: Parser Date
dateISOP = do
......
......@@ -69,7 +69,11 @@ chartData cId nt lt = do
(_total,mapTerms) <- countNodesByNgramsWith (group dico)
<$> getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms
let (dates, count) = V.unzip $ fmap (\(NgramsTerm t,(d,_)) -> (t, d)) $ V.fromList $ HashMap.toList mapTerms
let (dates, count) = V.unzip $
V.fromList $
List.sortOn snd $
(\(NgramsTerm t,(d,_)) -> (t, d)) <$>
HashMap.toList mapTerms
pure (Histo dates (round <$> count))
......
This diff is collapsed.
......@@ -33,6 +33,7 @@ import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.GEXF ()
import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph)
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node
......@@ -78,7 +79,11 @@ graphAPI u n = getGraph u n
:<|> graphVersionsAPI u n
------------------------------------------------------------------------
getGraph :: UserId -> NodeId -> GargNoServer HyperdataGraphAPI
--getGraph :: UserId -> NodeId -> GargServer HyperdataGraphAPI
getGraph :: FlowCmdM env err m
=> UserId
-> NodeId
-> m HyperdataGraphAPI
getGraph _uId nId = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
......@@ -109,7 +114,12 @@ getGraph _uId nId = do
HyperdataGraphAPI graph' camera
recomputeGraph :: UserId -> NodeId -> Maybe GraphMetric -> GargNoServer Graph
--recomputeGraph :: UserId -> NodeId -> Maybe GraphMetric -> GargNoServer Graph
recomputeGraph :: FlowCmdM env err m
=> UserId
-> NodeId
-> Maybe GraphMetric
-> m Graph
recomputeGraph _uId nId maybeDistance = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let
......@@ -122,7 +132,7 @@ recomputeGraph _uId nId maybeDistance = do
_ -> maybeDistance
let
cId = maybe (panic "[G.V.G.API.recomputeGraph] Node has no parent")
cId = maybe (panic "[G.C.V.G.API.recomputeGraph] Node has no parent")
identity
$ nodeGraph ^. node_parent_id
similarity = case graphMetric of
......@@ -151,12 +161,18 @@ recomputeGraph _uId nId maybeDistance = do
-- TODO use Database Monad only here ?
computeGraph :: HasNodeError err
--computeGraph :: HasNodeError err
-- => CorpusId
-- -> Distance
-- -> NgramsType
-- -> NodeListStory
-- -> Cmd err Graph
computeGraph :: FlowCmdM env err m
=> CorpusId
-> Distance
-> NgramsType
-> NodeListStory
-> Cmd err Graph
-> m Graph
computeGraph cId d nt repo = do
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster
......@@ -172,9 +188,11 @@ computeGraph cId d nt repo = do
-- printDebug "myCooc" myCooc
-- saveAsFileDebug "debug/my-cooc" myCooc
listNgrams <- getListNgrams [lId] nt
graph <- liftBase $ cooc2graphWith Spinglass d 0 myCooc
-- saveAsFileDebug "debug/graph" graph
pure graph
pure $ mergeGraphNgrams graph (Just listNgrams)
defaultGraphMetadata :: HasNodeError err
......@@ -214,10 +232,15 @@ graphAsync u n =
JobFunction (\_ log' -> graphRecompute u n (liftBase . log'))
graphRecompute :: UserId
--graphRecompute :: UserId
-- -> NodeId
-- -> (JobLog -> GargNoServer ())
-- -> GargNoServer JobLog
graphRecompute :: FlowCmdM env err m
=> UserId
-> NodeId
-> (JobLog -> GargNoServer ())
-> GargNoServer JobLog
-> (JobLog -> m ())
-> m JobLog
graphRecompute u n logStatus = do
logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
......@@ -274,7 +297,11 @@ graphVersions n nId = do
pure $ GraphVersions { gv_graph = listVersion
, gv_repo = v }
recomputeVersions :: UserId -> NodeId -> GargNoServer Graph
--recomputeVersions :: UserId -> NodeId -> GargNoServer Graph
recomputeVersions :: FlowCmdM env err m
=> UserId
-> NodeId
-> m Graph
recomputeVersions uId nId = recomputeGraph uId nId Nothing
------------------------------------------------------------
......@@ -300,9 +327,13 @@ graphClone uId pId (HyperdataGraphAPI { _hyperdataAPIGraph = graph
pure nId
------------------------------------------------------------
getGraphGexf :: UserId
--getGraphGexf :: UserId
-- -> NodeId
-- -> GargNoServer (Headers '[Servant.Header "Content-Disposition" Text] Graph)
getGraphGexf :: FlowCmdM env err m
=> UserId
-> NodeId
-> GargNoServer (Headers '[Servant.Header "Content-Disposition" Text] Graph)
-> m (Headers '[Servant.Header "Content-Disposition" Text] Graph)
getGraphGexf uId nId = do
HyperdataGraphAPI { _hyperdataAPIGraph = graph } <- getGraph uId nId
pure $ addHeader "attachment; filename=graph.gexf" graph
......
......@@ -205,21 +205,24 @@ data2graph :: ToComId a
-> Map (Int, Int) Double
-> [a]
-> Graph
data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing
data2graph labels coocs bridge conf partitions = Graph { _graph_nodes = nodes
, _graph_edges = edges
, _graph_metadata = Nothing }
where
community_id_by_node_id = Map.fromList $ map nodeId2comId partitions
nodes = map (setCoord ForceAtlas labels bridge)
[ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
, node_type = Terms -- or Unknown
, node_id = cs (show n)
, node_label = l
, node_x_coord = 0
, node_y_coord = 0
, node_attributes =
Attributes { clust_default = maybe 0 identity
(Map.lookup n community_id_by_node_id) } }
, node_type = Terms -- or Unknown
, node_id = cs (show n)
, node_label = l
, node_x_coord = 0
, node_y_coord = 0
, node_attributes =
Attributes { clust_default = maybe 0 identity
(Map.lookup n community_id_by_node_id) }
, node_children = [] }
)
| (l, n) <- labels
, Set.member n $ Set.fromList
......
......@@ -360,7 +360,7 @@ viewDocuments cId t ntId mQuery = proc () -> do
restrict -< if query == ""
then pgBool True
--else (n^.ns_search) @@ (pgTSQuery (T.unpack query))
else (n^.ns_search) @@ (toTSQuery $ T.unpack query)
else (n^.ns_search) @@ (plaintoTSQuery $ T.unpack query)
returnA -< FacetDoc (_ns_id n)
(_ns_date n)
......
......@@ -11,6 +11,7 @@ packages:
docker:
enable: false
#enable: true
repo: 'cgenie/stack-build:lts-18.12-garg'
run-args:
- '--publish=8008:8008'
......@@ -26,8 +27,9 @@ allow-newer: true
# "$everything": -haddock
extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit: 3e32ec3aca71eb326805355d3a99b9288dc342ee
- #git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
git: https://gitlab.iscpif.fr/cgenie/haskell-gargantext-prelude.git
commit: 35b09629a658fc16cc9ff63e7591e58511cd98a7
# Data Mining Libs
- git: https://github.com/delanoe/data-time-segment.git
commit: 10a416b9f6c443866b36479c3441ebb3bcdeb7ef
......@@ -44,7 +46,7 @@ extra-deps:
# Databases libs
- git: https://github.com/delanoe/haskell-opaleye.git
commit: 806da7f9fb6fe1032f51c1822fc224b281cdd84f
commit: d3ab7acd5ede737478763630035aa880f7e34444
- git: https://github.com/delanoe/hsparql.git
commit: 2acbbc55ac9bbd4bf1a713c586b8b8e8b82892eb
- git: https://github.com/robstewart57/rdf4h.git
......@@ -119,3 +121,4 @@ extra-deps:
- taggy-lens-0.1.2@sha256:091ca81d02bd3d7fb493dce0148e1a38f25eb178a1ebd751043a23239e5e3265,3009
- taggy-0.2.1@sha256:7bc55ddba178971dc6052163597f0445a0a2b5b0ca0e84ce651d53d722e3c265,4662
- servant-ekg-0.3.1@sha256:19bd9dc3943983da8e79d6f607614c68faea4054fb889d508c8a2b67b6bdd448,2203
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment