Commit 802e0cdf authored by Grégoire Locqueville's avatar Grégoire Locqueville

Remove useless code, dependencies, warnings

- Re-enable orphan warnings that were manually disabled in many modules
- Remove non-Haskell files that were just sitting in the source tree
- Remove modules that were not called from anywhere
- Remove unused dependencies

This is not exhaustive by any means. In particular, some more weeding out
can be achieved by looking at individual functions and even branches of
functions. Weeder can help with that.
parent 0b5ce744
Pipeline #6611 passed with stages
in 76 minutes and 30 seconds
import Prelude (IO, id, (.))
import Data.Aeson (encode)
import Codec.Serialise (deserialise)
import qualified Data.ByteString.Lazy as L
import Gargantext.Core.NodeStory (NodeListStory)
main :: IO ()
main = L.interact (encode . (id :: NodeListStory -> NodeListStory) . deserialise)
{-|
Module : CleanCsvCorpus.hs
Description : Gargantext starter
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Given a Gargantext CSV File and its Query This script cleans and
compress the contexts around the main terms of the query.
-}
module CLI.CleanCsvCorpus where
import Data.SearchEngine qualified as S
import Data.Set qualified as S
import Data.Text (pack)
import Data.Vector (Vector)
import Data.Vector qualified as V
import Gargantext.Core.Text.Corpus.Parsers.TSV qualified as TSV
import Gargantext.Core.Text.Search
import Gargantext.Prelude
------------------------------------------------------------------------
type Query = [S.Term]
filterDocs :: [DocId] -> Vector TSV.TsvGargV3 -> Vector TSV.TsvGargV3
filterDocs docIds = V.filter (\doc -> S.member (TSV.d_docId doc) $ S.fromList docIds )
main :: IO ()
main = do
let rPath = "/tmp/Gargantext_Corpus.csv"
let wPath = "/tmp/Gargantext_Corpus_bis.csv"
--let q = ["water", "scarcity", "morocco", "shortage","flood"]
let q = ["gratuit", "gratuité", "culture", "culturel"]
eDocs <- TSV.readTSVFile rPath
case eDocs of
Right (h, tsvDocs) -> do
putStrLn ("Number of documents before:" <> show (V.length tsvDocs) :: Text)
putStrLn ("Mean size of docs:" <> show ( TSV.docsSize tsvDocs) :: Text)
let docs = TSV.toDocs tsvDocs
let engine = S.insertDocs docs initialDocSearchEngine
let docIds = S.query engine (map pack q)
let docs' = TSV.fromDocs $ filterDocs docIds (V.fromList docs)
putStrLn ("Number of documents after:" <> show (V.length docs') :: Text)
putStrLn ("Mean size of docs:" <> show (TSV.docsSize docs') :: Text)
TSV.writeFile wPath (h, docs')
Left e -> panicTrace $ "Error: " <> e
module CLI.Utils (
mapMP
, mapConcurrentlyChunked
) where
import Control.Concurrent.Async as CCA (mapConcurrently)
import Data.List.Split (chunksOf)
import Gargantext.Prelude
import System.IO (hFlush)
------------------------------------------------------------------------
-- | Tools
mapMP :: MonadIO m => (a -> m b) -> [a] -> m [b]
mapMP f xs = do
bs <- zipWithM g (cycle "-\\|/") xs
liftIO $ hPutStr stderr ("\rDone\n" :: Text)
pure bs
where
g c x = do
liftIO $ hPutStr stderr ['\r',c]
liftIO $ hFlush stderr
f x
-- | Optimi that need further developments (not used yet)
mapConcurrentlyChunked :: (a -> IO b) -> [a] -> IO [b]
mapConcurrentlyChunked f ts = do
caps <- getNumCapabilities
let n = 1 `max` (length ts `div` caps)
concat <$> mapConcurrently (mapM f) (chunksOf n ts)
......@@ -19,7 +19,7 @@ fi
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in.
expected_cabal_project_hash="ed85064669c844e43ebc723ed707b7d320d2133dd5d93d3750257e368d7fe254"
expected_cabal_project_freeze_hash="50f3ccea242400c48bd9cec7286bd07c8223c87c043e09576dd5fef0949f982a"
expected_cabal_project_freeze_hash="60f10a3fab634a95294568d09926b258d8976027532304460ff1a9f9d1c10fdd"
......
......@@ -20,7 +20,6 @@ constraints: any.Cabal ==3.8.1.0,
any.SHA ==1.6.4.4,
SHA -exe,
any.StateVar ==1.2.2,
any.Unique ==0.4.7.8,
any.abstract-deque ==0.3,
abstract-deque -usecas,
any.accelerate ==1.3.0.0,
......@@ -79,7 +78,6 @@ constraints: any.Cabal ==3.8.1.0,
any.blaze-builder ==0.4.2.3,
any.blaze-html ==0.9.1.2,
any.blaze-markup ==0.8.3.0,
any.blaze-svg ==0.3.7,
any.boolexpr ==0.3,
any.boring ==0.2.1,
boring +tagged,
......@@ -131,8 +129,6 @@ constraints: any.Cabal ==3.8.1.0,
any.contravariant ==1.5.5,
contravariant +semigroups +statevar +tagged,
any.cookie ==0.4.6,
any.cpphs ==1.20.9.1,
cpphs -old-locale,
any.cprng-aes ==0.6.1,
any.crawlerArxiv ==0.1.0.0,
any.crawlerHAL ==0.1.0.0,
......@@ -169,7 +165,6 @@ constraints: any.Cabal ==3.8.1.0,
any.deepseq ==1.4.8.0,
any.deferred-folds ==0.9.18.6,
any.dense-linear-algebra ==0.1.0.0,
any.deriving-aeson ==0.2.9,
any.digest ==0.0.1.7,
digest +pkg-config,
any.directory ==1.3.7.1,
......@@ -197,14 +192,11 @@ constraints: any.Cabal ==3.8.1.0,
any.extra ==1.7.14,
any.fail ==4.9.0.0,
any.fast-logger ==3.2.2,
any.fclabels ==2.0.5.1,
any.fgl ==5.8.0.0,
fgl +containers042,
any.file-embed ==0.0.15.0,
any.file-embed-lzma ==0.0.1,
any.filelock ==0.1.1.7,
any.filepath ==1.4.2.2,
any.filepattern ==0.1.3,
any.fmt ==0.6.3.0,
any.focus ==1.0.3.2,
any.foldable1-classes-compat ==0.1,
......@@ -226,15 +218,7 @@ constraints: any.Cabal ==3.8.1.0,
any.ghc-boot ==9.4.7,
any.ghc-boot-th ==9.4.7,
any.ghc-heap ==9.4.7,
any.ghc-lib-parser ==9.4.7.20230826,
ghc-lib-parser +threaded-rts,
any.ghc-lib-parser-ex ==9.4.0.0,
ghc-lib-parser-ex -auto -no-ghc-lib,
any.ghc-parser ==0.2.6.0,
any.ghc-paths ==0.1.0.12,
any.ghc-prim ==0.9.1,
any.ghc-syntax-highlighter ==0.0.9.0,
ghc-syntax-highlighter -dev,
any.ghci ==9.4.7,
any.githash ==0.1.7.0,
any.graphviz ==2999.20.1.0,
......@@ -248,7 +232,6 @@ constraints: any.Cabal ==3.8.1.0,
hashable +integer-gmp -random-initial-seed,
any.hashtables ==1.3.1,
hashtables -bounds-checking -debug -detailed-profiling -portable -sse42 +unsafe-tricks,
any.haskeline ==0.8.2,
any.haskell-igraph ==0.10.4,
any.haskell-lexer ==1.1.1,
any.haskell-src-exts ==1.23.1,
......@@ -257,8 +240,6 @@ constraints: any.Cabal ==3.8.1.0,
any.hedgehog ==1.2,
any.hgal ==2.0.0.2,
any.hlcm ==0.2.2,
any.hlint ==3.5,
hlint +ghc-lib +gpl -hsyaml +threaded,
any.hmatrix ==0.20.2,
hmatrix -disable-default-paths -no-random_r -openblas,
any.hmatrix-gsl-stats ==0.4.1.8,
......@@ -267,7 +248,6 @@ constraints: any.Cabal ==3.8.1.0,
any.hpc ==0.6.1.0,
any.hsc2hs ==0.68.10,
hsc2hs -in-ghc-tree,
any.hscolour ==1.24.4,
any.hsinfomap ==0.1,
any.hslogger ==1.3.1.0,
hslogger +network--gt-3_0_0,
......@@ -300,8 +280,6 @@ constraints: any.Cabal ==3.8.1.0,
any.hxt-regex-xmlschema ==9.2.0.7,
hxt-regex-xmlschema -profile,
any.hxt-unicode ==9.0.2.4,
any.ihaskell ==0.11.0.0,
ihaskell +use-hlint,
any.indexed-profunctors ==0.1.1.1,
any.indexed-traversable ==0.1.3,
any.indexed-traversable-instances ==0.1.1.2,
......@@ -313,8 +291,6 @@ constraints: any.Cabal ==3.8.1.0,
any.invariant ==0.6.2,
any.iproute ==1.7.12,
any.ipynb ==0.2,
any.ipython-kernel ==0.11.0.0,
ipython-kernel -examples,
any.iso639 ==0.1.0.3,
any.jira-wiki-markup ==1.5.1,
any.jose ==0.10,
......@@ -339,9 +315,7 @@ constraints: any.Cabal ==3.8.1.0,
any.llvm-hs ==12.0.0,
llvm-hs -debug -llvm-with-rtti +shared-llvm,
any.llvm-hs-pure ==12.0.0,
any.located-base ==0.1.1.1,
any.lockfree-queue ==0.2.4,
any.logging-effect ==1.3.12,
any.logict ==0.8.0.0,
any.loop ==0.3.0,
any.lzma ==0.0.1.0,
......@@ -377,7 +351,6 @@ constraints: any.Cabal ==3.8.1.0,
any.morpheus-graphql-core ==0.24.3,
any.morpheus-graphql-server ==0.24.3,
any.morpheus-graphql-subscriptions ==0.24.3,
any.morpheus-graphql-tests ==0.24.3,
any.mtl ==2.2.2,
any.mtl-compat ==0.2.2,
mtl-compat -two-point-one -two-point-two,
......@@ -419,10 +392,6 @@ constraints: any.Cabal ==3.8.1.0,
any.password-types ==1.0.0.0,
any.patches-class ==0.1.0.1,
any.patches-map ==0.1.0.1,
any.path ==0.9.5,
path -dev,
any.path-io ==1.8.1,
path-io -dev,
any.pem ==0.2.4,
any.polyparse ==1.13,
any.port-utils ==0.2.1.0,
......@@ -440,16 +409,12 @@ constraints: any.Cabal ==3.8.1.0,
any.primitive ==0.8.0.0,
any.primitive-extras ==0.10.1.10,
any.primitive-unlifted ==2.1.0.0,
any.probability ==0.2.8,
probability +splitbase,
any.process ==1.6.17.0,
any.product-profunctors ==0.11.1.1,
any.profunctors ==5.6.2,
any.promises ==0.3,
any.protolude ==0.3.3,
any.psqueues ==0.2.7.3,
any.pureMD5 ==2.1.4,
pureMD5 -test,
any.qrcode-core ==0.9.9,
any.qrcode-juicypixels ==0.8.5,
any.quickcheck-instances ==0.3.30,
......@@ -462,7 +427,6 @@ constraints: any.Cabal ==3.8.1.0,
any.rdf4h ==3.1.1,
any.recover-rtti ==0.4.3,
any.recv ==0.1.0,
any.refact ==0.3.0.2,
any.reflection ==2.1.7,
reflection -slow +template-haskell,
any.regex ==1.1.0.2,
......@@ -502,7 +466,6 @@ constraints: any.Cabal ==3.8.1.0,
any.servant-auth-server ==0.4.8.0,
any.servant-auth-swagger ==0.2.10.2,
any.servant-blaze ==0.9.1,
any.servant-cassava ==0.10.2,
any.servant-client ==0.19,
any.servant-client-core ==0.20,
any.servant-docs ==0.12,
......@@ -521,7 +484,6 @@ constraints: any.Cabal ==3.8.1.0,
any.servant-xml-conduit ==0.1.0.4,
any.shelly ==1.12.1,
shelly -build-examples -lifted,
any.simple-reflect ==0.3.3,
any.simple-sendfile ==0.2.32,
simple-sendfile +allow-bsd -fallback,
any.singleton-bool ==0.1.6,
......@@ -551,7 +513,6 @@ constraints: any.Cabal ==3.8.1.0,
any.stm ==2.5.1.0,
any.stm-chans ==3.0.0.9,
any.stm-containers ==1.2.0.3,
any.stm-delay ==0.1.1.1,
any.stm-hamt ==1.2.0.14,
any.storable-complex ==0.2.3.0,
any.streaming-commons ==0.2.2.6,
......@@ -609,7 +570,6 @@ constraints: any.Cabal ==3.8.1.0,
any.time-locale-compat ==0.1.1.5,
time-locale-compat -old-locale,
any.time-manager ==0.0.1,
any.timezone-series ==0.1.13,
any.tls ==1.6.0,
tls +compat -hans +network,
any.tmp-postgres ==1.34.1.0,
......@@ -693,7 +653,6 @@ constraints: any.Cabal ==3.8.1.0,
any.xml-types ==0.3.8,
any.yaml ==0.11.11.2,
yaml +no-examples +no-exe,
any.zeromq4-haskell ==0.8.0,
any.zip ==2.0.0,
zip -dev -disable-bzip2 -disable-zstd,
any.zip-archive ==0.4.3,
......
This diff is collapsed.
{-|
Module : Gargantext.API.Admin.Orchestrator
Description : Jobs Orchestrator
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.API.Admin.Orchestrator where
import Control.Lens hiding (elements)
import Data.Aeson
import Data.ByteString.Lazy.Char8 qualified as LBS
import Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.Prelude hiding (to)
import Servant
import Servant.Job.Async
import Servant.Job.Client
callJobScrapy :: (ToJSON e, FromJSON e, FromJSON o, MonadClientJob m)
=> JobServerURL e Schedule o
-> (URL -> Schedule)
-> m o
callJobScrapy jurl schedule = do
progress $ NewTask jurl
out <- view job_output <$>
retryOnTransientFailure (clientCallbackJob' jurl
(fmap (const ()) . scrapySchedule . schedule))
progress $ Finished jurl Nothing
pure out
logConsole :: ToJSON a => a -> IO ()
logConsole = LBS.putStrLn . encode
callScraper :: MonadClientJob m => URL -> ScraperInput -> m JobLog
callScraper url input =
callJobScrapy jurl $ \cb ->
Schedule
{ s_project = "gargantext"
, s_spider = input ^. scin_spider
, s_setting = []
, s_jobid = Nothing
, s_version = Nothing
, s_extra =
[("query", input ^.. scin_query . _Just)
,("user", [input ^. scin_user])
,("corpus", [input ^. scin_corpus . to toUrlPiece])
,("report_every", input ^.. scin_report_every . _Just . to toUrlPiece)
,("limit", input ^.. scin_limit . _Just . to toUrlPiece)
,("url", input ^.. scin_local_file . _Just)
,("count_only", input ^.. scin_count_only . _Just . to toUrlPiece)
,("callback", [toUrlPiece cb])]
}
where
jurl :: JobServerURL JobLog Schedule JobLog
jurl = JobServerURL url Callback
pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
-> (e -> IO ()) -> IO JobLog
pipeline scrapyurl client_env input log_status = do
e <- runJobMLog client_env log_status $ callScraper scrapyurl input
either (panicTrace . show) pure e -- TODO throwError
-- TODO integrate to ServerT
-- use:
-- * serveJobsAPI instead of simpleServeJobsAPI
-- * JobFunction instead of simpleJobFunction
-- TODO:
-- * HasSelfUrl or move self_url to settings
-- * HasScrapers or move scrapers to settings
-- * EnvC env
{- NOT USED YET
import Data.Text
import Servant.Job.Server
import Servant.Job.Utils (extendBaseUrl)
import Gargantext.API.Admin.Types
scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
scrapyOrchestrator env = do
apiWithCallbacksServer (Proxy :: Proxy ScraperAPI)
defaultSettings (extendBaseUrl ("scraper" :: Text) $ env ^. env_self_url)
(env ^. env_manager) (LogEvent logConsole) $
simpleServeJobsAPI (env ^. env_scrapers) .
simpleJobFunction . pipeline (URL $ env ^. settings . scrapydUrl)
-}
{-|
Module : Gargantext.API.Admin.Orchestartor.Scrapy.Schedule
Description : Server API Auth Module
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Admin.Orchestrator.Scrapy.Schedule
where
import Control.Lens
import Data.Aeson
import GHC.Generics
import Protolude
import Servant
import Servant.Client
import Servant.Job.Utils (jsonOptions)
import Web.FormUrlEncoded hiding (parseMaybe)
import qualified Data.HashMap.Strict as H
------------------------------------------------------------------------
data Schedule = Schedule
{ s_project :: !Text
, s_spider :: !Text
, s_setting :: ![Text]
, s_jobid :: !(Maybe Text)
, s_version :: !(Maybe Text)
, s_extra :: ![(Text,[Text])]
}
deriving (Generic)
data ScheduleResponse = ScheduleResponse
{ r_status :: !Text
, r_jobid :: !Text
}
deriving (Generic)
instance FromJSON ScheduleResponse where
parseJSON = genericParseJSON (jsonOptions "r_")
instance ToForm Schedule where
toForm s =
Form . H.fromList $
[("project", [s_project s])
,("spider", [s_spider s])
,("setting", s_setting s)
,("jobid", s_jobid s ^.. _Just)
,("_version", s_version s ^.. _Just)
] ++ s_extra s
type Scrapy =
"schedule.json" :> ReqBody '[FormUrlEncoded] Schedule
:> Post '[JSON] ScheduleResponse
scrapyAPI :: Proxy Scrapy
scrapyAPI = Proxy
scrapySchedule :: Schedule -> ClientM ScheduleResponse
scrapySchedule = client scrapyAPI
{-|
Module : Gargantext.API.Admin.Utils
Description : Server API main Types
Copyright : (c) CNRS, 2017-Present
License : BSD3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Mainly copied from Servant.Job.Utils (Thanks)
-}
module Gargantext.API.Admin.Utils
where
import Gargantext.Prelude
import Prelude (String)
import qualified Data.Text as T
infixr 4 ?|
-- Reverse infix form of "fromMaybe"
(?|) :: Maybe a -> a -> a
(?|) = flip fromMaybe
infixr 4 ?!
-- Reverse infix form of "fromJust" with a custom error message
(?!) :: Maybe a -> String -> a
(?!) ma' msg = ma' ?| panicTrace (T.pack msg)
......@@ -9,8 +9,6 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
......
{-|
Module : Gargantext.API.Flow
Description : Main Flow API DataTypes
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
module Gargantext.API.Flow
where
-- import Gargantext.API.Prelude
import Gargantext.Prelude
data InputFlow = TextsInput
| NgramsInput
| ListInput
data Flow = EndFlow
| Texts InputFlow [Flow]
| Ngrams InputFlow [Flow]
| Lists InputFlow [Flow]
data OutputFlow
flow :: Flow -> OutputFlow
flow = undefined
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DuplicateRecordFields #-}
module Gargantext.API.GraphQL.Contact where
import Control.Lens
import Data.Morpheus.Types
( GQLType
, Resolver
, ResolverM
, QUERY
, lift
)
import Data.Text (Text)
import qualified Data.Text as T
import Gargantext.API.Prelude (GargM, GargError)
import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Database.Admin.Types.Hyperdata
( HyperdataUser(..)
, hc_source
, hc_title
, hu_shared)
import Gargantext.Database.Admin.Types.Hyperdata.Contact
( HyperdataContact
, ContactWho
, ContactWhere
, cw_city
, cw_country
, cw_firstName
, cw_lastName
, cw_labTeamDepts
, cw_office
, cw_organization
, cw_role
, cw_touch
, ct_mail
, ct_phone
, hc_who
, hc_where)
import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Query.Table.User (getUsersWithHyperdata, getUsersWithNodeHyperdata)
import Gargantext.Database.Schema.User (UserLight(..))
import Gargantext.Database.Schema.Node (node_id, node_hyperdata)
import Gargantext.Prelude
import GHC.Generics (Generic)
data UserInfo = UserInfo
{ ui_id :: Int
, ui_username :: Text
, ui_email :: Text
, ui_title :: Maybe Text
, ui_source :: Maybe Text
, ui_cwFirstName :: Maybe Text
, ui_cwLastName :: Maybe Text
, ui_cwCity :: Maybe Text
, ui_cwCountry :: Maybe Text
, ui_cwOrganization :: [Text]
, ui_cwLabTeamDepts :: [Text]
, ui_cwOffice :: Maybe Text
, ui_cwRole :: Maybe Text
, ui_cwTouchPhone :: Maybe Text
, ui_cwTouchMail :: Maybe Text }
deriving (Generic, GQLType, Show)
-- | Arguments to the "user info" query.
data UserInfoArgs
= UserInfoArgs
{ user_id :: Int
} deriving (Generic, GQLType)
-- | Arguments to the "user info" mutation,
data UserInfoMArgs
= UserInfoMArgs
{ ui_id :: Int
, ui_username :: Maybe Text
, ui_email :: Maybe Text
, ui_title :: Maybe Text
, ui_source :: Maybe Text
, ui_cwFirstName :: Maybe Text
, ui_cwLastName :: Maybe Text
, ui_cwCity :: Maybe Text
, ui_cwCountry :: Maybe Text
, ui_cwOrganization :: Maybe [Text]
, ui_cwLabTeamDepts :: Maybe [Text]
, ui_cwOffice :: Maybe Text
, ui_cwRole :: Maybe Text
, ui_cwTouchPhone :: Maybe Text
, ui_cwTouchMail :: Maybe Text
} deriving (Generic, GQLType)
type GqlM e env = Resolver QUERY e (GargM env GargError)
-- | Function to resolve user from a query.
resolveUserInfos
:: (HasConnectionPool env, HasConfig env, HasMail env)
=> UserInfoArgs -> GqlM e env [UserInfo]
resolveUserInfos UserInfoArgs { user_id } = do
lift $ printDebug "[resolveUserInfo] ui_id" user_id
dbUsers user_id
-- | Mutation for user info
updateUserInfo
:: (HasConnectionPool env, HasConfig env, HasMail env)
=> UserInfoMArgs -> ResolverM e (GargM env GargError) Int
updateUserInfo (UserInfoMArgs { ui_id, .. }) = do
lift $ printDebug "[updateUserInfo] ui_id" ui_id
users <- lift (getUsersWithNodeHyperdata ui_id)
case users of
[] -> panicTrace $ "[updateUserInfo] User with id " <> (T.pack $ show ui_id) <> " doesn't exist."
((_u, node_u):_) -> do
let u_hyperdata = node_u ^. node_hyperdata
-- lift $ printDebug "[updateUserInfo] u" u
let u_hyperdata' = uh ui_titleL ui_title $
uh ui_sourceL ui_source $
uh ui_cwFirstNameL ui_cwFirstName $
uh ui_cwLastNameL ui_cwLastName $
uh ui_cwCityL ui_cwCity $
uh ui_cwCountryL ui_cwCountry $
uh' ui_cwLabTeamDeptsL ui_cwLabTeamDepts $
uh' ui_cwOrganizationL ui_cwOrganization $
uh ui_cwOfficeL ui_cwOffice $
uh ui_cwRoleL ui_cwRole $
uh ui_cwTouchMailL ui_cwTouchMail $
uh ui_cwTouchPhoneL ui_cwTouchPhone $
u_hyperdata
-- lift $ printDebug "[updateUserInfo] with firstName" u_hyperdata'
_ <- lift $ updateHyperdata (node_u ^. node_id) u_hyperdata'
--let _newUser = toUser (u, u_hyperdata')
pure 1
where
uh _ Nothing u_hyperdata = u_hyperdata
uh lens' (Just val) u_hyperdata = u_hyperdata & lens' .~ Just val
uh' _ Nothing u_hyperdata = u_hyperdata
uh' lens' (Just val) u_hyperdata = u_hyperdata & lens' .~ val
-- | Inner function to fetch the user from DB.
dbUsers
:: (HasConnectionPool env, HasConfig env, HasMail env)
=> Int -> GqlM e env [UserInfo]
dbUsers user_id = do
-- lift $ printDebug "[dbUsers]" user_id
-- user <- getUsersWithId user_id
-- hyperdata <- getUserHyperdata user_id
-- lift (map toUser <$> zip user hyperdata)
lift (map toUser <$> (getUsersWithHyperdata user_id))
toUser :: (UserLight, HyperdataUser) -> UserInfo
toUser (UserLight { .. }, u_hyperdata) =
UserInfo { ui_id = userLight_id
, ui_username = userLight_username
, ui_email = userLight_email
, ui_title = u_hyperdata ^. ui_titleL
, ui_source = u_hyperdata ^. ui_sourceL
, ui_cwFirstName = u_hyperdata ^. ui_cwFirstNameL
, ui_cwLastName = u_hyperdata ^. ui_cwLastNameL
, ui_cwCity = u_hyperdata ^. ui_cwCityL
, ui_cwCountry = u_hyperdata ^. ui_cwCountryL
, ui_cwLabTeamDepts = u_hyperdata ^. ui_cwLabTeamDeptsL
, ui_cwOrganization = u_hyperdata ^. ui_cwOrganizationL
, ui_cwOffice = u_hyperdata ^. ui_cwOfficeL
, ui_cwRole = u_hyperdata ^. ui_cwRoleL
, ui_cwTouchMail = u_hyperdata ^. ui_cwTouchMailL
, ui_cwTouchPhone = u_hyperdata ^. ui_cwTouchPhoneL }
sharedL :: Traversal' HyperdataUser HyperdataContact
sharedL = hu_shared . _Just
ui_titleL :: Traversal' HyperdataUser (Maybe Text)
ui_titleL = sharedL . hc_title
ui_sourceL :: Traversal' HyperdataUser (Maybe Text)
ui_sourceL = sharedL . hc_source
contactWhoL :: Traversal' HyperdataUser ContactWho
contactWhoL = sharedL . hc_who . _Just
ui_cwFirstNameL :: Traversal' HyperdataUser (Maybe Text)
ui_cwFirstNameL = contactWhoL . cw_firstName
ui_cwLastNameL :: Traversal' HyperdataUser (Maybe Text)
ui_cwLastNameL = contactWhoL . cw_lastName
contactWhereL :: Traversal' HyperdataUser ContactWhere
contactWhereL = sharedL . hc_where . (ix 0)
ui_cwCityL :: Traversal' HyperdataUser (Maybe Text)
ui_cwCityL = contactWhereL . cw_city
ui_cwCountryL :: Traversal' HyperdataUser (Maybe Text)
ui_cwCountryL = contactWhereL . cw_country
ui_cwLabTeamDeptsL :: Traversal' HyperdataUser [Text]
ui_cwLabTeamDeptsL = hu_shared . _Just . (hc_where . (ix 0) . cw_labTeamDepts)
ui_cwOrganizationL :: Traversal' HyperdataUser [Text]
ui_cwOrganizationL = hu_shared . _Just . (hc_where . (ix 0) . cw_organization)
ui_cwOfficeL :: Traversal' HyperdataUser (Maybe Text)
ui_cwOfficeL = contactWhereL . cw_office
ui_cwRoleL :: Traversal' HyperdataUser (Maybe Text)
ui_cwRoleL = contactWhereL . cw_role
ui_cwTouchMailL :: Traversal' HyperdataUser (Maybe Text)
ui_cwTouchMailL = hu_shared . _Just . (hc_where . (ix 0) . cw_touch . _Just . ct_mail)
--ui_cwTouchMailL = contactWhereL . cw_touch . _Just . ct_mail
ui_cwTouchPhoneL :: Traversal' HyperdataUser (Maybe Text)
ui_cwTouchPhoneL = hu_shared . _Just . (hc_where . (ix 0) . cw_touch . _Just . ct_phone)
--ui_cwTouchPhoneL = contactWhereL . cw_touch . _Just . ct_phone
......@@ -11,9 +11,7 @@ Metrics API
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Metrics
where
......
......@@ -9,8 +9,6 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
......
{-|
Module : Gargantext.API.Node.Corpus.New.File
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Corpus.New.File
where
import Control.Lens ((?~))
import Data.Swagger
import Gargantext.API.Node.Corpus.New.Types (FileFormat, FileType)
import Gargantext.Core.Types (TODO)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Prelude hiding (hash)
import Gargantext.Prelude.Crypto.Hash (hash)
import Servant ( JSON, type (:>), Post, QueryParam, Summary )
import Servant.Multipart ( Input(iName), Mem, MultipartData(inputs), MultipartForm )
import Servant.Swagger.Internal ( addParam, HasSwagger(..) )
-------------------------------------------------------------
type Hash = Text
instance ToParamSchema (MultipartData Mem) where toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance (ToParamSchema a, HasSwagger sub) =>
HasSwagger (MultipartForm tag a :> sub) where
-- TODO
toSwagger _ = toSwagger (Proxy :: Proxy sub)
& addParam param
where
param = mempty
& required ?~ True
& schema .~ ParamOther sch
sch = mempty
& in_ .~ ParamFormData
& paramSchema .~ toParamSchema (Proxy :: Proxy a)
type WithUpload' = Summary "Upload file(s) to a corpus"
:> QueryParam "fileType" FileType
:> QueryParam "fileFormat" FileFormat
:> MultipartForm Mem (MultipartData Mem)
:> Post '[JSON] [Hash]
--postUpload :: NodeId -> Maybe FileType -> GargServer UploadAPI
--postUpload :: NodeId -> GargServer UploadAPI
postUpload :: NodeId
-> Maybe FileType
-> Maybe FileFormat
-> MultipartData Mem
-> Cmd err [Hash]
postUpload _ Nothing _ _ = panicTrace "fileType is a required parameter"
postUpload _ _ Nothing _ = panicTrace "fileFormat is a required parameter"
postUpload _ (Just _fileType) (Just _fileFormat) multipartData = do
-- printDebug "File Type: " fileType
-- printDebug "File format: " fileFormat
is <- liftBase $ do
-- printDebug "Inputs:" ()
forM (inputs multipartData) $ \input -> do
-- printDebug "iName " (iName input)
-- printDebug "iValue " (iValue input)
pure $ iName input
{-
_ <- forM (files multipartData) $ \file -> do
-- let content = fdPayload file
-- printDebug "XXX " (fdFileName file)
-- printDebug "YYY " content
pure () -- $ cs content
-- is <- inputs multipartData
-}
pure $ map hash is
-------------------------------------------------------------------
......@@ -8,7 +8,7 @@ Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-matches #-}
{-# OPTIONS_GHC -fno-warn-unused-matches #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE TypeOperators #-}
......
......@@ -11,7 +11,6 @@ Polymorphic Get Node API
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE TemplateHaskell #-}
......
......@@ -14,7 +14,6 @@ Async new node feature
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE IncoherentInstances #-}
module Gargantext.API.Node.New
......
......@@ -9,8 +9,7 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Update
where
......
......@@ -16,7 +16,6 @@ Portability : POSIX
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-} -- instance HasSwagger (WithCustomErrorScheme GargAPI)
module Gargantext.API.Routes
where
......
......@@ -22,8 +22,6 @@ Node API
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
......
This diff is collapsed.
{-|
Module : Gargantext.Core.Methods.Similarities.Distributional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Motivation and definition of the @Distributional@ distance.
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE Strict #-}
module Gargantext.Core.Methods.Similarities.Distributional
where
import Data.Matrix hiding (identity)
import qualified Data.Map as M
import Data.Vector (Vector)
import qualified Data.Vector as V
import Gargantext.Prelude
import Gargantext.Core.Viz.Graph.Utils
distributional' :: (Floating a, Ord a) => Matrix a -> [((Int, Int), a)]
distributional' m = filter (\((x,y), d) -> foldl' (&&) True (conditions x y d) ) distriList
where
conditions x y d = [ (x /= y)
, (d > miniMax')
, ((M.lookup (x,y) distriMap) > (M.lookup (y,x) distriMap))
]
distriList = toListsWithIndex distriMatrix
distriMatrix = ri (mi m)
distriMap = M.fromList $ distriList
miniMax' = miniMax distriMatrix
ri :: (Ord a, Fractional a) => Matrix a -> Matrix a
ri m = matrix c r doRi
where
doRi (x,y) = doRi' x y m
doRi' x y mi'' = sumMin x y mi'' / (V.sum $ ax Col x y mi'')
sumMin x y mi' = V.sum $ V.map (\(a,b) -> min a b )
$ V.zip (ax Col x y mi') (ax Row x y mi')
(c,r) = (nOf Col m, nOf Row m)
mi :: (Ord a, Floating a) => Matrix a -> Matrix a
mi m = matrix c r createMat
where
(c,r) = (nOf Col m, nOf Row m)
createMat (x,y) = doMi x y m
doMi x y m' = if x == y then 0 else (max (log (doMi' x y m')) 0 )
doMi' x y m' = (getElem x y m) / ( cross x y m / total m' )
cross x y m' = (V.sum $ ax Col x y m) * (V.sum $ ax Row x y m')
ax :: Axis -> Int -> Int -> Matrix a -> Vector a
ax a i j m = dropAt j' $ axis a i' m
where
i' = div i c + 1
j' = mod r j + 1
(c,r) = (nOf Col m, nOf Row m)
miniMax :: (Ord a) => Matrix a -> a
miniMax m = V.minimum $ V.map (\c -> V.maximum $ getCol c m) (V.enumFromTo 1 (nOf Col m))
{-|
Module : Gargantext.API.Ngrams.Types
Description : Ngrams List Types
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
NOTE This is legacy code. It keeps node stories in a directory
repo. We now have migrated to the DB. However this code is needed to
make the migration (see Gargantext.API.Ngrams.Tools)
-}
module Gargantext.Core.NodeStoryFile where
import Codec.Serialise (serialise, deserialise)
import Codec.Serialise.Class
import Control.Debounce (mkDebounce, defaultDebounceSettings, debounceFreq, debounceAction)
import Control.Lens (view)
import Data.ByteString.Lazy qualified as DBL
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Gargantext.Core.NodeStory hiding (fromDBNodeStoryEnv)
import Gargantext.Core.Types (ListId, NodeId(..))
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Prelude
import Gargantext.Core.Config (gc_repofilepath)
import System.Directory (renameFile, createDirectoryIfMissing, doesFileExist, removeFile)
import System.IO (hClose)
import System.IO.Temp (withTempFile)
getRepo :: HasNodeStory env err m
=> [ListId] -> m NodeListStory
getRepo listIds = do
g <- getNodeListStory
liftBase $ do
v <- g listIds
readMVar v
-- v <- liftBase $ f listIds
-- v' <- liftBase $ readMVar v
-- pure $ v'
getRepoReadConfig :: (HasNodeStory env err m)
=> [ListId] -> m NodeListStory
getRepoReadConfig listIds = do
repoFP <- view $ hasConfig . gc_repofilepath
env <- liftBase $ readNodeStoryEnv repoFP
let g = view nse_getter env
liftBase $ do
v <- g listIds
readMVar v
getNodeListStory :: HasNodeStory env err m
=> m ([NodeId] -> IO (MVar NodeListStory))
getNodeListStory = do
env <- view hasNodeStory
pure $ view nse_getter env
readNodeStoryEnv :: NodeStoryDir -> IO NodeStoryEnv
readNodeStoryEnv nsd = do
mvar <- nodeStoryVar nsd Nothing []
saver <- mkNodeStorySaver nsd mvar
let saver_immediate = withMVar mvar (writeNodeStories nsd)
let archive_saver_immediate ns = pure ns
pure $ NodeStoryEnv { _nse_var = mvar
, _nse_saver = saver
, _nse_saver_immediate = saver_immediate
, _nse_archive_saver_immediate = archive_saver_immediate
, _nse_getter = nodeStoryVar nsd (Just mvar) }
------------------------------------------------------------------------
mkNodeStorySaver :: NodeStoryDir -> MVar NodeListStory -> IO (IO ())
mkNodeStorySaver nsd mvns = mkDebounce settings
where
settings = defaultDebounceSettings
{ debounceAction = withMVar mvns (writeNodeStories nsd)
, debounceFreq = 1 * minute
-- , debounceEdge = trailingEdge -- Trigger on the trailing edge
}
minute = 60 * sec
sec = 10^(6 :: Int)
nodeStoryVar :: NodeStoryDir
-> Maybe (MVar NodeListStory)
-> [NodeId]
-> IO (MVar NodeListStory)
nodeStoryVar nsd Nothing ni = nodeStoryIncs nsd Nothing ni >>= newMVar
nodeStoryVar nsd (Just mv) ni = do
_ <- modifyMVar_ mv $ \mv' -> (nodeStoryIncs nsd (Just mv') ni)
pure mv
nodeStoryInc :: NodeStoryDir -> Maybe NodeListStory -> NodeId -> IO NodeListStory
nodeStoryInc nsd (Just ns@(NodeStory nls)) ni = do
case Map.lookup ni nls of
Nothing -> do
(NodeStory nls') <- nodeStoryRead nsd ni
pure $ NodeStory $ Map.union nls nls'
Just _ -> pure ns
nodeStoryInc nsd Nothing ni = nodeStoryRead nsd ni
nodeStoryIncs :: NodeStoryDir
-> Maybe NodeListStory
-> [NodeId]
-> IO NodeListStory
nodeStoryIncs _ Nothing [] = pure $ NodeStory $ Map.empty
nodeStoryIncs nsd (Just nls) ns = foldM (\m n -> nodeStoryInc nsd (Just m) n) nls ns
nodeStoryIncs nsd Nothing (ni:ns) = do
m <- nodeStoryRead nsd ni
nodeStoryIncs nsd (Just m) ns
nodeStoryDec :: NodeStoryDir
-> NodeListStory
-> NodeId
-> IO NodeListStory
nodeStoryDec nsd ns@(NodeStory nls) ni = do
case Map.lookup ni nls of
Nothing -> do
-- we make sure the corresponding file repo is really removed
_ <- nodeStoryRemove nsd ni
pure ns
Just _ -> do
let ns' = Map.filterWithKey (\k _v -> k /= ni) nls
_ <- nodeStoryRemove nsd ni
pure $ NodeStory ns'
-- | TODO lock
nodeStoryRead :: NodeStoryDir -> NodeId -> IO NodeListStory
nodeStoryRead nsd ni = do
_repoDir <- createDirectoryIfMissing True nsd
let nsp = nodeStoryPath nsd ni
exists <- doesFileExist nsp
if exists
then deserialise <$> DBL.readFile nsp
else pure (initNodeStory ni)
nodeStoryRemove :: NodeStoryDir -> NodeId -> IO ()
nodeStoryRemove nsd ni = do
let nsp = nodeStoryPath nsd ni
exists <- doesFileExist nsp
if exists
then removeFile nsp
else pure ()
nodeStoryRead_test :: NodeStoryDir -> NodeId -> IO (Maybe [ TableNgrams.NgramsType ])
nodeStoryRead_test nsd ni = nodeStoryRead nsd ni >>= \n -> pure
$ fmap Map.keys
$ fmap _a_state
$ Map.lookup ni
$ _unNodeStory n
------------------------------------------------------------------------
type NodeStoryDir = FilePath
writeNodeStories :: NodeStoryDir -> NodeListStory -> IO ()
writeNodeStories fp nls = do
_done <- mapM (writeNodeStory fp) $ splitByNode nls
-- printDebug "[writeNodeStories]" done
pure ()
writeNodeStory :: NodeStoryDir -> (NodeId, NodeListStory) -> IO ()
writeNodeStory rdfp (n, ns) = saverAction' rdfp n ns
splitByNode :: NodeListStory -> [(NodeId, NodeListStory)]
splitByNode (NodeStory m) =
List.map (\(n,a) -> (n, NodeStory $ Map.singleton n a)) $ Map.toList m
saverAction' :: Serialise a => NodeStoryDir -> NodeId -> a -> IO ()
saverAction' repoDir nId a = do
withTempFile repoDir ((show nId) <> "-tmp-repo.cbor") $ \fp h -> do
-- printDebug "[repoSaverAction]" fp
DBL.hPut h $ serialise a
hClose h
renameFile fp (nodeStoryPath repoDir nId)
nodeStoryPath :: NodeStoryDir -> NodeId -> FilePath
nodeStoryPath repoDir nId = repoDir <> "/" <> filename
where
filename = "repo" <> "-" <> (show nId) <> ".cbor"
------------------------------------------------------------------------
-- TODO : repo Migration TODO TESTS
{-
repoMigration :: NodeStoryDir -> NgramsRepo -> IO ()
repoMigration fp r = writeNodeStories fp (repoToNodeListStory r)
repoToNodeListStory :: NgramsRepo -> NodeListStory
repoToNodeListStory (Repo _v s h) = NodeStory $ Map.fromList ns
where
s' = ngramsState_migration s
h' = ngramsStatePatch_migration h
ns = List.map (\(n,ns')
-> (n, let hs = fromMaybe [] (Map.lookup n h') in
Archive { _a_version = List.length hs
, _a_state = ns'
, _a_history = hs }
)
) $ Map.toList s'
ngramsState_migration :: NgramsState
-> Map NodeId NgramsState'
ngramsState_migration ns =
Map.fromListWith (Map.union) $
List.concat $
map (\(nt, nTable)
-> map (\(nid, table)
-> (nid, Map.singleton nt table)
) $ Map.toList nTable
) $ Map.toList ns
ngramsStatePatch_migration :: [NgramsStatePatch]
-> Map NodeId [NgramsStatePatch']
ngramsStatePatch_migration np' = Map.fromListWith (<>)
$ List.concat
$ map toPatch np'
where
toPatch :: NgramsStatePatch -> [(NodeId, [NgramsStatePatch'])]
toPatch p =
List.concat $
map (\(nt, nTable)
-> map (\(nid, table)
-> (nid, [fst $ Patch.singleton nt table])
) $ Patch.toList nTable
) $ Patch.toList p
-}
{-|
Module : Gargantext.Core.Text.Convert
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Format Converter.
-}
{-# LANGUAGE PackageImports #-}
module Gargantext.Core.Text.Convert (risPress2tsvWrite)
where
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat(..), FileType(..))
import Gargantext.Core.Text.Corpus.Parsers.TSV (writeDocs2Tsv)
import Gargantext.Prelude
risPress2tsvWrite :: FilePath -> IO ()
risPress2tsvWrite f = do
eContents <- parseFile RisPresse Plain (f <> ".ris")
case eContents of
Right contents -> writeDocs2Tsv (f <> ".csv") contents
Left e -> panicTrace $ "Error: " <> e
......@@ -9,7 +9,7 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Text.Corpus.API.Arxiv
......
{-|
Module : Gargantext.Core.Text.Corpus.Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017-present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
where
import Data.Fixed (Fixed (MkFixed))
import Data.String (String)
import Data.Text qualified as T
import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
import Data.Time.Calendar (Day, fromGregorian)
import Gargantext.Prelude
import Text.Parsec.Error (ParseError)
import Text.Parsec.Prim (Stream, ParsecT)
import Text.Parsec.String (Parser)
import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
import Text.ParserCombinators.Parsec qualified (parse)
import Text.XML.HXT.DOM.Util (decimalStringToInt)
-- | Permit to transform a String to an Int in a monadic context
wrapDST :: Monad m => String -> m Int
wrapDST = pure . decimalStringToInt
-- | Generic parser which take at least one element not given in argument
many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
many1NoneOf = (many1 . noneOf)
getMultiplicator :: Int -> Int
getMultiplicator a
| 0 >= a = 1
| otherwise = 10 * (getMultiplicator $ div a 10)
-- | Parser for date format y-m-d
parseGregorian :: Parser Day
parseGregorian = do
y <- wrapDST =<< many1NoneOf ['-']
_ <- char '-'
m <- wrapDST =<< many1NoneOf ['-']
_ <- char '-'
d <- wrapDST =<< many1NoneOf ['T']
_ <- char 'T'
pure $ fromGregorian (toInteger y) m d
---- | Parser for time format h:m:s
parseTimeOfDay :: Parser TimeOfDay
parseTimeOfDay = do
h <- wrapDST =<< many1NoneOf [':']
_ <- char ':'
m <- wrapDST =<< many1NoneOf [':']
_ <- char ':'
r <- many1NoneOf ['.']
_ <- char '.'
dec <- many1NoneOf ['+', '-']
let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
seconds = nb * 10^(12-l)
pure $ TimeOfDay h m (MkFixed . toInteger $ seconds)
-- | Parser for timezone format +hh:mm
parseTimeZone :: Parser TimeZone
parseTimeZone = do
sign <- oneOf ['+', '-']
h <- wrapDST =<< many1NoneOf [':']
_ <- char ':'
m <- wrapDST =<< (many1 $ anyChar)
let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
in pure $ TimeZone timeInMinute False "CET"
---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
parseZonedTime :: Parser ZonedTime
parseZonedTime= do
d <- parseGregorian
tod <- parseTimeOfDay
tz <- parseTimeZone
pure $ ZonedTime (LocalTime d (tod)) tz
---- | Opposite of toRFC3339
fromRFC3339 :: Text -> Either ParseError ZonedTime
fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
where input = T.unpack t
{-|
Module : Gargantext.Core.Text.Corpus.Parsers.Json2Csv
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Json parser to export towoard CSV GargV3 format.
(Export from the Patent Database.)
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.Corpus.Parsers.Json2Csv (json2tsv, readPatents)
where
import Data.Aeson ( decode )
import Data.ByteString.Lazy (readFile)
import Data.Text (unpack)
import Data.Vector (fromList)
import Gargantext.Core.Text.Corpus.Parsers.TSV (TsvDoc(..), writeFile, headerTsvGargV3)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (readFile, writeFile)
import Prelude (read)
data Patent = Patent { _patent_title :: Text
, _patent_abstract :: Text
, _patent_year :: Text
, _patent_id :: Text
} deriving (Show)
$(deriveJSON (unPrefix "_patent_") ''Patent)
readPatents :: FilePath -> IO (Maybe [Patent])
readPatents fp = decode <$> readFile fp
type FilePathIn = FilePath
type FilePathOut = FilePath
json2tsv :: FilePathIn -> FilePathOut -> IO ()
json2tsv fin fout = do
patents <- maybe (panicTrace "json2tsv error") identity <$> readPatents fin
writeFile fout (headerTsvGargV3, fromList $ map patent2tsvDoc patents)
patent2tsvDoc :: Patent -> TsvDoc
patent2tsvDoc (Patent { .. }) =
TsvDoc { tsv_title = _patent_title
, tsv_source = "Source"
, tsv_publication_year = Just $ read (unpack _patent_year)
, tsv_publication_month = Just $ Defaults.month
, tsv_publication_day = Just $ Defaults.day
, tsv_abstract = _patent_abstract
, tsv_authors = "Authors" }
{-|
Module : Gargantext.Core.Text.Corpus.Parsers.Telegram
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.Parsers.Telegram
where
import Data.Aeson
import Data.ByteString.Lazy qualified as DBL
import Gargantext.Prelude
readFile_Telegram :: FilePath -> IO [TelegramMsg]
readFile_Telegram fp = do
raw <- DBL.readFile fp
let mayIssues = decode raw
case mayIssues of
Just is -> pure is
Nothing -> pure []
data TelegramMsg = TelegramMsg { _action_entities :: !Text
, _broadcastg :: !Text
, _buttonsg :: !Text
, _buttons_countg :: !Text
, _buttons_flatg :: !Text
, _chatg :: !Text
, _chat_peerg :: !Text
, _fileg :: !Text
, _forwardg :: !Text
, _input_chatg :: !Text
, _input_senderg :: !Text
, _linked_chatg :: !Text
, _reply_messageg :: !Text
, _senderg :: !Text
, _sender_idg :: !Text
, _textg :: !Text
, _via_botg :: !Text
, _via_input_botg :: !Text
, actiong :: !Text
, dateg :: !Text
, edit_dateg :: !Text
, edit_hideg :: !Text
, entitiesg :: !Text
, forwardsg :: !Text
, from_idg :: !Text
, from_scheduledg :: !Text
, fwd_fromg :: !Text
, grouped_idg :: !Text
, idg :: !Text
, legacyg :: !Text
, mediag :: !Text
, media_unreadg :: !Text
, mentionedg :: !Text
, messageg :: !Text
, noforwardsg :: !Text
, outg :: !Text
, peer_idg :: !Text
, pinnedg :: !Text
, postg :: !Text
, post_authorg :: !Text
, reactionsg :: !Text
, repliesg :: !Text
, reply_markupg :: !Text
, reply_tog :: !Text
, restriction_reasong :: !Text
, silentg :: !Text
, ttl_periodg :: !Text
, via_bot_idg :: !Text
, views :: !Text
}
deriving (Show, Generic)
instance FromJSON TelegramMsg
{-
gitlabIssue2hyperdataDocument :: Issue -> HyperdataDocument
gitlabIssue2hyperdataDocument issue = HyperdataDocument
{ _hd_bdd = Nothing
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just (_issue_title issue)
, _hd_authors = Nothing
, _hd_institutes = Nothing
, _hd_source = Nothing
, _hd_abstract = Just (_issue_content issue)
, _hd_publication_date = Just $ DT.pack $ show date
, _hd_publication_year = Just $ fromIntegral year
, _hd_publication_month = Just month
, _hd_publication_day = Just day
, _hd_publication_hour = Just (todHour tod)
, _hd_publication_minute = Just (todMin tod)
, _hd_publication_second = Just (round $ todSec tod)
, _hd_language_iso2 = Just $ (DT.pack . show) lang
}
where lang = EN
date = _issue_created issue
(year, month, day) = toGregorian $ localDay date
tod = localTimeOfDay date
-}
{-
readFile_IssuesAsDocs :: FilePath -> IO [HyperdataDocument]
readFile_IssuesAsDocs = fmap (fmap gitlabIssue2hyperdataDocument) . readFile_Issues
-}
{-|
Module : Gargantext.Core.Text.Corpus.Parsers.Wikimedia
Description : Parser for Wikimedia dump
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
@Gargantext.Core.Text.Corpus.Parsers.Wikimedia@:
This module provide a parser for wikipedia dump.
This include an xml parser for wikipedia's xml
and an wikimedia to plaintext converter for the wikipedia text field
-}
module Gargantext.Core.Text.Corpus.Parsers.Wikimedia
where
import Control.Monad.Catch
import Data.Conduit
import Data.Either
import Data.Text as T
import Data.XML.Types (Event, Name)
import Gargantext.Prelude hiding (force)
import Text.Pandoc
import Text.XML.Stream.Parse
-- | Use case
-- :{
-- wikimediaFile <- BL.readFile "text.xml"
-- _ <- runConduit $ parseLBS def wikimediaFile
-- .| force "mediawiki required" parseMediawiki
-- .| CL.mapM mediawikiPageToPlain
-- .| CL.mapM_ print
-- :}
-- | A simple "Page" type.
-- For the moment it takes only text and title
-- (since there is no abstract) will see if other data are relevant.
data Page =
Page { _markupFormat :: MarkupFormat
, _title :: Maybe T.Text
, _text :: Maybe T.Text
}
deriving (Show)
data MarkupFormat = Mediawiki | Plaintext
deriving (Show)
parseRevision :: MonadThrow m => ConduitT Event o m (Maybe T.Text)
parseRevision = tagNoAttr "{http://www.mediawiki.org/xml/export-0.10/}revision" $ do
text <- force "text is missing" $ ignoreExcept "{http://www.mediawiki.org/xml/export-0.10/}text" content
many_ ignoreAnyTreeContent
pure text
-- | Utility function that matches everything but the tag given
tagUntil :: Name -> NameMatcher Name
tagUntil name = matching (/= name)
-- | Utility function that consumes everything but the tag given
-- usefull because we have to consume every data.
manyTagsUntil_ :: MonadThrow m => Name -> ConduitT Event o m ()
manyTagsUntil_ n = many_ (ignoreTree (tagUntil n) ignoreAttrs)
manyTagsUntil_' :: MonadThrow m => Name -> ConduitT Event o m ()
manyTagsUntil_' = many_ . ignoreEmptyTag . tagUntil
-- | Utility function that parses nothing but the tag given,
-- usefull because we have to consume every data.
ignoreExcept :: MonadThrow m => Name
-> ConduitT Event o m b
-> ConduitT Event o m (Maybe b)
ignoreExcept name f = do
_ <- manyTagsUntil_ name
tagIgnoreAttrs (matching (== name)) f
-- TODO: remove ignoreExcept to:
-- many ignoreAnyTreeContentUntil "Article"
manyTagsUntil :: MonadThrow m => Name
-> ConduitT Event o m b
-> ConduitT Event o m (Maybe b)
manyTagsUntil name f = do
_ <- manyTagsUntil_ name
tagIgnoreAttrs (matching (== name)) f
parsePage :: MonadThrow m => ConduitT Event o m (Maybe Page)
parsePage =
tagNoAttr "{http://www.mediawiki.org/xml/export-0.10/}page" $ do
title <-
tagNoAttr "{http://www.mediawiki.org/xml/export-0.10/}title" content
_ <- manyTagsUntil_ "{http://www.mediawiki.org/xml/export-0.10/}revision"
revision <-
parseRevision
many_ $ ignoreAnyTreeContent
pure $ Page { _markupFormat = Mediawiki
, _title = title
, _text = revision }
parseMediawiki :: MonadThrow m => ConduitT Event Page m (Maybe ())
parseMediawiki =
tagIgnoreAttrs "{http://www.mediawiki.org/xml/export-0.10/}mediawiki"
$ manyYield' parsePage
-- | Convert a Mediawiki Page to a Plaintext Page.
-- Need to wrap the result in IO to parse and to combine it.
mediawikiPageToPlain :: Page -> IO Page
mediawikiPageToPlain page = do
title <- mediaToPlain $ _title page
revision <- mediaToPlain $ _text page
pure $ Page { _markupFormat = Plaintext, _title = title, _text = revision }
where mediaToPlain media =
case media of
(Nothing) -> pure Nothing
(Just med) -> do
res <- runIO $ do
doc <- readMediaWiki def med
writePlain def doc
case res of
(Left _) -> pure Nothing
(Right r) -> pure $ Just r
{-|
Module : Gargantext.Core.Text.List.Learn
Description : Learn to make lists
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
CSV parser for Gargantext corpus files.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Core.Text.List.Learn
where
import Data.IntMap qualified as IntMap
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.SVM qualified as SVM
import Data.Vector qualified as Vec
import Gargantext.Core
import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.GargDB
import Gargantext.Prelude
------------------------------------------------------------------------
train :: Double -> Double -> SVM.Problem -> IO SVM.Model
train x y = (SVM.train (SVM.CSvc x) (SVM.RBF y))
predict :: SVM.Model -> [Vec.Vector Double] -> IO [Double]
predict m vs = mapM (predict' m) vs
where
predict' m' vs' = SVM.predict m' (IntMap.fromList $ (zip [1..]) $ Vec.toList vs')
------------------------------------------------------------------------
trainList :: Double -> Double -> Map ListType [Vec.Vector Double] -> IO SVM.Model
trainList x y = (train x y) . trainList'
where
trainList' :: Map ListType [Vec.Vector Double] -> SVM.Problem
trainList' = mapVec2problem . (Map.mapKeys (fromIntegral . toDBid))
mapVec2problem :: Map Double [Vec.Vector Double] -> SVM.Problem
mapVec2problem = List.concat . (map (\(a,as) -> zip (repeat a) as)) . Map.toList . (Map.map vecs2maps)
vecs2maps :: [Vec.Vector Double] -> [IntMap.IntMap Double]
vecs2maps = map (IntMap.fromList . (zip [1..]) . Vec.toList)
predictList :: HasCallStack => Model -> [Vec.Vector Double] -> IO [Maybe ListType]
predictList (ModelSVM m _ _) vs = map (Just . fromDBid . round) <$> predict m vs
------------------------------------------------------------------------
data Model = ModelSVM { modelSVM :: SVM.Model
, param1 :: Maybe Double
, param2 :: Maybe Double
}
--{-
instance SaveFile Model
where
saveFile' fp (ModelSVM m _ _) = SVM.saveModel m fp
instance ReadFile Model
where
readFile' fp = do
m <- SVM.loadModel fp
pure $ ModelSVM m Nothing Nothing
--}
------------------------------------------------------------------------
-- | TODO
-- shuffle list
-- split list : train / test
-- grid parameters on best result on test
type Train = Map ListType [Vec.Vector Double]
type Tests = Map ListType [Vec.Vector Double]
type Score = Double
type Param = Double
grid :: (MonadBase IO m)
=> Param -> Param -> Train -> [Tests] -> m (Maybe Model)
grid _ _ _ [] = panicTrace "Gargantext.Core.Text.List.Learn.grid : empty test data"
grid s e tr te = do
let
grid' :: (MonadBase IO m)
=> Double -> Double
-> Train
-> [Tests]
-> m (Score, Model)
grid' x y tr' te' = do
model'' <- liftBase $ trainList x y tr'
let
model' = ModelSVM model'' (Just x) (Just y)
score' :: [(ListType, Maybe ListType)] -> Map (Maybe Bool) Int
score' = occurrencesWith (\(a,b) -> (==) <$> Just a <*> b)
score'' :: Map (Maybe Bool) Int -> Double
score'' m'' = maybe 0 (\t -> (fromIntegral t)/total) (Map.lookup (Just True) m'')
where
total = fromIntegral $ foldl (+) 0 $ Map.elems m''
getScore m t = do
let (res, toGuess) = List.unzip
$ List.concat
$ map (\(k,vs) -> zip (repeat k) vs)
$ Map.toList t
res' <- liftBase $ predictList m toGuess
pure $ score'' $ score' $ List.zip res res'
score <- mapM (getScore model') te'
pure (mean score, model')
r <- head . List.reverse
. (List.sortOn fst)
<$> mapM (\(x,y) -> grid' x y tr te)
[(x,y) | x <- [s..e], y <- [s..e]]
-- printDebug "GRID SEARCH" (map fst r)
-- printDebug "file" fp
--fp <- saveFile (ModelSVM model')
--save best result
pure $ snd <$> r
{-|
Module : Gargantext.Core.Text.Ngrams.List.Management
Description : Tools to manage lists
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.List.Management
where
{-
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types (NgramsElement, NgramsTerm(..))
import Gargantext.Database.Action.Flow.Types
import Gargantext.API.Ngrams.Tools (getListNgrams)
import Gargantext.Core.NodeStory
import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Group
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.List.Social
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.Metrics (scored', Scored(..), scored_speExc, scored_genInc, normalizeGlobal, normalizeLocal, scored_terms)
import Gargantext.Core.Types (ListType(..), CorpusId, ListId)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsUser, getContextsByNgramsOnlyUser)
import Gargantext.Database.Action.Metrics.TFICF (getTficf_withSample)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Database.Prelude (CmdM)
import Gargantext.Database.Query.Table.Ngrams (text2ngrams)
import Gargantext.Database.Query.Table.NgramsPostag (selectLems)
import Gargantext.Database.Query.Table.Node (defaultList, getClosestParentIdByType)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
import Gargantext.Database.Schema.Ngrams (NgramsType(..), Ngrams(..))
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.HashSet as HashSet
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
restrictListSize
:: forall env err m.
(HasNodeStory env err m, FlowCmdM env err m)
=> CorpusId
-> ListId
-> NgramsType
-> ListType
-> Int -- ^ number of ngram pairs to keep
-> m ()
restrictListSize corpusId listId ngramsType listType size = do
ngrams <- getListNgrams [listId] ngramsType
-- corpus_id <- getClosestParentIdByType
occurrences <- getOccByNgramsOnlyFast' corpusId
listId
ngramsType
(HashMap.keys ngrams)
ngrams' <- filterWith listType size occurrences ngrams
_ <- setListNgrams listId ngramsType ngrams'
pure ()
where filterWith :: ListType -> Int -> HashMap NgramsTerm Int
-> HashMap NgramsTerm NgramsRepoElement
-> m (Map NgramsTerm NgramsRepoElement)
filterWith listType' size occs ngrams =
HashMap.filter with ngrams
where
with nre = case (&&) <$> Just (nre^.nre_list == listType)
<*> ( HashMap.lookup (nre^.nre_root) occs
&&
-}
{-|
Module : Gargantext.Core.Text.List.Merge
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.List.Merge
where
import Control.Lens (view)
import Data.Map.Strict.Patch hiding (PatchMap)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.Prelude hiding (diff)
type List = Map NgramsTerm NgramsRepoElement
type Patch = PatchMap NgramsTerm (Replace (Maybe NgramsRepoElement))
-- Question: which version of Patching increment is using the FrontEnd ?
diffList :: Versioned List -> Versioned List -> Versioned Patch
diffList l1 l2 = Versioned (1 + view v_version l1)
(diff (view v_data l1) (view v_data l2))
-- | TODO
{-
commit :: ListId -> NgramsType -> Versioned Patch -> List -> List
commit = undefined
-}
{-|
Module : Gargantext.
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Gargantext.Text.Hetero where
import Data.List.Split as S
import Data.Map as M
import Data.Set as S
import Database.PostgreSQL.Simple as PGS
import GHC.Real as R
import Gargantext.Database.Admin.Gargandb
import Gargantext.Database.Admin.Private
import Gargantext.Database.Simple
import Gargantext.Text.Count (occurrences)
import Gargantext.Text.Words (cleanText)
import Opaleye.Internal.Column (Column)
import Opaleye.PGTypes (PGInt4)
--main = do
-- t <- getTextquery
-- print (Prelude.map (heterogeinity . concat) $ S.chunksOf 3 t)
-- heterogeinity sur concat texts
heterogeinity' :: Int -> Int -> Int -> IO [Integer]
heterogeinity' corpus_id limit x = do
t <- getAbstract corpus_id limit
Prelude.mapM (dicoStruct . occurrences) $ (S.chunksOf x) . cleanText $ concat t
heterogeinity'' :: Int -> Int -> Int -> IO [Integer]
heterogeinity'' corpus_id limit size = do
t <- getAbstract corpus_id limit
Prelude.mapM (dicoStruct . occurrences) $ (S.chunksOf size) . cleanText $ concat t
dicoStruct :: (Integral r, Monad m) => M.Map t r -> m r
dicoStruct dict_occ = do
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
pure $ div total_occ (fromIntegral keys_size)
-- heterogeinity sur UCT (Unité de Context Textuel)
heterogeinity :: [Char] -> IO Integer
heterogeinity string = do
let dict_occ = occurrences $ cleanText string
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
pure $ div total_occ (fromIntegral keys_size)
--computeHeterogeinity
-- :: Fractional t =>
-- Opaleye.Internal.Column.Column Opaleye.PGTypes.PGInt4
-- -> IO (t, Integer, Integer)
computeHeterogeinity corpus_id = do
c <- PGS.connect infoGargandb
t <- getText c (nodeHyperdataText corpus_id)
heterogeinity $ Prelude.concat t
main2 = do
let corpus_ids = [
("ALL", 272927) -- 73
,("Histoire", 1387736) -- 28
,("Sciences Po", 1296892) -- 37
,("Phylosophie", 1170004) -- 20
,("Psychologie", 1345852) -- 37
,("Sociologie", 1246452) -- 42
]
r <- Prelude.map computeHeterogeinity $ Prelude.map (\(t,id) -> id) corpus_ids
pure r
......@@ -8,7 +8,6 @@ Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
......
{-|
Module : Gargantext.Core.Text.Search
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This search Engine is first made to clean TSV file according to a query.
Starting from this model, a specific Gargantext engine will be made
(using more metrics scores/features).
-}
module Gargantext.Core.Text.Search where
import Data.Ix
import Data.SearchEngine
import Gargantext.Core.Text.Corpus.Parsers.TSV
import Gargantext.Core.Text.Terms.Mono (monoTexts)
import Gargantext.Core.Text.Terms.Mono.Stem as ST
import Gargantext.Prelude
-- Usefull to use stopwords
-- import Data.Set (Set)
-- import qualified Data.Set as Set
type DocId = Int
type DocSearchEngine = SearchEngine
TsvGargV3
DocId
DocField
NoFeatures
data DocField = TitleField
| AbstractField
deriving (Eq, Ord, Enum, Bounded, Ix, Show)
initialDocSearchEngine :: DocSearchEngine
initialDocSearchEngine =
initSearchEngine docSearchConfig defaultSearchRankParameters
docSearchConfig :: SearchConfig TsvGargV3 DocId DocField NoFeatures
docSearchConfig =
SearchConfig {
documentKey = d_docId,
extractDocumentTerms = extractTerms,
transformQueryTerm = normaliseQueryToken,
documentFeatureValue = const noFeatures
}
where
extractTerms :: TsvGargV3 -> DocField -> [Text]
extractTerms doc TitleField = monoTexts (d_title doc)
extractTerms doc AbstractField = monoTexts (d_abstract doc)
normaliseQueryToken :: Text -> DocField -> Text
normaliseQueryToken tok =
let tokStem = ST.stem ST.EN ST.PorterAlgorithm
in \field -> case field of
TitleField -> tokStem tok
AbstractField -> tokStem tok
defaultSearchRankParameters :: SearchRankParameters DocField NoFeatures
defaultSearchRankParameters =
SearchRankParameters {
paramK1,
paramB,
paramFieldWeights,
paramFeatureWeights = noFeatures,
paramFeatureFunctions = noFeatures,
paramResultsetSoftLimit = 2000,
paramResultsetHardLimit = 4000,
paramAutosuggestPrefilterLimit = 500,
paramAutosuggestPostfilterLimit = 500
}
where
paramK1 :: Float
paramK1 = 1.5
paramB :: DocField -> Float
paramB TitleField = 0.9
paramB AbstractField = 0.5
paramFieldWeights :: DocField -> Float
paramFieldWeights TitleField = 20
paramFieldWeights AbstractField = 5
{-|
Module : Gargantext.Core.Text.Ngrams.Token
Description : Tokens and tokenizing a text
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
In computer science, lexical analysis, lexing or tokenization is the
process of converting a sequence of characters (such as in a computer
program or web page) into a sequence of tokens (strings with an assigned
and thus identified meaning).
Source: https://en.wikipedia.org/wiki/Tokenize
-}
module Gargantext.Core.Text.Terms.Mono.Token (tokenize)
where
import Data.Text (Text)
import qualified Gargantext.Core.Text.Terms.Mono.Token.En as En
-- | Contexts depend on the lang
--import Gargantext.Core (Lang(..))
type Token = Text
-- >>> tokenize "A rose is a rose is a rose."
-- ["A","rose","is","a","rose","is","a","rose", "."]
tokenize :: Text -> [Token]
tokenize = En.tokenize
--data Context = Letter | Word | Sentence | Line | Paragraph
--
--tokenize' :: Lang -> Context -> [Token]
--tokenize' = undefined
--
{-# LANGUAGE TypeOperators #-}
module Gargantext.Core.Text.Upload
( Host(..)
, DocId(..)
, Data(..)
, ContentType (..)
, ethercalc
, codimd
)
where
import qualified Data.Map.Strict as Map
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Gargantext.Utils.Servant (TSV, Markdown)
import Network.HTTP.Client (newManager, Request(..))
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Protolude
import Servant.API
import Servant.Client
newtype Host = Host { fromHost :: Text }
newtype DocId = DocId { fromDocId :: Text }
newtype Data = Data { fromData :: Text }
data ContentType a =
CTPlain a
| CTTSV a
-- TODO SocialCalc, Excel XML ?
instance MimeRender TSV Data where
mimeRender p (Data d) = mimeRender p d
instance MimeRender PlainText Data where
mimeRender p (Data d) = mimeRender p d
instance ToHttpApiData DocId where
toUrlPiece (DocId docId) = docId
-- https://github.com/audreyt/ethercalc/blob/master/API.md
type EthercalcAPI =
"_" :> (
-- plain text
ReqBody '[PlainText] Data
:> Post '[PlainText] Text
:<|>
Capture "docId" DocId
:> ReqBody '[PlainText] Data
:> Put '[PlainText] Text
-- tsv
:<|>
ReqBody '[TSV] Data
:> Post '[PlainText, TSV] Text
:<|>
Capture "docId" DocId
:> ReqBody '[TSV] Data
:> Put '[PlainText, TSV] Text
)
ethercalcAPI :: Proxy EthercalcAPI
ethercalcAPI = Proxy
ethercalcNewPlain :: Data -> ClientM Text
ethercalcUpdatePlain :: DocId -> Data -> ClientM Text
ethercalcNewTSV :: Data -> ClientM Text
ethercalcUpdateTSV :: DocId -> Data -> ClientM Text
ethercalcNewPlain :<|> ethercalcUpdatePlain
:<|> ethercalcNewTSV :<|> ethercalcUpdateTSV = client ethercalcAPI
------------------------------
-- | Create new or update existing Ethercalc document (depending on
-- `Maybe DocId` constructor). `Data` can be in various formats (TSV,
-- etc).
ethercalc :: Host -> Maybe DocId -> ContentType Data -> IO (Either ClientError Text)
ethercalc (Host host) mDocId ctD = do
manager' <- newManager tlsManagerSettings
let env = mkClientEnv manager' (BaseUrl Https (T.unpack host) 443 "")
case (mDocId, ctD) of
(Nothing, CTPlain d) -> runClientM (ethercalcNewPlain d) env
(Nothing, CTTSV d) -> runClientM (ethercalcNewTSV d) env
(Just docId, CTPlain d) -> runClientM (ethercalcUpdatePlain docId d) env
(Just docId, CTTSV d) -> runClientM (ethercalcUpdateTSV docId d) env
-----------------------------------
type CodiMDAPI =
"new" :> (
ReqBody '[Markdown] Data
:> Post '[Markdown] Text
)
instance MimeRender Markdown Data where
mimeRender p (Data d) = mimeRender p d
codimdAPI :: Proxy CodiMDAPI
codimdAPI = Proxy
codimdAPINew :: Data -> ClientM Text
codimdAPINew = client codimdAPI
-- | Create a new CodiMD document (with Markdown contents). Please
-- note that AFAIK CodiMD update is not supported, see
-- https://github.com/hackmdio/codimd/issues/1013
codimd :: Host -> Data -> IO (Either Text Text)
codimd (Host host) d = do
manager' <- newManager tlsManagerSettings
let env' = mkClientEnv manager' (BaseUrl Https (T.unpack host) 443 "")
let env = env' { makeClientRequest = \burl req -> (defaultMakeClientRequest burl req) { redirectCount = 0 } }
eRes <- runClientM (codimdAPINew d) env
pure $ case eRes of
-- NOTE We actually expect a redirect here (a 302 with the new
-- page's URL). Hence we expect a `Left FailureResponse` because
-- we have set `redirectCount = 0` above.
Left (FailureResponse _req (Response { responseHeaders })) ->
case Map.lookup "location" (Map.fromList $ toList responseHeaders) of
Nothing -> Left "Cannot find 'Location' header in response"
Just loc -> Right $ TE.decodeUtf8 loc
err -> Left $ "Error creating codimd document: " <> show err
{-|
Module : Gargantext.Core.Utils.Count
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
Inspired from Gabriel Gonzales, "beautiful folds" talk.
-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE BangPatterns #-}
module Gargantext.Core.Utils.Count (head, last, all, any, sum, product, length)
where
import Data.Functor
import Control.Applicative
import qualified Data.Foldable
import Data.Monoid
import Control.Lens (Getting, foldMapOf)
import Gargantext.Prelude hiding (head, sum, length)
data Fold i o = forall m . Monoid m => Fold (i -> m) (m -> o)
instance Functor (Fold i) where
fmap k (Fold tally summarize) = Fold tally (k . summarize)
instance Applicative (Fold i) where
pure o = Fold (\_ -> ()) (\_ -> o)
Fold tallyF summarizeF <*> Fold tallyX summarizeX = Fold tally summarize
where
tally i = (tallyF i, tallyX i)
summarize (nF, nX) = summarizeF nF (summarizeX nX)
focus :: (forall m . Monoid m => Getting m b a) -> Fold a o -> Fold b o
focus lens (Fold tally summarize) = Fold (foldMapOf lens tally) summarize
fold :: Fold i o -> [i] -> o
fold (Fold tally summarize) is = summarize (reduce (map tally is))
where
reduce = Data.Foldable.foldl' (<>) mempty
--
head :: Fold a (Maybe a)
head = Fold (First . Just) getFirst
last :: Fold a (Maybe a)
last = Fold (Last . Just) getLast
--
all :: (a -> Bool) -> Fold a Bool
all predicate = Fold (All . predicate) getAll
any :: (a -> Bool) -> Fold a Bool
any predicate = Fold (Any . predicate) getAny
--
sum :: Num n => Fold n n
sum = Fold Sum getSum
product :: Num n => Fold n n
product = Fold Product getProduct
length :: Num n => Fold i n
length = Fold (\_ -> Sum 1) getSum
-- | Average function optimized (/!\ need to test it)
data Average a = Average { numerator :: !a, denominator :: !Int }
instance Num a => Monoid (Average a) where
mempty = Average 0 0
mappend (Average xL nL) (Average xR nR) = Average (xL + xR) (nL + nR)
average :: Fractional a => Fold a a
average = Fold tally summarize
where
tally x = Average x 1
summarize (Average numerator denominator) =
numerator / fromIntegral denominator
{-|
Module : Gargantext.Core.Viz.Graph
Description : Graph utils
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Graph
where
import Data.Aeson qualified as DA
import Data.ByteString.Lazy as DBL (readFile, writeFile)
import Data.HashMap.Strict (HashMap, lookup)
import Data.Text qualified as Text
import Gargantext.API.Ngrams.Types (NgramsTerm(..), NgramsRepoElement(..), mSetToList)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Viz.Graph.Types
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Prelude
import Text.Read qualified as Text
-----------------------------------------------------------
graphV3ToGraph :: GraphV3 -> Graph
graphV3ToGraph (GraphV3 links nodes) = Graph { _graph_nodes = map nodeV32node nodes
, _graph_edges = zipWith linkV32edge [1..] links
, _graph_metadata = Nothing }
where
nodeV32node :: NodeV3 -> Node
nodeV32node (NodeV3 no_id' (AttributesV3 cl') no_s' no_lb')
= Node { node_size = no_s'
, node_type = NgramsTerms
, node_id = show no_id'
, node_label = no_lb'
, node_x_coord = 0
, node_y_coord = 0
, node_attributes = Attributes cl'
, node_children = []
}
linkV32edge :: Int -> EdgeV3 -> Edge
linkV32edge n (EdgeV3 eo_s' eo_t' eo_w') =
Edge { edge_source = show eo_s'
, edge_hidden = Just False
, edge_target = show eo_t'
, edge_weight = (Text.read $ Text.unpack eo_w') :: Double
, edge_confluence = 0.5
, edge_id = show n }
graphV3ToGraphWithFiles :: FilePath -> FilePath -> IO ()
graphV3ToGraphWithFiles g1 g2 = do
-- GraphV3 <- IO Fichier
graph <- DBL.readFile g1
let newGraph = case DA.decode graph :: Maybe GraphV3 of
Nothing -> panicTrace "no graph"
Just new -> new
DBL.writeFile g2 (DA.encode $ graphV3ToGraph newGraph)
readGraphFromJson :: MonadBase IO m => FilePath -> m (Maybe Graph)
readGraphFromJson fp = do
graph <- liftBase $ DBL.readFile fp
pure $ DA.decode graph
-----------------------------------------------------------
mergeGraphNgrams :: Graph -> Maybe (HashMap NgramsTerm NgramsRepoElement) -> Graph
mergeGraphNgrams g Nothing = g
mergeGraphNgrams graph@(Graph { _graph_nodes }) (Just listNgrams) = set graph_nodes newNodes graph
where
newNodes = insertChildren <$> _graph_nodes
insertChildren (Node { node_label, .. }) = Node { node_children = children', .. }
where
-- lookup (NgramsTerm node_label) in listNgrams, then fetch (NgramsRepoElement _nre_children)
children' = case (lookup (NgramsTerm node_label) listNgrams) of
Nothing -> []
Just (NgramsRepoElement { _nre_children }) -> unNgramsTerm <$> mSetToList _nre_children
......@@ -9,8 +9,6 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Database.Action.Flow.Utils
( docNgrams
, documentIdWithNgrams
......
{-|
Module : Gargantext.Database.Lists
Description : Main requests of Node to the database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Action.Metrics.Lists
where
-- import Gargantext.API.Ngrams.Types (TabType(..))
-- import Gargantext.Core.Text.Metrics (Scored(..))
-- import Gargantext.Core.Types -- (NodePoly(..), NodeCorpus, ListId)
-- import Gargantext.Core.Types.Query (Limit)
-- import Gargantext.Database.Action.Flow.Types (FlowCmdM)
-- import Gargantext.Prelude hiding (sum, head)
-- import Prelude hiding (null, id, map, sum)
-- import qualified Data.HashMap.Strict as HashMap
-- import qualified Data.Map.Strict as Map
-- import qualified Data.Vector as Vec
-- import qualified Gargantext.Database.Action.Metrics as Metrics
{-
trainModel :: FlowCmdM env ServantErr m
=> Username -> m Score
trainModel u = do
rootId <- _node_id <$> getRoot u
(id:ids) <- getCorporaWithParentId rootId
(s,_model) <- case length ids >0 of
True -> grid 100 150 (getMetrics
False -> panic "Gargantext.Database.Lists.trainModel : not enough corpora"
--}
-- getMetrics' :: FlowCmdM env err m
-- => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-- -> m (Map.Map ListType [Vec.Vector Double])
-- getMetrics' cId maybeListId tabType maybeLimit = do
-- (ngs', scores) <- Metrics.getMetrics cId maybeListId tabType maybeLimit
-- let
-- metrics = map (\(Scored t s1 s2) -> (listType t ngs', [Vec.fromList [s1,s2]])) scores
-- listType t m = maybe (panic errorMsg) fst $ HashMap.lookup t m
-- errorMsg = "API.Node.metrics: key absent"
-- {-
-- _ <- Learn.grid 100 110 metrics' metrics'
-- --}
-- pure $ Map.fromListWith (<>) $ Vec.toList metrics
......@@ -8,9 +8,6 @@ Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FunctionalDependencies #-}
......
{-|
Module : Gargantext.Database.Action.TSQuery
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Gargantext.Database.Action.TSQuery where
import Data.Aeson
import Data.Maybe
import Data.String (IsString(..))
import Database.PostgreSQL.Simple (Query)
import Database.PostgreSQL.Simple.ToField
import Gargantext.Core
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Types
import Gargantext.Core.Types.Query (Limit, Offset)
import Gargantext.Database.Prelude (DBCmd, runPGSQuery)
import Gargantext.Prelude
newtype TSQuery = UnsafeTSQuery [Text]
-- | TODO [""] -> panic "error"
toTSQuery :: [Text] -> TSQuery
toTSQuery txt = UnsafeTSQuery $ map (stem EN GargPorterAlgorithm) txt
instance IsString TSQuery
where
fromString = UnsafeTSQuery . words . cs
instance ToField TSQuery
where
toField (UnsafeTSQuery xs)
= Many $ intersperse (Plain " && ")
$ map (\q -> Many [ Plain "plainto_tsquery("
, Escape (cs q)
, Plain ")"
]
) xs
data Order = Asc | Desc
instance ToField Order
where
toField Asc = Plain "ASC"
toField Desc = Plain "DESC"
-- TODO
-- FIX fav
-- ADD ngrams count
-- TESTS
textSearchQuery :: Query
textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
\ , n.hyperdata->'title' \
\ , n.hyperdata->'source' \
\ , n.hyperdata->'authors' \
\ , COALESCE(nn.score,null) \
\ FROM nodes n \
\ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
\ WHERE \
\ n.search @@ (?::tsquery) \
\ AND (n.parent_id = ? OR nn.node1_id = ?) \
\ AND n.typename = ? \
\ ORDER BY n.hyperdata -> 'publication_date' ? \
\ offset ? limit ?;"
-- | Text Search Function for Master Corpus
-- TODO : text search for user corpus
-- Example:
-- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
-- textSearchTest pId q = textSearch q pId 5 0 Asc
textSearch :: HasDBid NodeType
=> TSQuery -> ParentId
-> Limit -> Offset -> Order
-> DBCmd err [(Int,Value,Value,Value, Value, Maybe Int)]
textSearch q p l o ord' = runPGSQuery textSearchQuery (q,p,p,typeId,ord',o,l)
where
typeId = toDBid NodeDocument
{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
{-|
Module : Gargantext.Database.Bashql
Description : BASHQL to deal with Gargantext Database.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
* BASHQL is a Domain Specific Language to deal with the Database
* BASHQL = functional (Bash * SQL)
* Which language to chose when working with a database ? To make it
simple, instead of all common Object Relational Mapping (ORM) [1]
strategy used nowadays inspired more by object logic than functional
logic, the semantics of BASHQL with focus on the function first.
* BASHQL focus on the function, i.e. use bash language function name,
and make it with SQL behind the scene. Then BASHQL is inspired more
by Bash language [2] than SQL and then follows its main commands as
specification and documentation.
* Main arguments:
1. Theoritical: database and FileSystems are each thought as a single
category, assumption based on theoretical work on databases by David Spivak [0].
2. Practical argument: basic bash commands are a daily practice among
developper community.
* How to help ?
1. Choose a command you like in Bash
2. Implement it in Haskell-SQL according to Gargantext Shema (Tree like
filesystem)
3. Translate it in BASHQL (follow previous implementations)
4. Make a pull request (enjoy the community)
* Implementation strategy: Functional adapations are made to the
gargantext languages options and SQL optimization are done continuously
during the project. For the Haskellish part, you may be inspired by
Turtle implementation written by Gabriel Gonzales [3] which shows how to
write Haskell bash translations.
* Semantics
- FileSystem is now a NodeSystem where each File is a Node in a Directed Graph (DG).
* References
[0] MIT Press has published "Category theory for the sciences". The book
can also be purchased on Amazon. Here are reviews by the MAA, by the
AMS, and by SIAM.
[1] https://en.wikipedia.org/wiki/Object-relational_mapping
[2] https://en.wikipedia.org/wiki/Bash_(Unix_shell)
[3] https://github.com/Gabriel439/Haskell-Turtle-Library
TODO-ACCESS: should the checks be done here or before.
-}
module Gargantext.Database.Admin.Bashql () {-( get
, ls
, home
, post
, del
, mv
, put
, rename
, tree
-- , mkCorpus, mkAnnuaire
)-}
where
import Control.Monad.Reader -- (Reader, ask)
import Data.List (last)
import Gargantext.Core.Types
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataAny)
import Gargantext.Database.Prelude (runOpaQuery, Cmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Update qualified as U (Update(..), update)
import Gargantext.Database.Schema.Node
import Gargantext.Prelude hiding (get)
-- List of NodeId
-- type PWD a = PWD UserId [a]
type PWD = [NodeId]
--data PWD' a = a | PWD' [a]
rename :: NodeId -> Text -> Cmd err [Int]
rename n t = U.update $ U.Rename n t
mv :: NodeId -> ParentId -> Cmd err [Int]
mv n p = U.update $ U.Move n p
-- | TODO get Children or Node
get :: PWD -> Cmd err [Node HyperdataAny]
get [] = pure []
get pwd = runOpaQuery $ selectNodesWithParentID (last pwd)
-- | Home, need to filter with UserId
{-
home :: Cmd err PWD
home = map _node_id <$> getNodesWithParentId 0 Nothing
-}
-- | ls == get Children
ls :: PWD -> Cmd err [Node HyperdataAny]
ls = get
tree :: PWD -> Cmd err [Node HyperdataAny]
tree p = do
ns <- get p
children <- mapM (\n -> get [_node_id n]) ns
pure $ ns <> concat children
-- | TODO
post :: PWD -> [NodeWrite] -> Cmd err Int64
post [] _ = pure 0
post _ [] = pure 0
post pth ns = insertNodesWithParent (Just $ last pth) ns
--postR :: PWD -> [NodeWrite'] -> Cmd err [Int]
--postR [] _ _ = pure [0]
--postR _ [] _ = pure [0]
--postR pth ns c = mkNodeR (last pth) ns c
-- | WIP
-- rm : mv to trash
-- del : empty trash
--rm :: PWD -> [NodeId] -> IO Int
--rm = del
del :: [NodeId] -> Cmd err Int
del [] = pure 0
del ns = deleteNodes ns
-- | TODO
put :: U.Update -> Cmd err [Int]
put = U.update
-- | TODO
-- cd (Home UserId) | (Node NodeId)
-- cd Path
-- jump NodeId
-- touch Dir
-- type Name = Text
......@@ -8,10 +8,6 @@ Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Database.Query.Filter
where
......
......@@ -11,9 +11,6 @@ Multiple Join functions with Opaleye.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
......
......@@ -8,11 +8,9 @@ Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Database.Query.Table.Context
where
......
......@@ -9,9 +9,6 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Database.Query.Table.ContextNodeNgrams
( module Gargantext.Database.Schema.ContextNodeNgrams
, queryContextNodeNgramsTable
......
......@@ -9,8 +9,6 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Database.Query.Table.ContextNodeNgrams2
( module Gargantext.Database.Schema.ContextNodeNgrams2
, insertContextNodeNgrams2
......
......@@ -8,8 +8,6 @@ Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FunctionalDependencies #-}
......
......@@ -8,9 +8,7 @@ Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE Arrows #-}
module Gargantext.Database.Query.Table.Node.Children
where
......
......@@ -11,8 +11,6 @@ Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE LambdaCase #-}
......
......@@ -10,8 +10,6 @@ Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE LambdaCase #-}
......
{-|
Module : Gargantext.Database.Schema.NodeNodeNgrams
Description : TODO: remove this module and table in database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Database.Query.Table.NodeNodeNgrams
( module Gargantext.Database.Schema.NodeNodeNgrams
, queryNodeNodeNgramsTable
, insertNodeNodeNgrams
)
where
import Gargantext.Database.Admin.Types.Node (pgNodeId)
import Gargantext.Database.Prelude (DBCmd, mkCmd)
import Gargantext.Database.Schema.Ngrams (pgNgramsTypeId)
import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Schema.Prelude
import Prelude
queryNodeNodeNgramsTable :: Query NodeNodeNgramsRead
queryNodeNodeNgramsTable = selectTable nodeNodeNgramsTable
-- | Insert utils
insertNodeNodeNgrams :: [NodeNodeNgrams] -> DBCmd err Int
insertNodeNodeNgrams = insertNodeNodeNgramsW
. map (\(NodeNodeNgrams n1 n2 ng nt w) ->
NodeNodeNgrams (pgNodeId n1)
(pgNodeId n2)
(sqlInt4 ng)
(pgNgramsTypeId nt)
(sqlDouble w)
)
insertNodeNodeNgramsW :: [NodeNodeNgramsWrite] -> DBCmd err Int
insertNodeNodeNgramsW nnnw =
mkCmd $ \c -> fromIntegral <$> runInsert_ c insertNothing
where
insertNothing = (Insert { iTable = nodeNodeNgramsTable
, iRows = nnnw
, iReturning = rCount
, iOnConflict = (Just DoNothing)
})
{-|
Module : Gargantext.Database.Schema.Node_NodeNgrams_NodeNgrams
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
lgrams: listed ngrams
Node_NodeNgrams_NodeNgrams table is used to group ngrams
- first NodeId :: Referential / space node (corpus)
- NodeNgrams where Node is List
- lgrams1_id, lgrams2_id where all lgrams2_id will be added to lgrams1_id
- weight: score the relation
Next Step benchmark:
- recursive queries of postgres
- group with: https://en.wikipedia.org/wiki/Nested_set_model
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
where
{-
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Prelude (Cmd, runOpaQuery, mkCmd)
import Gargantext.Database.Admin.Types.Node (pgNodeId)
import Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
import Gargantext.Database.Schema.Node()
import Gargantext.Prelude
queryNode_NodeNgrams_NodeNgrams_Table :: Query Node_NodeNgrams_NodeNgrams_Read
queryNode_NodeNgrams_NodeNgrams_Table = selectTable node_NodeNgrams_NodeNgrams_Table
-- | Select NodeNgramsNgrams
-- TODO not optimized (get all ngrams without filters)
_node_Node_NodeNgrams_NodeNgrams :: Cmd err [Node_NodeNgrams_NodeNgrams]
_node_Node_NodeNgrams_NodeNgrams = runOpaQuery queryNode_NodeNgrams_NodeNgrams_Table
-- TODO: Add option on conflict
insert_Node_NodeNgrams_NodeNgrams :: [Node_NodeNgrams_NodeNgrams] -> Cmd err Int64
insert_Node_NodeNgrams_NodeNgrams = insert_Node_NodeNgrams_NodeNgrams_W
. map (\(Node_NodeNgrams_NodeNgrams n ng1 ng2 maybeWeight) ->
Node_NodeNgrams_NodeNgrams (pgNodeId n )
(sqlInt4 <$> ng1)
(sqlInt4 ng2)
(sqlDouble <$> maybeWeight)
)
insert_Node_NodeNgrams_NodeNgrams_W :: [Node_NodeNgrams_NodeNgrams_Write] -> Cmd err Int64
insert_Node_NodeNgrams_NodeNgrams_W ns =
mkCmd $ \c -> runInsert_ c Insert { iTable = node_NodeNgrams_NodeNgrams_Table
, iRows = ns
, iReturning = rCount
, iOnConflict = (Just DoNothing)
}
-}
{-|
Module : Gargantext.Database.Schema.NodesNgramsRepo
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Database.Query.Table.NodesNgramsRepo
where
{-
import Gargantext.Database.Schema.Prelude
import Gargantext.API.Ngrams (NgramsStatePatch)
import Gargantext.Database.Schema.NodesNgramsRepo
import Gargantext.Database.Prelude (mkCmd, Cmd, runOpaQuery)
import Gargantext.Prelude
selectPatches :: Query RepoDbRead
selectPatches = proc () -> do
repos <- selectTable repoTable -< ()
returnA -< repos
_selectRepo :: Cmd err [RepoDbNgrams]
_selectRepo = runOpaQuery selectPatches
_insertRepos :: [NgramsStatePatch] -> Cmd err Int64
_insertRepos ns = mkCmd $ \conn -> runInsert_ conn $ Insert repoTable (toWrite ns) rCount Nothing
where
toWrite :: [NgramsStatePatch] -> [RepoDbWrite]
toWrite = undefined
--ns' = map (\(RepoDbNgrams v ps) -> RepoDbWrite (sqlInt4 v) (pgJSONB ps)) ns
-}
......@@ -8,8 +8,6 @@ Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FunctionalDependencies #-}
......
......@@ -9,8 +9,6 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
......
......@@ -9,8 +9,6 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
......
{-|
Module : Gargantext.Database.Schema.NgramsPostag
Description : Ngrams connection to the Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Each Ngrams has a pos-tagging version to ease the default groups of
ngrams in NgramsTerm Lists.
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NgramsPostag
where
import Database.PostgreSQL.Simple qualified as PGS
import Gargantext.Database.Schema.Prelude ( Column, SqlInt4, SqlText, ToField(toField), toRow )
import Gargantext.Prelude
data NgramsPostagPoly id
lang_id
algo_id
postag
ngrams_id
lemm_id
score
= NgramsPostagPoly { _ngramsPostag_id :: !id
, _ngramsPostag_lang_id :: !lang_id
, _ngramsPostag_algo_id :: !algo_id
, _ngramsPostag_postag :: !postag
, _ngramsPostag_ngrams_id :: !ngrams_id
, _ngramsPostag_lemm_id :: !lemm_id
, _ngramsPostag_score :: !score
} deriving (Show)
------------------------------------------------------------------------
data PosTag = PosTag { unPosTag :: !Text }
| NER { unNER :: !Text } -- TODO
------------------------------------------------------------------------
-- type NgramsPostag = NgramsPostagPoly (Maybe Int) Lang PostTagAlgo (Maybe PosTag) NgramsTerm NgramsTerm (Maybe Int)
type NgramsPostagDB = NgramsPostagPoly (Maybe Int) Int Int (Maybe Text) Int Int Int
------------------------------------------------------------------------
type NgramsPosTagWrite = NgramsPostagPoly (Maybe (Column SqlInt4))
(Column SqlInt4)
(Column SqlInt4)
(Maybe (Column SqlText))
(Column SqlInt4)
(Column SqlInt4)
(Maybe (Column SqlInt4))
type NgramsPosTagRead = NgramsPostagPoly (Column SqlInt4)
(Column SqlInt4)
(Column SqlInt4)
(Column SqlText)
(Column SqlInt4)
(Column SqlInt4)
(Column SqlInt4)
makeLenses ''NgramsPostagPoly
instance PGS.ToRow NgramsPostagDB where
toRow (NgramsPostagPoly f0 f1 f2 f3 f4 f5 f6) = [ toField f0
, toField f1
, toField f2
, toField f3
, toField f4
, toField f5
, toField f6
]
......@@ -8,9 +8,6 @@ Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FunctionalDependencies #-}
......
......@@ -11,8 +11,6 @@ Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
......
......@@ -9,8 +9,6 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
......
......@@ -11,8 +11,6 @@ NodeNgrams register Context of Ngrams (named Cgrams then)
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
......
{-|
Module : Gargantext.Database.Schema.NodeNodeNgrams
Description : TODO: remove this module and table in database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NodeNodeNgrams
where
import Prelude
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Schema.Ngrams (NgramsTypeId, NgramsId)
import Gargantext.Database.Admin.Types.Node
data NodeNodeNgramsPoly n1 n2 ngrams_id ngt w
= NodeNodeNgrams { _nnng_node1_id :: !n1
, _nnng_node2_id :: !n2
, _nnng_ngrams_id :: !ngrams_id
, _nnng_ngramsType :: !ngt
, _nnng_weight :: !w
} deriving (Show)
type NodeNodeNgramsWrite =
NodeNodeNgramsPoly (Column SqlInt4 )
(Column SqlInt4 )
(Column SqlInt4 )
(Column SqlInt4 )
(Column SqlFloat8)
type NodeNodeNgramsRead =
NodeNodeNgramsPoly (Column SqlInt4 )
(Column SqlInt4 )
(Column SqlInt4 )
(Column SqlInt4 )
(Column SqlFloat8)
type NodeNodeNgrams =
NodeNodeNgramsPoly CorpusId DocId NgramsId NgramsTypeId Double
$(makeAdaptorAndInstance "pNodeNodeNgrams" ''NodeNodeNgramsPoly)
makeLenses ''NodeNodeNgramsPoly
nodeNodeNgramsTable :: Table NodeNodeNgramsWrite NodeNodeNgramsRead
nodeNodeNgramsTable = Table "node_node_ngrams"
( pNodeNodeNgrams NodeNodeNgrams
{ _nnng_node1_id = requiredTableField "node1_id"
, _nnng_node2_id = requiredTableField "node2_id"
, _nnng_ngrams_id = requiredTableField "ngrams_id"
, _nnng_ngramsType = requiredTableField "ngrams_type"
, _nnng_weight = requiredTableField "weight"
}
)
{-|
Module : Gargantext.Database.Schema.NodeNodeNgrams
Description : TODO: remove this module and table in database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NodeNodeNgrams2
where
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Schema.NodeNgrams (NodeNgramsId)
import Gargantext.Database.Admin.Types.Node
import Prelude
data NodeNodeNgrams2Poly node_id nodengrams_id w
= NodeNodeNgrams2 { _nnng2_node_id :: !node_id
, _nnng2_nodengrams_id :: !nodengrams_id
, _nnng2_weight :: !w
} deriving (Show)
type NodeNodeNgrams2Write =
NodeNodeNgrams2Poly (Column SqlInt4 )
(Column SqlInt4 )
(Column SqlFloat8)
type NodeNodeNgrams2Read =
NodeNodeNgrams2Poly (Column SqlInt4 )
(Column SqlInt4 )
(Column SqlFloat8)
type NodeNodeNgrams2 =
NodeNodeNgrams2Poly DocId NodeNgramsId Double
$(makeAdaptorAndInstance "pNodeNodeNgrams2" ''NodeNodeNgrams2Poly)
makeLenses ''NodeNodeNgrams2Poly
nodeNodeNgrams2Table :: Table NodeNodeNgrams2Write NodeNodeNgrams2Read
nodeNodeNgrams2Table = Table "node_node_ngrams2"
( pNodeNodeNgrams2 NodeNodeNgrams2
{ _nnng2_node_id = requiredTableField "node_id"
, _nnng2_nodengrams_id = requiredTableField "nodengrams_id"
, _nnng2_weight = requiredTableField "weight"
}
)
{-|
Module : Gargantext.Database.Schema.Node_NodeNgrams_NodeNgrams
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
lgrams: listed ngrams
Node_NodeNgrams_NodeNgrams table is used to group ngrams
- first NodeId :: Referential / space node (corpus)
- NodeNgrams where Node is List
- lgrams1_id, lgrams2_id where all lgrams2_id will be added to lgrams1_id
- weight: score the relation
Next Step benchmark:
- recursive queries of postgres
- group with: https://en.wikipedia.org/wiki/Nested_set_model
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
where
{-
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Schema.Node()
import Gargantext.Prelude
data Node_NodeNgrams_NodeNgrams_Poly node_id nng1_id nng2_id weight =
Node_NodeNgrams_NodeNgrams { _nnn_node_id :: !node_id
, _nnn_nng1_id :: !nng1_id
, _nnn_nng2_id :: !nng2_id
, _nnn_weight :: !weight
} deriving (Show)
type Node_NodeNgrams_NodeNgrams_Write =
Node_NodeNgrams_NodeNgrams_Poly
(Column SqlInt4 )
(Maybe (Column SqlInt4 ))
(Column SqlInt4 )
(Maybe (Column SqlFloat8))
type Node_NodeNgrams_NodeNgrams_Read =
Node_NodeNgrams_NodeNgrams_Poly
(Column SqlInt4 )
(Column SqlInt4 )
(Column SqlInt4 )
(Column SqlFloat8)
type ListNgramsId = Int
type Node_NodeNgrams_NodeNgrams =
Node_NodeNgrams_NodeNgrams_Poly CorpusId (Maybe ListNgramsId) ListNgramsId (Maybe Double)
$(makeAdaptorAndInstance "pNode_NodeNgrams_NodeNgrams"
''Node_NodeNgrams_NodeNgrams_Poly)
$(makeLensesWith abbreviatedFields
''Node_NodeNgrams_NodeNgrams_Poly)
node_NodeNgrams_NodeNgrams_Table :: Table Node_NodeNgrams_NodeNgrams_Write Node_NodeNgrams_NodeNgrams_Read
node_NodeNgrams_NodeNgrams_Table =
Table "node_nodengrams_nodengrams"
( pNode_NodeNgrams_NodeNgrams Node_NodeNgrams_NodeNgrams
{ _nnn_node_id = requiredTableField "node_id"
, _nnn_nng1_id = optionalTableField "node_ngrams1_id"
, _nnn_nng2_id = requiredTableField "node_ngrams2_id"
, _nnn_weight = optionalTableField "weight"
}
)
instance DefaultFromField SqlInt4 (Maybe Int) where
defaultFromField = fromPGSFromField
instance DefaultFromField SqlFloat8 (Maybe Double) where
defaultFromField = fromPGSFromField
-}
{-|
Module : Gargantext.Database.Schema.NodesNgramsRepo
Description : NodeNgram for Ngram indexation or Lists
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NodesNgramsRepo
where
{-
import Data.Map.Strict.Patch (PatchMap)
import Gargantext.Database.Schema.Prelude
import Gargantext.API.Ngrams.Types (NgramsTablePatch)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Prelude
data RepoDbPoly version patches
= RepoDbNgrams { _rdp_version :: !version
, _rdp_patches :: !patches
} deriving (Show)
type RepoDbWrite
= RepoDbPoly (Column SqlInt4)
(Column SqlJsonb)
type RepoDbRead
= RepoDbPoly (Column SqlInt4)
(Column SqlJsonb)
type RepoDbNgrams = RepoDbPoly Int NgramsStatePatch
$(makeAdaptorAndInstance "pRepoDbNgrams" ''RepoDbPoly)
makeLenses ''RepoDbPoly
instance DefaultFromField SqlJsonb
(PatchMap NgramsType
(PatchMap NodeId NgramsTablePatch))
where
defaultFromField = fromPGSFromField
repoTable :: Table RepoDbWrite RepoDbRead
repoTable = Table "nodes_ngrams_repo"
(pRepoDbNgrams RepoDbNgrams
{ _rdp_version = requiredTableField "version"
, _rdp_patches = requiredTableField "patches"
}
)
-}
......@@ -10,8 +10,6 @@ Portability : POSIX
Functions to deal with users, database side.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE DeriveAnyClass #-}
......
{-|
Module : Gargantext.Utils.JohnSnow
Description : John Snow NLP API connexion
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Utils.JohnSnowNLP where
import Control.Lens ( FunctorWithIndex(imap) )
import Data.Aeson (encode, Value(..), (.:), (.:?))
import Data.Aeson.Types (prependFailure, typeMismatch)
import Data.List.Safe qualified as LS
import Data.Map.Strict qualified as Map
import Data.Text (unpack)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import Gargantext.Core.Types (POS(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude hiding (All)
import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
import Prelude (userError)
data JSSpell = JSPOS Lang | JSLemma Lang
deriving (Show)
instance ToJSON JSSpell where
toJSON (JSPOS DE) = "de.pos"
toJSON (JSPOS EL) = "el.pos"
toJSON (JSPOS EN) = "en.pos"
toJSON (JSPOS ES) = "es.pos"
toJSON (JSPOS FR) = "fr.pos"
toJSON (JSPOS IT) = "it.pos"
toJSON (JSPOS PL) = "pl.pos"
toJSON (JSPOS PT) = "pt.pos"
toJSON (JSPOS RU) = "ru.pos"
toJSON (JSPOS UK) = "uk.pos"
toJSON (JSPOS ZH) = "zh.pos"
toJSON (JSLemma DE) = "de.lemma"
toJSON (JSLemma EL) = "el.lemma"
toJSON (JSLemma EN) = "en.lemma"
toJSON (JSLemma ES) = "es.lemma"
toJSON (JSLemma FR) = "fr.lemma"
toJSON (JSLemma IT) = "it.lemma"
toJSON (JSLemma PL) = "pl.lemma"
toJSON (JSLemma PT) = "pt.lemma"
toJSON (JSLemma RU) = "ru.lemma"
toJSON (JSLemma UK) = "uk.lemma"
toJSON (JSLemma ZH) = "zh.lemma"
instance FromJSON JSSpell where
parseJSON (String "de.pos") = pure $ JSPOS DE
parseJSON (String "en.pos") = pure $ JSPOS EN
parseJSON (String "el.pos") = pure $ JSPOS EL
parseJSON (String "es.pos") = pure $ JSPOS ES
parseJSON (String "fr.pos") = pure $ JSPOS FR
parseJSON (String "it.pos") = pure $ JSPOS IT
parseJSON (String "pl.pos") = pure $ JSPOS PL
parseJSON (String "pt.pos") = pure $ JSPOS PT
parseJSON (String "ru.pos") = pure $ JSPOS RU
parseJSON (String "uk.pos") = pure $ JSPOS UK
parseJSON (String "zh.pos") = pure $ JSPOS ZH
parseJSON (String "de.lemma") = pure $ JSLemma DE
parseJSON (String "en.lemma") = pure $ JSLemma EN
parseJSON (String "el.lemma") = pure $ JSLemma EL
parseJSON (String "es.lemma") = pure $ JSLemma ES
parseJSON (String "fr.lemma") = pure $ JSLemma FR
parseJSON (String "it.lemma") = pure $ JSLemma IT
parseJSON (String "pl.lemma") = pure $ JSLemma PL
parseJSON (String "pt.lemma") = pure $ JSLemma PT
parseJSON (String "ru.lemma") = pure $ JSLemma RU
parseJSON (String "uk.lemma") = pure $ JSLemma UK
parseJSON (String "zh.lemma") = pure $ JSLemma ZH
parseJSON s =
prependFailure "parsing spell failed, "
(typeMismatch "Spell" s)
data JSRequest =
JSRequest { _jsRequest_data :: !Text
, _jsRequest_format :: !Text
, _jsRequest_grouping :: !(Maybe Text)
, _jsRequest_spell :: !JSSpell }
deriving (Show)
-- "spell" options:
-- https://nlu.johnsnowlabs.com/docs/en/spellbook
deriveJSON (unPrefix "_jsRequest_") ''JSRequest
-- | JohnSnow NLP works via asynchronous tasks: send a query and get a
-- task in response. One must poll for task status and then get it's
-- result.
data JSAsyncTask =
JSAsyncTask { _jsAsyncTask_uuid :: !Text }
deriving (Show)
deriveJSON (unPrefix "_jsAsyncTask_") ''JSAsyncTask
-- | Task status.
data JSAsyncTaskStatus =
JSAsyncTaskStatus { _jsAsyncTaskStatus_code :: !Text
, _jsAsyncTaskStatus_message :: !(Maybe Text) }
deriving (Show)
taskReady :: JSAsyncTaskStatus -> Bool
taskReady (JSAsyncTaskStatus { .. }) = _jsAsyncTaskStatus_code == "success"
--deriveJSON (unPrefix "_jsAsyncTaskStatus_") ''JSAsyncTaskStatus
instance FromJSON JSAsyncTaskStatus where
parseJSON (Object v) = do
status <- v .: "status"
code <- status .: "code"
message <- status .:? "message"
pure $ JSAsyncTaskStatus { _jsAsyncTaskStatus_code = code
, _jsAsyncTaskStatus_message = message }
parseJSON s =
prependFailure "parsing status failed"
(typeMismatch "status" s)
-- | Response for our query. The `Maybe` types are here because we
-- combine 2 types of responses into one: `pos` and `lemma`.
data JSAsyncTaskResponse =
JSAsyncTaskResponse { _jsAsyncTaskResponse_index :: Map Text Int
, _jsAsyncTaskResponse_document :: Map Text Text
, _jsAsyncTaskResponse_sentence :: Map Text [Text]
, _jsAsyncTaskResponse_lem :: Maybe (Map Text [Text])
, _jsAsyncTaskResponse_pos :: Maybe (Map Text [POS])
, _jsAsyncTaskResponse_token :: Map Text [Text] }
deriving (Show)
deriveJSON (unPrefix "_jsAsyncTaskResponse_") ''JSAsyncTaskResponse
makeLenses ''JSAsyncTaskResponse
-- | We need to combine 2 responses: `pos` and `lemma` spells.
jsAsyncTaskResponseToSentences :: JSAsyncTaskResponse -> JSAsyncTaskResponse -> PosSentences
jsAsyncTaskResponseToSentences jsPos jsLemma =
PosSentences { _sentences }
where
_sentences = Map.elems $ Map.mapWithKey mapSentence (jsPos ^. jsAsyncTaskResponse_sentence)
mapSentence idx sentence = Sentence { _sentenceIndex = sIndex
, _sentenceTokens = sTokens }
where
sIndex = Map.findWithDefault (-1) idx (jsPos ^. jsAsyncTaskResponse_index)
lemmas = fromMaybe [] $
if Just sentence == Map.lookup idx (jsLemma ^. jsAsyncTaskResponse_sentence) then
Map.lookup idx $ fromMaybe Map.empty (jsLemma ^. jsAsyncTaskResponse_lem)
else
Nothing
sTokens = imap mapPosToken $ zip (Map.findWithDefault [] idx $ fromMaybe Map.empty (jsPos ^. jsAsyncTaskResponse_pos))
(Map.findWithDefault [] idx (jsPos ^. jsAsyncTaskResponse_token))
mapPosToken idx' (pos, token) = Token { _tokenIndex = -1
, _tokenWord = token
, _tokenOriginalText = ""
, _tokenLemma = fromMaybe "" $ (LS.!!) lemmas idx'
, _tokenCharacterOffsetBegin = -1
, _tokenCharacterOffsetEnd = -1
, _tokenPos = Just pos
, _tokenNer = Nothing
, _tokenBefore = Nothing
, _tokenAfter = Nothing }
-----------------------------------------------------
jsRequest :: Text -> JSSpell -> IO JSAsyncTask
jsRequest t s = do
url <- parseRequest $ "POST http://localhost:5000/api/results"
let jsReq = JSRequest { _jsRequest_data = t
, _jsRequest_format = "text"
, _jsRequest_grouping = Nothing
, _jsRequest_spell = s }
let request = setRequestBodyLBS (encode jsReq) url
task <- httpJSON request :: IO (Response JSAsyncTask)
pure $ getResponseBody task
jsTaskStatus :: JSAsyncTask -> IO JSAsyncTaskStatus
jsTaskStatus (JSAsyncTask uuid) = do
url <- parseRequest $ unpack $ "GET http://localhost:5000/api/results/" <> uuid <> "/status"
status <- httpJSON url
pure $ getResponseBody status
jsTaskResponse :: JSAsyncTask -> IO JSAsyncTaskResponse
jsTaskResponse (JSAsyncTask uuid) = do
url <- parseRequest $ unpack $ "GET http://localhost:5000/api/results/" <> uuid
result <- httpJSON url
pure $ getResponseBody result
waitForJsTask :: HasCallStack => JSAsyncTask -> IO JSAsyncTaskResponse
waitForJsTask jsTask = wait' 0
where
wait' :: Int -> IO JSAsyncTaskResponse
wait' counter = do
status <- jsTaskStatus jsTask
if taskReady status then
jsTaskResponse jsTask
else
if counter > 60 then
throwIO $ withStacktrace $ userError "waited for 1 minute and still no answer from JohnSnow NLP"
else do
-- printDebug "[waitForJsTask] task not ready, waiting" counter
_ <- threadDelay $ 1000000*1
wait' $ counter + 1
getPosTagAndLems :: Lang -> Text -> IO PosSentences
getPosTagAndLems l t = do
jsPosTask <- jsRequest t (JSPOS l)
jsLemmaTask <- jsRequest t (JSLemma l)
-- wait for both tasks
jsPos <- waitForJsTask jsPosTask
jsLemma <- waitForJsTask jsLemmaTask
pure $ jsAsyncTaskResponseToSentences jsPos jsLemma
nlp :: Lang -> Text -> IO PosSentences
nlp = getPosTagAndLems
......@@ -3,12 +3,9 @@
- "HSvm-0.1.1.3.22"
- "KMP-0.2.0.0"
- "MissingH-1.4.3.0"
- "Unique-0.4.7.8"
- "deferred-folds-0.9.18.6"
- "fclabels-2.0.5.1"
- "full-text-search-0.2.1.4"
- "fullstop-0.1.4"
- "ghc-parser-0.2.6.0"
- "hgal-2.0.0.2"
- "hsparql-0.3.8"
- "hspec-2.11.1"
......@@ -16,10 +13,6 @@
- "hspec-discover-2.11.1"
- "hspec-expectations-0.8.3"
- "http-accept-0.2"
- "ihaskell-0.11.0.0"
- "ipython-kernel-0.11.0.0"
- "located-base-0.1.1.1"
- "logging-effect-1.3.12"
- "morpheus-graphql-0.24.3"
- "morpheus-graphql-app-0.24.3"
- "morpheus-graphql-client-0.24.3"
......@@ -28,7 +21,6 @@
- "morpheus-graphql-core-0.24.3"
- "morpheus-graphql-server-0.24.3"
- "morpheus-graphql-subscriptions-0.24.3"
- "morpheus-graphql-tests-0.24.3"
- "opaleye-0.9.6.1"
- "primitive-unlifted-2.1.0.0"
- "rake-0.0.1"
......@@ -146,7 +138,7 @@
git: "https://gitlab.iscpif.fr/gargantext/crawlers/istex.git"
subdirs:
- .
- commit: c673924bce3283c4fb2d1b3a2f95eb3f1a9cd0b0
- commit: d2df0130575cfd32d6863d77e2ce34c48a1c32fa
git: "https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git"
subdirs:
- .
......@@ -276,8 +268,6 @@ flags:
semigroups: true
statevar: true
tagged: true
cpphs:
"old-locale": false
criterion:
"embed-data-files": false
fast: false
......@@ -330,13 +320,6 @@ flags:
gargantext:
"no-phylo-debug-logs": false
"test-crypto": false
"ghc-lib-parser":
"threaded-rts": true
"ghc-lib-parser-ex":
auto: false
"no-ghc-lib": false
"ghc-syntax-highlighter":
dev: false
graphviz:
"test-parsing": false
hashable:
......@@ -349,11 +332,6 @@ flags:
portable: false
sse42: false
"unsafe-tricks": true
hlint:
"ghc-lib": true
gpl: true
hsyaml: false
threaded: true
hmatrix:
"disable-default-paths": false
"no-random_r": false
......@@ -380,13 +358,9 @@ flags:
profile: false
"hxt-regex-xmlschema":
profile: false
ihaskell:
"use-hlint": true
"integer-logarithms":
"check-bounds": false
"integer-gmp": true
"ipython-kernel":
examples: false
jose:
demos: false
"json-stream":
......@@ -453,10 +427,6 @@ flags:
bcrypt: true
pbkdf2: true
scrypt: true
path:
dev: false
"path-io":
dev: false
"postgresql-libpq":
"use-pkg-config": false
"pretty-simple":
......@@ -465,10 +435,6 @@ flags:
prettyprinter:
buildreadme: false
text: true
probability:
splitbase: true
pureMD5:
test: false
"quickcheck-instances":
"bytestring-builder": false
reflection:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment