From f5f72268c9d2830d07863f72d61e280e76d667ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Kaminski?= <pk@intrepidus.pl> Date: Tue, 18 Apr 2023 20:05:57 +0200 Subject: [PATCH] [nlp] export languages (graphql) based on ini file --- src/Gargantext/API/GraphQL.hs | 7 ++--- src/Gargantext/API/GraphQL/NLP.hs | 27 ++++++++++++++++--- src/Gargantext/Core.hs | 4 +-- .../Core/Text/Terms/Multi/PosTagging.hs | 10 +++---- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/src/Gargantext/API/GraphQL.hs b/src/Gargantext/API/GraphQL.hs index 4ab8b29b..44a65bb7 100644 --- a/src/Gargantext/API/GraphQL.hs +++ b/src/Gargantext/API/GraphQL.hs @@ -46,6 +46,7 @@ import qualified Gargantext.API.GraphQL.TreeFirstLevel as GQLTree import qualified Gargantext.API.GraphQL.Team as GQLTeam import Gargantext.API.Prelude (GargM, GargError) import Gargantext.API.Types +import Gargantext.Core.NLP (HasNLPServer) import Gargantext.Database.Prelude (CmdCommon) import Gargantext.Prelude import GHC.Generics (Generic) @@ -71,7 +72,7 @@ data Query m , contexts_for_ngrams :: GQLCTX.ContextsForNgramsArgs -> m [GQLCTX.ContextGQL] , imt_schools :: GQLIMT.SchoolsArgs -> m [GQLIMT.School] , job_logs :: GQLAT.JobLogArgs -> m (Map Int JobLog) - , languages :: GQLNLP.LanguagesArgs -> m [GQLNLP.Lang] + , languages :: GQLNLP.LanguagesArgs -> m GQLNLP.LanguagesMap , nodes :: GQLNode.NodeArgs -> m [GQLNode.Node] , node_parent :: GQLNode.NodeParentArgs -> m [GQLNode.Node] , user_infos :: GQLUserInfo.UserInfoArgs -> m [GQLUserInfo.UserInfo] @@ -105,7 +106,7 @@ data Contet m -- | The main GraphQL resolver: how queries, mutations and -- subscriptions are handled. rootResolver - :: (CmdCommon env, HasJobEnv' env, HasSettings env) + :: (CmdCommon env, HasNLPServer env, HasJobEnv' env, HasSettings env) => RootResolver (GargM env GargError) e Query Mutation Undefined rootResolver = RootResolver @@ -128,7 +129,7 @@ rootResolver = -- | Main GraphQL "app". app - :: (Typeable env, CmdCommon env, HasJobEnv' env, HasSettings env) + :: (Typeable env, CmdCommon env, HasJobEnv' env, HasNLPServer env, HasSettings env) => App (EVENT (GargM env GargError)) (GargM env GargError) app = deriveApp rootResolver diff --git a/src/Gargantext/API/GraphQL/NLP.hs b/src/Gargantext/API/GraphQL/NLP.hs index 34119c7c..60ed792b 100644 --- a/src/Gargantext/API/GraphQL/NLP.hs +++ b/src/Gargantext/API/GraphQL/NLP.hs @@ -4,19 +4,23 @@ module Gargantext.API.GraphQL.NLP ( Lang(..) , LanguagesArgs(..) + , LanguagesMap , resolveLanguages ) where +import Control.Lens (view) +import qualified Data.Map.Strict as Map import Data.Morpheus.Types ( GQLType , Resolver , QUERY ) import Gargantext.API.Prelude (GargM, GargError) -import Gargantext.Core (Lang(..), allLangs) +import Gargantext.Core (Lang(..), NLPServerConfig(..), PosTagAlgo) -- , allLangs) +import Gargantext.Core.NLP (HasNLPServer(..)) import Gargantext.Prelude -import GHC.Generics (Generic) +import Protolude data LanguagesArgs = LanguagesArgs @@ -24,6 +28,21 @@ data LanguagesArgs type GqlM e env = Resolver QUERY e (GargM env GargError) +type LanguagesMap = Map.Map Lang NLPServer + +data NLPServer = NLPServer + { + server :: !PosTagAlgo + , url :: !Text + } + deriving (Show, Eq, Generic, GQLType) + resolveLanguages - :: LanguagesArgs -> GqlM e env [Lang] -resolveLanguages LanguagesArgs { } = pure $ allLangs + :: HasNLPServer env => LanguagesArgs -> GqlM e env LanguagesMap +resolveLanguages LanguagesArgs { } = do + -- pure $ allLangs + lift $ do + ns <- view nlpServer + printDebug "[resolveLanguages] nlpServer" ns + pure $ Map.map (\(NLPServerConfig { .. }) -> NLPServer { server + , url = Protolude.show url }) ns diff --git a/src/Gargantext/Core.hs b/src/Gargantext/Core.hs index 61838b85..2d0e71eb 100644 --- a/src/Gargantext/Core.hs +++ b/src/Gargantext/Core.hs @@ -97,13 +97,13 @@ instance HasDBid Lang where data NLPServerConfig = NLPServerConfig { server :: !PosTagAlgo , url :: !URI } - deriving (Show, Eq) + deriving (Show, Eq, Generic) ------------------------------------------------------------------------ type Form = Text type Lem = Text ------------------------------------------------------------------------ data PosTagAlgo = CoreNLP | JohnSnowServer | Spacy - deriving (Show, Read, Eq, Ord, Generic) + deriving (Show, Read, Eq, Ord, Generic, GQLType) instance Hashable PosTagAlgo diff --git a/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs b/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs index e1c2102f..ef79bc5c 100644 --- a/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs +++ b/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs @@ -92,15 +92,15 @@ corenlp' uri lang txt = do EN -> [ ("annotators", "tokenize,ssplit,pos,ner" ) ] FR -> [ ("annotators", "tokenize,ssplit,pos,lemma,ner") -- , ("parse.model", "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz") - , ("pos.model", "edu/stanford/nlp/models/pos-tagger/french/french.tagger") + , ("pos.model", "edu/stanford/nlp/models/pos-tagger/models/french.tagger") , ("tokenize.language", "fr") ] DE -> [ ("annotators", "tokenize,ssplit,pos,lemma,ner") -- , ("parse.model", "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz") - , ("pos.model", "edu/stanford/nlp/models/pos-tagger/french/german-hgc.tagger") + , ("pos.model", "edu/stanford/nlp/models/pos-tagger/models/german-hgc.tagger") , ("tokenize.language", "de") ] ES -> [ ("annotators", "tokenize,ssplit,pos,lemma,ner") -- , ("parse.model", "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz") - , ("pos.model", "edu/stanford/nlp/models/pos-tagger/french/spanish.tagger") + , ("pos.model", "edu/stanford/nlp/models/pos-tagger/models/spanish.tagger") , ("tokenize.language", "es") ] IT -> [ ("annotators", "tokenize,ssplit,pos,lemma,ner") -- , ("parse.model", "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz") @@ -110,9 +110,9 @@ corenlp' uri lang txt = do -- , ("parse.model", "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz") -- , ("pos.model", "edu/stanford/nlp/models/pos-tagger/french/french.tagger") , ("tokenize.language", "pl") ] - CN -> [ ("annotators", "tokenize,ssplit,pos,lemma,ner") + CN -> [ ("annotators", "tokenize,pos,lemma,ner") -- , ("parse.model", "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz") - , ("pos.model", "edu/stanford/nlp/models/pos-tagger/french/chinese-distsim.tagger") + , ("pos.model", "edu/stanford/nlp/models/pos-tagger/models/chinese-distsim.tagger") , ("tokenize.language", "zh") ] l -> panic $ pack $ "corenlp for language " <> show l <> " is not implemented yet" -- 2.21.0