From f5f72268c9d2830d07863f72d61e280e76d667ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Przemys=C5=82aw=20Kaminski?= <pk@intrepidus.pl>
Date: Tue, 18 Apr 2023 20:05:57 +0200
Subject: [PATCH] [nlp] export languages (graphql) based on ini file

---
 src/Gargantext/API/GraphQL.hs                 |  7 ++---
 src/Gargantext/API/GraphQL/NLP.hs             | 27 ++++++++++++++++---
 src/Gargantext/Core.hs                        |  4 +--
 .../Core/Text/Terms/Multi/PosTagging.hs       | 10 +++----
 4 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/src/Gargantext/API/GraphQL.hs b/src/Gargantext/API/GraphQL.hs
index 4ab8b29b..44a65bb7 100644
--- a/src/Gargantext/API/GraphQL.hs
+++ b/src/Gargantext/API/GraphQL.hs
@@ -46,6 +46,7 @@ import qualified Gargantext.API.GraphQL.TreeFirstLevel as GQLTree
 import qualified Gargantext.API.GraphQL.Team as GQLTeam
 import Gargantext.API.Prelude (GargM, GargError)
 import Gargantext.API.Types
+import Gargantext.Core.NLP (HasNLPServer)
 import Gargantext.Database.Prelude (CmdCommon)
 import Gargantext.Prelude
 import GHC.Generics (Generic)
@@ -71,7 +72,7 @@ data Query m
     , contexts_for_ngrams :: GQLCTX.ContextsForNgramsArgs -> m [GQLCTX.ContextGQL]
     , imt_schools         :: GQLIMT.SchoolsArgs -> m [GQLIMT.School]
     , job_logs            :: GQLAT.JobLogArgs -> m (Map Int JobLog)
-    , languages           :: GQLNLP.LanguagesArgs -> m [GQLNLP.Lang]
+    , languages           :: GQLNLP.LanguagesArgs -> m GQLNLP.LanguagesMap
     , nodes               :: GQLNode.NodeArgs -> m [GQLNode.Node]
     , node_parent         :: GQLNode.NodeParentArgs -> m [GQLNode.Node]
     , user_infos          :: GQLUserInfo.UserInfoArgs -> m [GQLUserInfo.UserInfo]
@@ -105,7 +106,7 @@ data Contet m
 -- | The main GraphQL resolver: how queries, mutations and
 -- subscriptions are handled.
 rootResolver
-  :: (CmdCommon env, HasJobEnv' env, HasSettings env)
+  :: (CmdCommon env, HasNLPServer env, HasJobEnv' env, HasSettings env)
   => RootResolver (GargM env GargError) e Query Mutation Undefined
 rootResolver =
   RootResolver
@@ -128,7 +129,7 @@ rootResolver =
 
 -- | Main GraphQL "app".
 app
-  :: (Typeable env, CmdCommon env, HasJobEnv' env, HasSettings env)
+  :: (Typeable env, CmdCommon env, HasJobEnv' env, HasNLPServer env, HasSettings env)
   => App (EVENT (GargM env GargError)) (GargM env GargError)
 app = deriveApp rootResolver
 
diff --git a/src/Gargantext/API/GraphQL/NLP.hs b/src/Gargantext/API/GraphQL/NLP.hs
index 34119c7c..60ed792b 100644
--- a/src/Gargantext/API/GraphQL/NLP.hs
+++ b/src/Gargantext/API/GraphQL/NLP.hs
@@ -4,19 +4,23 @@
 module Gargantext.API.GraphQL.NLP
   ( Lang(..)
   , LanguagesArgs(..)
+  , LanguagesMap
   , resolveLanguages
   )
   where
 
+import Control.Lens (view)
+import qualified Data.Map.Strict as Map
 import Data.Morpheus.Types
   ( GQLType
   , Resolver
   , QUERY
   )
 import Gargantext.API.Prelude (GargM, GargError)
-import Gargantext.Core (Lang(..), allLangs)
+import Gargantext.Core (Lang(..), NLPServerConfig(..), PosTagAlgo)  -- , allLangs)
+import Gargantext.Core.NLP (HasNLPServer(..))
 import Gargantext.Prelude
-import GHC.Generics (Generic)
+import Protolude
 
 data LanguagesArgs
   = LanguagesArgs
@@ -24,6 +28,21 @@ data LanguagesArgs
 
 type GqlM e env = Resolver QUERY e (GargM env GargError)
 
+type LanguagesMap = Map.Map Lang NLPServer
+
+data NLPServer = NLPServer
+  {
+    server :: !PosTagAlgo
+  , url    :: !Text
+  }
+  deriving (Show, Eq, Generic, GQLType)
+
 resolveLanguages
-  :: LanguagesArgs -> GqlM e env [Lang]
-resolveLanguages LanguagesArgs { } = pure $ allLangs
+  :: HasNLPServer env => LanguagesArgs -> GqlM e env LanguagesMap
+resolveLanguages LanguagesArgs { } = do
+  -- pure $ allLangs
+  lift $ do
+    ns <- view nlpServer
+    printDebug "[resolveLanguages] nlpServer" ns
+    pure $ Map.map (\(NLPServerConfig { .. }) -> NLPServer { server
+                                                           , url = Protolude.show url }) ns
diff --git a/src/Gargantext/Core.hs b/src/Gargantext/Core.hs
index 61838b85..2d0e71eb 100644
--- a/src/Gargantext/Core.hs
+++ b/src/Gargantext/Core.hs
@@ -97,13 +97,13 @@ instance HasDBid Lang where
 data NLPServerConfig = NLPServerConfig
   { server :: !PosTagAlgo
   , url    :: !URI }
-  deriving (Show, Eq)
+  deriving (Show, Eq, Generic)
 ------------------------------------------------------------------------
 type Form = Text
 type Lem  = Text
 ------------------------------------------------------------------------
 data PosTagAlgo = CoreNLP | JohnSnowServer | Spacy
-  deriving (Show, Read, Eq, Ord, Generic)
+  deriving (Show, Read, Eq, Ord, Generic, GQLType)
 
 instance Hashable PosTagAlgo
 
diff --git a/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs b/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
index e1c2102f..ef79bc5c 100644
--- a/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
@@ -92,15 +92,15 @@ corenlp' uri lang txt = do
             EN -> [ ("annotators", "tokenize,ssplit,pos,ner" ) ]
             FR -> [ ("annotators", "tokenize,ssplit,pos,lemma,ner")
                   -- , ("parse.model", "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz")
-                  , ("pos.model", "edu/stanford/nlp/models/pos-tagger/french/french.tagger")
+                  , ("pos.model", "edu/stanford/nlp/models/pos-tagger/models/french.tagger")
                   , ("tokenize.language", "fr") ]
             DE -> [ ("annotators", "tokenize,ssplit,pos,lemma,ner")
                   -- , ("parse.model", "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz")
-                  , ("pos.model", "edu/stanford/nlp/models/pos-tagger/french/german-hgc.tagger")
+                  , ("pos.model", "edu/stanford/nlp/models/pos-tagger/models/german-hgc.tagger")
                   , ("tokenize.language", "de") ]
             ES -> [ ("annotators", "tokenize,ssplit,pos,lemma,ner")
                   -- , ("parse.model", "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz")
-                  , ("pos.model", "edu/stanford/nlp/models/pos-tagger/french/spanish.tagger")
+                  , ("pos.model", "edu/stanford/nlp/models/pos-tagger/models/spanish.tagger")
                   , ("tokenize.language", "es") ]
             IT -> [ ("annotators", "tokenize,ssplit,pos,lemma,ner")
                   -- , ("parse.model", "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz")
@@ -110,9 +110,9 @@ corenlp' uri lang txt = do
                   -- , ("parse.model", "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz")
                   -- , ("pos.model", "edu/stanford/nlp/models/pos-tagger/french/french.tagger")
                   , ("tokenize.language", "pl") ]
-            CN -> [ ("annotators", "tokenize,ssplit,pos,lemma,ner")
+            CN -> [ ("annotators", "tokenize,pos,lemma,ner")
                   -- , ("parse.model", "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz")
-                  , ("pos.model", "edu/stanford/nlp/models/pos-tagger/french/chinese-distsim.tagger")
+                  , ("pos.model", "edu/stanford/nlp/models/pos-tagger/models/chinese-distsim.tagger")
                   , ("tokenize.language", "zh") ]
             l  -> panic $ pack $ "corenlp for language " <> show l <> " is not implemented yet"
 
-- 
2.21.0