Commit ee1b45b3 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Backend NLP French tested

parent 91a63511
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# 0 3 * * * pg_dump --dbname=$MYDB | gzip > ~/backup/db/$(date +%Y-%m-%d).psql.gz # 0 3 * * * pg_dump --dbname=$MYDB | gzip > ~/backup/db/$(date +%Y-%m-%d).psql.gz
if [[ $1 == "" || $2 == "" ]] if [[ $1 == "" || $2 == "" ]]
then echo "USAGE : ./psql gargantext.ini backup_directory" then echo "USAGE : ./bin/backup gargantext.ini backup_directory"
else else
INIFILE=$1 INIFILE=$1
......
WITH repeated AS WITH repeated AS
( select nn.node2_id AS id, count(*) AS c (
FROM nodes_nodes nn select nn.context_id AS id, count(*) AS c
GROUP BY nn.node2_id FROM nodes_contexts nn
GROUP BY nn.context_id
) )
DELETE FROM nodes n DELETE FROM contexts c
USING repeated r USING repeated r
WHERE WHERE
n.id = r.id c.id = r.id
AND r.c <= 1 AND r.c = 1
AND n.typename = 4 AND c.typename = 4
; ;
...@@ -77,15 +77,17 @@ instance HasDBid Lang where ...@@ -77,15 +77,17 @@ instance HasDBid Lang where
type Form = Text type Form = Text
type Lem = Text type Lem = Text
------------------------------------------------------------------------ ------------------------------------------------------------------------
data PosTagAlgo = CoreNLP | JohnSnowServer data PosTagAlgo = CoreNLP | JohnSnowServer | Spacy
deriving (Show, Read, Eq, Ord, Generic) deriving (Show, Read, Eq, Ord, Generic)
instance Hashable PosTagAlgo instance Hashable PosTagAlgo
instance HasDBid PosTagAlgo where instance HasDBid PosTagAlgo where
toDBid CoreNLP = 1 toDBid CoreNLP = 1
toDBid JohnSnowServer = 2 toDBid JohnSnowServer = 2
toDBid Spacy = 3
fromDBid 1 = CoreNLP fromDBid 1 = CoreNLP
fromDBid 2 = JohnSnowServer fromDBid 2 = JohnSnowServer
fromDBid 3 = Spacy
fromDBid _ = panic "HasDBid posTagAlgo : Not implemented" fromDBid _ = panic "HasDBid posTagAlgo : Not implemented"
...@@ -254,9 +254,7 @@ makeLenses ''NodeStoryEnv ...@@ -254,9 +254,7 @@ makeLenses ''NodeStoryEnv
makeLenses ''NodeStory makeLenses ''NodeStory
makeLenses ''Archive makeLenses ''Archive
----------------------------------------- ----------------------------------------------------------------------
data NodeStoryPoly nid v ngtid ngid nre = data NodeStoryPoly nid v ngtid ngid nre =
NodeStoryDB { node_id :: nid NodeStoryDB { node_id :: nid
, version :: v , version :: v
...@@ -614,14 +612,20 @@ readNodeStoryEnv pool = do ...@@ -614,14 +612,20 @@ readNodeStoryEnv pool = do
pure $ NodeStoryEnv { _nse_var = mvar pure $ NodeStoryEnv { _nse_var = mvar
, _nse_saver = saver , _nse_saver = saver
, _nse_saver_immediate = saver_immediate , _nse_saver_immediate = saver_immediate
, _nse_getter = nodeStoryVar pool (Just mvar) } , _nse_getter = nodeStoryVar pool (Just mvar)
}
nodeStoryVar :: Pool PGS.Connection -> Maybe (MVar NodeListStory) -> [NodeId] -> IO (MVar NodeListStory) nodeStoryVar :: Pool PGS.Connection
-> Maybe (MVar NodeListStory)
-> [NodeId]
-> IO (MVar NodeListStory)
nodeStoryVar pool Nothing nIds = do nodeStoryVar pool Nothing nIds = do
state <- withResource pool $ \c -> nodeStoryIncs c Nothing nIds state <- withResource pool $ \c -> nodeStoryIncs c Nothing nIds
newMVar state newMVar state
nodeStoryVar pool (Just mv) nIds = do nodeStoryVar pool (Just mv) nIds = do
_ <- withResource pool $ \c -> modifyMVar_ mv $ \nsl -> (nodeStoryIncs c (Just nsl) nIds) _ <- withResource pool
$ \c -> modifyMVar_ mv
$ \nsl -> (nodeStoryIncs c (Just nsl) nIds)
pure mv pure mv
-- Debounce is useful since it could delay the saving to some later -- Debounce is useful since it could delay the saving to some later
......
...@@ -29,6 +29,7 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr ...@@ -29,6 +29,7 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr
import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake) import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake)
-- import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow -- import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow
import qualified Gargantext.Utils.SpacyNLP as SpacyNLP import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
...@@ -36,15 +37,14 @@ import qualified Gargantext.Utils.SpacyNLP as SpacyNLP ...@@ -36,15 +37,14 @@ import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
type NLP_API = Lang -> Text -> IO PosSentences type NLP_API = Lang -> Text -> IO PosSentences
------------------------------------------------------------------- -------------------------------------------------------------------
-- To be removed
multiterms :: Lang -> Text -> IO [Terms] multiterms :: Lang -> Text -> IO [Terms]
multiterms = multiterms' tokenTag2terms multiterms = multiterms' tokenTag2terms
where
multiterms' :: (TokenTag -> a) -> Lang -> Text -> IO [a] multiterms' :: (TokenTag -> a) -> Lang -> Text -> IO [a]
multiterms' f lang txt = concat multiterms' f lang txt = concat
<$> map (map f) <$> map (map f)
<$> map (filter (\t -> _my_token_pos t == Just NP)) <$> map (filter (\t -> _my_token_pos t == Just NP))
<$> tokenTags lang txt <$> tokenTags lang txt
------------------------------------------------------------------- -------------------------------------------------------------------
tokenTag2terms :: TokenTag -> Terms tokenTag2terms :: TokenTag -> Terms
...@@ -53,7 +53,7 @@ tokenTag2terms (TokenTag ws t _ _) = Terms ws t ...@@ -53,7 +53,7 @@ tokenTag2terms (TokenTag ws t _ _) = Terms ws t
tokenTags :: Lang -> Text -> IO [[TokenTag]] tokenTags :: Lang -> Text -> IO [[TokenTag]]
tokenTags EN txt = tokenTagsWith EN txt corenlp tokenTags EN txt = tokenTagsWith EN txt corenlp
tokenTags FR txt = tokenTagsWith FR txt SpacyNLP.nlp tokenTags FR txt = tokenTagsWith FR txt SpacyNLP.nlp
tokenTags _ _ = panic "[G.C.T.T.Multi] NLP API not implemented yet" tokenTags l _ = panic $ "[G.C.T.T.Multi] Lang NLP API not implemented yet " <> (cs $ show l)
tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]] tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]]
tokenTagsWith lang txt nlp = map (groupTokens lang) tokenTagsWith lang txt nlp = map (groupTokens lang)
......
...@@ -35,6 +35,10 @@ import Gargantext.Core.Types ...@@ -35,6 +35,10 @@ import Gargantext.Core.Types
import Gargantext.Prelude import Gargantext.Prelude
import Network.HTTP.Simple import Network.HTTP.Simple
-- import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
tokens2tokensTags :: [Token] -> [TokenTag] tokens2tokensTags :: [Token] -> [TokenTag]
...@@ -73,18 +77,12 @@ corenlp' :: ( FromJSON a ...@@ -73,18 +77,12 @@ corenlp' :: ( FromJSON a
corenlp' lang txt = do corenlp' lang txt = do
let properties = case lang of let properties = case lang of
EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}" EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR -> "{\"annotators\": \"tokenize,ssplit,pos,lemma,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}" FR -> "{\"annotators\": \"tokenize,ssplit,pos,lemma,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
_ -> panic $ pack "not implemented yet" _ -> panic $ pack "not implemented yet"
url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
let request = setRequestBodyLBS (cs txt) url let request = setRequestBodyLBS (cs txt) url
httpJSON request httpJSON request
corenlpRaw :: Lang -> Text -> IO Value
corenlpRaw lang txt = do
response <- corenlp' lang txt
pure (getResponseBody response)
corenlp :: Lang -> Text -> IO PosSentences corenlp :: Lang -> Text -> IO PosSentences
corenlp lang txt = do corenlp lang txt = do
......
...@@ -318,9 +318,9 @@ flowCorpusUser l user corpusName ctype ids mfslw = do ...@@ -318,9 +318,9 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
<- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype <- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype
--let gp = (GroupParams l 2 3 (StopSize 3)) --let gp = (GroupParams l 2 3 (StopSize 3))
-- Here the PosTagAlgo should be chosen according the Lang -- Here the PosTagAlgo should be chosen according to the Lang
let gp = GroupWithPosTag l CoreNLP HashMap.empty let gp = GroupWithPosTag l CoreNLP HashMap.empty
ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw gp ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw gp
-- printDebug "flowCorpusUser:ngs" ngs -- printDebug "flowCorpusUser:ngs" ngs
...@@ -329,8 +329,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do ...@@ -329,8 +329,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
-- _ <- insertOccsUpdates userCorpusId mastListId -- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId -- printDebug "userListId" userListId
-- User Graph Flow -- User Graph Flow
_ <- insertDefaultNodeIfNotExists NodeDashboard userCorpusId userId
_ <- insertDefaultNodeIfNotExists NodeGraph userCorpusId userId _ <- insertDefaultNodeIfNotExists NodeGraph userCorpusId userId
_ <- insertDefaultNodeIfNotExists NodeDashboard userCorpusId userId
--_ <- mkPhylo userCorpusId userId --_ <- mkPhylo userCorpusId userId
-- Annuaire Flow -- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId -- _ <- mkAnnuaire rootUserId userId
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment