Commit 14246fa5 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] NLP API + group revert

parent 996fd394
......@@ -12,7 +12,7 @@ Multi-terms are ngrams where n > 1.
-}
module Gargantext.Core.Text.Terms.Multi (multiterms, multiterms_rake)
module Gargantext.Core.Text.Terms.Multi (multiterms, multiterms_rake, tokenTagsWith)
where
import Data.Text hiding (map, group, filter, concat)
......@@ -28,6 +28,11 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En
import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr
import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake)
import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow
-------------------------------------------------------------------
type NLP_API = Lang -> Text -> IO PosSentences
-------------------------------------------------------------------
-- To be removed
......@@ -37,21 +42,25 @@ multiterms = multiterms' tokenTag2terms
multiterms' :: (TokenTag -> a) -> Lang -> Text -> IO [a]
multiterms' f lang txt = concat
<$> map (map f)
<$> map (filter (\t -> _my_token_pos t == Just NP))
<$> map (filter (\t -> _my_token_pos t == Just NP))
<$> tokenTags lang txt
-------------------------------------------------------------------
tokenTag2terms :: TokenTag -> Terms
tokenTag2terms (TokenTag ws t _ _) = Terms ws t
tokenTags :: Lang -> Text -> IO [[TokenTag]]
tokenTags lang s = map (groupTokens lang) <$> tokenTags' lang s
tokenTags EN txt = tokenTagsWith EN txt corenlp
tokenTags FR txt = tokenTagsWith FR txt JohnSnow.nlp
tokenTags _ _ = panic "[G.C.T.T.Multi] NLP API not implemented yet"
tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]]
tokenTagsWith lang txt nlp = map (groupTokens lang)
<$> map tokens2tokensTags
<$> map _sentenceTokens
<$> _sentences
<$> nlp lang txt
tokenTags' :: Lang -> Text -> IO [[TokenTag]]
tokenTags' lang t = map tokens2tokensTags
<$> map _sentenceTokens
<$> _sentences
<$> corenlp lang t
---- | This function analyses and groups (or not) ngrams according to
---- specific grammars of each language.
......
......@@ -23,7 +23,8 @@ import Gargantext.Prelude
group2 :: POS -> POS -> [TokenTag] -> [TokenTag]
group2 p1 p2 (x@(TokenTag _ _ (Just p1') _):y@(TokenTag _ _ (Just p2') _):z) =
if (p1 == p1') && (p2 == p2')
then (x : y : group2 p1 p2 (x<>y : z))
then group2 p1 p2 (x<>y : z)
-- then (x : y : group2 p1 p2 (x<>y : z))
else (x : group2 p1 p2 (y:z))
group2 p1 p2 (x@(TokenTag _ _ Nothing _):y) = (x: group2 p1 p2 y)
group2 _ _ [x@(TokenTag _ _ (Just _) _)] = [x]
......
......@@ -72,7 +72,7 @@ data POS = NP
| JJ | VB
| CC | IN | DT
| ADV
| NoPos
| NotFound { not_found :: [Char] }
deriving (Show, Generic, Eq, Ord)
------------------------------------------------------------------------
-- https://pythonprogramming.net/part-of-speech-tagging-nltk-tutorial/
......@@ -80,32 +80,38 @@ instance FromJSON POS where
parseJSON = withText "String" (\x -> pure (pos $ unpack x))
where
pos :: [Char] -> POS
pos "ADJ" = JJ
pos "CC" = CC
pos "DT" = DT
pos "IN" = IN
pos "JJ" = JJ
pos "JJR" = JJ
pos "JJS" = JJ
pos "NC" = NP
pos "NN" = NP
pos "NNS" = NP
pos "NNP" = NP
pos "ADJ" = JJ
pos "CC" = CC
pos "CCONJ"= CC
pos "DT" = DT
pos "DET" = DT
pos "IN" = IN
pos "JJ" = JJ
pos "JJR" = JJ
pos "JJS" = JJ
pos "NC" = NP
pos "NN" = NP
pos "NOUN" = NP
pos "NNS" = NP
pos "NNP" = NP
pos "NNPS" = NP
pos "NP" = NP
pos "VB" = VB
pos "VBD" = VB
pos "VBG" = VB
pos "VBN" = VB
pos "VBP" = VB
pos "VBZ" = VB
pos "RB" = ADV
pos "RBR" = ADV
pos "RBS" = ADV
pos "WRB" = ADV
pos "NP" = NP
pos "VB" = VB
pos "VERB" = VB
pos "VBD" = VB
pos "VBG" = VB
pos "VBN" = VB
pos "VBP" = VB
pos "VBZ" = VB
pos "RB" = ADV
pos "ADV" = ADV
pos "RBR" = ADV
pos "RBS" = ADV
pos "WRB" = ADV
-- French specific
pos "P" = IN
pos _ = NoPos
pos "P" = IN
pos "PUNCT" = IN
pos x = NotFound x
instance ToJSON POS
instance Hashable POS
......
{-|
Module : Gargantext.Utils.JohnSnowNLP
Description : PosTagging module using Stanford java REST API
Module : Gargantext.Utils.JohnSnow
Description : John Snow NLP API connexion
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
......@@ -181,14 +181,14 @@ waitForJsTask jsTask = wait' 0
getPosTagAndLems :: Lang -> Text -> IO PosSentences
getPosTagAndLems l t = do
jsPosTask <- jsRequest t (JSPOS l)
jsPosTask <- jsRequest t (JSPOS l)
jsLemmaTask <- jsRequest t (JSLemma l)
-- wait for both tasks
jsPos <- waitForJsTask jsPosTask
jsLemma <- waitForJsTask jsLemmaTask
printDebug "[getPosTagAndLems] sentences" $ jsAsyncTaskResponseToSentences jsPos jsLemma
pure $ PosSentences []
pure $ jsAsyncTaskResponseToSentences jsPos jsLemma
nlp :: Lang -> Text -> IO PosSentences
nlp = getPosTagAndLems
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment