Commit 14246fa5 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] NLP API + group revert

parent 996fd394
...@@ -12,7 +12,7 @@ Multi-terms are ngrams where n > 1. ...@@ -12,7 +12,7 @@ Multi-terms are ngrams where n > 1.
-} -}
module Gargantext.Core.Text.Terms.Multi (multiterms, multiterms_rake) module Gargantext.Core.Text.Terms.Multi (multiterms, multiterms_rake, tokenTagsWith)
where where
import Data.Text hiding (map, group, filter, concat) import Data.Text hiding (map, group, filter, concat)
...@@ -28,6 +28,11 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En ...@@ -28,6 +28,11 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En
import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr
import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake) import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake)
import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow
-------------------------------------------------------------------
type NLP_API = Lang -> Text -> IO PosSentences
------------------------------------------------------------------- -------------------------------------------------------------------
-- To be removed -- To be removed
...@@ -37,21 +42,25 @@ multiterms = multiterms' tokenTag2terms ...@@ -37,21 +42,25 @@ multiterms = multiterms' tokenTag2terms
multiterms' :: (TokenTag -> a) -> Lang -> Text -> IO [a] multiterms' :: (TokenTag -> a) -> Lang -> Text -> IO [a]
multiterms' f lang txt = concat multiterms' f lang txt = concat
<$> map (map f) <$> map (map f)
<$> map (filter (\t -> _my_token_pos t == Just NP)) <$> map (filter (\t -> _my_token_pos t == Just NP))
<$> tokenTags lang txt <$> tokenTags lang txt
------------------------------------------------------------------- -------------------------------------------------------------------
tokenTag2terms :: TokenTag -> Terms tokenTag2terms :: TokenTag -> Terms
tokenTag2terms (TokenTag ws t _ _) = Terms ws t tokenTag2terms (TokenTag ws t _ _) = Terms ws t
tokenTags :: Lang -> Text -> IO [[TokenTag]] tokenTags :: Lang -> Text -> IO [[TokenTag]]
tokenTags lang s = map (groupTokens lang) <$> tokenTags' lang s tokenTags EN txt = tokenTagsWith EN txt corenlp
tokenTags FR txt = tokenTagsWith FR txt JohnSnow.nlp
tokenTags _ _ = panic "[G.C.T.T.Multi] NLP API not implemented yet"
tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]]
tokenTagsWith lang txt nlp = map (groupTokens lang)
<$> map tokens2tokensTags
<$> map _sentenceTokens
<$> _sentences
<$> nlp lang txt
tokenTags' :: Lang -> Text -> IO [[TokenTag]]
tokenTags' lang t = map tokens2tokensTags
<$> map _sentenceTokens
<$> _sentences
<$> corenlp lang t
---- | This function analyses and groups (or not) ngrams according to ---- | This function analyses and groups (or not) ngrams according to
---- specific grammars of each language. ---- specific grammars of each language.
......
...@@ -23,7 +23,8 @@ import Gargantext.Prelude ...@@ -23,7 +23,8 @@ import Gargantext.Prelude
group2 :: POS -> POS -> [TokenTag] -> [TokenTag] group2 :: POS -> POS -> [TokenTag] -> [TokenTag]
group2 p1 p2 (x@(TokenTag _ _ (Just p1') _):y@(TokenTag _ _ (Just p2') _):z) = group2 p1 p2 (x@(TokenTag _ _ (Just p1') _):y@(TokenTag _ _ (Just p2') _):z) =
if (p1 == p1') && (p2 == p2') if (p1 == p1') && (p2 == p2')
then (x : y : group2 p1 p2 (x<>y : z)) then group2 p1 p2 (x<>y : z)
-- then (x : y : group2 p1 p2 (x<>y : z))
else (x : group2 p1 p2 (y:z)) else (x : group2 p1 p2 (y:z))
group2 p1 p2 (x@(TokenTag _ _ Nothing _):y) = (x: group2 p1 p2 y) group2 p1 p2 (x@(TokenTag _ _ Nothing _):y) = (x: group2 p1 p2 y)
group2 _ _ [x@(TokenTag _ _ (Just _) _)] = [x] group2 _ _ [x@(TokenTag _ _ (Just _) _)] = [x]
......
...@@ -72,7 +72,7 @@ data POS = NP ...@@ -72,7 +72,7 @@ data POS = NP
| JJ | VB | JJ | VB
| CC | IN | DT | CC | IN | DT
| ADV | ADV
| NoPos | NotFound { not_found :: [Char] }
deriving (Show, Generic, Eq, Ord) deriving (Show, Generic, Eq, Ord)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- https://pythonprogramming.net/part-of-speech-tagging-nltk-tutorial/ -- https://pythonprogramming.net/part-of-speech-tagging-nltk-tutorial/
...@@ -80,32 +80,38 @@ instance FromJSON POS where ...@@ -80,32 +80,38 @@ instance FromJSON POS where
parseJSON = withText "String" (\x -> pure (pos $ unpack x)) parseJSON = withText "String" (\x -> pure (pos $ unpack x))
where where
pos :: [Char] -> POS pos :: [Char] -> POS
pos "ADJ" = JJ pos "ADJ" = JJ
pos "CC" = CC pos "CC" = CC
pos "DT" = DT pos "CCONJ"= CC
pos "IN" = IN pos "DT" = DT
pos "JJ" = JJ pos "DET" = DT
pos "JJR" = JJ pos "IN" = IN
pos "JJS" = JJ pos "JJ" = JJ
pos "NC" = NP pos "JJR" = JJ
pos "NN" = NP pos "JJS" = JJ
pos "NNS" = NP pos "NC" = NP
pos "NNP" = NP pos "NN" = NP
pos "NOUN" = NP
pos "NNS" = NP
pos "NNP" = NP
pos "NNPS" = NP pos "NNPS" = NP
pos "NP" = NP pos "NP" = NP
pos "VB" = VB pos "VB" = VB
pos "VBD" = VB pos "VERB" = VB
pos "VBG" = VB pos "VBD" = VB
pos "VBN" = VB pos "VBG" = VB
pos "VBP" = VB pos "VBN" = VB
pos "VBZ" = VB pos "VBP" = VB
pos "RB" = ADV pos "VBZ" = VB
pos "RBR" = ADV pos "RB" = ADV
pos "RBS" = ADV pos "ADV" = ADV
pos "WRB" = ADV pos "RBR" = ADV
pos "RBS" = ADV
pos "WRB" = ADV
-- French specific -- French specific
pos "P" = IN pos "P" = IN
pos _ = NoPos pos "PUNCT" = IN
pos x = NotFound x
instance ToJSON POS instance ToJSON POS
instance Hashable POS instance Hashable POS
......
{-| {-|
Module : Gargantext.Utils.JohnSnowNLP Module : Gargantext.Utils.JohnSnow
Description : PosTagging module using Stanford java REST API Description : John Snow NLP API connexion
Copyright : (c) CNRS, 2017 Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
...@@ -181,14 +181,14 @@ waitForJsTask jsTask = wait' 0 ...@@ -181,14 +181,14 @@ waitForJsTask jsTask = wait' 0
getPosTagAndLems :: Lang -> Text -> IO PosSentences getPosTagAndLems :: Lang -> Text -> IO PosSentences
getPosTagAndLems l t = do getPosTagAndLems l t = do
jsPosTask <- jsRequest t (JSPOS l) jsPosTask <- jsRequest t (JSPOS l)
jsLemmaTask <- jsRequest t (JSLemma l) jsLemmaTask <- jsRequest t (JSLemma l)
-- wait for both tasks -- wait for both tasks
jsPos <- waitForJsTask jsPosTask jsPos <- waitForJsTask jsPosTask
jsLemma <- waitForJsTask jsLemmaTask jsLemma <- waitForJsTask jsLemmaTask
printDebug "[getPosTagAndLems] sentences" $ jsAsyncTaskResponseToSentences jsPos jsLemma pure $ jsAsyncTaskResponseToSentences jsPos jsLemma
pure $ PosSentences []
nlp :: Lang -> Text -> IO PosSentences
nlp = getPosTagAndLems
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment