Commit 28461a06 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Ngrams extractor in English with tests : ok. Need to factor pattern...

[FEAT] Ngrams extractor in English with tests : ok. Need to factor pattern matching with FR examples.
parent b38cd1e3
......@@ -33,6 +33,7 @@ library
, lens
, opaleye
, path
, parsec
, postgresql-simple
, pretty
, product-profunctors
......@@ -65,9 +66,6 @@ library
exposed-modules: Data.Gargantext
, Data.Gargantext.Analysis
, Data.Gargantext.DSL
, Data.Gargantext.Ngrams.Occurrences
, Data.Gargantext.NLP
, Data.Gargantext.NLP.CoreNLP
, Data.Gargantext.Database
, Data.Gargantext.Database.Instances
, Data.Gargantext.Database.Ngram
......@@ -78,13 +76,22 @@ library
, Data.Gargantext.Database.NodeNodeNgram
, Data.Gargantext.Database.Private
, Data.Gargantext.Database.User
, Data.Gargantext.NLP
, Data.Gargantext.NLP.CoreNLP
, Data.Gargantext.Ngrams
, Data.Gargantext.Ngrams.Count
, Data.Gargantext.Ngrams.Parser
, Data.Gargantext.Ngrams.Lang.En
, Data.Gargantext.Ngrams.Lang.Fr
, Data.Gargantext.Ngrams.TextMining
, Data.Gargantext.Ngrams.Occurrences
, Data.Gargantext.Parsers
, Data.Gargantext.Parsers.WOS
, Data.Gargantext.Prelude
, Data.Gargantext.Server
, Data.Gargantext.Types
, Data.Gargantext.Types.Node
, Data.Gargantext.Types.Main
, Data.Gargantext.Types.Node
, Data.Gargantext.Utils.DateUtils
, Data.Gargantext.Utils.Prefix
default-language: Haskell2010
......@@ -100,10 +107,10 @@ library
-- , hastext
-- default-language: Haskell2010
test-suite garg-test-parsers
test-suite garg-test-ngrams
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Parsers.hs
main-is: Ngrams.hs
build-depends: base
, extra
, text
......
......@@ -26,11 +26,15 @@ data Token = Token { _tokenIndex :: Int
, _tokenCharacterOffsetEnd :: Int
, _tokenPos :: Text
, _tokenNer :: Text
, _tokenBefore :: Text
, _tokenAfter :: Text
, _tokenBefore :: Maybe Text
, _tokenAfter :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_token") ''Token)
token2text :: Token -> (Text, Text, Text)
token2text (Token _ w _ _ _ _ p n _ _) = (w,p,n)
data Sentence = Sentence { _sentenceIndex :: Int
, _sentenceTokens :: [Token]
} deriving (Show, Generic)
......@@ -49,6 +53,19 @@ instance ToJSON Sentences
instance FromJSON Sentences
-- request =
-- "fr" : {
-- "tokenize.language" : "fr",
-- "pos.model" : "edu/stanford/nlp/models/pos-tagger/french/french.tagger",
-- "parse.model" : "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz",
-- // dependency parser
-- "depparse.model" : "edu/stanford/nlp/models/parser/nndep/UD_French.gz",
-- "depparse.language" : "french",
-- "ner.model": DATA_ROOT+"/eunews.fr.crf.gz",
-- "ssplit.newlineIsSentenceBreak": "always"
-- },
--
corenlpPretty :: String -> IO ()
corenlpPretty txt = do
......
module Data.Gargantext.Ngrams (
module Data.Gargantext.Ngrams.TextMining,
module Data.Gargantext.Ngrams.Words,
module Data.Gargantext.Ngrams.Hetero,
module Data.Gargantext.Ngrams.Count
module Data.Gargantext.Ngrams.Count,
--module Data.Gargantext.Ngrams.Hetero,
module Data.Gargantext.Ngrams.Parser,
module Data.Gargantext.Ngrams.Occurrences,
module Data.Gargantext.Ngrams.TextMining
--module Data.Gargantext.Ngrams.Words
) where
import Data.Gargantext.Ngrams.TextMining
import Data.Gargantext.Ngrams.Words
import Data.Gargantext.Ngrams.Hetero
import Data.Gargantext.Ngrams.Count
--import Data.Gargantext.Ngrams.Hetero
import Data.Gargantext.Ngrams.Parser
import Data.Gargantext.Ngrams.Occurrences
import Data.Gargantext.Ngrams.TextMining
--import Data.Gargantext.Ngrams.Words
......@@ -4,7 +4,6 @@ module Data.Gargantext.Ngrams.Count where
import System.Environment (getArgs)
import Data.List (foldl', take)
import Data.Foldable as F
import Data.Map (Map)
......@@ -37,9 +36,9 @@ occurrences xs = foldl' (\x y -> M.insertWith' (+) y 1 x) M.empty xs
--occurrences' :: Ord a => [a] -> Map a Integer
--occurrences' xs = DTL.foldl (\x y -> M.insertWith' (+) y 1 x) M.empty xs
countMain :: IO ()
countMain = do
(fichier:rest) <- getArgs
(fichier:_) <- getArgs
c <- DTLIO.readFile fichier
--print $ occurrences $ DTL.chunksOf 1 c
print $ occurrences $ letters'' c
......
......@@ -10,7 +10,7 @@ import Opaleye.Internal.Column (Column)
import Data.Gargantext.Database.Gargandb
import Data.Gargantext.Database.Private
import Data.Gargantext.Utils.Chronos
--import Data.Gargantext.Utils.Chronos
import Data.Gargantext.Ngrams.Words (cleanText)
import Data.Gargantext.Ngrams.Count (occurrences)
......
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Ngrams.Lang.En (selectNgrams, groupNgrams) where
import Data.Gargantext.Prelude
import Data.Text (Text)
import Data.Monoid ((<>))
selectNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
selectNgrams xs = pf selectNgrams' xs
where
selectNgrams' (_,"NN",_) = True
selectNgrams' (_,"NNS",_) = True
selectNgrams' (_,"NNP",_) = True
selectNgrams' (_,"NN+CC",_) = True
selectNgrams' (_,_,"PERSON") = True
selectNgrams' (_,_,"ORGANIZATION") = True
selectNgrams' (_,_,"LOCATION") = True
selectNgrams' (_,_,_) = False
groupNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
groupNgrams [] = []
groupNgrams ((j1,"JJ",j1'):(c1,"CC",c1'):(j2,"JJ",j2'):(j3,"JJ",_):xs) = groupNgrams (jn1:cc:jn2:xs)
where
jn j' j'' jn' = (j' <> " " <> j'', "JJ", jn')
cc = (c1,"CC",c1')
jn1 = (j1, "JJ", j1')
jn2 = jn j2 j3 j2'
groupNgrams ((j1,"JJ",_):(_,"CC",_):(j2,"JJ",_):(n,"NN",nn):xs) = groupNgrams (jn1:jn2:xs)
where
jn j m mm p = (j <> " " <> m, p, mm)
jn1 = jn j1 n nn ("NN+CC" :: Text)
jn2 = jn j2 n nn ("NN+CC" :: Text)
groupNgrams ((j1,"JJ",_):(_,"CC",_):(j2,"JJ",_):(n,"NNS",nn):xs) = groupNgrams (jn1:jn2:xs)
where
jn j m mm p = (j <> " " <> m, p, mm)
jn1 = jn j1 n nn ("NN+CC" :: Text)
jn2 = jn j2 n nn ("NN+CC" :: Text)
groupNgrams ((x,"JJ",_):(y,"JJ",yy):xs) = groupNgrams ((x <> " " <> y, "JJ", yy):xs)
groupNgrams ((x,"JJ",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"JJ",_):(y,"NNS",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NNP",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NN",_):(y,"NP",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NN",_):(y,"NNS",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NP",_):(y,"NP",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NN",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
-- extractNgrams "Test the antiinflammatory or analgesic activity?"
-- [[("``","``","O"),("Test","VB","O"),("the","DT","O"),("antiinflammatory activity analgesic activity","NN","O"),("?",".","O"),("''","''","O")]]
-- > should be (antiinflammatory activity) <> (analgesic activity)
groupNgrams ((x,"NN",_):("of","IN",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> "of" <> " " <> y, "NN", yy):xs)
groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
groupNgrams (x:xs) = (x:(groupNgrams xs))
--
--textTest :: [String]
--textTest = [ "Alcoholic extract of Kaempferia galanga was tested for analgesic and antiinflammatory activities in animal models. "
-- , "Three doses, 300 mg/kg, 600 mg/kg and 1200 mg/kg of the plant extract prepared as a suspension in 2 ml of 2% gum acacia were used. "
-- , " Acute and sub acute inflammatory activities were studied in rats by carrageenan induced paw edema and cotton pellet induced granuloma models respectively. "
-- , "In both models, the standard drug used was aspirin 100 mg/kg. "
-- , "Two doses 600 mg/kg and 1200 mg/kg of plant extract exhibited significant (P<0.001) antiinflammatory activity in carrageenan model and cotton pellet granuloma model in comparison to control. "
-- , "Analgesic activity was studied in rats using hot plate and tail-flick models. "
-- , "Codeine 5 mg/kg and vehicle served as standard and control respectively. "
-- , "The two doses of plant extract exhibited significant analgesic activity in tail flick model (P<0.001) and hot plate model (P<0.001) in comparison to control. "
-- , "In conclusion K. galanga possesses antiinflammatory and analgesic activities. "]
--
--
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Ngrams.Lang.Fr (selectNgrams, groupNgrams)
where
import Data.Gargantext.Prelude
import Data.Text (Text)
import Data.Monoid ((<>))
selectNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
selectNgrams xs = pf selectNgrams' xs
where
selectNgrams' (_,"NN",_) = True
selectNgrams' (_,"NNS",_) = True
selectNgrams' (_,"NNP",_) = True
selectNgrams' (_,"NN+CC",_) = True
selectNgrams' (_,_,"PERSON") = True
selectNgrams' (_,_,"ORGANIZATION") = True
selectNgrams' (_,_,"LOCATION") = True
selectNgrams' (_,_,_) = False
groupNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
groupNgrams [] = []
groupNgrams ((j1,"JJ",j1'):(c1,"CC",c1'):(j2,"JJ",j2'):(j3,"JJ",_):xs) = groupNgrams (jn1:cc:jn2:xs)
where
jn j' j'' jn' = (j' <> " " <> j'', "JJ", jn')
cc = (c1,"CC",c1')
jn1 = (j1, "JJ", j1')
jn2 = jn j2 j3 j2'
groupNgrams ((j1,"JJ",_):(_,"CC",_):(j2,"JJ",_):(n,"NN",nn):xs) = groupNgrams (jn1:jn2:xs)
where
jn j m mm p = (j <> " " <> m, p, mm)
jn1 = jn j1 n nn ("NN+CC" :: Text)
jn2 = jn j2 n nn ("NN+CC" :: Text)
groupNgrams ((j1,"JJ",_):(_,"CC",_):(j2,"JJ",_):(n,"NNS",nn):xs) = groupNgrams (jn1:jn2:xs)
where
jn j m mm p = (j <> " " <> m, p, mm)
jn1 = jn j1 n nn ("NN+CC" :: Text)
jn2 = jn j2 n nn ("NN+CC" :: Text)
groupNgrams ((x,"JJ",_):(y,"JJ",yy):xs) = groupNgrams ((x <> " " <> y, "JJ", yy):xs)
groupNgrams ((x,"JJ",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"JJ",_):(y,"NNS",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NNP",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NN",_):(y,"NP",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NN",_):(y,"NNS",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NP",_):(y,"NP",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NN",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
-- extractNgrams "Test the antiinflammatory or analgesic activity?"
-- [[("``","``","O"),("Test","VB","O"),("the","DT","O"),("antiinflammatory activity analgesic activity","NN","O"),("?",".","O"),("''","''","O")]]
-- > should be (antiinflammatory activity) <> (analgesic activity)
groupNgrams ((x,"NN",_):("of","IN",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> "of" <> " " <> y, "NN", yy):xs)
groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
groupNgrams (x:xs) = (x:(groupNgrams xs))
--textTest :: [String]
--textTest = [ "Alcoholic extract of Kaempferia galanga was tested for analgesic and antiinflammatory activities in animal models. "
-- , "Three doses, 300 mg/kg, 600 mg/kg and 1200 mg/kg of the plant extract prepared as a suspension in 2 ml of 2% gum acacia were used. "
-- , " Acute and sub acute inflammatory activities were studied in rats by carrageenan induced paw edema and cotton pellet induced granuloma models respectively. "
-- , "In both models, the standard drug used was aspirin 100 mg/kg. "
-- , "Two doses 600 mg/kg and 1200 mg/kg of plant extract exhibited significant (P<0.001) antiinflammatory activity in carrageenan model and cotton pellet granuloma model in comparison to control. "
-- , "Analgesic activity was studied in rats using hot plate and tail-flick models. "
-- , "Codeine 5 mg/kg and vehicle served as standard and control respectively. "
-- , "The two doses of plant extract exhibited significant analgesic activity in tail flick model (P<0.001) and hot plate model (P<0.001) in comparison to control. "
-- , "In conclusion K. galanga possesses antiinflammatory and analgesic activities. "]
--
--
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Gargantext.Ngrams.Parser where
import Data.Gargantext.Prelude
import Data.Gargantext.NLP.CoreNLP
import Data.Gargantext.Types.Main (Language(..), Ngrams)
import qualified Data.Gargantext.Ngrams.Lang.En as En
import qualified Data.Gargantext.Ngrams.Lang.Fr as Fr
-- TODO for scientific papers: add maesures
-- TODO add the p score regex
extractNgrams :: String -> IO [[Ngrams]]
extractNgrams t = pm (groupNgrams EN) <$> extractNgrams' t
extractNgrams' :: String -> IO [[Ngrams]]
extractNgrams' t = pm (pm token2text)
<$> pm _sentenceTokens
<$> sentences
<$> corenlp t
-- | This function selects ngrams according to grammars specific
-- of each language.
-- In english, JJ is ADJectiv in french.
selectNgrams :: Language -> [Ngrams] -> [Ngrams]
selectNgrams EN = En.selectNgrams
selectNgrams FR = Fr.selectNgrams
-- | This function analyze and groups (or not) ngrams according to
-- grammars specific of each language.
groupNgrams :: Language -> [Ngrams] -> [Ngrams]
groupNgrams EN = En.groupNgrams
groupNgrams FR = Fr.groupNgrams
module Data.Gargantext.Ngrams.TFICF where
data TFICF = TFICF { _tficfTerms :: Text
, _tficfContext1 :: Context
, _tficfContext2 :: Context
, _tficfScore :: Maybe Double
} deriving (Read, Show, Generics)
tfidf :: Text -> TFICF
tfidf txt = TFICF txt Document Corpus score
where
score = Nothing
......@@ -4,11 +4,12 @@ import Data.Map (empty, Map, insertWith, toList)
import Data.List (foldl, foldl')
import qualified Data.List as L
sortGT :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering
sortGT (a1, b1) (a2, b2)
| a1 < a2 = GT
| a1 > a2 = LT
| a1 == a2 = compare b1 b2
| a1 < a2 = GT
| a1 > a2 = LT
| a1 == a2 = compare b1 b2
sortGT (_, _) (_, _) = error "What is this case ?"
--histogram :: Ord a => [a] -> [(a, Int)]
......@@ -21,7 +22,7 @@ countElem m e = Data.Map.insertWith (\n o -> n + o) e 1 m
freqList :: (Ord k) => [k] -> Data.Map.Map k Int
freqList = foldl countElem Data.Map.empty
--getMaxFromMap :: Data.Map.Map -> Maybe -> [a] -> [a]
getMaxFromMap :: Ord a => Map a1 a -> [a1]
getMaxFromMap m = go [] Nothing (toList m)
where
go ks _ [] = ks
......@@ -53,4 +54,4 @@ countYear' (xs) = foldl' (\x y -> insertWith (+) y 1 x) empty xs
textMiningMain :: IO ()
textMiningMain = do
print $ merge ["abc"] ["bcd"]
print $ merge ["abc"::String] ["bcd" :: String]
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Parsers.WOS where
import Prelude hiding (takeWhile, take, concat, readFile)
......@@ -6,6 +8,7 @@ import Data.Map as DM
import Data.Attoparsec.ByteString
import Data.Attoparsec.ByteString.Char8 (anyChar, isEndOfLine)
import Data.ByteString (ByteString)
import Data.ByteString.Char8 (pack)
import Data.Either.Extra(Either(..))
import Control.Applicative
......@@ -47,8 +50,9 @@ data ParserType = WOS | CSV
wosParser :: Parser [Maybe [ByteString]]
wosParser = do
-- TODO Warning if version /= 1.0
_ <- manyTill anyChar (string "\nVR 1.0")
ns <- many1 wosNotice <* "\nEF"
-- FIXME anyChar (string ..) /= exact string "\nVR 1.0" ?
_ <- manyTill anyChar (string $ pack "\nVR 1.0")
ns <- many1 wosNotice <* (string $ pack "\nEF")
return ns
startNotice :: Parser ByteString
......@@ -56,7 +60,7 @@ startNotice = "\nPT " *> takeTill isEndOfLine
wosNotice :: Parser (Maybe [ByteString])
wosNotice = do
n <- startNotice *> wosFields <* manyTill anyChar (string "\nER\n")
n <- startNotice *> wosFields <* manyTill anyChar (string $ pack "\nER\n")
return n
field' :: Parser (ByteString, [ByteString])
......
......@@ -16,6 +16,19 @@ import Data.Gargantext.Types.Node ( NodePoly
, HyperdataNotebook
)
data Language = EN | FR -- | DE | IT | SP
-- > EN == english
-- > FR == french
-- > DE == deutch (not implemented yet)
-- > IT == italian (not implemented yet)
-- > SP == spanish (not implemented yet)
-- > ... add your language and help us to implement it (:
type Ngrams = (Text, Text, Text)
-- | TODO add Symbolic Node / Document
-- TODO make instances of Nodes
......
......@@ -4,4 +4,5 @@ packages:
- .
- /home/alexandre/local/logiciels/haskell/servant/servant-multipart
extra-deps:
- utc-0.2.0.1
resolver: lts-9.2
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
import Test.Hspec
import Test.QuickCheck
import Control.Exception (evaluate)
import Data.Text (Text)
import Data.Gargantext.Prelude
import Data.Gargantext.Types.Main (Language(..))
import Data.Gargantext.Ngrams
import Data.Gargantext.Ngrams.Occurrences (parseOccurrences)
import Data.Gargantext.Ngrams.Parser (extractNgrams, selectNgrams)
ngramsExtractionTest EN = hspec $ do
describe "Ngrams extraction in English Language" $ do
let textTest = [ "Alcoholic extract of Kaempferia galanga was tested for analgesic and antiinflammatory activities in animal models. ", "Three doses, 300 mg/kg, 600 mg/kg and 1200 mg/kg of the plant extract prepared as a suspension in 2 ml of 2% gum acacia were used. ", " Acute and sub acute inflammatory activities were studied in rats by carrageenan induced paw edema and cotton pellet induced granuloma models respectively. ", "In both models, the standard drug used was aspirin 100 mg/kg. ", "Two doses 600 mg/kg and 1200 mg/kg of plant extract exhibited significant (P<0.001) antiinflammatory activity in carrageenan model and cotton pellet granuloma model in comparison to control. ", "Analgesic activity was studied in rats using hot plate and tail-flick models. ", "Codeine 5 mg/kg and vehicle served as standard and control respectively. ", "The two doses of plant extract exhibited significant analgesic activity in tail flick model (P<0.001) and hot plate model (P<0.001) in comparison to control. ", "In conclusion K. galanga possesses antiinflammatory and analgesic activities. "] :: [String]
it "\"Of\" seperates two ngrams" $ do
t1 <- pm (selectNgrams EN) <$> extractNgrams (textTest !! 0)
t1 `shouldBe` [[("Alcoholic extract","NN","O"),("Kaempferia galanga","NN","O"),("analgesic activities","NN+CC","O"),("antiinflammatory activities","NN+CC","O"),("animal models","NN","O")]]
it "Tests the conjunction of coordination in two ngrams with its adjectives" $ do
t2 <- pm (selectNgrams EN) <$> extractNgrams (textTest !! 2)
t2 `shouldBe` [[("Acute activities","NN+CC","O"),("sub acute inflammatory activities","NN+CC","O"),("rats","NNS","O"),("carrageenan","NN","O"),("paw edema","NN","O"),("cotton pellet","NN","O"),("granuloma models","NN","O")]]
ngramsExtractionTest FR = hspec $ do
describe "Ngrams extraction in English Language" $ do
let textTest = [ "Alcoholic extract of Kaempferia galanga was tested for analgesic and antiinflammatory activities in animal models. ", "Three doses, 300 mg/kg, 600 mg/kg and 1200 mg/kg of the plant extract prepared as a suspension in 2 ml of 2% gum acacia were used. ", " Acute and sub acute inflammatory activities were studied in rats by carrageenan induced paw edema and cotton pellet induced granuloma models respectively. ", "In both models, the standard drug used was aspirin 100 mg/kg. ", "Two doses 600 mg/kg and 1200 mg/kg of plant extract exhibited significant (P<0.001) antiinflammatory activity in carrageenan model and cotton pellet granuloma model in comparison to control. ", "Analgesic activity was studied in rats using hot plate and tail-flick models. ", "Codeine 5 mg/kg and vehicle served as standard and control respectively. ", "The two doses of plant extract exhibited significant analgesic activity in tail flick model (P<0.001) and hot plate model (P<0.001) in comparison to control. ", "In conclusion K. galanga possesses antiinflammatory and analgesic activities. "] :: [String]
it "\"Of\" seperates two ngrams" $ do
t1 <- pm (selectNgrams EN) <$> extractNgrams (textTest !! 0)
t1 `shouldBe` [[("Alcoholic extract","NN","O"),("Kaempferia galanga","NN","O"),("analgesic activities","NN+CC","O"),("antiinflammatory activities","NN+CC","O"),("animal models","NN","O")]]
it "Tests the conjunction of coordination in two ngrams with its adjectives" $ do
t2 <- pm (selectNgrams EN) <$> extractNgrams (textTest !! 2)
t2 `shouldBe` [[("Acute activities","NN+CC","O"),("sub acute inflammatory activities","NN+CC","O"),("rats","NNS","O"),("carrageenan","NN","O"),("paw edema","NN","O"),("cotton pellet","NN","O"),("granuloma models","NN","O")]]
parsersTest = hspec $ do
describe "Parser for occurrences" $ do
let txt = "internet"
it "returns the result of one parsing" $ do
parseOccurrences "internet" "internet" `shouldBe` Right 1
-- | Context of Text should be toLower
it "returns the result of one parsing not case sensitive" $ do
let txtCase = "Internet"
parseOccurrences txtCase "internet" `shouldBe` Right 1
it "returns the result of one parsing after space" $ do
parseOccurrences txt " internet"
`shouldBe` Right 1
it "returns the result of one parsing after chars" $ do
parseOccurrences txt "l'internet"
`shouldBe` Right 1
it "returns the result of multiple parsing" $ do
parseOccurrences txt "internet internet of things"
`shouldBe` Right 2
it "returns the result of multiple parsing separated by text" $ do
parseOccurrences txt "internet in the internet of things"
`shouldBe` Right 2
it "returns the result of multiple parsing separated by punctuation" $ do
parseOccurrences txt "internet. In the internet of things, internet like; internet?"
`shouldBe` Right 4
-- describe "Parser for nodes" $ do
-- it "returns the result of one parsing after space" $ do
-- occOfCorpus 249509 "sciences" `shouldReturn` 7
main :: IO ()
main = do
-- parsersTest
-- ngramsExtractionTest EN
ngramsExtractionTest FR
{-# LANGUAGE OverloadedStrings #-}
import Test.Hspec
import Test.QuickCheck
import Control.Exception (evaluate)
import Data.Text (Text)
import Data.Gargantext.Parsers.Occurrences (parseOccurrences)
-- import Data.Gargantext.Analysis (occOfCorpus)
parsersTest = hspec $ do
describe "Parser for occurrences" $ do
let txt = "internet"
it "returns the result of one parsing" $ do
parseOccurrences "internet" "internet" `shouldBe` Right 1
-- | Context of Text should be toLower
it "returns the result of one parsing not case sensitive" $ do
let txtCase = "Internet"
parseOccurrences txtCase "internet" `shouldBe` Right 1
it "returns the result of one parsing after space" $ do
parseOccurrences txt " internet"
`shouldBe` Right 1
it "returns the result of one parsing after chars" $ do
parseOccurrences txt "l'internet"
`shouldBe` Right 1
it "returns the result of multiple parsing" $ do
parseOccurrences txt "internet internet of things"
`shouldBe` Right 2
it "returns the result of multiple parsing separated by text" $ do
parseOccurrences txt "internet in the internet of things"
`shouldBe` Right 2
it "returns the result of multiple parsing separated by punctuation" $ do
parseOccurrences txt "internet. In the internet of things, internet like; internet?"
`shouldBe` Right 4
-- describe "Parser for nodes" $ do
-- it "returns the result of one parsing after space" $ do
-- occOfCorpus 249509 "sciences" `shouldReturn` 7
main :: IO ()
main = do
parsersTest
......@@ -8,7 +8,8 @@ import Control.Exception (evaluate)
import Data.Text (Text)
import Data.Hastext.Parsers.Occurrences (parse)
main = print "hspec $ do
main :: IO ()
main = print "hspec $ do"
describe "Parser for occurrences" $ do
let txt = "internet"
......@@ -43,5 +44,4 @@ main = print "hspec $ do
main :: IO ()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment