[ngrams] annotate types of ngrams algorithms

parent 08c7c91c
Pipeline #7170 passed with stages
in 52 minutes and 15 seconds
...@@ -392,10 +392,15 @@ For tasty, if you want to run specific test (via patterns), use: ...@@ -392,10 +392,15 @@ For tasty, if you want to run specific test (via patterns), use:
cabal v2-run garg-test-tasty -- -p '/Ngrams/ cabal v2-run garg-test-tasty -- -p '/Ngrams/
``` ```
For integration tests, do: For integration tests, do:
```shel ```shell
cabal v2-test garg-test-hspec --test-show-details=streaming --test-option=--match='/some pattern/' cabal v2-test garg-test-hspec --test-show-details=streaming --test-option=--match='/some pattern/'
``` ```
You could also use [ghciwatch](https://mercurytechnologies.github.io/ghciwatch/) for testsing:
```shell
ghciwatch --command "cabal v2-repl garg-test-tasty" --after-startup-ghci ':set args "--pattern" "/Ngrams/"' --after-startup-ghci "Main.main" --after-reload-ghci "Main.main" --watch src --watch test
```
### Modifying a golden test to accept a new (expected) output ### Modifying a golden test to accept a new (expected) output
Some tests, like the Phylo one, use golden testing to ensure that the JSON Phylo we generate is Some tests, like the Phylo one, use golden testing to ensure that the JSON Phylo we generate is
......
...@@ -264,6 +264,7 @@ library ...@@ -264,6 +264,7 @@ library
Gargantext.Core.Worker.Types Gargantext.Core.Worker.Types
Gargantext.Database.Action.Flow Gargantext.Database.Action.Flow
Gargantext.Database.Action.Flow.Types Gargantext.Database.Action.Flow.Types
Gargantext.Database.Action.Flow.Utils
Gargantext.Database.Action.Metrics.TFICF Gargantext.Database.Action.Metrics.TFICF
Gargantext.Database.Action.Search Gargantext.Database.Action.Search
Gargantext.Database.Action.User Gargantext.Database.Action.User
...@@ -286,6 +287,7 @@ library ...@@ -286,6 +287,7 @@ library
Gargantext.Database.Query.Table.Node.User Gargantext.Database.Query.Table.Node.User
Gargantext.Database.Query.Table.User Gargantext.Database.Query.Table.User
Gargantext.Database.Query.Tree.Root Gargantext.Database.Query.Tree.Root
Gargantext.Database.Schema.Context
Gargantext.Database.Schema.Ngrams Gargantext.Database.Schema.Ngrams
Gargantext.Database.Schema.Node Gargantext.Database.Schema.Node
Gargantext.Database.Schema.User Gargantext.Database.Schema.User
...@@ -413,7 +415,6 @@ library ...@@ -413,7 +415,6 @@ library
Gargantext.Database.Action.Flow.Extract Gargantext.Database.Action.Flow.Extract
Gargantext.Database.Action.Flow.List Gargantext.Database.Action.Flow.List
Gargantext.Database.Action.Flow.Pairing Gargantext.Database.Action.Flow.Pairing
Gargantext.Database.Action.Flow.Utils
Gargantext.Database.Action.Index Gargantext.Database.Action.Index
Gargantext.Database.Action.Learn Gargantext.Database.Action.Learn
Gargantext.Database.Action.Mail Gargantext.Database.Action.Mail
...@@ -458,7 +459,6 @@ library ...@@ -458,7 +459,6 @@ library
Gargantext.Database.Query.Table.NodeNgrams Gargantext.Database.Query.Table.NodeNgrams
Gargantext.Database.Query.Tree Gargantext.Database.Query.Tree
Gargantext.Database.Query.Tree.Error Gargantext.Database.Query.Tree.Error
Gargantext.Database.Schema.Context
Gargantext.Database.Schema.ContextNodeNgrams Gargantext.Database.Schema.ContextNodeNgrams
Gargantext.Database.Schema.ContextNodeNgrams2 Gargantext.Database.Schema.ContextNodeNgrams2
Gargantext.Database.Schema.NodeContext Gargantext.Database.Schema.NodeContext
......
...@@ -102,7 +102,7 @@ import Gargantext.Database.Query.Table.Node ( MkCorpus, insertDefaultNodeIfNotEx ...@@ -102,7 +102,7 @@ import Gargantext.Database.Query.Table.Node ( MkCorpus, insertDefaultNodeIfNotEx
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add) import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert ( ToNode(toNode) ) -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..)) import Gargantext.Database.Query.Table.Node.Document.Insert ( ToNode(toNode) ) -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..)) import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Query.Table.NodeContext (selectDocNodesOnlyId)
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId) import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(..), getOrMkRoot, getOrMkRootWithCorpus, userFromMkCorpusUser) import Gargantext.Database.Query.Tree.Root (MkCorpusUser(..), getOrMkRoot, getOrMkRootWithCorpus, userFromMkCorpusUser)
import Gargantext.Database.Schema.Ngrams ( indexNgrams, text2ngrams ) import Gargantext.Database.Schema.Ngrams ( indexNgrams, text2ngrams )
...@@ -502,7 +502,7 @@ reIndexWith cId lId nt lts = do ...@@ -502,7 +502,7 @@ reIndexWith cId lId nt lts = do
<$> getTermsWith identity [lId] nt lts <$> getTermsWith identity [lId] nt lts
-- Get all documents of the corpus -- Get all documents of the corpus
(docs :: [Context HyperdataDocument]) <- selectDocNodes cId (docs :: [ContextOnlyId HyperdataDocument]) <- selectDocNodesOnlyId cId
let let
-- fromListWith (<>) -- fromListWith (<>)
......
...@@ -38,7 +38,7 @@ import Gargantext.Database.Query.Table.ContextNodeNgrams ( ContextNodeNgramsPoly ...@@ -38,7 +38,7 @@ import Gargantext.Database.Query.Table.ContextNodeNgrams ( ContextNodeNgramsPoly
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add) import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId) import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..)) import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Schema.Context (context_hyperdata, context_id) import Gargantext.Database.Schema.Context (context_oid_hyperdata, context_oid_id)
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTypeId(..)) import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTypeId(..))
import Gargantext.Database.Types ( Indexed(..), index ) import Gargantext.Database.Types ( Indexed(..), index )
import Gargantext.Prelude import Gargantext.Prelude
...@@ -75,18 +75,18 @@ insertDocNgrams lId m = do ...@@ -75,18 +75,18 @@ insertDocNgrams lId m = do
docNgrams :: Lang docNgrams :: Lang
-> NgramsType -> NgramsType
-> [NT.NgramsTerm] -> [NT.NgramsTerm]
-> Gargantext.Database.Admin.Types.Node.Context HyperdataDocument -> ContextOnlyId HyperdataDocument
-> [((MatchedText, TermsCount), -> [((MatchedText, TermsCount),
Map NgramsType (Map NodeId Int))] Map NgramsType (Map NodeId Int))]
docNgrams lang nt ts doc = docNgrams lang nt ts doc =
List.zip List.zip
(termsInText lang (buildPatternsWith lang ts) (termsInText lang (buildPatternsWith lang ts)
$ T.unlines $ catMaybes $ T.unlines $ catMaybes
[ doc ^. context_hyperdata . hd_title [ doc ^. context_oid_hyperdata . hd_title
, doc ^. context_hyperdata . hd_abstract , doc ^. context_oid_hyperdata . hd_abstract
] ]
) )
(List.cycle [DM.fromList $ [(nt, DM.singleton (doc ^. context_id) 1 )]]) (List.cycle [DM.fromList $ [(nt, DM.singleton (doc ^. context_oid_id) 1 )]])
documentIdWithNgrams :: HasNodeError err documentIdWithNgrams :: HasNodeError err
......
...@@ -47,15 +47,18 @@ instance HasText HyperdataDocument ...@@ -47,15 +47,18 @@ instance HasText HyperdataDocument
, _hd_abstract h , _hd_abstract h
] ]
defaultHyperdataDocument :: HyperdataDocument emptyHyperdataDocument :: HyperdataDocument
defaultHyperdataDocument = case decode docExample of emptyHyperdataDocument = HyperdataDocument Nothing Nothing
Just hp -> hp
Nothing -> HyperdataDocument Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
defaultHyperdataDocument :: HyperdataDocument
defaultHyperdataDocument = case decode docExample of
Just hp -> hp
Nothing -> emptyHyperdataDocument
where where
docExample :: ByteString docExample :: ByteString
docExample = "{\"doi\":\"sdfds\",\"publication_day\":6,\"language_iso2\":\"en\",\"publication_minute\":0,\"publication_month\":7,\"language_iso3\":\"eng\",\"publication_second\":0,\"authors\":\"Nils Hovdenak, Kjell Haram\",\"publication_year\":2012,\"publication_date\":\"2012-07-06 00:00:00+00:00\",\"language_name\":\"English\",\"realdate_full_\":\"2012 01 12\",\"source\":\"European journal of obstetrics, gynecology, and reproductive biology\",\"abstract\":\"The literature was searched for publications on minerals and vitamins during pregnancy and the possible influence of supplements on pregnancy outcome.\",\"title\":\"Influence of mineral and vitamin supplements on pregnancy outcome.\",\"publication_hour\":0}" docExample = "{\"doi\":\"sdfds\",\"publication_day\":6,\"language_iso2\":\"en\",\"publication_minute\":0,\"publication_month\":7,\"language_iso3\":\"eng\",\"publication_second\":0,\"authors\":\"Nils Hovdenak, Kjell Haram\",\"publication_year\":2012,\"publication_date\":\"2012-07-06 00:00:00+00:00\",\"language_name\":\"English\",\"realdate_full_\":\"2012 01 12\",\"source\":\"European journal of obstetrics, gynecology, and reproductive biology\",\"abstract\":\"The literature was searched for publications on minerals and vitamins during pregnancy and the possible influence of supplements on pregnancy outcome.\",\"title\":\"Influence of mineral and vitamin supplements on pregnancy outcome.\",\"publication_hour\":0}"
......
...@@ -103,6 +103,7 @@ type ContextTitle = Text ...@@ -103,6 +103,7 @@ type ContextTitle = Text
-- | NodePoly indicates that Node has a Polymorphism Type -- | NodePoly indicates that Node has a Polymorphism Type
type Node json = NodePoly NodeId (Maybe Hash) NodeTypeId UserId (Maybe ParentId) NodeName UTCTime json type Node json = NodePoly NodeId (Maybe Hash) NodeTypeId UserId (Maybe ParentId) NodeName UTCTime json
type Context json = ContextPoly NodeId (Maybe Hash) NodeTypeId UserId (Maybe ParentId) ContextTitle UTCTime json type Context json = ContextPoly NodeId (Maybe Hash) NodeTypeId UserId (Maybe ParentId) ContextTitle UTCTime json
type ContextOnlyId json = ContextPolyOnlyId NodeId json
-- | NodeSearch (queries) -- | NodeSearch (queries)
-- type NodeSearch json = NodePolySearch NodeId NodeTypeId UserId (Maybe ParentId) NodeName UTCTime json (Maybe TSVector) -- type NodeSearch json = NodePolySearch NodeId NodeTypeId UserId (Maybe ParentId) NodeName UTCTime json (Maybe TSVector)
......
...@@ -22,6 +22,7 @@ module Gargantext.Database.Query.Table.NodeContext ...@@ -22,6 +22,7 @@ module Gargantext.Database.Query.Table.NodeContext
, queryNodeContextTable , queryNodeContextTable
, selectDocsDates , selectDocsDates
, selectDocNodes , selectDocNodes
, selectDocNodesOnlyId
, selectDocs , selectDocs
, nodeContextsCategory , nodeContextsCategory
, nodeContextsScore , nodeContextsScore
...@@ -413,6 +414,15 @@ queryDocNodes cId = proc () -> do ...@@ -413,6 +414,15 @@ queryDocNodes cId = proc () -> do
restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument) restrict -< (c ^. context_typename) .== sqlInt4 (toDBid NodeDocument)
returnA -< c returnA -< c
selectDocNodesOnlyId :: HasDBid NodeType => CorpusId -> DBCmd err [ContextOnlyId HyperdataDocument]
selectDocNodesOnlyId cId = runOpaQuery (queryDocNodesOnlyId cId)
queryDocNodesOnlyId :: HasDBid NodeType => CorpusId -> O.Select ContextOnlyIdRead
queryDocNodesOnlyId cId = proc () -> do
c <- queryDocNodes cId -< ()
returnA -< ContextOnlyId (c ^. context_id) (c ^. context_hyperdata)
joinInCorpus :: O.Select (ContextRead, MaybeFields NodeContextRead) joinInCorpus :: O.Select (ContextRead, MaybeFields NodeContextRead)
joinInCorpus = proc () -> do joinInCorpus = proc () -> do
c <- queryContextTable -< () c <- queryContextTable -< ()
......
...@@ -52,6 +52,23 @@ $(makeLenses ''ContextPoly) ...@@ -52,6 +52,23 @@ $(makeLenses ''ContextPoly)
$(makeAdaptorAndInstance "pContext" ''ContextPoly) $(makeAdaptorAndInstance "pContext" ''ContextPoly)
$(makeLensesWith abbreviatedFields ''ContextPoly) $(makeLensesWith abbreviatedFields ''ContextPoly)
------------------------------------------------------------------------
data ContextPolyOnlyId id hyperdata =
ContextOnlyId { _context_oid_id :: !id
, _context_oid_hyperdata :: !hyperdata }
deriving (Show, Generic)
------------------------------------------------------------------------
-- Automatic instances derivation
$(deriveJSON (unPrefix "_context_oid_") ''ContextPolyOnlyId)
$(makeLenses ''ContextPolyOnlyId)
$(makeAdaptorAndInstance "pContextOnlyId" ''ContextPolyOnlyId)
$(makeLensesWith abbreviatedFields ''ContextPolyOnlyId)
------------------------------------------------------------------------
contextTable :: Table ContextWrite ContextRead contextTable :: Table ContextWrite ContextRead
contextTable = Table "contexts" (pContext Context { _context_id = optionalTableField "id" contextTable = Table "contexts" (pContext Context { _context_id = optionalTableField "id"
, _context_hash_id = optionalTableField "hash_id" , _context_hash_id = optionalTableField "hash_id"
...@@ -87,6 +104,10 @@ type ContextRead = ContextPoly (Field SqlInt4 ) ...@@ -87,6 +104,10 @@ type ContextRead = ContextPoly (Field SqlInt4 )
(Field SqlText ) (Field SqlText )
(Field SqlTimestamptz ) (Field SqlTimestamptz )
(Field SqlJsonb ) (Field SqlJsonb )
type ContextOnlyIdRead = ContextPolyOnlyId (Field SqlInt4 )
(Field SqlJsonb )
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Context(Read|Write)Search is slower than Context(Write|Read) use it -- | Context(Read|Write)Search is slower than Context(Write|Read) use it
-- for full text search only -- for full text search only
......
...@@ -6,7 +6,11 @@ module Test.Ngrams.Count (tests) where ...@@ -6,7 +6,11 @@ module Test.Ngrams.Count (tests) where
import Gargantext.API.Ngrams import Gargantext.API.Ngrams
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (buildPatternsWith, termsInText, Pattern(..)) import Gargantext.Core.Text.Terms.WithList (buildPatternsWith, termsInText, Pattern(..))
import Gargantext.Database.Action.Flow.Utils (docNgrams)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..), emptyHyperdataDocument )
import Gargantext.Database.Schema.Context ( ContextPolyOnlyId(..) )
import Gargantext.Prelude import Gargantext.Prelude
import Test.Tasty import Test.Tasty
import Test.Tasty.HUnit import Test.Tasty.HUnit
...@@ -20,10 +24,13 @@ unitTests = testGroup "Count tests" ...@@ -20,10 +24,13 @@ unitTests = testGroup "Count tests"
[ -- Sorting [ -- Sorting
testCase "Build patterns works 01" testBuildPatterns01 testCase "Build patterns works 01" testBuildPatterns01
, testCase "Build patterns works 02" testBuildPatterns02 , testCase "Build patterns works 02" testBuildPatterns02
, testCase "termsInText works 01" testTermsInText01 , testCase "termsInText works 01" testTermsInText01
, testCase "termsInText works 02" testTermsInText02 , testCase "termsInText works 02" testTermsInText02
, testCase "termsInText works 03" testTermsInText03 , testCase "termsInText works 03" testTermsInText03
, testCase "termsInText works 04 (related to issue #221)" testTermsInText04 , testCase "termsInText works 04 (related to issue #221)" testTermsInText04
, testCase "docNgrams works 01" testDocNgrams01
] ]
-- | Let's document how the `buildPatternsWith` function works. -- | Let's document how the `buildPatternsWith` function works.
...@@ -91,3 +98,12 @@ testTermsInText04 = do ...@@ -91,3 +98,12 @@ testTermsInText04 = do
length tit @?= 1 length tit @?= 1
let [tit1] = tit let [tit1] = tit
tit1 @?= ("feuilles de basilic", 2) tit1 @?= ("feuilles de basilic", 2)
testDocNgrams01 :: Assertion
testDocNgrams01 = do
let terms = ["hello", "world"] :: [NgramsTerm]
let hd = emptyHyperdataDocument { _hd_title = Just "hello world"
, _hd_abstract = Nothing }
let ctx = ContextOnlyId 1 hd
let dNgrams = docNgrams EN NgramsTerms terms ctx
length dNgrams @?= 2
...@@ -31,9 +31,18 @@ import qualified Test.Parsers.Date as PD ...@@ -31,9 +31,18 @@ import qualified Test.Parsers.Date as PD
import qualified Test.Utils.Crypto as Crypto import qualified Test.Utils.Crypto as Crypto
import qualified Test.Utils.Jobs as Jobs import qualified Test.Utils.Jobs as Jobs
import System.IO (hGetBuffering, hSetBuffering)
import Test.Tasty import Test.Tasty
import Test.Tasty.Hspec import Test.Tasty.Hspec
-- | https://mercurytechnologies.github.io/ghciwatch/integration/tasty.html
protectStdoutBuffering :: IO a -> IO a
protectStdoutBuffering action =
bracket
(hGetBuffering stdout)
(\bufferMode -> hSetBuffering stdout bufferMode)
(const action)
main :: IO () main :: IO ()
main = do main = do
utilSpec <- testSpec "Utils" Utils.test utilSpec <- testSpec "Utils" Utils.test
...@@ -46,7 +55,7 @@ main = do ...@@ -46,7 +55,7 @@ main = do
asyncUpdatesSpec <- testSpec "Notifications" Notifications.test asyncUpdatesSpec <- testSpec "Notifications" Notifications.test
occurrencesSpec <- testSpec "Occurrences" Occurrences.test occurrencesSpec <- testSpec "Occurrences" Occurrences.test
defaultMain $ testGroup "Gargantext" protectStdoutBuffering $ defaultMain $ testGroup "Gargantext"
[ utilSpec [ utilSpec
, clusteringSpec , clusteringSpec
, dateSplitSpec , dateSplitSpec
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment