Commit f5bb11c9 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Initial support for the Bool API query in PG searches

parent 772987e7
......@@ -78,9 +78,10 @@ test:
chown -R test:test /root/.cache/cabal/logs/
chown -R test:test /root/.cache/cabal/packages/hackage.haskell.org/
ls /builds/gargantext/devops/coreNLP/stanford-corenlp-current
cp -R /builds/gargantext/devops/coreNLP/stanford-corenlp-current /build/gargantext/haskell-gargantext/devops/coreNLP/
chown -R test:test /build/gargantext/haskell-gargantext/devops/coreNLP/stanford-corenlp-current
# This is not optimal because it downloads the coreNLP binary every time
# and it's not resistant in case of network outages.
cd /builds/gargantext/haskell-gargantext/devops/coreNLP; ./build.sh
cd /builds/gargantext/haskell-gargantext
nix-shell --run "chown -R test:test /root/.config/ && su -m test -c \"export PATH=$PATH:$TEST_NIX_PATH && cd /builds/gargantext/haskell-gargantext; $CABAL --store-dir=$CABAL_STORE_DIR v2-test --test-show-details=streaming --flags test-crypto --ghc-options='-O0 -fclear-plugins'\""
chown -R root:root dist-newstyle/
......
......@@ -124,6 +124,7 @@ library
Gargantext.Database.Admin.Types.Hyperdata.Document
Gargantext.Database.Admin.Types.Node
Gargantext.Database.Prelude
Gargantext.Database.Query.Facet
Gargantext.Database.Query.Table.NgramsPostag
Gargantext.Database.Query.Table.Node
Gargantext.Database.Query.Table.Node.Error
......@@ -316,7 +317,6 @@ library
Gargantext.Database.Admin.Types.Metrics
Gargantext.Database.GargDB
Gargantext.Database.Query
Gargantext.Database.Query.Facet
Gargantext.Database.Query.Facet.Types
Gargantext.Database.Query.Filter
Gargantext.Database.Query.Join
......
......@@ -17,12 +17,13 @@ Count API part of Gargantext.
module Gargantext.API.Search
where
import Data.Aeson hiding (defaultTaggedObject)
-- import Data.List (concat)
import Data.Aeson hiding (defaultTaggedObject)
import Data.Swagger hiding (fieldLabelModifier, Contact)
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.Text.Corpus.Query (RawQuery (..), parseQuery)
import Gargantext.Core.Types.Query (Limit, Offset)
import Gargantext.Core.Types.Search
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
......@@ -37,6 +38,7 @@ import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import qualified Data.Text as T
import Data.Either
-----------------------------------------------------------------------
-- TODO-ACCESS: CanSearch? or is it part of CanGetNode
......@@ -50,23 +52,28 @@ type API results = Summary "Search endpoint"
-----------------------------------------------------------------------
-- | Api search function
api :: NodeId -> GargServer (API SearchResult)
api nId (SearchQuery q SearchDoc) o l order = do
$(logLocM) DEBUG $ T.pack "New search started with query = " <> T.pack (show q)
SearchResult <$> SearchResultDoc
<$> map (toRow nId)
<$> searchInCorpus nId False q o l order
-- <$> searchInCorpus nId False (concat q) o l order
api nId (SearchQuery q SearchContact) o l order = do
-- printDebug "isPairedWith" nId
aIds <- isPairedWith nId NodeAnnuaire
-- TODO if paired with several corpus
case head aIds of
Nothing -> pure $ SearchResult
$ SearchNoResult "[G.A.Search] pair corpus with an Annuaire"
Just aId -> SearchResult
<$> SearchResultContact
<$> map (toRow aId)
<$> searchInCorpusWithContacts nId aId q o l order
api nId (SearchQuery rawQuery SearchDoc) o l order = do
case parseQuery rawQuery of
Left err -> pure $ SearchResult $ SearchNoResult (T.pack err)
Right q -> do
$(logLocM) DEBUG $ T.pack "New search started with query = " <> (getRawQuery rawQuery)
SearchResult <$> SearchResultDoc
<$> map (toRow nId)
<$> searchInCorpus nId False q o l order
api nId (SearchQuery rawQuery SearchContact) o l order = do
case parseQuery rawQuery of
Left err -> pure $ SearchResult $ SearchNoResult (T.pack err)
Right q -> do
-- printDebug "isPairedWith" nId
aIds <- isPairedWith nId NodeAnnuaire
-- TODO if paired with several corpus
case head aIds of
Nothing -> pure $ SearchResult
$ SearchNoResult "[G.A.Search] pair corpus with an Annuaire"
Just aId -> SearchResult
<$> SearchResultContact
<$> map (toRow aId)
<$> searchInCorpusWithContacts nId aId q o l order
api _nId (SearchQuery _q SearchDocWithNgrams) _o _l _order = undefined
-----------------------------------------------------------------------
......@@ -85,7 +92,7 @@ instance Arbitrary SearchType where
-----------------------------------------------------------------------
data SearchQuery =
SearchQuery { query :: ![Text]
SearchQuery { query :: !RawQuery
, expected :: !SearchType
}
deriving (Generic)
......@@ -100,7 +107,7 @@ instance ToSchema SearchQuery
-}
instance Arbitrary SearchQuery where
arbitrary = elements [SearchQuery ["electrodes"] SearchDoc]
arbitrary = elements [SearchQuery (RawQuery "electrodes") SearchDoc]
-- arbitrary = elements [SearchQuery "electrodes" 1 ] --SearchDoc]
-----------------------------------------------------------------------
data SearchResult =
......
......@@ -36,6 +36,7 @@ import Data.Maybe
import Data.Swagger
import Data.Text (Text())
import GHC.Generics (Generic)
import Prelude
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
......@@ -43,15 +44,16 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.API.HashedResponse
import Gargantext.API.Ngrams.Types (TabType(..))
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.Text.Corpus.Query (RawQuery, parseQuery)
import Gargantext.Core.Types (TableResult(..))
import Gargantext.Core.Types.Query (Offset, Limit)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Action.Learn (FavOrTrash(..), moreLike)
import Gargantext.Database.Action.Search
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Facet (FacetDoc , runViewDocuments, runCountDocuments, OrderBy(..), runViewAuthorsDoc)
import Gargantext.Database.Admin.Types.Node hiding (ERROR, DEBUG)
import Gargantext.Database.Prelude -- (Cmd, CmdM)
import Gargantext.Database.Query.Facet (FacetDoc , runViewDocuments, runCountDocuments, OrderBy(..), runViewAuthorsDoc)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Prelude
------------------------------------------------------------------------
......@@ -77,7 +79,7 @@ data TableQuery = TableQuery
, tq_limit :: Limit
, tq_orderBy :: OrderBy
, tq_view :: TabType
, tq_query :: Text
, tq_query :: RawQuery
} deriving (Generic)
type FacetTableResult = TableResult FacetDoc
......@@ -116,13 +118,13 @@ getTableApi cId tabType mLimit mOffset mOrderBy mQuery mYear = do
t <- getTable cId tabType mOffset mLimit mOrderBy mQuery mYear
pure $ constructHashedResponse t
postTableApi :: HasNodeError err
=> NodeId -> TableQuery -> Cmd err FacetTableResult
postTableApi cId (TableQuery o l order ft "") = getTable cId (Just ft) (Just o) (Just l) (Just order) Nothing Nothing
postTableApi cId (TableQuery o l order ft q) = case ft of
Docs -> searchInCorpus' cId False [q] (Just o) (Just l) (Just order)
Trash -> searchInCorpus' cId True [q] (Just o) (Just l) (Just order)
x -> panic $ "not implemented in tableApi " <> (cs $ show x)
postTableApi :: HasNodeError err => NodeId -> TableQuery -> Cmd err FacetTableResult
postTableApi cId tq = case tq of
TableQuery o l order ft "" -> getTable cId (Just ft) (Just o) (Just l) (Just order) Nothing Nothing
TableQuery o l order ft q -> case ft of
Docs -> searchInCorpus' cId False q (Just o) (Just l) (Just order)
Trash -> searchInCorpus' cId True q (Just o) (Just l) (Just order)
x -> panic $ "not implemented in tableApi " <> (cs $ show x)
getTableHashApi :: HasNodeError err
=> NodeId -> Maybe TabType -> Cmd err Text
......@@ -132,15 +134,21 @@ getTableHashApi cId tabType = do
searchInCorpus' :: CorpusId
-> Bool
-> [Text]
-> RawQuery
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Cmd err FacetTableResult
searchInCorpus' cId t q o l order = do
docs <- searchInCorpus cId t q o l order
countAllDocs <- searchCountInCorpus cId t q
pure $ TableResult { tr_docs = docs, tr_count = countAllDocs }
case parseQuery q of
-- FIXME(adn) The error handling needs to be monomorphic over GargErr.
Left _noParseErr -> do
-- - $(logLocM) ERROR $ "Invalid input query " <> (getRawQuery q) <> " , error = " <> (T.pack noParseErr)
pure $ TableResult 0 []
Right boolQuery -> do
docs <- searchInCorpus cId t boolQuery o l order
countAllDocs <- searchCountInCorpus cId t boolQuery
pure $ TableResult { tr_docs = docs, tr_count = countAllDocs }
getTable :: HasNodeError err
......
......@@ -6,6 +6,7 @@ module Gargantext.Core.Text.Corpus.Query (
, Limit(..)
, getQuery
, parseQuery
, mapQuery
, renderQuery
, interpretQuery
, ExternalAPIs(..)
......@@ -93,3 +94,6 @@ parseQuery (RawQuery txt) = bimap show (Query . BoolExpr.boolTreeToCNF) $
renderQuery :: Query -> RawQuery
renderQuery (Query cnf) = RawQuery . T.pack $ BoolExpr.boolExprPrinter (showsPrec 0) (BoolExpr.fromCNF cnf) ""
mapQuery :: (Term -> Term) -> Query -> Query
mapQuery f = Query . fmap f . getQuery
......@@ -21,34 +21,67 @@ module Gargantext.Database.Action.Search (
import Control.Arrow (returnA)
import Control.Lens ((^.), view)
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import Data.Maybe
import qualified Data.Set as Set
import Data.Text (Text, unpack, intercalate)
import Data.Profunctor.Product (p4)
import Data.Text (Text, unpack)
import Data.Time (UTCTime)
import Gargantext.Core
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types
import Gargantext.Core.Types.Query (IsTrash, Limit, Offset)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
import Gargantext.Database.Prelude (runOpaQuery, runCountOpaQuery, DBCmd)
import Gargantext.Database.Query.Facet
import Gargantext.Database.Query.Filter
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Database.Query.Table.Context
import Gargantext.Database.Query.Table.ContextNodeNgrams (queryContextNodeNgramsTable)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Database.Query.Table.NodeContext
import Gargantext.Database.Query.Table.NodeContext_NodeContext
import Gargantext.Database.Schema.Context
import Gargantext.Database.Schema.ContextNodeNgrams (ContextNodeNgramsPoly(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Context
import Gargantext.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Opaleye hiding (Order)
import Data.Profunctor.Product (p4)
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Gargantext.Core.Text.Corpus.Query as API
import qualified Opaleye as O hiding (Order)
import Data.BoolExpr
import qualified Data.Text as T
--
-- Interpreting a query into a Postgres' TSQuery
--
queryToTsSearch :: API.Query -> Field SqlTSQuery
queryToTsSearch q = sqlToTSQuery $ T.unpack $ (API.interpretQuery q transformAST)
where
transformAST :: BoolExpr Term -> T.Text
transformAST ast = case ast of
BAnd sub1 sub2
-> " (" <> transformAST sub1 <> " & " <> transformAST sub2 <> ") "
BOr sub1 sub2
-> " (" <> transformAST sub1 <> " | " <> transformAST sub2 <> ") "
BNot (BConst (Negative term))
-> transformAST (BConst (Positive term)) -- double negation
BNot sub
-> "!" <> transformAST sub
-- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
BTrue
-> T.empty
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse
-> T.empty
BConst (Positive (Term term))
-> T.intercalate " & " $ T.words term
-- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term))
-> "!" <> term
------------------------------------------------------------------------
searchDocInDatabase :: HasDBid NodeType
......@@ -139,7 +172,7 @@ _queryListWithNgrams lId ngramIds = proc () -> do
searchInCorpus :: HasDBid NodeType
=> CorpusId
-> IsTrash
-> [Text]
-> API.Query
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
......@@ -147,23 +180,21 @@ searchInCorpus :: HasDBid NodeType
searchInCorpus cId t q o l order = runOpaQuery
$ filterWith o l order
$ queryInCorpus cId t
$ intercalate " | "
$ map stemIt q
$ API.mapQuery (Term . stemIt . getTerm) q
searchCountInCorpus :: HasDBid NodeType
=> CorpusId
-> IsTrash
-> [Text]
-> API.Query
-> DBCmd err Int
searchCountInCorpus cId t q = runCountOpaQuery
$ queryInCorpus cId t
$ intercalate " | "
$ map stemIt q
$ API.mapQuery (Term . stemIt . getTerm) q
queryInCorpus :: HasDBid NodeType
=> CorpusId
-> IsTrash
-> Text
-> API.Query
-> O.Select FacetDocRead
queryInCorpus cId t q = proc () -> do
c <- queryContextSearchTable -< ()
......@@ -175,7 +206,7 @@ queryInCorpus cId t q = proc () -> do
else matchMaybe (view nc_category <$> nc) $ \case
Nothing -> toFields False
Just c' -> c' .>= sqlInt4 1
restrict -< (c ^. cs_search) @@ sqlToTSQuery (unpack q)
restrict -< (c ^. cs_search) @@ queryToTsSearch q
restrict -< (c ^. cs_typename ) .== sqlInt4 (toDBid NodeDocument)
returnA -< FacetDoc { facetDoc_id = c^.cs_id
, facetDoc_created = c^.cs_date
......@@ -191,7 +222,7 @@ searchInCorpusWithContacts
:: HasDBid NodeType
=> CorpusId
-> AnnuaireId
-> [Text]
-> API.Query
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
......@@ -201,13 +232,12 @@ searchInCorpusWithContacts cId aId q o l _order =
$ offset' o
$ orderBy (desc _fp_score)
$ selectGroup cId aId
$ intercalate " | "
$ map stemIt q
$ API.mapQuery (Term . stemIt . getTerm) q
selectGroup :: HasDBid NodeType
=> CorpusId
-> AnnuaireId
-> Text
-> API.Query
-> Select FacetPairedRead
selectGroup cId aId q = proc () -> do
(a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
......@@ -219,7 +249,7 @@ selectContactViaDoc
:: HasDBid NodeType
=> CorpusId
-> AnnuaireId
-> Text
-> API.Query
-> SelectArr ()
( Field SqlInt4
, Field SqlTimestamptz
......@@ -231,7 +261,7 @@ selectContactViaDoc cId aId query = proc () -> do
(contact, annuaire, _, corpus, doc) <- queryContactViaDoc -< ()
restrict -< matchMaybe (view cs_search <$> doc) $ \case
Nothing -> toFields False
Just s -> s @@ sqlToTSQuery (unpack query)
Just s -> s @@ queryToTsSearch query
restrict -< (view cs_typename <$> doc) .=== justFields (sqlInt4 (toDBid NodeDocument))
restrict -< (view nc_node_id <$> corpus) .=== justFields (pgNodeId cId)
restrict -< (view nc_node_id <$> annuaire) .=== justFields (pgNodeId aId)
......
......@@ -37,6 +37,7 @@ data NodeError = NoListFound { listId :: ListId }
| DoesNotExist NodeId
| NeedsConfiguration
| NodeError Text
| QueryNoParse Text
instance Show NodeError
where
......@@ -55,6 +56,7 @@ instance Show NodeError
show (DoesNotExist n) = "Node does not exist (" <> show n <> ")"
show NeedsConfiguration = "Needs configuration"
show (NodeError e) = "NodeError: " <> cs e
show (QueryNoParse err) = "QueryNoParse: " <> T.unpack err
instance ToJSON NodeError where
toJSON (NoListFound { listId = NodeId listId }) =
......
......@@ -128,6 +128,7 @@ tests = sequential $ aroundAll withTestDB $ describe "Database" $ do
it "Can stem query terms" stemmingTest
it "Can perform a simple search inside documents" corpusSearch01
it "Can perform search by author in documents" corpusSearch02
it "Can perform more complex searches using the boolean API" corpusSearch03
data ExpectedActual a =
Expected a
......
......@@ -23,6 +23,9 @@ import Test.Hspec.Expectations
import Test.Tasty.HUnit
import Gargantext.Core.Text.Terms.Mono.Stem.En
import Gargantext.Database.Admin.Config (userMaster)
import qualified Data.Text as T
import qualified Gargantext.Core.Text.Corpus.Query as API
import Gargantext.Database.Query.Facet
exampleDocument_01 :: HyperdataDocument
......@@ -48,14 +51,13 @@ exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ|
exampleDocument_02 :: HyperdataDocument
exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ|
{ "doi":"02"
{ "doi":""
, "uniqId": "1405.3072v3"
, "bdd": "Arxiv"
, "publication_day":6
, "language_iso2":"EN"
, "publication_minute":0
, "publication_month":7
, "language_iso3":"eng"
, "publication_second":0
, "authors":"Ajeje Brazorf and Manuel Agnelli"
, "authors":"Ajeje Brazorf, Manuel Agnelli"
, "publication_year":2012
, "publication_date":"2012-07-06 00:00:00+00:00"
, "language_name":"English"
......@@ -76,7 +78,7 @@ exampleDocument_03 = either error id $ parseEither parseJSON $ [aesonQQ|
, "title": "Haskell for OCaml programmers"
, "source": ""
, "uniqId": "1405.3072v2"
, "authors": "Raphael Poss"
, "authors": "Raphael Poss, Herbert Ballerina"
, "abstract": " This introduction to Haskell is written to optimize learning by programmers who already know OCaml. "
, "institutes": ""
, "language_iso2": "EN"
......@@ -85,6 +87,23 @@ exampleDocument_03 = either error id $ parseEither parseJSON $ [aesonQQ|
}
|]
exampleDocument_04 :: HyperdataDocument
exampleDocument_04 = either error id $ parseEither parseJSON $ [aesonQQ|
{
"bdd": "Arxiv"
, "doi": ""
, "url": "http://arxiv.org/pdf/1407.5670v1"
, "title": "Rust for functional programmers"
, "source": ""
, "uniqId": "1407.5670v1"
, "authors": "Raphael Poss"
, "abstract": " This article provides an introduction to Rust , a systems language by Mozilla , to programmers already familiar with Haskell , OCaml or other functional languages. " , "institutes": ""
, "language_iso2": "EN"
, "publication_date": "2014-07-21T21:20:31Z"
, "publication_year": 2014
}
|]
nlpServerConfig :: NLPServerConfig
nlpServerConfig =
let uri = parseURI "http://localhost:9000"
......@@ -103,14 +122,17 @@ corpusAddDocuments env = do
(Just $ _node_hyperdata $ corpus)
(Multi EN)
corpusId
[exampleDocument_01, exampleDocument_02, exampleDocument_03]
liftIO $ length ids `shouldBe` 3
[exampleDocument_01, exampleDocument_02, exampleDocument_03, exampleDocument_04]
liftIO $ length ids `shouldBe` 4
stemmingTest :: TestEnv -> Assertion
stemmingTest _env = do
stemIt "Ajeje" `shouldBe` "Ajeje"
stemIt "PyPlasm:" `shouldBe` "PyPlasm:"
mkQ :: T.Text -> API.Query
mkQ txt = either (\e -> error $ "(query) = " <> T.unpack txt <> ": " <> e) id . API.parseQuery . API.RawQuery $ txt
corpusSearch01 :: TestEnv -> Assertion
corpusSearch01 env = do
flip runReaderT env $ runTestMonad $ do
......@@ -118,8 +140,8 @@ corpusSearch01 env = do
parentId <- getRootId (UserName userMaster)
[corpus] <- getCorporaWithParentId parentId
results1 <- searchInCorpus (_node_id corpus) False ["mineral"] Nothing Nothing Nothing
results2 <- searchInCorpus (_node_id corpus) False ["computational"] Nothing Nothing Nothing
results1 <- searchInCorpus (_node_id corpus) False (mkQ "mineral") Nothing Nothing Nothing
results2 <- searchInCorpus (_node_id corpus) False (mkQ "computational") Nothing Nothing Nothing
liftIO $ length results1 `shouldBe` 1
liftIO $ length results2 `shouldBe` 1
......@@ -132,7 +154,26 @@ corpusSearch02 env = do
parentId <- getRootId (UserName userMaster)
[corpus] <- getCorporaWithParentId parentId
results1 <- searchInCorpus (_node_id corpus) False ["Raphael"] Nothing Nothing Nothing
results1 <- searchInCorpus (_node_id corpus) False (mkQ "Raphael") Nothing Nothing Nothing
results2 <- searchInCorpus (_node_id corpus) False (mkQ "Raphael Poss") Nothing Nothing Nothing
liftIO $ do
length results1 `shouldBe` 2 -- Haskell & Rust
map facetDoc_title results2 `shouldBe` ["Haskell for OCaml programmers", "Rust for functional programmers"]
-- | Check that we support more complex queries via the bool API
corpusSearch03 :: TestEnv -> Assertion
corpusSearch03 env = do
flip runReaderT env $ runTestMonad $ do
parentId <- getRootId (UserName userMaster)
[corpus] <- getCorporaWithParentId parentId
results1 <- searchInCorpus (_node_id corpus) False (mkQ "\"Manuel Agnelli\"") Nothing Nothing Nothing
results2 <- searchInCorpus (_node_id corpus) False (mkQ "Raphael AND -Rust") Nothing Nothing Nothing
results3 <- searchInCorpus (_node_id corpus) False (mkQ "(Raphael AND (NOT Rust)) OR PyPlasm") Nothing Nothing Nothing
liftIO $ do
length results1 `shouldBe` 1
map facetDoc_title results2 `shouldBe` ["Haskell for OCaml programmers"]
map facetDoc_title results3 `shouldBe` ["PyPlasm: computational geometry made easy", "Haskell for OCaml programmers"]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment