Commit a854f24e authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'master' into stable

parents 6116a39e 38f940bf
# This file is a template, and might need editing before it works on your project. # Thanks to:
# see https://docs.gitlab.com/ce/ci/yaml/README.html for all available options # https://vadosware.io/post/zero-to-continuous-integrated-testing-a-haskell-project-with-gitlab/
#
# you can delete this line if you're not using Docker #
#image: busybox:latest image: haskell:8
before_script: variables:
- echo "Before script section" STACK_ROOT: "${CI_PROJECT_DIR}/.stack"
- echo "For example you might run an update here or install a build dependency" STACK_OPTS: "--system-ghc"
- echo "Or perhaps you might print out some debugging details"
cache:
after_script: paths:
- echo "After script section" - .stack
- echo "For example you might do some cleanup here" - .stack-work
- target
build1:
#before_script:
#- apt-get update
#- apt-get install make xz-utils
stages:
- build
- test
build:
stage: build stage: build
script: script:
- ./install - make setup
- make build
#test1:
# TOOO
#unit-test:
# stage: test # stage: test
# script: # script:
# - echo "Do a test here" # - make test-unit
# - echo "For example run a test suite" #
# #int-test:
#test2:
# stage: test # stage: test
# script:
# - echo "Do another parallel test here"
# - echo "For example run a lint test"
#
#deploy1:
# stage: deploy
# script: # script:
# - echo "Do your deploy here" # - make test-int
#
#e2e-test:
# stage: test
# script:
# - make test-e2e
#
# If you find yourself with a non-sensical build error when you know your project should be building just fine, this fragment should help:
#
#build:
# stage: build
# script:
# # Clear out cache files
# - rm -rf .stack
# - rm -rf .stack-work
# - stack setup --system-ghc
# - stack install --local-bin-path target --system-ghc
#!/bin/bash #!/bin/bash
sudo su postgres # sudo su postgres
# postgresql://$USER:$PW@localhost/$DB
PW="password" PW="password"
DB="gargandbV5" DB="gargandbV5"
USER="gargantua" USER="gargantua"
psql -c "CREATE USER \"${USER}\" psql -c "CREATE USER \"${USER}\""
psql -c "ALTER USER \"${USER}\" with PASSWORD \"${PW}\"" psql -c "ALTER USER \"${USER}\" with PASSWORD \"${PW}\""
psql -c "DROP DATABASE IF EXISTS \"${DB}\"" psql -c "DROP DATABASE IF EXISTS \"${DB}\""
createdb "${DB}" createdb "${DB}"
psql "${DB}" < schema.sql psql "${DB}" < schema.sql
psql -c "ALTER DATABASE \"${DB}\" OWNER to \"${USER}\" ;" psql -c "ALTER DATABASE \"${DB}\" OWNER to \"${USER}\""
......
...@@ -80,6 +80,7 @@ ALTER TABLE public.ngrams OWNER TO gargantua; ...@@ -80,6 +80,7 @@ ALTER TABLE public.ngrams OWNER TO gargantua;
--ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua; --ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua;
--------------------------------------------------------------- ---------------------------------------------------------------
-- TODO nodes_nodes(node1_id int, node2_id int, edge_type int , weight real)
CREATE TABLE public.nodes_nodes ( CREATE TABLE public.nodes_nodes (
node1_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE, node1_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
node2_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE, node2_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
......
...@@ -130,7 +130,7 @@ import GHC.Generics (Generic) ...@@ -130,7 +130,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
-- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..)) -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
import Gargantext.Database.Config (userMaster) import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast) import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
import Gargantext.Database.Schema.Ngrams (NgramsType) import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Types.Node (NodeType(..)) import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Utils (fromField', HasConnection) import Gargantext.Database.Utils (fromField', HasConnection)
...@@ -1019,7 +1019,8 @@ getTableNgrams _nType nId tabType listId limit_ offset ...@@ -1019,7 +1019,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
setScores True table = do setScores True table = do
let ngrams_terms = (table ^.. each . ne_ngrams) let ngrams_terms = (table ^.. each . ne_ngrams)
t1 <- getTime' t1 <- getTime'
occurrences <- getOccByNgramsOnlyFast nId occurrences <- getOccByNgramsOnlyFast' nId
listId
ngramsType ngramsType
ngrams_terms ngrams_terms
t2 <- getTime' t2 <- getTime'
...@@ -1153,8 +1154,6 @@ getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orde ...@@ -1153,8 +1154,6 @@ getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orde
apiNgramsTableCorpus :: ( RepoCmdM env err m apiNgramsTableCorpus :: ( RepoCmdM env err m
, HasNodeError err , HasNodeError err
, HasInvalidError err , HasInvalidError err
......
...@@ -55,6 +55,7 @@ import Gargantext.API.Ngrams.NTree (MyTree) ...@@ -55,6 +55,7 @@ import Gargantext.API.Ngrams.NTree (MyTree)
import Gargantext.API.Search (SearchDocsAPI, searchDocs) import Gargantext.API.Search (SearchDocsAPI, searchDocs)
import Gargantext.API.Table import Gargantext.API.Table
import Gargantext.API.Types import Gargantext.API.Types
import Gargantext.Core.Types (NodeTableResult)
import Gargantext.Core.Types.Main (Tree, NodeTree, ListType) import Gargantext.Core.Types.Main (Tree, NodeTree, ListType)
import Gargantext.Database.Config (nodeTypeId) import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Facet (FacetDoc, OrderBy(..)) import Gargantext.Database.Facet (FacetDoc, OrderBy(..))
...@@ -158,7 +159,8 @@ type ChildrenApi a = Summary " Summary children" ...@@ -158,7 +159,8 @@ type ChildrenApi a = Summary " Summary children"
:> QueryParam "type" NodeType :> QueryParam "type" NodeType
:> QueryParam "offset" Int :> QueryParam "offset" Int
:> QueryParam "limit" Int :> QueryParam "limit" Int
:> Get '[JSON] [Node a] -- :> Get '[JSON] [Node a]
:> Get '[JSON] (NodeTableResult a)
------------------------------------------------------------------------ ------------------------------------------------------------------------
type NodeNodeAPI a = Get '[JSON] (Node a) type NodeNodeAPI a = Get '[JSON] (Node a)
......
...@@ -44,7 +44,7 @@ import Data.Swagger ...@@ -44,7 +44,7 @@ import Data.Swagger
import Data.Text (Text()) import Data.Text (Text())
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.API.Ngrams (TabType(..)) import Gargantext.API.Ngrams (TabType(..))
import Gargantext.Core.Types (Offset, Limit) import Gargantext.Core.Types (Offset, Limit, TableResult(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..)) import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..))
import Gargantext.Database.Learn (FavOrTrash(..), moreLike) import Gargantext.Database.Learn (FavOrTrash(..), moreLike)
...@@ -60,7 +60,7 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) ...@@ -60,7 +60,7 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
type TableApi = Summary " Table API" type TableApi = Summary " Table API"
:> ReqBody '[JSON] TableQuery :> ReqBody '[JSON] TableQuery
:> Post '[JSON] TableResult :> Post '[JSON] FacetTableResult
data TableQuery = TableQuery data TableQuery = TableQuery
{ tq_offset :: Int { tq_offset :: Int
...@@ -70,17 +70,7 @@ data TableQuery = TableQuery ...@@ -70,17 +70,7 @@ data TableQuery = TableQuery
, tq_query :: Text , tq_query :: Text
} deriving (Generic) } deriving (Generic)
data TableResult = TableResult { tr_count :: Int type FacetTableResult = TableResult FacetDoc
, tr_docs :: [FacetDoc]
} deriving (Generic)
$(deriveJSON (unPrefix "tr_") ''TableResult)
instance ToSchema TableResult where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "tr_")
instance Arbitrary TableResult where
arbitrary = TableResult <$> arbitrary <*> arbitrary
$(deriveJSON (unPrefix "tq_") ''TableQuery) $(deriveJSON (unPrefix "tq_") ''TableQuery)
...@@ -91,7 +81,7 @@ instance Arbitrary TableQuery where ...@@ -91,7 +81,7 @@ instance Arbitrary TableQuery where
arbitrary = elements [TableQuery 0 10 DateAsc Docs "electrodes"] arbitrary = elements [TableQuery 0 10 DateAsc Docs "electrodes"]
tableApi :: NodeId -> TableQuery -> Cmd err TableResult tableApi :: NodeId -> TableQuery -> Cmd err FacetTableResult
tableApi cId (TableQuery o l order ft "") = getTable cId (Just ft) (Just o) (Just l) (Just order) tableApi cId (TableQuery o l order ft "") = getTable cId (Just ft) (Just o) (Just l) (Just order)
tableApi cId (TableQuery o l order ft q) = case ft of tableApi cId (TableQuery o l order ft q) = case ft of
Docs -> searchInCorpus' cId False [q] (Just o) (Just l) (Just order) Docs -> searchInCorpus' cId False [q] (Just o) (Just l) (Just order)
...@@ -104,20 +94,21 @@ searchInCorpus' :: CorpusId ...@@ -104,20 +94,21 @@ searchInCorpus' :: CorpusId
-> Maybe Offset -> Maybe Offset
-> Maybe Limit -> Maybe Limit
-> Maybe OrderBy -> Maybe OrderBy
-> Cmd err TableResult -> Cmd err FacetTableResult
searchInCorpus' cId t q o l order = do searchInCorpus' cId t q o l order = do
docs <- searchInCorpus cId t q o l order docs <- searchInCorpus cId t q o l order
allDocs <- searchInCorpus cId t q Nothing Nothing Nothing countAllDocs <- searchCountInCorpus cId t q
pure (TableResult (length allDocs) docs) pure $ TableResult { tr_docs = docs, tr_count = countAllDocs }
getTable :: NodeId -> Maybe TabType getTable :: NodeId -> Maybe TabType
-> Maybe Offset -> Maybe Limit -> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err TableResult -> Maybe OrderBy -> Cmd err FacetTableResult
getTable cId ft o l order = do getTable cId ft o l order = do
docs <- getTable' cId ft o l order docs <- getTable' cId ft o l order
-- TODO: Rewrite to use runCountOpaQuery and avoid (length allDocs)
allDocs <- getTable' cId ft Nothing Nothing Nothing allDocs <- getTable' cId ft Nothing Nothing Nothing
pure (TableResult (length allDocs) docs) pure $ TableResult { tr_docs = docs, tr_count = length allDocs }
getTable' :: NodeId -> Maybe TabType getTable' :: NodeId -> Maybe TabType
-> Maybe Offset -> Maybe Limit -> Maybe Offset -> Maybe Limit
......
...@@ -14,6 +14,7 @@ commentary with @some markup@. ...@@ -14,6 +14,7 @@ commentary with @some markup@.
------------------------------------------------------------------------ ------------------------------------------------------------------------
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Types ( module Gargantext.Core.Types.Main module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, module Gargantext.Database.Types.Node , module Gargantext.Database.Types.Node
...@@ -22,21 +23,28 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main ...@@ -22,21 +23,28 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, Label, Stems , Label, Stems
, HasInvalidError(..), assertValid , HasInvalidError(..), assertValid
, Name , Name
, TableResult(..)
, NodeTableResult
) where ) where
import Control.Lens (Prism', (#)) import Control.Lens (Prism', (#))
import Control.Monad.Error.Class (MonadError, throwError) import Control.Monad.Error.Class (MonadError, throwError)
import Data.Aeson import Data.Aeson
import Data.Semigroup import Data.Aeson.TH (deriveJSON)
import Data.Monoid import Data.Monoid
import Data.Semigroup
import Data.Set (Set, empty) import Data.Set (Set, empty)
import Data.Swagger (ToSchema(..), genericDeclareNamedSchema)
--import qualified Data.Set as S --import qualified Data.Set as S
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
import Data.Validity import Data.Validity
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Types.Node import Gargantext.Database.Types.Node
import Gargantext.Prelude import Gargantext.Prelude
...@@ -135,3 +143,18 @@ assertValid :: (MonadError e m, HasInvalidError e) => Validation -> m () ...@@ -135,3 +143,18 @@ assertValid :: (MonadError e m, HasInvalidError e) => Validation -> m ()
assertValid v = when (not $ validationIsValid v) $ throwError $ _InvalidError # v assertValid v = when (not $ validationIsValid v) $ throwError $ _InvalidError # v
-- assertValid :: MonadIO m => Validation -> m () -- assertValid :: MonadIO m => Validation -> m ()
-- assertValid v = when (not $ validationIsValid v) $ fail $ show v -- assertValid v = when (not $ validationIsValid v) $ fail $ show v
data TableResult a = TableResult { tr_count :: Int
, tr_docs :: [a]
} deriving (Generic)
$(deriveJSON (unPrefix "tr_") ''TableResult)
instance ToSchema a => ToSchema (TableResult a) where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "tr_")
instance Arbitrary a => Arbitrary (TableResult a) where
arbitrary = TableResult <$> arbitrary <*> arbitrary
type NodeTableResult a = TableResult (Node a)
...@@ -94,4 +94,9 @@ nodeTypes = [ (n, nodeTypeId n) | n <- allNodeTypes ] ...@@ -94,4 +94,9 @@ nodeTypes = [ (n, nodeTypeId n) | n <- allNodeTypes ]
fromNodeTypeId :: NodeTypeId -> NodeType fromNodeTypeId :: NodeTypeId -> NodeType
fromNodeTypeId tId = fromMaybe (panic $ pack $ "Type Id " <> show tId <> " does not exist") fromNodeTypeId tId = fromMaybe (panic $ pack $ "Type Id " <> show tId <> " does not exist")
(lookup tId nodeTypeInv) (lookup tId nodeTypeInv)
...@@ -67,6 +67,7 @@ import Gargantext.Database.Schema.User (getUser, UserLight(..)) ...@@ -67,6 +67,7 @@ import Gargantext.Database.Schema.User (getUser, UserLight(..))
import Gargantext.Database.TextSearch (searchInDatabase) import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId) import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Utils (Cmd, CmdM) import Gargantext.Database.Utils (Cmd, CmdM)
import Gargantext.Database.Triggers
import Gargantext.Ext.IMT (toSchoolName) import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile) import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -223,14 +224,16 @@ flowCorpusUser l userName corpusName ctype ids = do ...@@ -223,14 +224,16 @@ flowCorpusUser l userName corpusName ctype ids = do
-- TODO: check if present already, ignore -- TODO: check if present already, ignore
_ <- Doc.add userCorpusId ids _ <- Doc.add userCorpusId ids
tId <- mkNode NodeTexts userCorpusId userId tId <- mkNode NodeTexts userCorpusId userId
printDebug "Node Text Id" tId printDebug "Node Text Id" tId
-- User List Flow -- User List Flow
--{- --{-
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster (Left "") ctype (masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster (Left "") ctype
ngs <- buildNgramsLists l 2 3 (StopSize 3) userCorpusId masterCorpusId ngs <- buildNgramsLists l 2 3 (StopSize 3) userCorpusId masterCorpusId
userListId <- flowList userId userCorpusId ngs userListId <- flowList userId userCorpusId ngs
mastListId <- getOrMkList masterCorpusId masterUserId
_ <- insertOccsUpdates userCorpusId mastListId
printDebug "userListId" userListId printDebug "userListId" userListId
-- User Graph Flow -- User Graph Flow
_ <- mkDashboard userCorpusId userId _ <- mkDashboard userCorpusId userId
......
...@@ -32,16 +32,25 @@ import qualified Data.Map as DM ...@@ -32,16 +32,25 @@ import qualified Data.Map as DM
import Data.Text (Text, toLower) import Data.Text (Text, toLower)
import qualified Data.Text as DT import qualified Data.Text as DT
import Gargantext.Prelude hiding (sum) import Gargantext.Prelude hiding (sum)
import Gargantext.Core.Types (TableResult(..))
import Gargantext.Database.Schema.Ngrams -- (NgramsType(..)) import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
import Gargantext.Database.Node.Contact -- (HyperdataContact(..)) import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
import Gargantext.Database.Flow.Utils import Gargantext.Database.Flow.Utils
import Gargantext.Database.Utils (Cmd, runPGSQuery) import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId) import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
import Gargantext.Database.Node.Children (getAllContacts) import Gargantext.Database.Node.Children (getAllContacts)
-- TODO mv this type in Types Main -- TODO mv this type in Types Main
type Terms = Text type Terms = Text
{-
pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
pairing'' = undefined
pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
pairing' = undefined
-}
-- | TODO : add paring policy as parameter -- | TODO : add paring policy as parameter
pairing :: AnnuaireId pairing :: AnnuaireId
-> CorpusId -> CorpusId
...@@ -50,7 +59,7 @@ pairing :: AnnuaireId ...@@ -50,7 +59,7 @@ pairing :: AnnuaireId
pairing aId cId lId = do pairing aId cId lId = do
contacts' <- getAllContacts aId contacts' <- getAllContacts aId
let contactsMap = pairingPolicyToMap toLower let contactsMap = pairingPolicyToMap toLower
$ toMaps extractNgramsT contacts' $ toMaps extractNgramsT (tr_docs contacts')
ngramsMap' <- getNgramsTindexed cId Authors ngramsMap' <- getNgramsTindexed cId Authors
let ngramsMap = pairingPolicyToMap lastName ngramsMap' let ngramsMap = pairingPolicyToMap lastName ngramsMap'
......
...@@ -25,12 +25,17 @@ import Gargantext.Database.Utils (Cmd) ...@@ -25,12 +25,17 @@ import Gargantext.Database.Utils (Cmd)
import Gargantext.Database.Schema.NodeNodeNgrams import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Types.Node import Gargantext.Database.Types.Node
toMaps :: Hyperdata a => (a -> Map (NgramsT Ngrams) Int) -> [Node a] -> Map (NgramsT Ngrams) (Map NodeId Int) toMaps :: Hyperdata a
=> (a -> Map (NgramsT Ngrams) Int)
-> [Node a]
-> Map (NgramsT Ngrams) (Map NodeId Int)
toMaps fun ns = mapNodeIdNgrams $ documentIdWithNgrams fun ns' toMaps fun ns = mapNodeIdNgrams $ documentIdWithNgrams fun ns'
where where
ns' = map (\(Node nId _ _ _ _ _ json) -> DocumentWithId nId json) ns ns' = map (\(Node nId _ _ _ _ _ json) -> DocumentWithId nId json) ns
mapNodeIdNgrams :: Hyperdata a => [DocumentIdWithNgrams a] -> Map (NgramsT Ngrams) (Map NodeId Int) mapNodeIdNgrams :: Hyperdata a
=> [DocumentIdWithNgrams a]
-> Map (NgramsT Ngrams) (Map NodeId Int)
mapNodeIdNgrams ds = DM.map (DM.fromListWith (+)) $ DM.fromListWith (<>) xs mapNodeIdNgrams ds = DM.map (DM.fromListWith (+)) $ DM.fromListWith (<>) xs
where where
xs = [(ng, [(nId, i)]) | (nId, n2i') <- n2i ds, (ng, i) <- DM.toList n2i'] xs = [(ng, [(nId, i)]) | (nId, n2i') <- n2i ds, (ng, i) <- DM.toList n2i']
...@@ -72,14 +77,17 @@ data DocNgrams = DocNgrams { dn_doc_id :: DocId ...@@ -72,14 +77,17 @@ data DocNgrams = DocNgrams { dn_doc_id :: DocId
insertDocNgramsOn :: CorpusId insertDocNgramsOn :: CorpusId
-> [DocNgrams] -> [DocNgrams]
-> Cmd err Int -> Cmd err Int
insertDocNgramsOn cId dn = insertNodeNodeNgrams $ (map (docNgrams2nodeNodeNgrams cId) dn) insertDocNgramsOn cId dn =
insertNodeNodeNgrams
$ (map (docNgrams2nodeNodeNgrams cId) dn)
insertDocNgrams :: CorpusId insertDocNgrams :: CorpusId
-> Map NgramsIndexed (Map NgramsType (Map NodeId Int)) -> Map NgramsIndexed (Map NgramsType (Map NodeId Int))
-> Cmd err Int -> Cmd err Int
insertDocNgrams cId m = insertDocNgramsOn cId [ DocNgrams n (_ngramsId ng) (ngramsTypeId t) (fromIntegral i) insertDocNgrams cId m =
| (ng, t2n2i) <- DM.toList m insertDocNgramsOn cId [ DocNgrams n (_ngramsId ng) (ngramsTypeId t) (fromIntegral i)
, (t, n2i) <- DM.toList t2n2i | (ng, t2n2i) <- DM.toList m
, (n, i) <- DM.toList n2i , (t, n2i) <- DM.toList t2n2i
] , (n, i) <- DM.toList n2i
]
...@@ -19,6 +19,7 @@ Ngrams by node enable contextual metrics. ...@@ -19,6 +19,7 @@ Ngrams by node enable contextual metrics.
module Gargantext.Database.Metrics.NgramsByNode module Gargantext.Database.Metrics.NgramsByNode
where where
import Debug.Trace (trace)
import Data.Map.Strict (Map, fromListWith, elems, toList, fromList) import Data.Map.Strict (Map, fromListWith, elems, toList, fromList)
import Data.Map.Strict.Patch (PatchMap, Replace, diff) import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.Set (Set) import Data.Set (Set)
...@@ -68,21 +69,19 @@ getTficf' u m nt f = do ...@@ -68,21 +69,19 @@ getTficf' u m nt f = do
pure $ toTficfData (countNodesByNgramsWith f u') pure $ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m') (countNodesByNgramsWith f m')
--{-
getTficfWith :: UserCorpusId -> MasterCorpusId -> [ListId] getTficfWith :: UserCorpusId -> MasterCorpusId -> [ListId]
-> NgramsType -> Map Text (Maybe Text) -> NgramsType -> Map Text (Maybe Text)
-> Cmd err (Map Text (Double, Set Text)) -> Cmd err (Map Text (Double, Set Text))
getTficfWith u m ls nt mtxt = do getTficfWith u m ls nt mtxt = do
u' <- getNodesByNgramsOnlyUser u ls nt (Map.keys mtxt) u' <- getNodesByNgramsOnlyUser u ls nt (Map.keys mtxt)
m' <- getNodesByNgramsMaster u m m' <- getNodesByNgramsMaster u m
let f x = case Map.lookup x mtxt of let f x = case Map.lookup x mtxt of
Nothing -> x Nothing -> x
Just x' -> maybe x identity x' Just x' -> maybe x identity x'
pure $ toTficfData (countNodesByNgramsWith f u') pure $ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m') (countNodesByNgramsWith f m')
--}
type Context = (Double, Map Text (Double, Set Text)) type Context = (Double, Map Text (Double, Set Text))
...@@ -121,7 +120,8 @@ groupNodesByNgramsWith f m = ...@@ -121,7 +120,8 @@ groupNodesByNgramsWith f m =
$ toList m $ toList m
------------------------------------------------------------------------ ------------------------------------------------------------------------
getNodesByNgramsUser :: CorpusId -> NgramsType getNodesByNgramsUser :: CorpusId
-> NgramsType
-> Cmd err (Map Text (Set NodeId)) -> Cmd err (Map Text (Set NodeId))
getNodesByNgramsUser cId nt = getNodesByNgramsUser cId nt =
fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n)) fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n))
...@@ -141,7 +141,6 @@ getNodesByNgramsUser cId nt = ...@@ -141,7 +141,6 @@ getNodesByNgramsUser cId nt =
queryNgramsByNodeUser :: DPS.Query queryNgramsByNodeUser :: DPS.Query
queryNgramsByNodeUser = [sql| queryNgramsByNodeUser = [sql|
SELECT nng.node2_id, ng.terms FROM node_node_ngrams nng SELECT nng.node2_id, ng.terms FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
...@@ -157,13 +156,59 @@ getNodesByNgramsUser cId nt = ...@@ -157,13 +156,59 @@ getNodesByNgramsUser cId nt =
|] |]
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO add groups -- TODO add groups
getOccByNgramsOnlyFast :: CorpusId -> NgramsType -> [Text] getOccByNgramsOnlyFast :: CorpusId
-> NgramsType
-> [Text]
-> Cmd err (Map Text Int) -> Cmd err (Map Text Int)
getOccByNgramsOnlyFast cId nt ngs = getOccByNgramsOnlyFast cId nt ngs =
fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
getOccByNgramsOnlyFast' :: CorpusId
-> ListId
-> NgramsType
-> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
fromListWith (+) <$> map (second round) <$> run cId lId nt tms
where
fields = [QualifiedIdentifier Nothing "text"]
run :: CorpusId
-> ListId
-> NgramsType
-> [Text]
-> Cmd err [(Text, Double)]
run cId' lId' _nt' tms' = runPGSQuery query
( Values fields (DPS.Only <$> tms')
, cId'
, lId'
-- , ngramsTypeId nt'
)
query :: DPS.Query
query = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, nng.weight FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
WHERE nng.node1_id = ? -- CorpusId
AND nng.node2_id = ?
-- AND nng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0
GROUP BY ng.terms, nng.weight
|]
-- just slower than getOccByNgramsOnlyFast -- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow :: NodeType -> CorpusId -> [ListId] -> NgramsType -> [Text] getOccByNgramsOnlySlow :: NodeType
-> CorpusId
-> [ListId]
-> NgramsType
-> [Text]
-> Cmd err (Map Text Int) -> Cmd err (Map Text Int)
getOccByNgramsOnlySlow t cId ls nt ngs = getOccByNgramsOnlySlow t cId ls nt ngs =
Map.map Set.size <$> getScore' t cId ls nt ngs Map.map Set.size <$> getScore' t cId ls nt ngs
...@@ -172,7 +217,10 @@ getOccByNgramsOnlySlow t cId ls nt ngs = ...@@ -172,7 +217,10 @@ getOccByNgramsOnlySlow t cId ls nt ngs =
getScore' NodeDocument = getNgramsByDocOnlyUser getScore' NodeDocument = getNgramsByDocOnlyUser
getScore' _ = getNodesByNgramsOnlyUser getScore' _ = getNodesByNgramsOnlyUser
getOccByNgramsOnlySafe :: CorpusId -> [ListId] -> NgramsType -> [Text] getOccByNgramsOnlySafe :: CorpusId
-> [ListId]
-> NgramsType
-> [Text]
-> Cmd err (Map Text Int) -> Cmd err (Map Text Int)
getOccByNgramsOnlySafe cId ls nt ngs = do getOccByNgramsOnlySafe cId ls nt ngs = do
printDebug "getOccByNgramsOnlySafe" (cId, nt, length ngs) printDebug "getOccByNgramsOnlySafe" (cId, nt, length ngs)
...@@ -200,7 +248,23 @@ selectNgramsOccurrencesOnlyByNodeUser cId nt tms = ...@@ -200,7 +248,23 @@ selectNgramsOccurrencesOnlyByNodeUser cId nt tms =
-- equivalent ngrams intersections are not empty) -- equivalent ngrams intersections are not empty)
queryNgramsOccurrencesOnlyByNodeUser :: DPS.Query queryNgramsOccurrencesOnlyByNodeUser :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser = [sql| queryNgramsOccurrencesOnlyByNodeUser = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node2_id) FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
|]
queryNgramsOccurrencesOnlyByNodeUser' :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser' = [sql|
WITH input_rows(terms) AS (?) WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node2_id) FROM node_node_ngrams nng SELECT ng.terms, COUNT(nng.node2_id) FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id JOIN ngrams ng ON nng.ngrams_id = ng.id
...@@ -214,11 +278,16 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql| ...@@ -214,11 +278,16 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
GROUP BY nng.node2_id, ng.terms GROUP BY nng.node2_id, ng.terms
|] |]
getNodesByNgramsOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text] getNodesByNgramsOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId)) -> Cmd err (Map Text (Set NodeId))
getNodesByNgramsOnlyUser cId ls nt ngs = Map.unionsWith (<>) getNodesByNgramsOnlyUser cId ls nt ngs =
. map (fromListWith (<>) . map (second Set.singleton)) Map.unionsWith (<>)
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt) (splitEvery 1000 ngs) . map (fromListWith (<>) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt)
(splitEvery 1000 ngs)
selectNgramsOnlyByNodeUser :: CorpusId -> [ListId] -> NgramsType -> [Text] selectNgramsOnlyByNodeUser :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, NodeId)] -> Cmd err [(Text, NodeId)]
...@@ -235,7 +304,6 @@ selectNgramsOnlyByNodeUser cId ls nt tms = ...@@ -235,7 +304,6 @@ selectNgramsOnlyByNodeUser cId ls nt tms =
queryNgramsOnlyByNodeUser :: DPS.Query queryNgramsOnlyByNodeUser :: DPS.Query
queryNgramsOnlyByNodeUser = [sql| queryNgramsOnlyByNodeUser = [sql|
WITH input_rows(terms) AS (?), WITH input_rows(terms) AS (?),
input_list(id) AS (?) input_list(id) AS (?)
SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng
...@@ -253,12 +321,43 @@ queryNgramsOnlyByNodeUser = [sql| ...@@ -253,12 +321,43 @@ queryNgramsOnlyByNodeUser = [sql|
selectNgramsOnlyByNodeUser' :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, Int)]
selectNgramsOnlyByNodeUser' cId ls nt tms =
runPGSQuery queryNgramsOnlyByNodeUser
( Values fields (DPS.Only <$> tms)
, Values [QualifiedIdentifier Nothing "int4"] (DPS.Only <$> (map (\(NodeId n) -> n) ls))
, cId
, nodeTypeId NodeDocument
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByNodeUser' :: DPS.Query
queryNgramsOnlyByNodeUser' = [sql|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.weight FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = nng.node2_id
WHERE nng.node1_id = ? -- CorpusId
AND nng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0
GROUP BY ng.terms, nng.weight
|]
getNgramsByDocOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text] getNgramsByDocOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId)) -> Cmd err (Map Text (Set NodeId))
getNgramsByDocOnlyUser cId ls nt ngs = Map.unionsWith (<>) getNgramsByDocOnlyUser cId ls nt ngs =
. map (fromListWith (<>) . map (second Set.singleton)) Map.unionsWith (<>)
<$> mapM (selectNgramsOnlyByDocUser cId ls nt) (splitEvery 1000 ngs) . map (fromListWith (<>) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByDocUser cId ls nt) (splitEvery 1000 ngs)
selectNgramsOnlyByDocUser :: DocId -> [ListId] -> NgramsType -> [Text] selectNgramsOnlyByDocUser :: DocId -> [ListId] -> NgramsType -> [Text]
...@@ -275,7 +374,6 @@ selectNgramsOnlyByDocUser dId ls nt tms = ...@@ -275,7 +374,6 @@ selectNgramsOnlyByDocUser dId ls nt tms =
queryNgramsOnlyByDocUser :: DPS.Query queryNgramsOnlyByDocUser :: DPS.Query
queryNgramsOnlyByDocUser = [sql| queryNgramsOnlyByDocUser = [sql|
WITH input_rows(terms) AS (?), WITH input_rows(terms) AS (?),
input_list(id) AS (?) input_list(id) AS (?)
SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng
...@@ -287,7 +385,6 @@ queryNgramsOnlyByDocUser = [sql| ...@@ -287,7 +385,6 @@ queryNgramsOnlyByDocUser = [sql|
GROUP BY ng.terms, nng.node2_id GROUP BY ng.terms, nng.node2_id
|] |]
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO filter by language, database, any social field -- | TODO filter by language, database, any social field
getNodesByNgramsMaster :: UserCorpusId -> MasterCorpusId -> Cmd err (Map Text (Set NodeId)) getNodesByNgramsMaster :: UserCorpusId -> MasterCorpusId -> Cmd err (Map Text (Set NodeId))
...@@ -316,37 +413,36 @@ selectNgramsByNodeMaster n ucId mcId p = runPGSQuery ...@@ -316,37 +413,36 @@ selectNgramsByNodeMaster n ucId mcId p = runPGSQuery
-- | TODO fix node_node_ngrams relation -- | TODO fix node_node_ngrams relation
queryNgramsByNodeMaster' :: DPS.Query queryNgramsByNodeMaster' :: DPS.Query
queryNgramsByNodeMaster' = [sql| queryNgramsByNodeMaster' = [sql|
WITH nodesByNgramsUser AS (
SELECT n.id, ng.terms FROM nodes n
JOIN nodes_nodes nn ON n.id = nn.node2_id
JOIN node_node_ngrams nng ON nng.node2_id = n.id
JOIN ngrams ng ON nng.ngrams_id = ng.id
WHERE nn.node1_id = ? -- UserCorpusId
-- AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
AND node_pos(n.id,?) >= ?
AND node_pos(n.id,?) < ?
GROUP BY n.id, ng.terms
WITH nodesByNgramsUser AS ( ),
SELECT n.id, ng.terms FROM nodes n
JOIN nodes_nodes nn ON n.id = nn.node2_id
JOIN node_node_ngrams nng ON nng.node2_id = n.id
JOIN ngrams ng ON nng.ngrams_id = ng.id
WHERE nn.node1_id = ? -- UserCorpusId
-- AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
AND node_pos(n.id,?) >= ?
AND node_pos(n.id,?) < ?
GROUP BY n.id, ng.terms
),
nodesByNgramsMaster AS ( nodesByNgramsMaster AS (
SELECT n.id, ng.terms FROM nodes n TABLESAMPLE SYSTEM_ROWS(?) SELECT n.id, ng.terms FROM nodes n TABLESAMPLE SYSTEM_ROWS(?)
JOIN node_node_ngrams nng ON n.id = nng.node2_id JOIN node_node_ngrams nng ON n.id = nng.node2_id
JOIN ngrams ng ON ng.id = nng.ngrams_id JOIN ngrams ng ON ng.id = nng.ngrams_id
WHERE n.parent_id = ? -- Master Corpus NodeTypeId WHERE n.parent_id = ? -- Master Corpus NodeTypeId
AND n.typename = ? -- NodeTypeId AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ? -- NgramsTypeId
GROUP BY n.id, ng.terms GROUP BY n.id, ng.terms
) )
SELECT m.id, m.terms FROM nodesByNgramsMaster m SELECT m.id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|] |]
...@@ -28,10 +28,10 @@ import Gargantext.Prelude ...@@ -28,10 +28,10 @@ import Gargantext.Prelude
import Opaleye import Opaleye
import Control.Arrow (returnA) import Control.Arrow (returnA)
selectNgramsByDoc :: [CorpusId] -> DocId -> NgramsType -> Cmd err [Text] selectNgramsByDoc :: [ListId] -> DocId -> NgramsType -> Cmd err [Text]
selectNgramsByDoc cIds dId nt = runOpaQuery (query cIds dId nt) selectNgramsByDoc lIds dId nt = runOpaQuery (query lIds dId nt)
where where
join :: Query (NgramsRead, NodeNodeNgramsReadNull) join :: Query (NgramsRead, NodeNodeNgramsReadNull)
join = leftJoin queryNgramsTable queryNodeNodeNgramsTable on1 join = leftJoin queryNgramsTable queryNodeNodeNgramsTable on1
where where
...@@ -42,7 +42,7 @@ selectNgramsByDoc cIds dId nt = runOpaQuery (query cIds dId nt) ...@@ -42,7 +42,7 @@ selectNgramsByDoc cIds dId nt = runOpaQuery (query cIds dId nt)
restrict -< foldl (\b cId -> ((toNullable $ pgNodeId cId) .== nnng^.nnng_node1_id) .|| b) (pgBool True) cIds' restrict -< foldl (\b cId -> ((toNullable $ pgNodeId cId) .== nnng^.nnng_node1_id) .|| b) (pgBool True) cIds'
restrict -< (toNullable $ pgNodeId dId') .== nnng^.nnng_node2_id restrict -< (toNullable $ pgNodeId dId') .== nnng^.nnng_node2_id
restrict -< (toNullable $ pgNgramsType nt') .== nnng^.nnng_ngramsType restrict -< (toNullable $ pgNgramsType nt') .== nnng^.nnng_ngramsType
returnA -< ng^.ngrams_terms returnA -< ng^.ngrams_terms
postNgrams :: CorpusId -> DocId -> [Text] -> Cmd err Int postNgrams :: CorpusId -> DocId -> [Text] -> Cmd err Int
......
...@@ -30,12 +30,11 @@ import Gargantext.Database.Node.Contact (HyperdataContact) ...@@ -30,12 +30,11 @@ import Gargantext.Database.Node.Contact (HyperdataContact)
import Gargantext.Database.Schema.Node (pgNodeId) import Gargantext.Database.Schema.Node (pgNodeId)
import Control.Arrow (returnA) import Control.Arrow (returnA)
getAllDocuments :: ParentId -> Cmd err (TableResult (Node HyperdataDocument))
getAllDocuments :: ParentId -> Cmd err [Node HyperdataDocument]
getAllDocuments pId = getAllChildren pId (Proxy :: Proxy HyperdataDocument) getAllDocuments pId = getAllChildren pId (Proxy :: Proxy HyperdataDocument)
(Just NodeDocument) (Just NodeDocument)
getAllContacts :: ParentId -> Cmd err [Node HyperdataContact] getAllContacts :: ParentId -> Cmd err (TableResult (Node HyperdataContact))
getAllContacts pId = getAllChildren pId (Proxy :: Proxy HyperdataContact) getAllContacts pId = getAllChildren pId (Proxy :: Proxy HyperdataContact)
(Just NodeContact) (Just NodeContact)
...@@ -43,7 +42,7 @@ getAllChildren :: JSONB a ...@@ -43,7 +42,7 @@ getAllChildren :: JSONB a
=> ParentId => ParentId
-> proxy a -> proxy a
-> Maybe NodeType -> Maybe NodeType
-> Cmd err [Node a] -> Cmd err (NodeTableResult a)
getAllChildren pId p maybeNodeType = getChildren pId p maybeNodeType Nothing Nothing getAllChildren pId p maybeNodeType = getChildren pId p maybeNodeType Nothing Nothing
getChildren :: JSONB a getChildren :: JSONB a
...@@ -52,11 +51,19 @@ getChildren :: JSONB a ...@@ -52,11 +51,19 @@ getChildren :: JSONB a
-> Maybe NodeType -> Maybe NodeType
-> Maybe Offset -> Maybe Offset
-> Maybe Limit -> Maybe Limit
-> Cmd err [Node a] -> Cmd err (NodeTableResult a)
getChildren pId _ maybeNodeType maybeOffset maybeLimit = runOpaQuery getChildren pId _ maybeNodeType maybeOffset maybeLimit = do
$ limit' maybeLimit $ offset' maybeOffset docs <- runOpaQuery
$ orderBy (asc _node_id) $ limit' maybeLimit $ offset' maybeOffset
$ selectChildren pId maybeNodeType $ orderBy (asc _node_id)
$ query
docCount <- runCountOpaQuery query
pure $ TableResult { tr_docs = docs, tr_count = docCount }
where
query = selectChildren pId maybeNodeType
selectChildren :: ParentId selectChildren :: ParentId
-> Maybe NodeType -> Maybe NodeType
......
...@@ -64,7 +64,9 @@ leftJoin3 ...@@ -64,7 +64,9 @@ leftJoin3
-> ((fieldsL2, fieldsR) -> Column PGBool) -> ((fieldsL2, fieldsR) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool) -> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Opaleye.Select (fieldsL1, nullableFieldsR2) -> Opaleye.Select (fieldsL1, nullableFieldsR2)
leftJoin3 q1 q2 q3 cond12 cond23 = leftJoin q3 (leftJoin q2 q1 cond12) cond23 leftJoin3 q1 q2 q3
cond12 cond23 =
leftJoin q3 ( leftJoin q2 q1 cond12) cond23
leftJoin4 leftJoin4
...@@ -85,7 +87,13 @@ leftJoin4 ...@@ -85,7 +87,13 @@ leftJoin4
-> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool) -> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool) -> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Opaleye.Select (fieldsL1, nullableFieldsR3) -> Opaleye.Select (fieldsL1, nullableFieldsR3)
leftJoin4 q1 q2 q3 q4 cond12 cond23 cond34 = leftJoin q4 (leftJoin q3 (leftJoin q2 q1 cond12) cond23) cond34 leftJoin4 q1 q2 q3 q4
cond12 cond23 cond34 =
leftJoin q4 ( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
leftJoin5 :: ( Default Unpackspec fieldsL1 fieldsL1, leftJoin5 :: ( Default Unpackspec fieldsL1 fieldsL1,
...@@ -110,7 +118,15 @@ leftJoin5 :: ( Default Unpackspec fieldsL1 fieldsL1, ...@@ -110,7 +118,15 @@ leftJoin5 :: ( Default Unpackspec fieldsL1 fieldsL1,
-> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool) -> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool) -> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Query (fieldsL1, nullableFieldsR4) -> Query (fieldsL1, nullableFieldsR4)
leftJoin5 q1 q2 q3 q4 q5 cond12 cond23 cond34 cond45 = leftJoin q5 (leftJoin q4 (leftJoin q3 (leftJoin q2 q1 cond12) cond23) cond34) cond45 leftJoin5 q1 q2 q3 q4 q5
cond12 cond23 cond34 cond45 =
leftJoin q5 ( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
leftJoin6 :: ( Default Unpackspec fieldsL1 fieldsL1, leftJoin6 :: ( Default Unpackspec fieldsL1 fieldsL1,
...@@ -139,7 +155,17 @@ leftJoin6 :: ( Default Unpackspec fieldsL1 fieldsL1, ...@@ -139,7 +155,17 @@ leftJoin6 :: ( Default Unpackspec fieldsL1 fieldsL1,
-> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool) -> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool) -> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Query (fieldsL1, nullableFieldsR5) -> Query (fieldsL1, nullableFieldsR5)
leftJoin6 q1 q2 q3 q4 q5 q6 cond12 cond23 cond34 cond45 cond56 = leftJoin q6 (leftJoin q5 (leftJoin q4 (leftJoin q3 (leftJoin q2 q1 cond12) cond23) cond34) cond45) cond56 leftJoin6 q1 q2 q3 q4 q5 q6
cond12 cond23 cond34 cond45 cond56 =
leftJoin q6 ( leftJoin q5
( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
) cond56
leftJoin7 leftJoin7
...@@ -175,7 +201,19 @@ leftJoin7 ...@@ -175,7 +201,19 @@ leftJoin7
-> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool) -> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool) -> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Opaleye.Select (fieldsL1, nullableFieldsR6) -> Opaleye.Select (fieldsL1, nullableFieldsR6)
leftJoin7 q1 q2 q3 q4 q5 q6 q7 cond12 cond23 cond34 cond45 cond56 cond67 = leftJoin q7 (leftJoin q6 (leftJoin q5 (leftJoin q4 (leftJoin q3 (leftJoin q2 q1 cond12) cond23) cond34) cond45) cond56) cond67 leftJoin7 q1 q2 q3 q4 q5 q6 q7
cond12 cond23 cond34 cond45 cond56 cond67 =
leftJoin q7 ( leftJoin q6
( leftJoin q5
( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
) cond56
) cond67
leftJoin8 leftJoin8
...@@ -216,7 +254,21 @@ leftJoin8 ...@@ -216,7 +254,21 @@ leftJoin8
-> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool) -> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool) -> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Opaleye.Select (fieldsL1, nullableFieldsR7) -> Opaleye.Select (fieldsL1, nullableFieldsR7)
leftJoin8 q1 q2 q3 q4 q5 q6 q7 q8 cond12 cond23 cond34 cond45 cond56 cond67 cond78 = leftJoin q8 (leftJoin q7 (leftJoin q6 (leftJoin q5 (leftJoin q4 (leftJoin q3 (leftJoin q2 q1 cond12) cond23) cond34) cond45) cond56) cond67) cond78 leftJoin8 q1 q2 q3 q4 q5 q6 q7 q8
cond12 cond23 cond34 cond45 cond56 cond67 cond78 =
leftJoin q8 ( leftJoin q7
( leftJoin q6
( leftJoin q5
( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
) cond56
) cond67
) cond78
leftJoin9 leftJoin9
...@@ -262,5 +314,21 @@ leftJoin9 ...@@ -262,5 +314,21 @@ leftJoin9
-> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool) -> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool) -> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Opaleye.Select (fieldsL1, nullableFieldsR8) -> Opaleye.Select (fieldsL1, nullableFieldsR8)
leftJoin9 q1 q2 q3 q4 q5 q6 q7 q8 q9 cond12 cond23 cond34 cond45 cond56 cond67 cond78 cond89 = leftJoin q9 (leftJoin q8 (leftJoin q7 (leftJoin q6 (leftJoin q5 (leftJoin q4 (leftJoin q3 (leftJoin q2 q1 cond12) cond23) cond34) cond45) cond56) cond67) cond78) cond89 leftJoin9 q1 q2 q3 q4 q5 q6 q7 q8 q9
cond12 cond23 cond34 cond45 cond56 cond67 cond78 cond89 =
leftJoin q9 ( leftJoin q8
( leftJoin q7
( leftJoin q6
( leftJoin q5
( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
) cond56
) cond67
) cond78
) cond89
...@@ -373,17 +373,17 @@ type JSONB = QueryRunnerColumnDefault PGJsonb ...@@ -373,17 +373,17 @@ type JSONB = QueryRunnerColumnDefault PGJsonb
getNode :: JSONB a => NodeId -> proxy a -> Cmd err (Node a) getNode :: JSONB a => NodeId -> proxy a -> Cmd err (Node a)
getNode nId _ = do getNode nId _ = do
fromMaybe (error $ "Node does node exist: " <> show nId) . headMay fromMaybe (error $ "Node does not exist: " <> show nId) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId)) <$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
getNodePhylo :: NodeId -> Cmd err (Node HyperdataPhylo) getNodePhylo :: NodeId -> Cmd err (Node HyperdataPhylo)
getNodePhylo nId = do getNodePhylo nId = do
fromMaybe (error $ "Node does node exist: " <> show nId) . headMay fromMaybe (error $ "Node Phylo does not exist: " <> show nId) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId)) <$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
getNode' :: NodeId -> Cmd err (Node Value) getNode' :: NodeId -> Cmd err (Node Value)
getNode' nId = fromMaybe (error $ "Node does node exist: " <> show nId) . headMay getNode' nId = fromMaybe (error $ "Node does not exist: " <> show nId) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId)) <$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
......
...@@ -35,7 +35,7 @@ import Gargantext.Database.Schema.Ngrams ...@@ -35,7 +35,7 @@ import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus) import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
import Gargantext.Database.Schema.NodeNodeNgrams import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Queries.Join (leftJoin6) import Gargantext.Database.Queries.Join (leftJoin6)
import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery) import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types import Gargantext.Core.Types
import Control.Arrow (returnA) import Control.Arrow (returnA)
...@@ -72,6 +72,15 @@ searchInCorpus cId t q o l order = runOpaQuery ...@@ -72,6 +72,15 @@ searchInCorpus cId t q o l order = runOpaQuery
$ intercalate " | " $ intercalate " | "
$ map stemIt q $ map stemIt q
searchCountInCorpus :: CorpusId
-> IsTrash
-> [Text]
-> Cmd err Int
searchCountInCorpus cId t q = runCountOpaQuery
$ queryInCorpus cId t
$ intercalate " | "
$ map stemIt q
queryInCorpus :: CorpusId queryInCorpus :: CorpusId
-> IsTrash -> IsTrash
-> Text -> Text
......
...@@ -108,7 +108,7 @@ dbTree rootId = map (\(nId, tId, pId, n) -> DbTreeNode nId tId pId n) ...@@ -108,7 +108,7 @@ dbTree rootId = map (\(nId, tId, pId, n) -> DbTreeNode nId tId pId n)
FROM nodes AS c FROM nodes AS c
INNER JOIN tree AS s ON c.parent_id = s.id INNER JOIN tree AS s ON c.parent_id = s.id
-- WHERE c.typename IN (2,20,21,22,3,5,30,31,40,7,9,90) WHERE c.typename IN (2,20,21,22,3,5,30,31,40,7,9,90)
) )
SELECT * from tree; SELECT * from tree;
|] (Only rootId) |] (Only rootId)
......
{-|
Module : Gargantext.Database.Triggers
Description : Triggers configuration
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Ngrams by node enable contextual metrics.
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.Database.Triggers
where
import Database.PostgreSQL.Simple.SqlQQ (sql)
-- import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as DPS
------------------------------------------------------------------------
type MasterListId = ListId
insertOccsUpdates :: UserCorpusId -> MasterListId -> Cmd err [DPS.Only Int]
insertOccsUpdates cId lId = runPGSQuery query (cId, lId, nodeTypeId NodeList, nodeTypeId NodeDocument)
where
query :: DPS.Query
query = [sql|
INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
SELECT nn.node1_id, lists.id, nnn.ngrams_id, 1, count(*) as c -- type of score
FROM node_node_ngrams nnn
INNER JOIN nodes_nodes nn ON nn.node2_id = nnn.node2_id
INNER JOIN nodes docs ON docs.id = nnn.node2_id
INNER JOIN nodes lists ON lists.parent_id = nn.node1_id
-- WHERE nn.node1_id = NEW.node1_id -- .node1_id -- corpus_id
WHERE nn.node1_id = ? -- .node1_id -- corpus_id
AND nnn.node1_id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ?
AND docs.typename = ?
GROUP BY nn.node1_id, lists.id, nnn.ngrams_id
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE SET weight = 3 -- c -- excluded.weight
RETURNING 1
-- TOCHECK
|]
triggerOccsUpdates :: CorpusId -> ListId -> Cmd err [DPS.Only Int]
triggerOccsUpdates cId lId = runPGSQuery query (cId, lId, nodeTypeId NodeList, nodeTypeId NodeDocument)
where
query :: DPS.Query
query = [sql|
drop trigger trigger_occs on nodes_nodes;
CREATE OR REPLACE FUNCTION occs_update() RETURNS trigger AS
$$
BEGIN
IF TG_OP = 'UPDATE' THEN
INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
-- TODO edge_type instead of ngrams_type
SELECT nn.node1_id, lists.id, nnn.ngrams_id, count(*), 1 -- type of score
FROM node_node_ngrams nnn
INNER JOIN nodes_nodes nn ON nn.node2_id = nnn.node2_id
INNER JOIN nodes docs ON docs.id = nnn.node2_id
INNER JOIN nodes lists ON lists.parent_id = nn.node1_id
-- WHERE nn.node1_id = NEW.node1_id -- .node1_id -- corpus_id
WHERE nn.node1_id = ? -- .node1_id -- corpus_id
AND nnn.node1_id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ?
AND docs.typename = ?
GROUP BY nn.node1_id, lists.id, nnn.ngrams_id
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE SET weight = excluded.weight;
END IF;
RETURN NULL;
END $$
LANGUAGE plpgsql;
CREATE TRIGGER trigger_occs
AFTER UPDATE ON nodes_nodes
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE occs_update();
update nodes_nodes SET node1_id = node1_id;
|]
...@@ -30,6 +30,7 @@ import Control.Monad.Except ...@@ -30,6 +30,7 @@ import Control.Monad.Except
import Data.Aeson (Result(Error,Success), fromJSON, FromJSON) import Data.Aeson (Result(Error,Success), fromJSON, FromJSON)
import Data.Either.Extra (Either(Left, Right)) import Data.Either.Extra (Either(Left, Right))
import Data.Ini (readIniFile, lookupValue) import Data.Ini (readIniFile, lookupValue)
import qualified Data.List as DL
import Data.Maybe (maybe) import Data.Maybe (maybe)
import Data.Monoid ((<>)) import Data.Monoid ((<>))
import Data.Profunctor.Product.Default (Default) import Data.Profunctor.Product.Default (Default)
...@@ -41,6 +42,7 @@ import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(Conversion ...@@ -41,6 +42,7 @@ import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(Conversion
import Database.PostgreSQL.Simple.Internal (Field) import Database.PostgreSQL.Simple.Internal (Field)
import Gargantext.Prelude import Gargantext.Prelude
import Opaleye (Query, Unpackspec, showSqlForPostgres, FromFields, Select, runQuery) import Opaleye (Query, Unpackspec, showSqlForPostgres, FromFields, Select, runQuery)
import Opaleye.Aggregate (countRows)
import System.IO (FilePath) import System.IO (FilePath)
import Text.Read (read) import Text.Read (read)
import qualified Data.ByteString as DB import qualified Data.ByteString as DB
...@@ -67,6 +69,9 @@ type Cmd' env err a = forall m. CmdM' env err m => m a ...@@ -67,6 +69,9 @@ type Cmd' env err a = forall m. CmdM' env err m => m a
type Cmd err a = forall m env. CmdM env err m => m a type Cmd err a = forall m env. CmdM env err m => m a
fromInt64ToInt :: Int64 -> Int
fromInt64ToInt = fromIntegral
-- TODO: ideally there should be very few calls to this functions. -- TODO: ideally there should be very few calls to this functions.
mkCmd :: (Connection -> IO a) -> Cmd err a mkCmd :: (Connection -> IO a) -> Cmd err a
mkCmd k = do mkCmd k = do
...@@ -82,6 +87,12 @@ runOpaQuery :: Default FromFields fields haskells ...@@ -82,6 +87,12 @@ runOpaQuery :: Default FromFields fields haskells
=> Select fields -> Cmd err [haskells] => Select fields -> Cmd err [haskells]
runOpaQuery q = mkCmd $ \c -> runQuery c q runOpaQuery q = mkCmd $ \c -> runQuery c q
runCountOpaQuery :: Select a -> Cmd err Int
runCountOpaQuery q = do
counts <- mkCmd $ \c -> runQuery c $ countRows q
-- countRows is guaranteed to return a list with exactly one row so DL.head is safe here
pure $ fromInt64ToInt $ DL.head counts
formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err DB.ByteString formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err DB.ByteString
formatPGSQuery q a = mkCmd $ \conn -> PGS.formatQuery conn q a formatPGSQuery q a = mkCmd $ \conn -> PGS.formatQuery conn q a
......
...@@ -15,6 +15,7 @@ Portability : POSIX ...@@ -15,6 +15,7 @@ Portability : POSIX
module Gargantext.Viz.Graph.Tools module Gargantext.Viz.Graph.Tools
where where
import Debug.Trace (trace)
import Data.Graph.Clustering.Louvain.CplusPlus (LouvainNode(..)) import Data.Graph.Clustering.Louvain.CplusPlus (LouvainNode(..))
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain) import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Map (Map) import Data.Map (Map)
...@@ -47,16 +48,39 @@ cooc2graph threshold myCooc = do ...@@ -47,16 +48,39 @@ cooc2graph threshold myCooc = do
distanceMat = measureConditional matCooc distanceMat = measureConditional matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat distanceMap = Map.filter (> threshold) $ mat2map distanceMat
let nodesApprox :: Int
nodesApprox = n'
where
(as, bs) = List.unzip $ Map.keys distanceMap
n' = Set.size $ Set.fromList $ as <> bs
ClustersParams rivers level = trace ("nodesApprox: " <> show nodesApprox) $ clustersParams nodesApprox
partitions <- case Map.size distanceMap > 0 of partitions <- case Map.size distanceMap > 0 of
True -> cLouvain distanceMap True -> trace ("level" <> show level) $ cLouvain level distanceMap
False -> panic "Text.Flow: DistanceMap is empty" False -> panic "Text.Flow: DistanceMap is empty"
let bridgeness' = bridgeness 300 partitions distanceMap let bridgeness' = trace ("rivers: " <> show rivers) $ bridgeness rivers partitions distanceMap
let confluence' = confluence (Map.keys bridgeness') 3 True False let confluence' = confluence (Map.keys bridgeness') 3 True False
data2graph (Map.toList ti) myCooc' bridgeness' confluence' partitions data2graph (Map.toList ti) myCooc' bridgeness' confluence' partitions
data ClustersParams = ClustersParams { bridgness :: Double
, louvain :: Text
} deriving (Show)
clustersParams :: Int -> ClustersParams
clustersParams x = ClustersParams (fromIntegral x) y
where
y | x < 100 = "0.0001"
| x < 350 = "0.001"
| x < 500 = "0.01"
| x < 1000 = "0.1"
| otherwise = "1"
---------------------------------------------------------- ----------------------------------------------------------
-- | From data to Graph -- | From data to Graph
data2graph :: [(Text, Int)] data2graph :: [(Text, Int)]
......
...@@ -53,7 +53,7 @@ relatedComp graphs = foldl' (\mem groups -> ...@@ -53,7 +53,7 @@ relatedComp graphs = foldl' (\mem groups ->
louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]] louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community) louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
<$> groupBy (\a b -> (l_community_id a) == (l_community_id b)) <$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
<$> (cLouvain $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges) <$> (cLouvain "0.0001" $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
where where
-------------------------------------- --------------------------------------
idx :: PhyloGroup -> Int idx :: PhyloGroup -> Int
......
...@@ -4,6 +4,7 @@ extra-package-dbs: [] ...@@ -4,6 +4,7 @@ extra-package-dbs: []
packages: packages:
- . - .
docker: docker:
enable: false enable: false
repo: 'fpco/stack-build:lts-14.6-garg' repo: 'fpco/stack-build:lts-14.6-garg'
...@@ -39,7 +40,7 @@ extra-deps: ...@@ -39,7 +40,7 @@ extra-deps:
- git: https://github.com/np/servant-job.git - git: https://github.com/np/servant-job.git
commit: 8557bfc9472a1b2be0b7bc632c23701ba5f44bf8 commit: 8557bfc9472a1b2be0b7bc632c23701ba5f44bf8
- git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git - git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git
commit: 1c636112b151110408e7c5a28cec39e46657358e commit: b29040ce741629d61cc63e8ba97e75bf0944979e
- git: https://github.com/np/patches-map - git: https://github.com/np/patches-map
commit: 8c6f38c4844ead53e664cf9c82ba461715dbe445 commit: 8c6f38c4844ead53e664cf9c82ba461715dbe445
- git: https://github.com/delanoe/haskell-opaleye.git #- opaleye-0.6.7002.0 - git: https://github.com/delanoe/haskell-opaleye.git #- opaleye-0.6.7002.0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment