Commit a854f24e authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'master' into stable

parents 6116a39e 38f940bf
# This file is a template, and might need editing before it works on your project.
# see https://docs.gitlab.com/ce/ci/yaml/README.html for all available options
# Thanks to:
# https://vadosware.io/post/zero-to-continuous-integrated-testing-a-haskell-project-with-gitlab/
#
#
image: haskell:8
variables:
STACK_ROOT: "${CI_PROJECT_DIR}/.stack"
STACK_OPTS: "--system-ghc"
# you can delete this line if you're not using Docker
#image: busybox:latest
cache:
paths:
- .stack
- .stack-work
- target
before_script:
- echo "Before script section"
- echo "For example you might run an update here or install a build dependency"
- echo "Or perhaps you might print out some debugging details"
#before_script:
#- apt-get update
#- apt-get install make xz-utils
after_script:
- echo "After script section"
- echo "For example you might do some cleanup here"
stages:
- build
- test
build1:
build:
stage: build
script:
- ./install
- make setup
- make build
#test1:
# TOOO
#unit-test:
# stage: test
# script:
# - make test-unit
#
#int-test:
# stage: test
# script:
# - echo "Do a test here"
# - echo "For example run a test suite"
# - make test-int
#
#test2:
#e2e-test:
# stage: test
# script:
# - echo "Do another parallel test here"
# - echo "For example run a lint test"
# - make test-e2e
#
#deploy1:
# stage: deploy
# If you find yourself with a non-sensical build error when you know your project should be building just fine, this fragment should help:
#
#build:
# stage: build
# script:
# - echo "Do your deploy here"
# # Clear out cache files
# - rm -rf .stack
# - rm -rf .stack-work
# - stack setup --system-ghc
# - stack install --local-bin-path target --system-ghc
#!/bin/bash
sudo su postgres
# sudo su postgres
# postgresql://$USER:$PW@localhost/$DB
PW="password"
DB="gargandbV5"
USER="gargantua"
psql -c "CREATE USER \"${USER}\"
psql -c "CREATE USER \"${USER}\""
psql -c "ALTER USER \"${USER}\" with PASSWORD \"${PW}\""
psql -c "DROP DATABASE IF EXISTS \"${DB}\""
createdb "${DB}"
psql "${DB}" < schema.sql
psql -c "ALTER DATABASE \"${DB}\" OWNER to \"${USER}\" ;"
psql -c "ALTER DATABASE \"${DB}\" OWNER to \"${USER}\""
......
......@@ -80,6 +80,7 @@ ALTER TABLE public.ngrams OWNER TO gargantua;
--ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua;
---------------------------------------------------------------
-- TODO nodes_nodes(node1_id int, node2_id int, edge_type int , weight real)
CREATE TABLE public.nodes_nodes (
node1_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
node2_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
......
......@@ -130,7 +130,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
-- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Utils (fromField', HasConnection)
......@@ -1019,7 +1019,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
setScores True table = do
let ngrams_terms = (table ^.. each . ne_ngrams)
t1 <- getTime'
occurrences <- getOccByNgramsOnlyFast nId
occurrences <- getOccByNgramsOnlyFast' nId
listId
ngramsType
ngrams_terms
t2 <- getTime'
......@@ -1153,8 +1154,6 @@ getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orde
apiNgramsTableCorpus :: ( RepoCmdM env err m
, HasNodeError err
, HasInvalidError err
......
......@@ -55,6 +55,7 @@ import Gargantext.API.Ngrams.NTree (MyTree)
import Gargantext.API.Search (SearchDocsAPI, searchDocs)
import Gargantext.API.Table
import Gargantext.API.Types
import Gargantext.Core.Types (NodeTableResult)
import Gargantext.Core.Types.Main (Tree, NodeTree, ListType)
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Facet (FacetDoc, OrderBy(..))
......@@ -158,7 +159,8 @@ type ChildrenApi a = Summary " Summary children"
:> QueryParam "type" NodeType
:> QueryParam "offset" Int
:> QueryParam "limit" Int
:> Get '[JSON] [Node a]
-- :> Get '[JSON] [Node a]
:> Get '[JSON] (NodeTableResult a)
------------------------------------------------------------------------
type NodeNodeAPI a = Get '[JSON] (Node a)
......
......@@ -44,7 +44,7 @@ import Data.Swagger
import Data.Text (Text())
import GHC.Generics (Generic)
import Gargantext.API.Ngrams (TabType(..))
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Types (Offset, Limit, TableResult(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..))
import Gargantext.Database.Learn (FavOrTrash(..), moreLike)
......@@ -60,7 +60,7 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
type TableApi = Summary " Table API"
:> ReqBody '[JSON] TableQuery
:> Post '[JSON] TableResult
:> Post '[JSON] FacetTableResult
data TableQuery = TableQuery
{ tq_offset :: Int
......@@ -70,17 +70,7 @@ data TableQuery = TableQuery
, tq_query :: Text
} deriving (Generic)
data TableResult = TableResult { tr_count :: Int
, tr_docs :: [FacetDoc]
} deriving (Generic)
$(deriveJSON (unPrefix "tr_") ''TableResult)
instance ToSchema TableResult where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "tr_")
instance Arbitrary TableResult where
arbitrary = TableResult <$> arbitrary <*> arbitrary
type FacetTableResult = TableResult FacetDoc
$(deriveJSON (unPrefix "tq_") ''TableQuery)
......@@ -91,7 +81,7 @@ instance Arbitrary TableQuery where
arbitrary = elements [TableQuery 0 10 DateAsc Docs "electrodes"]
tableApi :: NodeId -> TableQuery -> Cmd err TableResult
tableApi :: NodeId -> TableQuery -> Cmd err FacetTableResult
tableApi cId (TableQuery o l order ft "") = getTable cId (Just ft) (Just o) (Just l) (Just order)
tableApi cId (TableQuery o l order ft q) = case ft of
Docs -> searchInCorpus' cId False [q] (Just o) (Just l) (Just order)
......@@ -104,20 +94,21 @@ searchInCorpus' :: CorpusId
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Cmd err TableResult
-> Cmd err FacetTableResult
searchInCorpus' cId t q o l order = do
docs <- searchInCorpus cId t q o l order
allDocs <- searchInCorpus cId t q Nothing Nothing Nothing
pure (TableResult (length allDocs) docs)
countAllDocs <- searchCountInCorpus cId t q
pure $ TableResult { tr_docs = docs, tr_count = countAllDocs }
getTable :: NodeId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
-> Maybe OrderBy -> Cmd err TableResult
-> Maybe OrderBy -> Cmd err FacetTableResult
getTable cId ft o l order = do
docs <- getTable' cId ft o l order
-- TODO: Rewrite to use runCountOpaQuery and avoid (length allDocs)
allDocs <- getTable' cId ft Nothing Nothing Nothing
pure (TableResult (length allDocs) docs)
pure $ TableResult { tr_docs = docs, tr_count = length allDocs }
getTable' :: NodeId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
......
......@@ -14,6 +14,7 @@ commentary with @some markup@.
------------------------------------------------------------------------
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, module Gargantext.Database.Types.Node
......@@ -22,21 +23,28 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, Label, Stems
, HasInvalidError(..), assertValid
, Name
, TableResult(..)
, NodeTableResult
) where
import Control.Lens (Prism', (#))
import Control.Monad.Error.Class (MonadError, throwError)
import Data.Aeson
import Data.Semigroup
import Data.Aeson.TH (deriveJSON)
import Data.Monoid
import Data.Semigroup
import Data.Set (Set, empty)
import Data.Swagger (ToSchema(..), genericDeclareNamedSchema)
--import qualified Data.Set as S
import Data.Text (Text, unpack)
import Data.Validity
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.Core.Types.Main
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Types.Node
import Gargantext.Prelude
......@@ -135,3 +143,18 @@ assertValid :: (MonadError e m, HasInvalidError e) => Validation -> m ()
assertValid v = when (not $ validationIsValid v) $ throwError $ _InvalidError # v
-- assertValid :: MonadIO m => Validation -> m ()
-- assertValid v = when (not $ validationIsValid v) $ fail $ show v
data TableResult a = TableResult { tr_count :: Int
, tr_docs :: [a]
} deriving (Generic)
$(deriveJSON (unPrefix "tr_") ''TableResult)
instance ToSchema a => ToSchema (TableResult a) where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "tr_")
instance Arbitrary a => Arbitrary (TableResult a) where
arbitrary = TableResult <$> arbitrary <*> arbitrary
type NodeTableResult a = TableResult (Node a)
......@@ -95,3 +95,8 @@ nodeTypes = [ (n, nodeTypeId n) | n <- allNodeTypes ]
fromNodeTypeId :: NodeTypeId -> NodeType
fromNodeTypeId tId = fromMaybe (panic $ pack $ "Type Id " <> show tId <> " does not exist")
(lookup tId nodeTypeInv)
......@@ -67,6 +67,7 @@ import Gargantext.Database.Schema.User (getUser, UserLight(..))
import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Utils (Cmd, CmdM)
import Gargantext.Database.Triggers
import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Prelude
......@@ -228,9 +229,11 @@ flowCorpusUser l userName corpusName ctype ids = do
-- User List Flow
--{-
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster (Left "") ctype
(masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster (Left "") ctype
ngs <- buildNgramsLists l 2 3 (StopSize 3) userCorpusId masterCorpusId
userListId <- flowList userId userCorpusId ngs
mastListId <- getOrMkList masterCorpusId masterUserId
_ <- insertOccsUpdates userCorpusId mastListId
printDebug "userListId" userListId
-- User Graph Flow
_ <- mkDashboard userCorpusId userId
......
......@@ -32,16 +32,25 @@ import qualified Data.Map as DM
import Data.Text (Text, toLower)
import qualified Data.Text as DT
import Gargantext.Prelude hiding (sum)
import Gargantext.Core.Types (TableResult(..))
import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
import Gargantext.Database.Flow.Utils
import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId)
import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ListId{-, DocId, ContactId-})
import Gargantext.Database.Node.Children (getAllContacts)
-- TODO mv this type in Types Main
type Terms = Text
{-
pairing'' :: (CorpusId, CorpusId) -> (DocId -> DocId)
pairing'' = undefined
pairing' :: (CorpusId, AnnuaireId) -> (DocId -> ContactId)
pairing' = undefined
-}
-- | TODO : add paring policy as parameter
pairing :: AnnuaireId
-> CorpusId
......@@ -50,7 +59,7 @@ pairing :: AnnuaireId
pairing aId cId lId = do
contacts' <- getAllContacts aId
let contactsMap = pairingPolicyToMap toLower
$ toMaps extractNgramsT contacts'
$ toMaps extractNgramsT (tr_docs contacts')
ngramsMap' <- getNgramsTindexed cId Authors
let ngramsMap = pairingPolicyToMap lastName ngramsMap'
......
......@@ -25,12 +25,17 @@ import Gargantext.Database.Utils (Cmd)
import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Types.Node
toMaps :: Hyperdata a => (a -> Map (NgramsT Ngrams) Int) -> [Node a] -> Map (NgramsT Ngrams) (Map NodeId Int)
toMaps :: Hyperdata a
=> (a -> Map (NgramsT Ngrams) Int)
-> [Node a]
-> Map (NgramsT Ngrams) (Map NodeId Int)
toMaps fun ns = mapNodeIdNgrams $ documentIdWithNgrams fun ns'
where
ns' = map (\(Node nId _ _ _ _ _ json) -> DocumentWithId nId json) ns
mapNodeIdNgrams :: Hyperdata a => [DocumentIdWithNgrams a] -> Map (NgramsT Ngrams) (Map NodeId Int)
mapNodeIdNgrams :: Hyperdata a
=> [DocumentIdWithNgrams a]
-> Map (NgramsT Ngrams) (Map NodeId Int)
mapNodeIdNgrams ds = DM.map (DM.fromListWith (+)) $ DM.fromListWith (<>) xs
where
xs = [(ng, [(nId, i)]) | (nId, n2i') <- n2i ds, (ng, i) <- DM.toList n2i']
......@@ -72,12 +77,15 @@ data DocNgrams = DocNgrams { dn_doc_id :: DocId
insertDocNgramsOn :: CorpusId
-> [DocNgrams]
-> Cmd err Int
insertDocNgramsOn cId dn = insertNodeNodeNgrams $ (map (docNgrams2nodeNodeNgrams cId) dn)
insertDocNgramsOn cId dn =
insertNodeNodeNgrams
$ (map (docNgrams2nodeNodeNgrams cId) dn)
insertDocNgrams :: CorpusId
-> Map NgramsIndexed (Map NgramsType (Map NodeId Int))
-> Cmd err Int
insertDocNgrams cId m = insertDocNgramsOn cId [ DocNgrams n (_ngramsId ng) (ngramsTypeId t) (fromIntegral i)
insertDocNgrams cId m =
insertDocNgramsOn cId [ DocNgrams n (_ngramsId ng) (ngramsTypeId t) (fromIntegral i)
| (ng, t2n2i) <- DM.toList m
, (t, n2i) <- DM.toList t2n2i
, (n, i) <- DM.toList n2i
......
......@@ -19,6 +19,7 @@ Ngrams by node enable contextual metrics.
module Gargantext.Database.Metrics.NgramsByNode
where
import Debug.Trace (trace)
import Data.Map.Strict (Map, fromListWith, elems, toList, fromList)
import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.Set (Set)
......@@ -68,7 +69,6 @@ getTficf' u m nt f = do
pure $ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
--{-
getTficfWith :: UserCorpusId -> MasterCorpusId -> [ListId]
-> NgramsType -> Map Text (Maybe Text)
-> Cmd err (Map Text (Double, Set Text))
......@@ -82,7 +82,6 @@ getTficfWith u m ls nt mtxt = do
pure $ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
--}
type Context = (Double, Map Text (Double, Set Text))
......@@ -121,7 +120,8 @@ groupNodesByNgramsWith f m =
$ toList m
------------------------------------------------------------------------
getNodesByNgramsUser :: CorpusId -> NgramsType
getNodesByNgramsUser :: CorpusId
-> NgramsType
-> Cmd err (Map Text (Set NodeId))
getNodesByNgramsUser cId nt =
fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n))
......@@ -141,7 +141,6 @@ getNodesByNgramsUser cId nt =
queryNgramsByNodeUser :: DPS.Query
queryNgramsByNodeUser = [sql|
SELECT nng.node2_id, ng.terms FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
......@@ -157,13 +156,59 @@ getNodesByNgramsUser cId nt =
|]
------------------------------------------------------------------------
-- TODO add groups
getOccByNgramsOnlyFast :: CorpusId -> NgramsType -> [Text]
getOccByNgramsOnlyFast :: CorpusId
-> NgramsType
-> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlyFast cId nt ngs =
fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
getOccByNgramsOnlyFast' :: CorpusId
-> ListId
-> NgramsType
-> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
fromListWith (+) <$> map (second round) <$> run cId lId nt tms
where
fields = [QualifiedIdentifier Nothing "text"]
run :: CorpusId
-> ListId
-> NgramsType
-> [Text]
-> Cmd err [(Text, Double)]
run cId' lId' _nt' tms' = runPGSQuery query
( Values fields (DPS.Only <$> tms')
, cId'
, lId'
-- , ngramsTypeId nt'
)
query :: DPS.Query
query = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, nng.weight FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
WHERE nng.node1_id = ? -- CorpusId
AND nng.node2_id = ?
-- AND nng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0
GROUP BY ng.terms, nng.weight
|]
-- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow :: NodeType -> CorpusId -> [ListId] -> NgramsType -> [Text]
getOccByNgramsOnlySlow :: NodeType
-> CorpusId
-> [ListId]
-> NgramsType
-> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlySlow t cId ls nt ngs =
Map.map Set.size <$> getScore' t cId ls nt ngs
......@@ -172,7 +217,10 @@ getOccByNgramsOnlySlow t cId ls nt ngs =
getScore' NodeDocument = getNgramsByDocOnlyUser
getScore' _ = getNodesByNgramsOnlyUser
getOccByNgramsOnlySafe :: CorpusId -> [ListId] -> NgramsType -> [Text]
getOccByNgramsOnlySafe :: CorpusId
-> [ListId]
-> NgramsType
-> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlySafe cId ls nt ngs = do
printDebug "getOccByNgramsOnlySafe" (cId, nt, length ngs)
......@@ -200,7 +248,23 @@ selectNgramsOccurrencesOnlyByNodeUser cId nt tms =
-- equivalent ngrams intersections are not empty)
queryNgramsOccurrencesOnlyByNodeUser :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node2_id) FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
|]
queryNgramsOccurrencesOnlyByNodeUser' :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser' = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node2_id) FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
......@@ -214,11 +278,16 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
GROUP BY nng.node2_id, ng.terms
|]
getNodesByNgramsOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId))
getNodesByNgramsOnlyUser cId ls nt ngs = Map.unionsWith (<>)
getNodesByNgramsOnlyUser cId ls nt ngs =
Map.unionsWith (<>)
. map (fromListWith (<>) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt) (splitEvery 1000 ngs)
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt)
(splitEvery 1000 ngs)
selectNgramsOnlyByNodeUser :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, NodeId)]
......@@ -235,7 +304,6 @@ selectNgramsOnlyByNodeUser cId ls nt tms =
queryNgramsOnlyByNodeUser :: DPS.Query
queryNgramsOnlyByNodeUser = [sql|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng
......@@ -253,14 +321,45 @@ queryNgramsOnlyByNodeUser = [sql|
selectNgramsOnlyByNodeUser' :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, Int)]
selectNgramsOnlyByNodeUser' cId ls nt tms =
runPGSQuery queryNgramsOnlyByNodeUser
( Values fields (DPS.Only <$> tms)
, Values [QualifiedIdentifier Nothing "int4"] (DPS.Only <$> (map (\(NodeId n) -> n) ls))
, cId
, nodeTypeId NodeDocument
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByNodeUser' :: DPS.Query
queryNgramsOnlyByNodeUser' = [sql|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.weight FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = nng.node2_id
WHERE nng.node1_id = ? -- CorpusId
AND nng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0
GROUP BY ng.terms, nng.weight
|]
getNgramsByDocOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId))
getNgramsByDocOnlyUser cId ls nt ngs = Map.unionsWith (<>)
getNgramsByDocOnlyUser cId ls nt ngs =
Map.unionsWith (<>)
. map (fromListWith (<>) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByDocUser cId ls nt) (splitEvery 1000 ngs)
selectNgramsOnlyByDocUser :: DocId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, NodeId)]
selectNgramsOnlyByDocUser dId ls nt tms =
......@@ -275,7 +374,6 @@ selectNgramsOnlyByDocUser dId ls nt tms =
queryNgramsOnlyByDocUser :: DPS.Query
queryNgramsOnlyByDocUser = [sql|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng
......@@ -287,7 +385,6 @@ queryNgramsOnlyByDocUser = [sql|
GROUP BY ng.terms, nng.node2_id
|]
------------------------------------------------------------------------
-- | TODO filter by language, database, any social field
getNodesByNgramsMaster :: UserCorpusId -> MasterCorpusId -> Cmd err (Map Text (Set NodeId))
......@@ -316,10 +413,9 @@ selectNgramsByNodeMaster n ucId mcId p = runPGSQuery
-- | TODO fix node_node_ngrams relation
queryNgramsByNodeMaster' :: DPS.Query
queryNgramsByNodeMaster' = [sql|
WITH nodesByNgramsUser AS (
WITH nodesByNgramsUser AS (
SELECT n.id, ng.terms FROM nodes n
SELECT n.id, ng.terms FROM nodes n
JOIN nodes_nodes nn ON n.id = nn.node2_id
JOIN node_node_ngrams nng ON nng.node2_id = n.id
JOIN ngrams ng ON nng.ngrams_id = ng.id
......@@ -333,9 +429,9 @@ SELECT n.id, ng.terms FROM nodes n
),
nodesByNgramsMaster AS (
nodesByNgramsMaster AS (
SELECT n.id, ng.terms FROM nodes n TABLESAMPLE SYSTEM_ROWS(?)
SELECT n.id, ng.terms FROM nodes n TABLESAMPLE SYSTEM_ROWS(?)
JOIN node_node_ngrams nng ON n.id = nng.node2_id
JOIN ngrams ng ON ng.id = nng.ngrams_id
......@@ -345,7 +441,7 @@ SELECT n.id, ng.terms FROM nodes n TABLESAMPLE SYSTEM_ROWS(?)
GROUP BY n.id, ng.terms
)
SELECT m.id, m.terms FROM nodesByNgramsMaster m
SELECT m.id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|]
......
......@@ -28,8 +28,8 @@ import Gargantext.Prelude
import Opaleye
import Control.Arrow (returnA)
selectNgramsByDoc :: [CorpusId] -> DocId -> NgramsType -> Cmd err [Text]
selectNgramsByDoc cIds dId nt = runOpaQuery (query cIds dId nt)
selectNgramsByDoc :: [ListId] -> DocId -> NgramsType -> Cmd err [Text]
selectNgramsByDoc lIds dId nt = runOpaQuery (query lIds dId nt)
where
join :: Query (NgramsRead, NodeNodeNgramsReadNull)
......
......@@ -30,12 +30,11 @@ import Gargantext.Database.Node.Contact (HyperdataContact)
import Gargantext.Database.Schema.Node (pgNodeId)
import Control.Arrow (returnA)
getAllDocuments :: ParentId -> Cmd err [Node HyperdataDocument]
getAllDocuments :: ParentId -> Cmd err (TableResult (Node HyperdataDocument))
getAllDocuments pId = getAllChildren pId (Proxy :: Proxy HyperdataDocument)
(Just NodeDocument)
getAllContacts :: ParentId -> Cmd err [Node HyperdataContact]
getAllContacts :: ParentId -> Cmd err (TableResult (Node HyperdataContact))
getAllContacts pId = getAllChildren pId (Proxy :: Proxy HyperdataContact)
(Just NodeContact)
......@@ -43,7 +42,7 @@ getAllChildren :: JSONB a
=> ParentId
-> proxy a
-> Maybe NodeType
-> Cmd err [Node a]
-> Cmd err (NodeTableResult a)
getAllChildren pId p maybeNodeType = getChildren pId p maybeNodeType Nothing Nothing
getChildren :: JSONB a
......@@ -52,11 +51,19 @@ getChildren :: JSONB a
-> Maybe NodeType
-> Maybe Offset
-> Maybe Limit
-> Cmd err [Node a]
getChildren pId _ maybeNodeType maybeOffset maybeLimit = runOpaQuery
-> Cmd err (NodeTableResult a)
getChildren pId _ maybeNodeType maybeOffset maybeLimit = do
docs <- runOpaQuery
$ limit' maybeLimit $ offset' maybeOffset
$ orderBy (asc _node_id)
$ selectChildren pId maybeNodeType
$ query
docCount <- runCountOpaQuery query
pure $ TableResult { tr_docs = docs, tr_count = docCount }
where
query = selectChildren pId maybeNodeType
selectChildren :: ParentId
-> Maybe NodeType
......
......@@ -64,7 +64,9 @@ leftJoin3
-> ((fieldsL2, fieldsR) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Opaleye.Select (fieldsL1, nullableFieldsR2)
leftJoin3 q1 q2 q3 cond12 cond23 = leftJoin q3 (leftJoin q2 q1 cond12) cond23
leftJoin3 q1 q2 q3
cond12 cond23 =
leftJoin q3 ( leftJoin q2 q1 cond12) cond23
leftJoin4
......@@ -85,7 +87,13 @@ leftJoin4
-> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Opaleye.Select (fieldsL1, nullableFieldsR3)
leftJoin4 q1 q2 q3 q4 cond12 cond23 cond34 = leftJoin q4 (leftJoin q3 (leftJoin q2 q1 cond12) cond23) cond34
leftJoin4 q1 q2 q3 q4
cond12 cond23 cond34 =
leftJoin q4 ( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
leftJoin5 :: ( Default Unpackspec fieldsL1 fieldsL1,
......@@ -110,7 +118,15 @@ leftJoin5 :: ( Default Unpackspec fieldsL1 fieldsL1,
-> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Query (fieldsL1, nullableFieldsR4)
leftJoin5 q1 q2 q3 q4 q5 cond12 cond23 cond34 cond45 = leftJoin q5 (leftJoin q4 (leftJoin q3 (leftJoin q2 q1 cond12) cond23) cond34) cond45
leftJoin5 q1 q2 q3 q4 q5
cond12 cond23 cond34 cond45 =
leftJoin q5 ( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
leftJoin6 :: ( Default Unpackspec fieldsL1 fieldsL1,
......@@ -139,7 +155,17 @@ leftJoin6 :: ( Default Unpackspec fieldsL1 fieldsL1,
-> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Query (fieldsL1, nullableFieldsR5)
leftJoin6 q1 q2 q3 q4 q5 q6 cond12 cond23 cond34 cond45 cond56 = leftJoin q6 (leftJoin q5 (leftJoin q4 (leftJoin q3 (leftJoin q2 q1 cond12) cond23) cond34) cond45) cond56
leftJoin6 q1 q2 q3 q4 q5 q6
cond12 cond23 cond34 cond45 cond56 =
leftJoin q6 ( leftJoin q5
( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
) cond56
leftJoin7
......@@ -175,7 +201,19 @@ leftJoin7
-> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Opaleye.Select (fieldsL1, nullableFieldsR6)
leftJoin7 q1 q2 q3 q4 q5 q6 q7 cond12 cond23 cond34 cond45 cond56 cond67 = leftJoin q7 (leftJoin q6 (leftJoin q5 (leftJoin q4 (leftJoin q3 (leftJoin q2 q1 cond12) cond23) cond34) cond45) cond56) cond67
leftJoin7 q1 q2 q3 q4 q5 q6 q7
cond12 cond23 cond34 cond45 cond56 cond67 =
leftJoin q7 ( leftJoin q6
( leftJoin q5
( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
) cond56
) cond67
leftJoin8
......@@ -216,7 +254,21 @@ leftJoin8
-> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Opaleye.Select (fieldsL1, nullableFieldsR7)
leftJoin8 q1 q2 q3 q4 q5 q6 q7 q8 cond12 cond23 cond34 cond45 cond56 cond67 cond78 = leftJoin q8 (leftJoin q7 (leftJoin q6 (leftJoin q5 (leftJoin q4 (leftJoin q3 (leftJoin q2 q1 cond12) cond23) cond34) cond45) cond56) cond67) cond78
leftJoin8 q1 q2 q3 q4 q5 q6 q7 q8
cond12 cond23 cond34 cond45 cond56 cond67 cond78 =
leftJoin q8 ( leftJoin q7
( leftJoin q6
( leftJoin q5
( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
) cond56
) cond67
) cond78
leftJoin9
......@@ -262,5 +314,21 @@ leftJoin9
-> ((fieldsL2, (fieldsL3, nullableFieldsR2)) -> Column PGBool)
-> ((fieldsL1, (fieldsL2, nullableFieldsR1)) -> Column PGBool)
-> Opaleye.Select (fieldsL1, nullableFieldsR8)
leftJoin9 q1 q2 q3 q4 q5 q6 q7 q8 q9 cond12 cond23 cond34 cond45 cond56 cond67 cond78 cond89 = leftJoin q9 (leftJoin q8 (leftJoin q7 (leftJoin q6 (leftJoin q5 (leftJoin q4 (leftJoin q3 (leftJoin q2 q1 cond12) cond23) cond34) cond45) cond56) cond67) cond78) cond89
leftJoin9 q1 q2 q3 q4 q5 q6 q7 q8 q9
cond12 cond23 cond34 cond45 cond56 cond67 cond78 cond89 =
leftJoin q9 ( leftJoin q8
( leftJoin q7
( leftJoin q6
( leftJoin q5
( leftJoin q4
( leftJoin q3
( leftJoin q2 q1
cond12
) cond23
) cond34
) cond45
) cond56
) cond67
) cond78
) cond89
......@@ -373,17 +373,17 @@ type JSONB = QueryRunnerColumnDefault PGJsonb
getNode :: JSONB a => NodeId -> proxy a -> Cmd err (Node a)
getNode nId _ = do
fromMaybe (error $ "Node does node exist: " <> show nId) . headMay
fromMaybe (error $ "Node does not exist: " <> show nId) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
getNodePhylo :: NodeId -> Cmd err (Node HyperdataPhylo)
getNodePhylo nId = do
fromMaybe (error $ "Node does node exist: " <> show nId) . headMay
fromMaybe (error $ "Node Phylo does not exist: " <> show nId) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
getNode' :: NodeId -> Cmd err (Node Value)
getNode' nId = fromMaybe (error $ "Node does node exist: " <> show nId) . headMay
getNode' nId = fromMaybe (error $ "Node does not exist: " <> show nId) . headMay
<$> runOpaQuery (limit 1 $ selectNode (pgNodeId nId))
......
......@@ -35,7 +35,7 @@ import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Queries.Join (leftJoin6)
import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery)
import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types
import Control.Arrow (returnA)
......@@ -72,6 +72,15 @@ searchInCorpus cId t q o l order = runOpaQuery
$ intercalate " | "
$ map stemIt q
searchCountInCorpus :: CorpusId
-> IsTrash
-> [Text]
-> Cmd err Int
searchCountInCorpus cId t q = runCountOpaQuery
$ queryInCorpus cId t
$ intercalate " | "
$ map stemIt q
queryInCorpus :: CorpusId
-> IsTrash
-> Text
......
......@@ -108,7 +108,7 @@ dbTree rootId = map (\(nId, tId, pId, n) -> DbTreeNode nId tId pId n)
FROM nodes AS c
INNER JOIN tree AS s ON c.parent_id = s.id
-- WHERE c.typename IN (2,20,21,22,3,5,30,31,40,7,9,90)
WHERE c.typename IN (2,20,21,22,3,5,30,31,40,7,9,90)
)
SELECT * from tree;
|] (Only rootId)
......
{-|
Module : Gargantext.Database.Triggers
Description : Triggers configuration
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Ngrams by node enable contextual metrics.
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.Database.Triggers
where
import Database.PostgreSQL.Simple.SqlQQ (sql)
-- import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as DPS
------------------------------------------------------------------------
type MasterListId = ListId
insertOccsUpdates :: UserCorpusId -> MasterListId -> Cmd err [DPS.Only Int]
insertOccsUpdates cId lId = runPGSQuery query (cId, lId, nodeTypeId NodeList, nodeTypeId NodeDocument)
where
query :: DPS.Query
query = [sql|
INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
SELECT nn.node1_id, lists.id, nnn.ngrams_id, 1, count(*) as c -- type of score
FROM node_node_ngrams nnn
INNER JOIN nodes_nodes nn ON nn.node2_id = nnn.node2_id
INNER JOIN nodes docs ON docs.id = nnn.node2_id
INNER JOIN nodes lists ON lists.parent_id = nn.node1_id
-- WHERE nn.node1_id = NEW.node1_id -- .node1_id -- corpus_id
WHERE nn.node1_id = ? -- .node1_id -- corpus_id
AND nnn.node1_id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ?
AND docs.typename = ?
GROUP BY nn.node1_id, lists.id, nnn.ngrams_id
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE SET weight = 3 -- c -- excluded.weight
RETURNING 1
-- TOCHECK
|]
triggerOccsUpdates :: CorpusId -> ListId -> Cmd err [DPS.Only Int]
triggerOccsUpdates cId lId = runPGSQuery query (cId, lId, nodeTypeId NodeList, nodeTypeId NodeDocument)
where
query :: DPS.Query
query = [sql|
drop trigger trigger_occs on nodes_nodes;
CREATE OR REPLACE FUNCTION occs_update() RETURNS trigger AS
$$
BEGIN
IF TG_OP = 'UPDATE' THEN
INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
-- TODO edge_type instead of ngrams_type
SELECT nn.node1_id, lists.id, nnn.ngrams_id, count(*), 1 -- type of score
FROM node_node_ngrams nnn
INNER JOIN nodes_nodes nn ON nn.node2_id = nnn.node2_id
INNER JOIN nodes docs ON docs.id = nnn.node2_id
INNER JOIN nodes lists ON lists.parent_id = nn.node1_id
-- WHERE nn.node1_id = NEW.node1_id -- .node1_id -- corpus_id
WHERE nn.node1_id = ? -- .node1_id -- corpus_id
AND nnn.node1_id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ?
AND docs.typename = ?
GROUP BY nn.node1_id, lists.id, nnn.ngrams_id
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE SET weight = excluded.weight;
END IF;
RETURN NULL;
END $$
LANGUAGE plpgsql;
CREATE TRIGGER trigger_occs
AFTER UPDATE ON nodes_nodes
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE occs_update();
update nodes_nodes SET node1_id = node1_id;
|]
......@@ -30,6 +30,7 @@ import Control.Monad.Except
import Data.Aeson (Result(Error,Success), fromJSON, FromJSON)
import Data.Either.Extra (Either(Left, Right))
import Data.Ini (readIniFile, lookupValue)
import qualified Data.List as DL
import Data.Maybe (maybe)
import Data.Monoid ((<>))
import Data.Profunctor.Product.Default (Default)
......@@ -41,6 +42,7 @@ import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(Conversion
import Database.PostgreSQL.Simple.Internal (Field)
import Gargantext.Prelude
import Opaleye (Query, Unpackspec, showSqlForPostgres, FromFields, Select, runQuery)
import Opaleye.Aggregate (countRows)
import System.IO (FilePath)
import Text.Read (read)
import qualified Data.ByteString as DB
......@@ -67,6 +69,9 @@ type Cmd' env err a = forall m. CmdM' env err m => m a
type Cmd err a = forall m env. CmdM env err m => m a
fromInt64ToInt :: Int64 -> Int
fromInt64ToInt = fromIntegral
-- TODO: ideally there should be very few calls to this functions.
mkCmd :: (Connection -> IO a) -> Cmd err a
mkCmd k = do
......@@ -82,6 +87,12 @@ runOpaQuery :: Default FromFields fields haskells
=> Select fields -> Cmd err [haskells]
runOpaQuery q = mkCmd $ \c -> runQuery c q
runCountOpaQuery :: Select a -> Cmd err Int
runCountOpaQuery q = do
counts <- mkCmd $ \c -> runQuery c $ countRows q
-- countRows is guaranteed to return a list with exactly one row so DL.head is safe here
pure $ fromInt64ToInt $ DL.head counts
formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err DB.ByteString
formatPGSQuery q a = mkCmd $ \conn -> PGS.formatQuery conn q a
......
......@@ -15,6 +15,7 @@ Portability : POSIX
module Gargantext.Viz.Graph.Tools
where
import Debug.Trace (trace)
import Data.Graph.Clustering.Louvain.CplusPlus (LouvainNode(..))
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Map (Map)
......@@ -47,16 +48,39 @@ cooc2graph threshold myCooc = do
distanceMat = measureConditional matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat
let nodesApprox :: Int
nodesApprox = n'
where
(as, bs) = List.unzip $ Map.keys distanceMap
n' = Set.size $ Set.fromList $ as <> bs
ClustersParams rivers level = trace ("nodesApprox: " <> show nodesApprox) $ clustersParams nodesApprox
partitions <- case Map.size distanceMap > 0 of
True -> cLouvain distanceMap
True -> trace ("level" <> show level) $ cLouvain level distanceMap
False -> panic "Text.Flow: DistanceMap is empty"
let bridgeness' = bridgeness 300 partitions distanceMap
let bridgeness' = trace ("rivers: " <> show rivers) $ bridgeness rivers partitions distanceMap
let confluence' = confluence (Map.keys bridgeness') 3 True False
data2graph (Map.toList ti) myCooc' bridgeness' confluence' partitions
data ClustersParams = ClustersParams { bridgness :: Double
, louvain :: Text
} deriving (Show)
clustersParams :: Int -> ClustersParams
clustersParams x = ClustersParams (fromIntegral x) y
where
y | x < 100 = "0.0001"
| x < 350 = "0.001"
| x < 500 = "0.01"
| x < 1000 = "0.1"
| otherwise = "1"
----------------------------------------------------------
-- | From data to Graph
data2graph :: [(Text, Int)]
......
......@@ -53,7 +53,7 @@ relatedComp graphs = foldl' (\mem groups ->
louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
<$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
<$> (cLouvain $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
<$> (cLouvain "0.0001" $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
where
--------------------------------------
idx :: PhyloGroup -> Int
......
......@@ -4,6 +4,7 @@ extra-package-dbs: []
packages:
- .
docker:
enable: false
repo: 'fpco/stack-build:lts-14.6-garg'
......@@ -39,7 +40,7 @@ extra-deps:
- git: https://github.com/np/servant-job.git
commit: 8557bfc9472a1b2be0b7bc632c23701ba5f44bf8
- git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git
commit: 1c636112b151110408e7c5a28cec39e46657358e
commit: b29040ce741629d61cc63e8ba97e75bf0944979e
- git: https://github.com/np/patches-map
commit: 8c6f38c4844ead53e664cf9c82ba461715dbe445
- git: https://github.com/delanoe/haskell-opaleye.git #- opaleye-0.6.7002.0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment