Commit e7e89297 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[WIP][FLOW] get ngrams by docs.

parent 6a247110
...@@ -816,3 +816,5 @@ getTableNgrams _cId maybeTabType listIds mlimit moffset = do ...@@ -816,3 +816,5 @@ getTableNgrams _cId maybeTabType listIds mlimit moffset = do
getListNgrams ({-lists <>-} listIds) ngramsType getListNgrams ({-lists <>-} listIds) ngramsType
& mapped . v_data . _NgramsTable %~ (take limit_ . drop offset_) & mapped . v_data . _NgramsTable %~ (take limit_ . drop offset_)
...@@ -19,7 +19,7 @@ Portability : POSIX ...@@ -19,7 +19,7 @@ Portability : POSIX
module Gargantext.Database.Flow -- (flowDatabase, ngrams2list) module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
where where
--import Debug.Trace (trace) import Debug.Trace (trace)
--import Control.Lens (view) --import Control.Lens (view)
import Control.Monad (mapM_) import Control.Monad (mapM_)
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
...@@ -41,6 +41,7 @@ import Gargantext.Database.Flow.Utils (insertToNodeNgrams) ...@@ -41,6 +41,7 @@ import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
import Gargantext.Database.Metrics.TFICF (getTficf) import Gargantext.Database.Metrics.TFICF (getTficf)
import Gargantext.Text.Terms (extractTerms) import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Metrics.TFICF (Tficf(..)) import Gargantext.Text.Metrics.TFICF (Tficf(..))
--import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId)
import Gargantext.Database.Node.Document.Add (add) import Gargantext.Database.Node.Document.Add (add)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..)) import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Root (getRoot) import Gargantext.Database.Root (getRoot)
...@@ -121,7 +122,10 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user ...@@ -121,7 +122,10 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user
_ <- insertToNodeNgrams indexedNgrams _ <- insertToNodeNgrams indexedNgrams
-- List Ngrams Flow -- List Ngrams Flow
-- get elements
-- filter by TFICF
let ngs = ngrams2list' indexedNgrams let ngs = ngrams2list' indexedNgrams
--let ngs = getNgramsElementsWithParentNodeId masterCorpusId
_masterListId <- flowList masterUserId masterCorpusId ngs _masterListId <- flowList masterUserId masterCorpusId ngs
_userListId <- flowListUser userId userCorpusId ngs 100 _userListId <- flowListUser userId userCorpusId ngs 100
-------------------------------------------------- --------------------------------------------------
...@@ -256,7 +260,8 @@ flowList uId cId ngs = do ...@@ -256,7 +260,8 @@ flowList uId cId ngs = do
lId <- getOrMkList cId uId lId <- getOrMkList cId uId
printDebug "listId flowList" lId printDebug "listId flowList" lId
--printDebug "ngs" (DM.keys ngs) --printDebug "ngs" (DM.keys ngs)
-- TODO add stemming equivalence of 2 ngrams
-- TODO grouping
-- TODO needs rework -- TODO needs rework
-- let groupEd = groupNgramsBy (\(NgramsT t1 n1) (NgramsT t2 n2) -> if (((==) t1 t2) && ((==) n1 n2)) then (Just (n1,n2)) else Nothing) ngs -- let groupEd = groupNgramsBy (\(NgramsT t1 n1) (NgramsT t2 n2) -> if (((==) t1 t2) && ((==) n1 n2)) then (Just (n1,n2)) else Nothing) ngs
-- _ <- insertGroups lId groupEd -- _ <- insertGroups lId groupEd
...@@ -273,7 +278,8 @@ flowListUser uId cId ngsM n = do ...@@ -273,7 +278,8 @@ flowListUser uId cId ngsM n = do
ngs <- take n <$> sortWith tficf_score ngs <- take n <$> sortWith tficf_score
<$> getTficf userMaster cId lId NgramsTerms <$> getTficf userMaster cId lId NgramsTerms
flowListBase lId ngsM trace ("flowListBase" <> show lId) flowListBase lId ngsM
putListNgrams lId NgramsTerms $ putListNgrams lId NgramsTerms $
[ NgramsElement (tficf_ngramsTerms ng) GraphList 1 Nothing mempty [ NgramsElement (tficf_ngramsTerms ng) GraphList 1 Nothing mempty
| ng <- ngs | ng <- ngs
...@@ -322,7 +328,6 @@ ngrams2list' m = fromListWith (<>) ...@@ -322,7 +328,6 @@ ngrams2list' m = fromListWith (<>)
-- | TODO: weight of the list could be a probability -- | TODO: weight of the list could be a probability
insertLists :: HasNodeError err => ListId -> [(ListType, (NgramsType, NgramsIndexed))] -> Cmd err Int insertLists :: HasNodeError err => ListId -> [(ListType, (NgramsType, NgramsIndexed))] -> Cmd err Int
insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing (ngramsTypeId ngt) (fromIntegral $ listTypeId l) 1 insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing (ngramsTypeId ngt) (fromIntegral $ listTypeId l) 1
......
...@@ -19,15 +19,18 @@ Count Ngrams by Context ...@@ -19,15 +19,18 @@ Count Ngrams by Context
module Gargantext.Database.Metrics.Count where module Gargantext.Database.Metrics.Count where
import Data.Monoid (mempty)
import Control.Arrow (returnA) import Control.Arrow (returnA)
import Control.Lens (view) import Control.Lens (view)
import Data.Map.Strict (Map, fromListWith, elems) import Data.Map.Strict (Map, fromListWith, elems)
import Data.Text (Text) import Data.Text (Text)
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.API.Ngrams (NgramsElement(..))
import Gargantext.Core.Types.Main (listTypeId, ListType(..)) import Gargantext.Core.Types.Main (listTypeId, ListType(..))
import Gargantext.Database.Queries.Join (leftJoin4, leftJoin5) import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Queries.Join (leftJoin4, leftJoin5, leftJoin3)
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsType(..), ngramsTypeId, Ngrams(..), NgramsIndexed(..), ngrams, ngramsTerms) import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsType(..), ngramsTypeId, Ngrams(..), NgramsIndexed(..), ngrams, ngramsTerms, fromNgramsTypeId)
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Node (HasNodeError(..)) import Gargantext.Database.Schema.Node (HasNodeError(..))
import Gargantext.Database.Schema.NodeNgram import Gargantext.Database.Schema.NodeNgram
...@@ -80,27 +83,45 @@ getNgramsByNode :: NodeId -> NgramsType -> Cmd err [[Text]] ...@@ -80,27 +83,45 @@ getNgramsByNode :: NodeId -> NgramsType -> Cmd err [[Text]]
getNgramsByNode nId nt = elems getNgramsByNode nId nt = elems
<$> fromListWith (<>) <$> fromListWith (<>)
<$> map (\(i,t) -> (i,[t])) <$> map (\(i,t) -> (i,[t]))
<$> getNgramsByNodeIndexed nId nt <$> getNgramsByNodeNodeIndexed nId nt
-- | TODO add join with nodeNodeNgram (if it exists) -- | TODO add join with nodeNodeNgram (if it exists)
getNgramsByNodeIndexed :: NodeId -> NgramsType -> Cmd err [(NodeId, Text)] getNgramsByNodeNodeIndexed :: NodeId -> NgramsType -> Cmd err [(NodeId, Text)]
getNgramsByNodeIndexed nId nt = runOpaQuery (select' nId) getNgramsByNodeNodeIndexed nId nt = runOpaQuery (select' nId)
where where
select' nId' = proc () -> do select' nId' = proc () -> do
(ng,(nng,(_,n))) <- getNgramsByNodeIndexedJoin -< () (ng,(nng,(_,n))) <- getNgramsByNodeNodeIndexedJoin -< ()
restrict -< _node_id n .== toNullable (pgNodeId nId') restrict -< _node_id n .== toNullable (pgNodeId nId')
restrict -< nng_ngramsType nng .== toNullable (pgNgramsTypeId $ ngramsTypeId nt) restrict -< nng_ngramsType nng .== toNullable (pgNgramsTypeId $ ngramsTypeId nt)
returnA -< (nng_node_id nng, ngrams_terms ng) returnA -< (nng_node_id nng, ngrams_terms ng)
{-
getNgramsByNodeIndexed' :: NodeId -> NgramsType -> Cmd err [(NodeId, Maybe Text)]
getNgramsByNodeIndexed' nId nt = runOpaQuery (select' nId)
where
select' nId' = proc () -> do
(nnng,(ng,(nng,(_,n)))) <- getNgramsByNodeIndexedJoin5 -< ()
restrict -< _node_id n .== toNullable (pgNodeId nId')
restrict -< nng_ngramsType nng .== toNullable (pgNgramsTypeId $ ngramsTypeId nt)
let node_id' = ifThenElse (isNull $ toNullable $ nnng_node1_id nnng)
(nng_node_id nng)
(nnng_node2_id nng)
let t1 = ifThenElse (isNull $ toNullable $ nnng_node1_id nnng)
(ngrams_terms ng)
(nnng_terms nng)
returnA -< (n1, t1)
--} --}
getNgramsByNodeIndexedJoin :: Query ( NgramsRead
getNgramsByNodeNodeIndexedJoin :: Query ( NgramsRead
, (NodeNgramReadNull , (NodeNgramReadNull
, (NodeNodeReadNull , (NodeNodeReadNull
, NodeReadNull , NodeReadNull
) )
) )
) )
getNgramsByNodeIndexedJoin = leftJoin4 queryNodeTable getNgramsByNodeNodeIndexedJoin = leftJoin4 queryNodeTable
queryNodeNodeTable queryNodeNodeTable
queryNodeNgramTable queryNodeNgramTable
queryNgramsTable queryNgramsTable
...@@ -126,7 +147,7 @@ getNgramsByNodeIndexedJoin = leftJoin4 queryNodeTable ...@@ -126,7 +147,7 @@ getNgramsByNodeIndexedJoin = leftJoin4 queryNodeTable
c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng' c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng'
getNgramsByNodeIndexedJoin' :: Query ( NodeNodeNgramsRead getNgramsByNodeNodeIndexedJoin5 :: Query ( NodeNodeNgramsRead
, (NgramsReadNull , (NgramsReadNull
, (NodeNgramReadNull , (NodeNgramReadNull
, (NodeNodeReadNull , (NodeNodeReadNull
...@@ -135,7 +156,7 @@ getNgramsByNodeIndexedJoin' :: Query ( NodeNodeNgramsRead ...@@ -135,7 +156,7 @@ getNgramsByNodeIndexedJoin' :: Query ( NodeNodeNgramsRead
) )
) )
) )
getNgramsByNodeIndexedJoin' = leftJoin5 queryNodeTable getNgramsByNodeNodeIndexedJoin5 = leftJoin5 queryNodeTable
queryNodeNodeTable queryNodeNodeTable
queryNodeNgramTable queryNodeNgramTable
queryNgramsTable queryNgramsTable
...@@ -161,7 +182,6 @@ getNgramsByNodeIndexedJoin' = leftJoin5 queryNodeTable ...@@ -161,7 +182,6 @@ getNgramsByNodeIndexedJoin' = leftJoin5 queryNodeTable
) -> Column PGBool ) -> Column PGBool
c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng' c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng'
c4 :: ( NodeNodeNgramsRead c4 :: ( NodeNodeNgramsRead
, (NgramsRead , (NgramsRead
, ( NodeNgramReadNull , ( NodeNgramReadNull
...@@ -174,9 +194,41 @@ getNgramsByNodeIndexedJoin' = leftJoin5 queryNodeTable ...@@ -174,9 +194,41 @@ getNgramsByNodeIndexedJoin' = leftJoin5 queryNodeTable
c4 (nnng,(_,(_,(nn,_)))) = (toNullable $ nnng_node1_id nnng) .== (nn_node1_id nn) c4 (nnng,(_,(_,(nn,_)))) = (toNullable $ nnng_node1_id nnng) .== (nn_node1_id nn)
.&& (toNullable $ nnng_node2_id nnng) .== (nn_node2_id nn) .&& (toNullable $ nnng_node2_id nnng) .== (nn_node2_id nn)
--}
--{-
getNgramsElementsWithParentNodeId :: NodeId -> Cmd err (Map NgramsType [NgramsElement])
getNgramsElementsWithParentNodeId nId = do
ns <- getNgramsWithParentNodeId nId
pure $ fromListWith (<>) [ (maybe (panic "error") identity $ fromNgramsTypeId nt, [NgramsElement ng CandidateList 1 Nothing mempty])
| (_,(nt,ng)) <- ns
]
-------------------------------------------------------------------------
getNgramsWithParentNodeId :: NodeId -> Cmd err [(NodeId, (NgramsTypeId, Text))]
getNgramsWithParentNodeId nId = runOpaQuery (select nId)
where
select nId' = proc () -> do
(ng,(nng,n)) <- getNgramsWithParentNodeIdJoin -< ()
restrict -< _node_parentId n .== (toNullable $ pgNodeId nId')
restrict -< _node_typename n .== (toNullable $ pgInt4 $ nodeTypeId NodeDocument)
returnA -< (nng_node_id nng, (nng_ngramsType nng, ngrams_terms ng))
--} --}
getNgramsWithParentNodeIdJoin :: Query ( NgramsRead
, ( NodeNgramReadNull
, NodeReadNull
)
)
getNgramsWithParentNodeIdJoin = leftJoin3 queryNodeTable queryNodeNgramTable queryNgramsTable on1 on2
where
on1 :: (NodeNgramRead, NodeRead) -> Column PGBool
on1 (nng,n) = nng_node_id nng .== _node_id n
on2 :: (NgramsRead, (NodeNgramRead, NodeReadNull))-> Column PGBool
on2 (ng, (nng,_)) = ngrams_id ng .== nng_ngrams_id nng
...@@ -123,6 +123,10 @@ instance FromField NgramsTypeId where ...@@ -123,6 +123,10 @@ instance FromField NgramsTypeId where
if (n :: Int) > 0 then return $ NgramsTypeId n if (n :: Int) > 0 then return $ NgramsTypeId n
else mzero else mzero
instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
pgNgramsType :: NgramsType -> Column PGInt4 pgNgramsType :: NgramsType -> Column PGInt4
pgNgramsType = pgNgramsTypeId . ngramsTypeId pgNgramsType = pgNgramsTypeId . ngramsTypeId
......
...@@ -156,7 +156,6 @@ instance QueryRunnerColumnDefault (Nullable PGInt4) NodeId ...@@ -156,7 +156,6 @@ instance QueryRunnerColumnDefault (Nullable PGInt4) NodeId
queryRunnerColumnDefault = fieldQueryRunnerColumn queryRunnerColumnDefault = fieldQueryRunnerColumn
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- WIP -- WIP
-- TODO Classe HasDefault where -- TODO Classe HasDefault where
......
...@@ -114,14 +114,22 @@ gargantuaUser = User (Nothing) (pgStrictText "password") ...@@ -114,14 +114,22 @@ gargantuaUser = User (Nothing) (pgStrictText "password")
(pgStrictText "e@mail") (pgStrictText "e@mail")
(pgBool True) (pgBool True) (Nothing) (pgBool True) (pgBool True) (Nothing)
simpleUser :: UserWrite simpleUser1 :: UserWrite
simpleUser = User (Nothing) (pgStrictText "password") simpleUser1 = User (Nothing) (pgStrictText "password")
(Nothing) (pgBool False) (pgStrictText "user1") (Nothing) (pgBool False) (pgStrictText "user1")
(pgStrictText "first_name") (pgStrictText "first_name")
(pgStrictText "last_name") (pgStrictText "last_name")
(pgStrictText "e@mail") (pgStrictText "e@mail")
(pgBool False) (pgBool True) (Nothing) (pgBool False) (pgBool True) (Nothing)
simpleUser2 :: UserWrite
simpleUser2 = User (Nothing) (pgStrictText "password")
(Nothing) (pgBool False) (pgStrictText "user2")
(pgStrictText "first_name")
(pgStrictText "last_name")
(pgStrictText "e@mail")
(pgBool False) (pgBool True) (Nothing)
------------------------------------------------------------------ ------------------------------------------------------------------
queryUserTable :: Query UserRead queryUserTable :: Query UserRead
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment