Commit e7e89297 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[WIP][FLOW] get ngrams by docs.

parent 6a247110
......@@ -816,3 +816,5 @@ getTableNgrams _cId maybeTabType listIds mlimit moffset = do
getListNgrams ({-lists <>-} listIds) ngramsType
& mapped . v_data . _NgramsTable %~ (take limit_ . drop offset_)
......@@ -19,7 +19,7 @@ Portability : POSIX
module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
where
--import Debug.Trace (trace)
import Debug.Trace (trace)
--import Control.Lens (view)
import Control.Monad (mapM_)
import Control.Monad.IO.Class (liftIO)
......@@ -41,6 +41,7 @@ import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
import Gargantext.Database.Metrics.TFICF (getTficf)
import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Metrics.TFICF (Tficf(..))
--import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId)
import Gargantext.Database.Node.Document.Add (add)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Root (getRoot)
......@@ -121,7 +122,10 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user
_ <- insertToNodeNgrams indexedNgrams
-- List Ngrams Flow
-- get elements
-- filter by TFICF
let ngs = ngrams2list' indexedNgrams
--let ngs = getNgramsElementsWithParentNodeId masterCorpusId
_masterListId <- flowList masterUserId masterCorpusId ngs
_userListId <- flowListUser userId userCorpusId ngs 100
--------------------------------------------------
......@@ -256,7 +260,8 @@ flowList uId cId ngs = do
lId <- getOrMkList cId uId
printDebug "listId flowList" lId
--printDebug "ngs" (DM.keys ngs)
-- TODO add stemming equivalence of 2 ngrams
-- TODO grouping
-- TODO needs rework
-- let groupEd = groupNgramsBy (\(NgramsT t1 n1) (NgramsT t2 n2) -> if (((==) t1 t2) && ((==) n1 n2)) then (Just (n1,n2)) else Nothing) ngs
-- _ <- insertGroups lId groupEd
......@@ -273,7 +278,8 @@ flowListUser uId cId ngsM n = do
ngs <- take n <$> sortWith tficf_score
<$> getTficf userMaster cId lId NgramsTerms
flowListBase lId ngsM
trace ("flowListBase" <> show lId) flowListBase lId ngsM
putListNgrams lId NgramsTerms $
[ NgramsElement (tficf_ngramsTerms ng) GraphList 1 Nothing mempty
| ng <- ngs
......@@ -322,7 +328,6 @@ ngrams2list' m = fromListWith (<>)
-- | TODO: weight of the list could be a probability
insertLists :: HasNodeError err => ListId -> [(ListType, (NgramsType, NgramsIndexed))] -> Cmd err Int
insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing (ngramsTypeId ngt) (fromIntegral $ listTypeId l) 1
......
......@@ -19,15 +19,18 @@ Count Ngrams by Context
module Gargantext.Database.Metrics.Count where
import Data.Monoid (mempty)
import Control.Arrow (returnA)
import Control.Lens (view)
import Data.Map.Strict (Map, fromListWith, elems)
import Data.Text (Text)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.API.Ngrams (NgramsElement(..))
import Gargantext.Core.Types.Main (listTypeId, ListType(..))
import Gargantext.Database.Queries.Join (leftJoin4, leftJoin5)
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Queries.Join (leftJoin4, leftJoin5, leftJoin3)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsType(..), ngramsTypeId, Ngrams(..), NgramsIndexed(..), ngrams, ngramsTerms)
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsType(..), ngramsTypeId, Ngrams(..), NgramsIndexed(..), ngrams, ngramsTerms, fromNgramsTypeId)
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Node (HasNodeError(..))
import Gargantext.Database.Schema.NodeNgram
......@@ -80,27 +83,45 @@ getNgramsByNode :: NodeId -> NgramsType -> Cmd err [[Text]]
getNgramsByNode nId nt = elems
<$> fromListWith (<>)
<$> map (\(i,t) -> (i,[t]))
<$> getNgramsByNodeIndexed nId nt
<$> getNgramsByNodeNodeIndexed nId nt
-- | TODO add join with nodeNodeNgram (if it exists)
getNgramsByNodeIndexed :: NodeId -> NgramsType -> Cmd err [(NodeId, Text)]
getNgramsByNodeIndexed nId nt = runOpaQuery (select' nId)
getNgramsByNodeNodeIndexed :: NodeId -> NgramsType -> Cmd err [(NodeId, Text)]
getNgramsByNodeNodeIndexed nId nt = runOpaQuery (select' nId)
where
select' nId' = proc () -> do
(ng,(nng,(_,n))) <- getNgramsByNodeIndexedJoin -< ()
(ng,(nng,(_,n))) <- getNgramsByNodeNodeIndexedJoin -< ()
restrict -< _node_id n .== toNullable (pgNodeId nId')
restrict -< nng_ngramsType nng .== toNullable (pgNgramsTypeId $ ngramsTypeId nt)
returnA -< (nng_node_id nng, ngrams_terms ng)
{-
getNgramsByNodeIndexed' :: NodeId -> NgramsType -> Cmd err [(NodeId, Maybe Text)]
getNgramsByNodeIndexed' nId nt = runOpaQuery (select' nId)
where
select' nId' = proc () -> do
(nnng,(ng,(nng,(_,n)))) <- getNgramsByNodeIndexedJoin5 -< ()
restrict -< _node_id n .== toNullable (pgNodeId nId')
restrict -< nng_ngramsType nng .== toNullable (pgNgramsTypeId $ ngramsTypeId nt)
let node_id' = ifThenElse (isNull $ toNullable $ nnng_node1_id nnng)
(nng_node_id nng)
(nnng_node2_id nng)
let t1 = ifThenElse (isNull $ toNullable $ nnng_node1_id nnng)
(ngrams_terms ng)
(nnng_terms nng)
returnA -< (n1, t1)
--}
getNgramsByNodeIndexedJoin :: Query ( NgramsRead
getNgramsByNodeNodeIndexedJoin :: Query ( NgramsRead
, (NodeNgramReadNull
, (NodeNodeReadNull
, NodeReadNull
)
)
)
getNgramsByNodeIndexedJoin = leftJoin4 queryNodeTable
getNgramsByNodeNodeIndexedJoin = leftJoin4 queryNodeTable
queryNodeNodeTable
queryNodeNgramTable
queryNgramsTable
......@@ -126,7 +147,7 @@ getNgramsByNodeIndexedJoin = leftJoin4 queryNodeTable
c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng'
getNgramsByNodeIndexedJoin' :: Query ( NodeNodeNgramsRead
getNgramsByNodeNodeIndexedJoin5 :: Query ( NodeNodeNgramsRead
, (NgramsReadNull
, (NodeNgramReadNull
, (NodeNodeReadNull
......@@ -135,7 +156,7 @@ getNgramsByNodeIndexedJoin' :: Query ( NodeNodeNgramsRead
)
)
)
getNgramsByNodeIndexedJoin' = leftJoin5 queryNodeTable
getNgramsByNodeNodeIndexedJoin5 = leftJoin5 queryNodeTable
queryNodeNodeTable
queryNodeNgramTable
queryNgramsTable
......@@ -161,7 +182,6 @@ getNgramsByNodeIndexedJoin' = leftJoin5 queryNodeTable
) -> Column PGBool
c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng'
c4 :: ( NodeNodeNgramsRead
, (NgramsRead
, ( NodeNgramReadNull
......@@ -174,9 +194,41 @@ getNgramsByNodeIndexedJoin' = leftJoin5 queryNodeTable
c4 (nnng,(_,(_,(nn,_)))) = (toNullable $ nnng_node1_id nnng) .== (nn_node1_id nn)
.&& (toNullable $ nnng_node2_id nnng) .== (nn_node2_id nn)
--}
--{-
getNgramsElementsWithParentNodeId :: NodeId -> Cmd err (Map NgramsType [NgramsElement])
getNgramsElementsWithParentNodeId nId = do
ns <- getNgramsWithParentNodeId nId
pure $ fromListWith (<>) [ (maybe (panic "error") identity $ fromNgramsTypeId nt, [NgramsElement ng CandidateList 1 Nothing mempty])
| (_,(nt,ng)) <- ns
]
-------------------------------------------------------------------------
getNgramsWithParentNodeId :: NodeId -> Cmd err [(NodeId, (NgramsTypeId, Text))]
getNgramsWithParentNodeId nId = runOpaQuery (select nId)
where
select nId' = proc () -> do
(ng,(nng,n)) <- getNgramsWithParentNodeIdJoin -< ()
restrict -< _node_parentId n .== (toNullable $ pgNodeId nId')
restrict -< _node_typename n .== (toNullable $ pgInt4 $ nodeTypeId NodeDocument)
returnA -< (nng_node_id nng, (nng_ngramsType nng, ngrams_terms ng))
--}
getNgramsWithParentNodeIdJoin :: Query ( NgramsRead
, ( NodeNgramReadNull
, NodeReadNull
)
)
getNgramsWithParentNodeIdJoin = leftJoin3 queryNodeTable queryNodeNgramTable queryNgramsTable on1 on2
where
on1 :: (NodeNgramRead, NodeRead) -> Column PGBool
on1 (nng,n) = nng_node_id nng .== _node_id n
on2 :: (NgramsRead, (NodeNgramRead, NodeReadNull))-> Column PGBool
on2 (ng, (nng,_)) = ngrams_id ng .== nng_ngrams_id nng
......@@ -123,6 +123,10 @@ instance FromField NgramsTypeId where
if (n :: Int) > 0 then return $ NgramsTypeId n
else mzero
instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
pgNgramsType :: NgramsType -> Column PGInt4
pgNgramsType = pgNgramsTypeId . ngramsTypeId
......
......@@ -156,7 +156,6 @@ instance QueryRunnerColumnDefault (Nullable PGInt4) NodeId
queryRunnerColumnDefault = fieldQueryRunnerColumn
------------------------------------------------------------------------
-- WIP
-- TODO Classe HasDefault where
......
......@@ -114,14 +114,22 @@ gargantuaUser = User (Nothing) (pgStrictText "password")
(pgStrictText "e@mail")
(pgBool True) (pgBool True) (Nothing)
simpleUser :: UserWrite
simpleUser = User (Nothing) (pgStrictText "password")
simpleUser1 :: UserWrite
simpleUser1 = User (Nothing) (pgStrictText "password")
(Nothing) (pgBool False) (pgStrictText "user1")
(pgStrictText "first_name")
(pgStrictText "last_name")
(pgStrictText "e@mail")
(pgBool False) (pgBool True) (Nothing)
simpleUser2 :: UserWrite
simpleUser2 = User (Nothing) (pgStrictText "password")
(Nothing) (pgBool False) (pgStrictText "user2")
(pgStrictText "first_name")
(pgStrictText "last_name")
(pgStrictText "e@mail")
(pgBool False) (pgBool True) (Nothing)
------------------------------------------------------------------
queryUserTable :: Query UserRead
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment