Commit 4264b3ae authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TEXT FLOW] checking all

parent 670bc6c8
...@@ -26,22 +26,76 @@ module Gargantext.Database.Action.Flow.List ...@@ -26,22 +26,76 @@ module Gargantext.Database.Action.Flow.List
import Control.Monad (mapM_) import Control.Monad (mapM_)
import Data.Map (Map, toList) import Data.Map (Map, toList)
import Data.Either
import Data.Maybe (Maybe(..), catMaybes) import Data.Maybe (Maybe(..), catMaybes)
import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Gargantext.API.Ngrams (NgramsElement(..), putListNgrams) import Gargantext.API.Ngrams (NgramsElement(..), putListNgrams)
import Gargantext.Core.Types.Main (ListType(CandidateTerm)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Types.Individu
import Gargantext.Core.Flow.Types import Gargantext.Core.Flow.Types
import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Admin.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId) import Gargantext.Database.Admin.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Admin.Utils (Cmd)
import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId) import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import Gargantext.Database.Schema.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb, getCgramsId) import Gargantext.Database.Schema.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb, getCgramsId)
import Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams -- (insert_Node_NodeNgrams_NodeNgrams, Node_NodeNgrams_NodeNgrams(..)) import Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams -- (insert_Node_NodeNgrams_NodeNgrams, Node_NodeNgrams_NodeNgrams(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.List
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set
import Gargantext.Database.Action.Metrics.NgramsByNode
import Gargantext.Database.Action.Query.Tree.Root (getOrMk_RootWithCorpus)
-- FLOW LIST -- FLOW LIST
-- 1. select specific terms of the corpus when compared with others langs
-- (for now, suppose english)
-- 2. select specific terms of the corpus when compared with others corpora (same database)
-- 3. select clusters of terms (generic and specific)
{-
data FlowList = FlowListLang
| FlowListTficf
| FlowListSpeGen
flowList_Tficf :: UserCorpusId
-> MasterCorpusId
-> NgramsType
-> (Text -> Text)
-> Cmd err (Map Text (Double, Set Text))
flowList_Tficf u m nt f = do
u' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsUser u nt
m' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsMaster u m
pure $ sortTficf Down
$ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
flowList_Tficf' :: UserCorpusId
-> MasterCorpusId
-> NgramsType
-> Cmd err (Map Text (Double, Set Text))
flowList_Tficf' u m nt f = do
u' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsUser u nt
m' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsMaster u m
pure $ sortTficf Down
$ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
-}
-- | TODO check optimization -- | TODO check optimization
mapNodeIdNgrams :: [DocumentIdWithNgrams a] mapNodeIdNgrams :: [DocumentIdWithNgrams a]
-> Map Ngrams (Map NgramsType (Map NodeId Int)) -> Map Ngrams (Map NgramsType (Map NodeId Int))
......
...@@ -71,8 +71,8 @@ lastName texte = DT.toLower ...@@ -71,8 +71,8 @@ lastName texte = DT.toLower
where where
lastName' = lastMay . DT.splitOn " " lastName' = lastMay . DT.splitOn " "
-- TODO: this method is dangerous (maybe equalities of the result are not taken into account -- TODO: this method is dangerous (maybe equalities of the result are
-- emergency demo plan...) -- not taken into account emergency demo plan...)
pairingPolicyToMap :: (Terms -> Terms) pairingPolicyToMap :: (Terms -> Terms)
-> Map (NgramsT Ngrams) a -> Map (NgramsT Ngrams) a
-> Map (NgramsT Ngrams) a -> Map (NgramsT Ngrams) a
......
...@@ -212,7 +212,7 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $ ...@@ -212,7 +212,7 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
JOIN input_rows ir ON ir.terms = ng.terms JOIN input_rows ir ON ir.terms = ng.terms
WHERE nng.node1_id = ? -- CorpusId WHERE nng.node1_id = ? -- CorpusId
AND nng.node2_id = ? -- ListId AND nng.node2_id = ? -- ListId
AND nng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0 -- TODO -- AND nn.category > 0 -- TODO
GROUP BY ng.terms, nng.weight GROUP BY ng.terms, nng.weight
|] |]
......
...@@ -65,7 +65,10 @@ triggerCountInsert = execPGSQuery query (nodeTypeId NodeDocument, nodeTypeId Nod ...@@ -65,7 +65,10 @@ triggerCountInsert = execPGSQuery query (nodeTypeId NodeDocument, nodeTypeId Nod
|] |]
triggerCountInsert2 :: Cmd err Int64 triggerCountInsert2 :: Cmd err Int64
triggerCountInsert2 = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDocument, nodeTypeId NodeList) triggerCountInsert2 = execPGSQuery query ( nodeTypeId NodeCorpus
, nodeTypeId NodeDocument
, nodeTypeId NodeList
)
where where
query :: DPS.Query query :: DPS.Query
query = [sql| query = [sql|
......
...@@ -39,6 +39,112 @@ import Opaleye.Internal.QueryArr (Query) ...@@ -39,6 +39,112 @@ import Opaleye.Internal.QueryArr (Query)
import Prelude hiding (null, id, map, sum) import Prelude hiding (null, id, map, sum)
------------------------------------------------------------------------ ------------------------------------------------------------------------
$(makeAdaptorAndInstance "pNode" ''NodePoly)
$(makeLensesWith abbreviatedFields ''NodePoly)
$(makeAdaptorAndInstance "pNodeSearch" ''NodePolySearch)
$(makeLensesWith abbreviatedFields ''NodePolySearch)
------------------------------------------------------------------------
nodeTable :: Table NodeWrite NodeRead
nodeTable = Table "nodes" (pNode Node { _node_id = optional "id"
, _node_typename = required "typename"
, _node_userId = required "user_id"
, _node_parentId = optional "parent_id"
, _node_name = required "name"
, _node_date = optional "date"
, _node_hyperdata = required "hyperdata"
-- ignoring ts_vector field here
}
)
queryNodeTable :: Query NodeRead
queryNodeTable = queryTable nodeTable
------------------------------------------------------------------------
type NodeWrite = NodePoly (Maybe (Column PGInt4) )
(Column PGInt4)
(Column PGInt4)
(Maybe (Column PGInt4) )
(Column PGText)
(Maybe (Column PGTimestamptz))
(Column PGJsonb)
type NodeRead = NodePoly (Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGText )
(Column PGTimestamptz )
(Column PGJsonb )
type NodeReadNull = NodePoly (Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGText))
(Column (Nullable PGTimestamptz))
(Column (Nullable PGJsonb))
------------------------------------------------------------------------
-- | Node(Read|Write)Search is slower than Node(Write|Read) use it
-- for full text search only
nodeTableSearch :: Table NodeSearchWrite NodeSearchRead
nodeTableSearch = Table "nodes" (pNodeSearch NodeSearch { _ns_id = optional "id"
, _ns_typename = required "typename"
, _ns_userId = required "user_id"
, _ns_parentId = required "parent_id"
, _ns_name = required "name"
, _ns_date = optional "date"
, _ns_hyperdata = required "hyperdata"
, _ns_search = optional "search"
}
)
type NodeSearchWrite =
NodePolySearch
(Maybe (Column PGInt4) )
(Column PGInt4 )
(Column PGInt4 )
(Column (Nullable PGInt4) )
(Column PGText )
(Maybe (Column PGTimestamptz))
(Column PGJsonb )
(Maybe (Column PGTSVector) )
type NodeSearchRead =
NodePolySearch
(Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column (Nullable PGInt4 ))
(Column PGText )
(Column PGTimestamptz )
(Column PGJsonb )
(Column PGTSVector )
type NodeSearchReadNull =
NodePolySearch
(Column (Nullable PGInt4) )
(Column (Nullable PGInt4) )
(Column (Nullable PGInt4) )
(Column (Nullable PGInt4) )
(Column (Nullable PGText) )
(Column (Nullable PGTimestamptz))
(Column (Nullable PGJsonb) )
(Column (Nullable PGTSVector) )
------------------------------------------------------------------------
------------------------------------------------------------------------
-- Instances
------------------------------------------------------------------------
instance FromField HyperdataAny where instance FromField HyperdataAny where
fromField = fromField' fromField = fromField'
...@@ -140,101 +246,4 @@ instance QueryRunnerColumnDefault (Nullable PGInt4) NodeId ...@@ -140,101 +246,4 @@ instance QueryRunnerColumnDefault (Nullable PGInt4) NodeId
queryRunnerColumnDefault = fieldQueryRunnerColumn queryRunnerColumnDefault = fieldQueryRunnerColumn
------------------------------------------------------------------------
$(makeAdaptorAndInstance "pNode" ''NodePoly)
$(makeLensesWith abbreviatedFields ''NodePoly)
$(makeAdaptorAndInstance "pNodeSearch" ''NodePolySearch)
$(makeLensesWith abbreviatedFields ''NodePolySearch)
type NodeWrite = NodePoly (Maybe (Column PGInt4) )
(Column PGInt4)
(Column PGInt4)
(Maybe (Column PGInt4) )
(Column PGText)
(Maybe (Column PGTimestamptz))
(Column PGJsonb)
type NodeRead = NodePoly (Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGText )
(Column PGTimestamptz )
(Column PGJsonb )
type NodeReadNull = NodePoly (Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGText))
(Column (Nullable PGTimestamptz))
(Column (Nullable PGJsonb))
nodeTable :: Table NodeWrite NodeRead
nodeTable = Table "nodes" (pNode Node { _node_id = optional "id"
, _node_typename = required "typename"
, _node_userId = required "user_id"
, _node_parentId = optional "parent_id"
, _node_name = required "name"
, _node_date = optional "date"
, _node_hyperdata = required "hyperdata"
}
)
queryNodeTable :: Query NodeRead
queryNodeTable = queryTable nodeTable
------------------------------------------------------------------------
-- | Node(Read|Write)Search is slower than Node(Write|Read) use it
-- for full text search only
type NodeSearchWrite =
NodePolySearch
(Maybe (Column PGInt4) )
(Column PGInt4 )
(Column PGInt4 )
(Column (Nullable PGInt4) )
(Column PGText )
(Maybe (Column PGTimestamptz))
(Column PGJsonb )
(Maybe (Column PGTSVector) )
type NodeSearchRead =
NodePolySearch
(Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column (Nullable PGInt4 ))
(Column PGText )
(Column PGTimestamptz )
(Column PGJsonb )
(Column PGTSVector )
type NodeSearchReadNull =
NodePolySearch
(Column (Nullable PGInt4) )
(Column (Nullable PGInt4) )
(Column (Nullable PGInt4) )
(Column (Nullable PGInt4) )
(Column (Nullable PGText) )
(Column (Nullable PGTimestamptz))
(Column (Nullable PGJsonb) )
(Column (Nullable PGTSVector) )
nodeTableSearch :: Table NodeSearchWrite NodeSearchRead
nodeTableSearch = Table "nodes" (pNodeSearch NodeSearch { _ns_id = optional "id"
, _ns_typename = required "typename"
, _ns_userId = required "user_id"
, _ns_parentId = required "parent_id"
, _ns_name = required "name"
, _ns_date = optional "date"
, _ns_hyperdata = required "hyperdata"
, _ns_search = optional "search"
}
)
...@@ -33,15 +33,15 @@ import Opaleye ...@@ -33,15 +33,15 @@ import Opaleye
import Prelude import Prelude
data NodeNodeNgrams2Poly node_id nodengrams_id w data NodeNodeNgrams2Poly node_id nodengrams_id w
= NodeNodeNgrams2 { _nnng2_node_id :: node_id = NodeNodeNgrams2 { _nnng2_node_id :: node_id
, _nnng2_nodengrams_id :: nodengrams_id , _nnng2_nodengrams_id :: nodengrams_id
, _nnng2_weight :: w , _nnng2_weight :: w
} deriving (Show) } deriving (Show)
type NodeNodeNgrams2Write = type NodeNodeNgrams2Write =
NodeNodeNgrams2Poly (Column PGInt4 ) NodeNodeNgrams2Poly (Column PGInt4 )
(Column PGInt4 ) (Column PGInt4 )
(Column PGFloat8) (Column PGFloat8)
type NodeNodeNgrams2Read = type NodeNodeNgrams2Read =
NodeNodeNgrams2Poly (Column PGInt4 ) NodeNodeNgrams2Poly (Column PGInt4 )
...@@ -50,8 +50,8 @@ type NodeNodeNgrams2Read = ...@@ -50,8 +50,8 @@ type NodeNodeNgrams2Read =
type NodeNodeNgrams2ReadNull = type NodeNodeNgrams2ReadNull =
NodeNodeNgrams2Poly (Column (Nullable PGInt4 )) NodeNodeNgrams2Poly (Column (Nullable PGInt4 ))
(Column (Nullable PGInt4 )) (Column (Nullable PGInt4 ))
(Column (Nullable PGFloat8)) (Column (Nullable PGFloat8))
type NodeNodeNgrams2 = type NodeNodeNgrams2 =
NodeNodeNgrams2Poly DocId NodeNgramsId Double NodeNodeNgrams2Poly DocId NodeNgramsId Double
...@@ -63,8 +63,8 @@ makeLenses ''NodeNodeNgrams2Poly ...@@ -63,8 +63,8 @@ makeLenses ''NodeNodeNgrams2Poly
nodeNodeNgrams2Table :: Table NodeNodeNgrams2Write NodeNodeNgrams2Read nodeNodeNgrams2Table :: Table NodeNodeNgrams2Write NodeNodeNgrams2Read
nodeNodeNgrams2Table = Table "node_node_ngrams2" nodeNodeNgrams2Table = Table "node_node_ngrams2"
( pNodeNodeNgrams2 NodeNodeNgrams2 ( pNodeNodeNgrams2 NodeNodeNgrams2
{ _nnng2_node_id = required "node_id" { _nnng2_node_id = required "node_id"
, _nnng2_nodengrams_id = required "nodengrams_id" , _nnng2_nodengrams_id = required "nodengrams_id"
, _nnng2_weight = required "weight" , _nnng2_weight = required "weight"
} }
) )
...@@ -77,16 +77,16 @@ insertNodeNodeNgrams2 :: [NodeNodeNgrams2] -> Cmd err Int ...@@ -77,16 +77,16 @@ insertNodeNodeNgrams2 :: [NodeNodeNgrams2] -> Cmd err Int
insertNodeNodeNgrams2 = insertNodeNodeNgrams2W insertNodeNodeNgrams2 = insertNodeNodeNgrams2W
. map (\(NodeNodeNgrams2 n1 n2 w) -> . map (\(NodeNodeNgrams2 n1 n2 w) ->
NodeNodeNgrams2 (pgNodeId n1) NodeNodeNgrams2 (pgNodeId n1)
(pgInt4 n2) (pgInt4 n2)
(pgDouble w) (pgDouble w)
) )
insertNodeNodeNgrams2W :: [NodeNodeNgrams2Write] -> Cmd err Int insertNodeNodeNgrams2W :: [NodeNodeNgrams2Write] -> Cmd err Int
insertNodeNodeNgrams2W nnnw = insertNodeNodeNgrams2W nnnw =
mkCmd $ \c -> fromIntegral <$> runInsert_ c insertNothing mkCmd $ \c -> fromIntegral <$> runInsert_ c insertNothing
where where
insertNothing = (Insert { iTable = nodeNodeNgrams2Table insertNothing = Insert { iTable = nodeNodeNgrams2Table
, iRows = nnnw , iRows = nnnw
, iReturning = rCount , iReturning = rCount
, iOnConflict = (Just DoNothing) , iOnConflict = (Just DoNothing)
}) }
...@@ -47,20 +47,21 @@ toUserLight (UserDB id _ _ _ u _ _ e _ _ _ ) = UserLight id u e ...@@ -47,20 +47,21 @@ toUserLight (UserDB id _ _ _ u _ _ e _ _ _ ) = UserLight id u e
data UserPoly id pass llogin suser data UserPoly id pass llogin suser
uname fname lname uname fname lname
mail staff active djoined = UserDB { user_id :: id mail staff active djoined =
, user_password :: pass UserDB { user_id :: id
, user_lastLogin :: llogin , user_password :: pass
, user_isSuperUser :: suser , user_lastLogin :: llogin
, user_isSuperUser :: suser
, user_username :: uname
, user_firstName :: fname , user_username :: uname
, user_lastName :: lname , user_firstName :: fname
, user_email :: mail , user_lastName :: lname
, user_email :: mail
, user_isStaff :: staff
, user_isActive :: active , user_isStaff :: staff
, user_dateJoined :: djoined , user_isActive :: active
} deriving (Show) , user_dateJoined :: djoined
} deriving (Show)
type UserWrite = UserPoly (Maybe (Column PGInt4)) (Column PGText) type UserWrite = UserPoly (Maybe (Column PGInt4)) (Column PGText)
(Maybe (Column PGTimestamptz)) (Column PGBool) (Maybe (Column PGTimestamptz)) (Column PGBool)
......
...@@ -131,6 +131,9 @@ buildNgramsTermsList' uCid groupIt stop gls is = do ...@@ -131,6 +131,9 @@ buildNgramsTermsList' uCid groupIt stop gls is = do
pure $ Map.fromList [(NgramsTerms, ngs')] pure $ Map.fromList [(NgramsTerms, ngs')]
-} -}
buildNgramsTermsList :: Lang buildNgramsTermsList :: Lang
-> Int -> Int
-> Int -> Int
...@@ -152,8 +155,8 @@ buildNgramsTermsList l n m s uCid mCid = do ...@@ -152,8 +155,8 @@ buildNgramsTermsList l n m s uCid mCid = do
termList = termList =
-- (toTermList a b ((isStopTerm s) . fst) candidatesHead) -- (toTermList a b ((isStopTerm s) . fst) candidatesHead)
(map (toList ((isStopTerm s) .fst) GraphTerm) candidatesHead) (map (toGargList ((isStopTerm s) .fst) GraphTerm) candidatesHead)
<> (map (toList ((isStopTerm s) .fst) CandidateTerm) candidatesTail) <> (map (toGargList ((isStopTerm s) .fst) CandidateTerm) candidatesTail)
ngs = List.concat $ map toNgramsElement termList ngs = List.concat $ map toNgramsElement termList
...@@ -167,8 +170,8 @@ toTermList :: Int ...@@ -167,8 +170,8 @@ toTermList :: Int
-> [(ListType, a)] -> [(ListType, a)]
toTermList _ _ _ [] = [] toTermList _ _ _ [] = []
toTermList a b stop ns = -- trace ("computing toTermList") $ toTermList a b stop ns = -- trace ("computing toTermList") $
map (toList stop CandidateTerm) xs map (toGargList stop CandidateTerm) xs
<> map (toList stop GraphTerm) ys <> map (toGargList stop GraphTerm) ys
<> toTermList a b stop zs <> toTermList a b stop zs
where where
xs = take a ns xs = take a ns
...@@ -194,8 +197,8 @@ toNgramsElement (listType, (_stem, (_score, setNgrams))) = ...@@ -194,8 +197,8 @@ toNgramsElement (listType, (_stem, (_score, setNgrams))) =
) children ) children
toList :: (b -> Bool) -> ListType -> b -> (ListType, b) toGargList :: (b -> Bool) -> ListType -> b -> (ListType, b)
toList stop l n = case stop n of toGargList stop l n = case stop n of
True -> (StopTerm, n) True -> (StopTerm, n)
False -> (l, n) False -> (l, n)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment