diff --git a/src/Gargantext/Database/Flow.hs b/src/Gargantext/Database/Flow.hs index d3dc26673df9bea0a17778d142039eef9c61b0cd..657821762e30facdaa6731465389e6bcaef09302 100644 --- a/src/Gargantext/Database/Flow.hs +++ b/src/Gargantext/Database/Flow.hs @@ -312,9 +312,13 @@ flowListUser :: FlowCmdM env err m -> m ListId flowListUser uId cId ngsM n = do lId <- getOrMkList cId uId - + + {- ngs <- take n <$> sortWith tficf_score <$> getTficf userMaster cId lId NgramsTerms + -} + + let ngs = [] trace ("flowListBase" <> show lId) flowListBase lId ngsM diff --git a/src/Gargantext/Database/Metrics/TFICF.hs b/src/Gargantext/Database/Metrics/TFICF.hs index 85a7464147571859c203beb98502b27e35bfb854..d917f9db46e20e45e01a2a47d7e9c77ebb5179cf 100644 --- a/src/Gargantext/Database/Metrics/TFICF.hs +++ b/src/Gargantext/Database/Metrics/TFICF.hs @@ -18,9 +18,9 @@ TFICF, generalization of TFIDF module Gargantext.Database.Metrics.TFICF where +import Data.Text (Text) import Database.PostgreSQL.Simple.SqlQQ (sql) import qualified Database.PostgreSQL.Simple as DPS - import Safe (headMay) import Gargantext.Text.Metrics.TFICF -- (tficf) import Gargantext.Prelude @@ -33,23 +33,31 @@ import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms, NgramsType, ngr type OccGlobal = Double type OccCorpus = Double +data TficfTerms = TficfTerms + { tt_terms :: !Text + , tt_global :: !Double + , tt_corpus :: !Double + } deriving (Show) + +data TficfData = TficfData + { td_global :: !Double + , td_corpus :: !Double + , td_terms :: ![TficfTerms] + } deriving (Show) + +getTficf :: UsernameMaster -> CorpusId -> NgramsType + -> Cmd err TficfData +getTficf u cId ngType = do + g <- countDocsInDatabase u + c <- countDocsInCorpus cId + ngs <- getOccByNgrams u cId ngType -getTficf :: UsernameMaster -> CorpusId -> ListId -> NgramsType - -> Cmd err [Tficf] -getTficf u cId lId ngType = do - g <- getTficfGlobal u - c <- getTficfCorpus cId - ngs <- getTficfNgrams u cId lId ngType + pure $ TficfData (fromIntegral g) (fromIntegral c) ngs - pure $ map (\(nId, nTerms, wm, wn) - -> Tficf nId nTerms - (tficf (TficfCorpus wn (fromIntegral c)) - (TficfLanguage wm (fromIntegral g)) - ) - ) ngs -getTficfGlobal :: UsernameMaster -> Cmd err Int -getTficfGlobal u = maybe 0 identity <$> headMay +-- | TODO add filters with LANG and Database type +countDocsInDatabase :: UsernameMaster -> Cmd err Int +countDocsInDatabase u = maybe 0 identity <$> headMay <$> map (\(DPS.Only n) -> n ) <$> runPGSQuery q p where @@ -61,8 +69,8 @@ getTficfGlobal u = maybe 0 identity <$> headMay AND n.typename = ? |] -getTficfCorpus :: CorpusId -> Cmd err Int -getTficfCorpus cId = maybe 0 identity <$> headMay +countDocsInCorpus :: CorpusId -> Cmd err Int +countDocsInCorpus cId = maybe 0 identity <$> headMay <$> map (\(DPS.Only n) -> n ) <$> runPGSQuery q p where @@ -76,31 +84,21 @@ getTficfCorpus cId = maybe 0 identity <$> headMay -getTficfNgrams :: UsernameMaster -> CorpusId -> ListId -> NgramsType - -> Cmd err [(NgramsId, NgramsTerms, OccGlobal, OccCorpus)] -getTficfNgrams u cId lId ngType = runPGSQuery queryTficf p +getOccByNgrams :: UsernameMaster -> CorpusId -> NgramsType + -> Cmd err [TficfTerms] +getOccByNgrams u cId ngType = map (\(t,g,c) -> TficfTerms t g c) + <$> runPGSQuery queryTficf p where - p = (u, nodeTypeId NodeList, nodeTypeId NodeDocument, ngramsTypeId ngType, cId, lId) + p = (u, nodeTypeId NodeDocument, ngramsTypeId ngType, cId) queryTficf :: DPS.Query queryTficf = [sql| --- TODO add CTE for groups -WITH input(masterUsername,typenameList,typenameDoc,ngramsTypeId,corpusId,listId) - AS ((VALUES(?::"text", ? :: "int4", ?::"int4", ?::"int4",?::"int4",?::"int4"))), - -- AS ((VALUES('gargantua'::"text", 5 :: "int4", 4::"int4", 4::"int4",1018::"int4",1019::"int4"))), - -list_master AS ( -SELECT n.id,n.name,n.user_id from nodes n -JOIN input ON n.typename = input.typenameList -JOIN auth_user a ON a.id = n.user_id -WHERE -a.username = input.masterUsername -), +WITH input(masterUsername,typenameDoc,ngramsTypeId,corpusId) + AS ((VALUES(?::"text", ?::"int4",?::"int4",?::"int4"))), ngrams_master AS ( SELECT ng.id, ng.terms, SUM(nng2.weight) AS weight FROM nodes_ngrams nng -JOIN list_master ON list_master.id = nng.node_id JOIN nodes_ngrams nng2 ON nng2.ngrams_id = nng.ngrams_id JOIN nodes n ON n.id = nng2.node_id JOIN input ON input.typenameDoc = n.typename @@ -114,7 +112,6 @@ GROUP BY ng.id,ng.terms ngrams_user AS ( SELECT ng.id, ng.terms, SUM(nng2.weight) AS weight FROM nodes_ngrams nng -JOIN list_master ON list_master.id = nng.node_id JOIN nodes_ngrams nng2 ON nng2.ngrams_id = nng.ngrams_id JOIN nodes_nodes nn ON nn.node2_id = nng2.node_id @@ -129,7 +126,7 @@ GROUP BY ng.id,ng.terms ) -SELECT nu.id,nu.terms,SUM(nm.weight) wm,SUM(nu.weight) wu +SELECT nu.terms,SUM(nm.weight) wm,SUM(nu.weight) wu FROM ngrams_user nu JOIN ngrams_master nm ON nm.id = nu.id WHERE diff --git a/src/Gargantext/Text/Metrics/TFICF.hs b/src/Gargantext/Text/Metrics/TFICF.hs index e000ded2a68a2192e7e87f384c695cb221adf0b7..594cd74dde82452e2a1e6998d2008df56495256b 100644 --- a/src/Gargantext/Text/Metrics/TFICF.hs +++ b/src/Gargantext/Text/Metrics/TFICF.hs @@ -28,6 +28,10 @@ data Tficf = Tficf { tficf_ngramsId :: NgramsId , tficf_score :: Double } deriving (Show) +data Tficf' = Tficf' { tficf'_terms :: NgramsTerms + , tficf'_score :: Double + } deriving (Show) + type SupraContext = TficfContext type InfraContext = TficfContext