Commit b34b8baf authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FLOW][TFICF] preparing for repo filtering/grouping.

parent d218880c
Pipeline #221 failed with stage
...@@ -312,9 +312,13 @@ flowListUser :: FlowCmdM env err m ...@@ -312,9 +312,13 @@ flowListUser :: FlowCmdM env err m
-> m ListId -> m ListId
flowListUser uId cId ngsM n = do flowListUser uId cId ngsM n = do
lId <- getOrMkList cId uId lId <- getOrMkList cId uId
{-
ngs <- take n <$> sortWith tficf_score ngs <- take n <$> sortWith tficf_score
<$> getTficf userMaster cId lId NgramsTerms <$> getTficf userMaster cId lId NgramsTerms
-}
let ngs = []
trace ("flowListBase" <> show lId) flowListBase lId ngsM trace ("flowListBase" <> show lId) flowListBase lId ngsM
......
...@@ -18,9 +18,9 @@ TFICF, generalization of TFIDF ...@@ -18,9 +18,9 @@ TFICF, generalization of TFIDF
module Gargantext.Database.Metrics.TFICF where module Gargantext.Database.Metrics.TFICF where
import Data.Text (Text)
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import qualified Database.PostgreSQL.Simple as DPS import qualified Database.PostgreSQL.Simple as DPS
import Safe (headMay) import Safe (headMay)
import Gargantext.Text.Metrics.TFICF -- (tficf) import Gargantext.Text.Metrics.TFICF -- (tficf)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -33,23 +33,31 @@ import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms, NgramsType, ngr ...@@ -33,23 +33,31 @@ import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms, NgramsType, ngr
type OccGlobal = Double type OccGlobal = Double
type OccCorpus = Double type OccCorpus = Double
data TficfTerms = TficfTerms
{ tt_terms :: !Text
, tt_global :: !Double
, tt_corpus :: !Double
} deriving (Show)
data TficfData = TficfData
{ td_global :: !Double
, td_corpus :: !Double
, td_terms :: ![TficfTerms]
} deriving (Show)
getTficf :: UsernameMaster -> CorpusId -> NgramsType
-> Cmd err TficfData
getTficf u cId ngType = do
g <- countDocsInDatabase u
c <- countDocsInCorpus cId
ngs <- getOccByNgrams u cId ngType
getTficf :: UsernameMaster -> CorpusId -> ListId -> NgramsType pure $ TficfData (fromIntegral g) (fromIntegral c) ngs
-> Cmd err [Tficf]
getTficf u cId lId ngType = do
g <- getTficfGlobal u
c <- getTficfCorpus cId
ngs <- getTficfNgrams u cId lId ngType
pure $ map (\(nId, nTerms, wm, wn)
-> Tficf nId nTerms
(tficf (TficfCorpus wn (fromIntegral c))
(TficfLanguage wm (fromIntegral g))
)
) ngs
getTficfGlobal :: UsernameMaster -> Cmd err Int -- | TODO add filters with LANG and Database type
getTficfGlobal u = maybe 0 identity <$> headMay countDocsInDatabase :: UsernameMaster -> Cmd err Int
countDocsInDatabase u = maybe 0 identity <$> headMay
<$> map (\(DPS.Only n) -> n ) <$> map (\(DPS.Only n) -> n )
<$> runPGSQuery q p <$> runPGSQuery q p
where where
...@@ -61,8 +69,8 @@ getTficfGlobal u = maybe 0 identity <$> headMay ...@@ -61,8 +69,8 @@ getTficfGlobal u = maybe 0 identity <$> headMay
AND n.typename = ? AND n.typename = ?
|] |]
getTficfCorpus :: CorpusId -> Cmd err Int countDocsInCorpus :: CorpusId -> Cmd err Int
getTficfCorpus cId = maybe 0 identity <$> headMay countDocsInCorpus cId = maybe 0 identity <$> headMay
<$> map (\(DPS.Only n) -> n ) <$> map (\(DPS.Only n) -> n )
<$> runPGSQuery q p <$> runPGSQuery q p
where where
...@@ -76,31 +84,21 @@ getTficfCorpus cId = maybe 0 identity <$> headMay ...@@ -76,31 +84,21 @@ getTficfCorpus cId = maybe 0 identity <$> headMay
getTficfNgrams :: UsernameMaster -> CorpusId -> ListId -> NgramsType getOccByNgrams :: UsernameMaster -> CorpusId -> NgramsType
-> Cmd err [(NgramsId, NgramsTerms, OccGlobal, OccCorpus)] -> Cmd err [TficfTerms]
getTficfNgrams u cId lId ngType = runPGSQuery queryTficf p getOccByNgrams u cId ngType = map (\(t,g,c) -> TficfTerms t g c)
<$> runPGSQuery queryTficf p
where where
p = (u, nodeTypeId NodeList, nodeTypeId NodeDocument, ngramsTypeId ngType, cId, lId) p = (u, nodeTypeId NodeDocument, ngramsTypeId ngType, cId)
queryTficf :: DPS.Query queryTficf :: DPS.Query
queryTficf = [sql| queryTficf = [sql|
-- TODO add CTE for groups WITH input(masterUsername,typenameDoc,ngramsTypeId,corpusId)
WITH input(masterUsername,typenameList,typenameDoc,ngramsTypeId,corpusId,listId) AS ((VALUES(?::"text", ?::"int4",?::"int4",?::"int4"))),
AS ((VALUES(?::"text", ? :: "int4", ?::"int4", ?::"int4",?::"int4",?::"int4"))),
-- AS ((VALUES('gargantua'::"text", 5 :: "int4", 4::"int4", 4::"int4",1018::"int4",1019::"int4"))),
list_master AS (
SELECT n.id,n.name,n.user_id from nodes n
JOIN input ON n.typename = input.typenameList
JOIN auth_user a ON a.id = n.user_id
WHERE
a.username = input.masterUsername
),
ngrams_master AS ( ngrams_master AS (
SELECT ng.id, ng.terms, SUM(nng2.weight) AS weight FROM nodes_ngrams nng SELECT ng.id, ng.terms, SUM(nng2.weight) AS weight FROM nodes_ngrams nng
JOIN list_master ON list_master.id = nng.node_id
JOIN nodes_ngrams nng2 ON nng2.ngrams_id = nng.ngrams_id JOIN nodes_ngrams nng2 ON nng2.ngrams_id = nng.ngrams_id
JOIN nodes n ON n.id = nng2.node_id JOIN nodes n ON n.id = nng2.node_id
JOIN input ON input.typenameDoc = n.typename JOIN input ON input.typenameDoc = n.typename
...@@ -114,7 +112,6 @@ GROUP BY ng.id,ng.terms ...@@ -114,7 +112,6 @@ GROUP BY ng.id,ng.terms
ngrams_user AS ( ngrams_user AS (
SELECT ng.id, ng.terms, SUM(nng2.weight) AS weight SELECT ng.id, ng.terms, SUM(nng2.weight) AS weight
FROM nodes_ngrams nng FROM nodes_ngrams nng
JOIN list_master ON list_master.id = nng.node_id
JOIN nodes_ngrams nng2 ON nng2.ngrams_id = nng.ngrams_id JOIN nodes_ngrams nng2 ON nng2.ngrams_id = nng.ngrams_id
JOIN nodes_nodes nn ON nn.node2_id = nng2.node_id JOIN nodes_nodes nn ON nn.node2_id = nng2.node_id
...@@ -129,7 +126,7 @@ GROUP BY ng.id,ng.terms ...@@ -129,7 +126,7 @@ GROUP BY ng.id,ng.terms
) )
SELECT nu.id,nu.terms,SUM(nm.weight) wm,SUM(nu.weight) wu SELECT nu.terms,SUM(nm.weight) wm,SUM(nu.weight) wu
FROM ngrams_user nu FROM ngrams_user nu
JOIN ngrams_master nm ON nm.id = nu.id JOIN ngrams_master nm ON nm.id = nu.id
WHERE WHERE
......
...@@ -28,6 +28,10 @@ data Tficf = Tficf { tficf_ngramsId :: NgramsId ...@@ -28,6 +28,10 @@ data Tficf = Tficf { tficf_ngramsId :: NgramsId
, tficf_score :: Double , tficf_score :: Double
} deriving (Show) } deriving (Show)
data Tficf' = Tficf' { tficf'_terms :: NgramsTerms
, tficf'_score :: Double
} deriving (Show)
type SupraContext = TficfContext type SupraContext = TficfContext
type InfraContext = TficfContext type InfraContext = TficfContext
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment