Commit dec1cb78 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FLOW/TEXT/DB] TFICF Done. Finally, need to connect all the components of the Flow now.

parent 42cba88f
...@@ -24,22 +24,41 @@ import Data.Set (Set) ...@@ -24,22 +24,41 @@ import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple.Extra (second, swap) import Data.Tuple.Extra (second, swap)
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Config (nodeTypeId) import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..)) import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId) import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Utils (Cmd, runPGSQuery) import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Metrics.TFICF -- (tficf) import Gargantext.Text.Metrics.TFICF -- (tficf)
import Gargantext.Text.Terms.Mono.Stem (stem)
import qualified Data.List as List
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Database.PostgreSQL.Simple as DPS import qualified Database.PostgreSQL.Simple as DPS
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
ngramsGroup :: Lang -> Int -> Text -> Text
ngramsGroup l n = Text.intercalate " "
. map (stem l)
. take n
. List.sort
. Text.splitOn " "
. Text.replace "-" " "
sortTficf :: (Map Text (Double, Set Text))
-> [(Double, Set Text)]
sortTficf = List.reverse . List.sortOn fst . elems
getTficf :: UserCorpusId -> MasterCorpusId -> (Text -> Text)
getTficf' :: UserCorpusId -> MasterCorpusId -> (Text -> Text)
-> Cmd err (Map Text (Double, Set Text)) -> Cmd err (Map Text (Double, Set Text))
getTficf u m f = do getTficf' u m f = do
u' <- getNodesByNgramsUser u u' <- getNodesByNgramsUser u
m' <- getNodesByNgramsMaster u m m' <- getNodesByNgramsMaster u m
...@@ -54,8 +73,8 @@ type Infra = Context ...@@ -54,8 +73,8 @@ type Infra = Context
toTficfData :: Infra -> Supra toTficfData :: Infra -> Supra
-> Map Text (Double, Set Text) -> Map Text (Double, Set Text)
toTficfData (ti, mi) (ts, ms) = toTficfData (ti, mi) (ts, ms) =
fromList [ (t, ( tficf (TficfInfra ti n) fromList [ (t, ( tficf (TficfInfra n ti)
(TficfSupra ts $ maybe 0 fst $ Map.lookup t ms) (TficfSupra (maybe 0 fst $ Map.lookup t ms) ts)
, ns , ns
) )
) )
...@@ -63,7 +82,6 @@ toTficfData (ti, mi) (ts, ms) = ...@@ -63,7 +82,6 @@ toTficfData (ti, mi) (ts, ms) =
] ]
-- | fst is size of Supra Corpus -- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs) -- snd is Texts and size of Occurrences (different docs)
countNodesByNgramsWith :: (Text -> Text) countNodesByNgramsWith :: (Text -> Text)
...@@ -72,7 +90,8 @@ countNodesByNgramsWith :: (Text -> Text) ...@@ -72,7 +90,8 @@ countNodesByNgramsWith :: (Text -> Text)
countNodesByNgramsWith f m = (total, m') countNodesByNgramsWith f m = (total, m')
where where
total = fromIntegral $ Set.size $ Set.unions $ elems m total = fromIntegral $ Set.size $ Set.unions $ elems m
m' = Map.map (swap . second (fromIntegral . Set.size)) $ groupNodesByNgramsWith f m m' = Map.map ( swap . second (fromIntegral . Set.size))
$ groupNodesByNgramsWith f m
groupNodesByNgramsWith :: (Text -> Text) groupNodesByNgramsWith :: (Text -> Text)
...@@ -157,10 +176,4 @@ SELECT nng.node_id, ng.id, ng.terms FROM nodes_ngrams nng ...@@ -157,10 +176,4 @@ SELECT nng.node_id, ng.id, ng.terms FROM nodes_ngrams nng
SELECT m.node_id, m.terms FROM nodesByNgramsMaster m SELECT m.node_id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|] |]
...@@ -173,13 +173,14 @@ data Action = Del | Add ...@@ -173,13 +173,14 @@ data Action = Del | Add
type NgramsParent = Text type NgramsParent = Text
type NgramsChild = Text type NgramsChild = Text
{-
ngramsGroup :: Action -> ListId -> [(NgramsTypeId, NgramsParent, NgramsChild)] -> Cmd err () ngramsGroup :: Action -> ListId -> [(NgramsTypeId, NgramsParent, NgramsChild)] -> Cmd err ()
ngramsGroup _ _ [] = pure () ngramsGroup _ _ [] = pure ()
ngramsGroup a lid input = void $ trace (show input) $ execPGSQuery (ngramsGroupQuery a) (DPS.Only $ Values fields input') ngramsGroup a lid input = void $ trace (show input) $ execPGSQuery (ngramsGroupQuery a) (DPS.Only $ Values fields input')
where where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4","text","text"] fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4","text","text"]
input' = map (\(ntpid,p,c) -> (lid, nodeTypeId NodeList, userMaster, ntpid, p,c)) input input' = map (\(ntpid,p,c) -> (lid, nodeTypeId NodeList, userMaster, ntpid, p,c)) input
-}
ngramsGroupQuery :: Action -> DPS.Query ngramsGroupQuery :: Action -> DPS.Query
ngramsGroupQuery a = case a of ngramsGroupQuery a = case a of
...@@ -290,6 +291,8 @@ data NodeNgramsUpdate = NodeNgramsUpdate ...@@ -290,6 +291,8 @@ data NodeNgramsUpdate = NodeNgramsUpdate
-- TODO wrap these updates in a transaction. -- TODO wrap these updates in a transaction.
-- TODO-ACCESS: -- TODO-ACCESS:
-- * check userId CanUpdateNgrams userListId -- * check userId CanUpdateNgrams userListId
{-
updateNodeNgrams :: NodeNgramsUpdate -> Cmd err () updateNodeNgrams :: NodeNgramsUpdate -> Cmd err ()
updateNodeNgrams nnu = do updateNodeNgrams nnu = do
updateNodeNgrams' userListId $ _nnu_lists_update nnu updateNodeNgrams' userListId $ _nnu_lists_update nnu
...@@ -299,3 +302,4 @@ updateNodeNgrams nnu = do ...@@ -299,3 +302,4 @@ updateNodeNgrams nnu = do
ngramsGroup Add userListId $ _nnu_add_children nnu ngramsGroup Add userListId $ _nnu_add_children nnu
where where
userListId = _nnu_user_list_id nnu userListId = _nnu_user_list_id nnu
-}
...@@ -48,6 +48,7 @@ type InfraContext = TficfContext ...@@ -48,6 +48,7 @@ type InfraContext = TficfContext
tficf :: InfraContext Double Double -> SupraContext Double Double -> Double tficf :: InfraContext Double Double -> SupraContext Double Double -> Double
tficf (TficfCorpus c c') (TficfLanguage l l') = tficf' c c' l l' tficf (TficfCorpus c c') (TficfLanguage l l') = tficf' c c' l l'
tficf (TficfDocument d d')(TficfCorpus c c') = tficf' d d' c c' tficf (TficfDocument d d')(TficfCorpus c c') = tficf' d d' c c'
tficf (TficfInfra d d')(TficfSupra c c') = tficf' d d' c c'
tficf _ _ = panic "Not in definition" tficf _ _ = panic "Not in definition"
tficf' :: Double -> Double -> Double -> Double -> Double tficf' :: Double -> Double -> Double -> Double -> Double
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment