Commit dec1cb78 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FLOW/TEXT/DB] TFICF Done. Finally, need to connect all the components of the Flow now.

parent 42cba88f
......@@ -24,22 +24,41 @@ import Data.Set (Set)
import Data.Text (Text)
import Data.Tuple.Extra (second, swap)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Prelude
import Gargantext.Text.Metrics.TFICF -- (tficf)
import Gargantext.Text.Terms.Mono.Stem (stem)
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Database.PostgreSQL.Simple as DPS
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
ngramsGroup :: Lang -> Int -> Text -> Text
ngramsGroup l n = Text.intercalate " "
. map (stem l)
. take n
. List.sort
. Text.splitOn " "
. Text.replace "-" " "
sortTficf :: (Map Text (Double, Set Text))
-> [(Double, Set Text)]
sortTficf = List.reverse . List.sortOn fst . elems
getTficf :: UserCorpusId -> MasterCorpusId -> (Text -> Text)
getTficf' :: UserCorpusId -> MasterCorpusId -> (Text -> Text)
-> Cmd err (Map Text (Double, Set Text))
getTficf u m f = do
getTficf' u m f = do
u' <- getNodesByNgramsUser u
m' <- getNodesByNgramsMaster u m
......@@ -54,8 +73,8 @@ type Infra = Context
toTficfData :: Infra -> Supra
-> Map Text (Double, Set Text)
toTficfData (ti, mi) (ts, ms) =
fromList [ (t, ( tficf (TficfInfra ti n)
(TficfSupra ts $ maybe 0 fst $ Map.lookup t ms)
fromList [ (t, ( tficf (TficfInfra n ti)
(TficfSupra (maybe 0 fst $ Map.lookup t ms) ts)
, ns
)
)
......@@ -63,7 +82,6 @@ toTficfData (ti, mi) (ts, ms) =
]
-- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs)
countNodesByNgramsWith :: (Text -> Text)
......@@ -72,7 +90,8 @@ countNodesByNgramsWith :: (Text -> Text)
countNodesByNgramsWith f m = (total, m')
where
total = fromIntegral $ Set.size $ Set.unions $ elems m
m' = Map.map (swap . second (fromIntegral . Set.size)) $ groupNodesByNgramsWith f m
m' = Map.map ( swap . second (fromIntegral . Set.size))
$ groupNodesByNgramsWith f m
groupNodesByNgramsWith :: (Text -> Text)
......@@ -157,10 +176,4 @@ SELECT nng.node_id, ng.id, ng.terms FROM nodes_ngrams nng
SELECT m.node_id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|]
......@@ -173,13 +173,14 @@ data Action = Del | Add
type NgramsParent = Text
type NgramsChild = Text
{-
ngramsGroup :: Action -> ListId -> [(NgramsTypeId, NgramsParent, NgramsChild)] -> Cmd err ()
ngramsGroup _ _ [] = pure ()
ngramsGroup a lid input = void $ trace (show input) $ execPGSQuery (ngramsGroupQuery a) (DPS.Only $ Values fields input')
where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4","text","text"]
input' = map (\(ntpid,p,c) -> (lid, nodeTypeId NodeList, userMaster, ntpid, p,c)) input
-}
ngramsGroupQuery :: Action -> DPS.Query
ngramsGroupQuery a = case a of
......@@ -290,6 +291,8 @@ data NodeNgramsUpdate = NodeNgramsUpdate
-- TODO wrap these updates in a transaction.
-- TODO-ACCESS:
-- * check userId CanUpdateNgrams userListId
{-
updateNodeNgrams :: NodeNgramsUpdate -> Cmd err ()
updateNodeNgrams nnu = do
updateNodeNgrams' userListId $ _nnu_lists_update nnu
......@@ -299,3 +302,4 @@ updateNodeNgrams nnu = do
ngramsGroup Add userListId $ _nnu_add_children nnu
where
userListId = _nnu_user_list_id nnu
-}
......@@ -48,6 +48,7 @@ type InfraContext = TficfContext
tficf :: InfraContext Double Double -> SupraContext Double Double -> Double
tficf (TficfCorpus c c') (TficfLanguage l l') = tficf' c c' l l'
tficf (TficfDocument d d')(TficfCorpus c c') = tficf' d d' c c'
tficf (TficfInfra d d')(TficfSupra c c') = tficf' d d' c c'
tficf _ _ = panic "Not in definition"
tficf' :: Double -> Double -> Double -> Double -> Double
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment