Commit 26fe8014 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT][Text][Metrics] TFICF.

parent 08d3adb7
...@@ -58,7 +58,7 @@ import GHC.Generics (Generic) ...@@ -58,7 +58,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Types.Node (NodeType(..)) import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Schema.Node (defaultList, HasNodeError) import Gargantext.Database.Schema.Node (defaultList, HasNodeError)
import Gargantext.Database.Schema.Ngrams (NgramsType, NgramsTypeId, ngramsTypeId, NgramsTableData'(..)) import Gargantext.Database.Schema.Ngrams (NgramsType, NgramsTypeId, ngramsTypeId, NgramsTableData(..))
import qualified Gargantext.Database.Schema.Ngrams as Ngrams import qualified Gargantext.Database.Schema.Ngrams as Ngrams
import Gargantext.Database.Schema.NodeNgram import Gargantext.Database.Schema.NodeNgram
import Gargantext.Database.Utils (Cmd) import Gargantext.Database.Utils (Cmd)
...@@ -120,10 +120,10 @@ newtype NgramsTable = NgramsTable { _ngramsTable :: [NgramsElement] } ...@@ -120,10 +120,10 @@ newtype NgramsTable = NgramsTable { _ngramsTable :: [NgramsElement] }
deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show) deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
-- | TODO Check N and Weight -- | TODO Check N and Weight
toNgramsElement :: [NgramsTableData'] -> [NgramsElement] toNgramsElement :: [NgramsTableData] -> [NgramsElement]
toNgramsElement ns = map toNgramsElement' ns toNgramsElement ns = map toNgramsElement' ns
where where
toNgramsElement' (NgramsTableData' _ p t _ lt w) = NgramsElement t lt' (round w) p' c' toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
where where
p' = case p of p' = case p of
Nothing -> Nothing Nothing -> Nothing
...@@ -132,14 +132,14 @@ toNgramsElement ns = map toNgramsElement' ns ...@@ -132,14 +132,14 @@ toNgramsElement ns = map toNgramsElement' ns
lt' = maybe (panic "API.Ngrams: listypeId") identity lt lt' = maybe (panic "API.Ngrams: listypeId") identity lt
mapParent :: Map Int Text mapParent :: Map Int Text
mapParent = fromListWith (<>) $ map (\(NgramsTableData' i _ t _ _ _) -> (i,t)) ns mapParent = fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
mapChildren :: Map Text (Set Text) mapChildren :: Map Text (Set Text)
mapChildren = mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent)) mapChildren = mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
$ fromListWith (<>) $ fromListWith (<>)
$ map (first fromJust) $ map (first fromJust)
$ filter (isJust . fst) $ filter (isJust . fst)
$ map (\(NgramsTableData' _ p t _ _ _) -> (p, Set.singleton t)) ns $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
instance Arbitrary NgramsTable where instance Arbitrary NgramsTable where
......
...@@ -18,3 +18,6 @@ module Gargantext.Core.Types.Individu ...@@ -18,3 +18,6 @@ module Gargantext.Core.Types.Individu
import Data.Text (Text) import Data.Text (Text)
type Username = Text type Username = Text
type UsernameMaster = Username
type UsernameSimple = Username
...@@ -281,7 +281,10 @@ flowList uId cId ngs = do ...@@ -281,7 +281,10 @@ flowList uId cId ngs = do
pure lId pure lId
flowListUser :: HasNodeError err => UserId -> CorpusId -> Cmd err NodeId flowListUser :: HasNodeError err => UserId -> CorpusId -> Cmd err NodeId
flowListUser uId cId = getOrMkList cId uId flowListUser uId cId = do
lid <- getOrMkList cId uId
-- is <- insertLists lId $ ngrams2list ngs
pure lid
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -319,3 +322,11 @@ insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing ( ...@@ -319,3 +322,11 @@ insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing (
| (l,(ngt, ng)) <- lngs | (l,(ngt, ng)) <- lngs
] ]
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -53,6 +53,11 @@ import Prelude (Enum, Bounded, minBound, maxBound, Functor) ...@@ -53,6 +53,11 @@ import Prelude (Enum, Bounded, minBound, maxBound, Functor)
import qualified Data.Set as DS import qualified Data.Set as DS
import qualified Database.PostgreSQL.Simple as PGS import qualified Database.PostgreSQL.Simple as PGS
type NgramsTerms = Text
type NgramsId = Int
type Size = Int
--{- --{-
data NgramsPoly id terms n = NgramsDb { ngrams_id :: id data NgramsPoly id terms n = NgramsDb { ngrams_id :: id
, ngrams_terms :: terms , ngrams_terms :: terms
...@@ -128,10 +133,6 @@ ngramsTypeId NgramsTerms = 4 ...@@ -128,10 +133,6 @@ ngramsTypeId NgramsTerms = 4
fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
fromNgramsTypeId id = lookup id $ fromList [(ngramsTypeId nt,nt) | nt <- [minBound .. maxBound] :: [NgramsType]] fromNgramsTypeId id = lookup id $ fromList [(ngramsTypeId nt,nt) | nt <- [minBound .. maxBound] :: [NgramsType]]
type NgramsTerms = Text
type NgramsId = Int
type Size = Int
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO put it in Gargantext.Text.Ngrams -- | TODO put it in Gargantext.Text.Ngrams
data Ngrams = Ngrams { _ngramsTerms :: Text data Ngrams = Ngrams { _ngramsTerms :: Text
...@@ -232,7 +233,7 @@ queryInsertNgrams = [sql| ...@@ -232,7 +233,7 @@ queryInsertNgrams = [sql|
getNgramsTableDb :: NodeType -> NgramsType getNgramsTableDb :: NodeType -> NgramsType
-> NgramsTableParamUser -> NgramsTableParamUser
-> Limit -> Offset -> Limit -> Offset
-> Cmd err [NgramsTableData'] -> Cmd err [NgramsTableData]
getNgramsTableDb nt ngrt ntp limit_ offset_ = do getNgramsTableDb nt ngrt ntp limit_ offset_ = do
...@@ -245,7 +246,7 @@ getNgramsTableDb nt ngrt ntp limit_ offset_ = do ...@@ -245,7 +246,7 @@ getNgramsTableDb nt ngrt ntp limit_ offset_ = do
listMasterId <- maybe (panic "error master list") (view node_id) <$> head <$> getListsWithParentId corpusMasterId listMasterId <- maybe (panic "error master list") (view node_id) <$> head <$> getListsWithParentId corpusMasterId
getNgramsTableData' nt ngrt ntp (NgramsTableParam listMasterId corpusMasterId) limit_ offset_ getNgramsTableData nt ngrt ntp (NgramsTableParam listMasterId corpusMasterId) limit_ offset_
data NgramsTableParam = data NgramsTableParam =
NgramsTableParam { _nt_listId :: NodeId NgramsTableParam { _nt_listId :: NodeId
...@@ -255,44 +256,24 @@ data NgramsTableParam = ...@@ -255,44 +256,24 @@ data NgramsTableParam =
type NgramsTableParamUser = NgramsTableParam type NgramsTableParamUser = NgramsTableParam
type NgramsTableParamMaster = NgramsTableParam type NgramsTableParamMaster = NgramsTableParam
data NgramsTableData = NgramsTableData { _ntd_ngrams :: Text
, _ntd_n :: Int
, _ntd_listType :: Maybe ListType
, _ntd_weight :: Double
} deriving (Show)
getNgramsTableData :: NodeType -> NgramsType
-> NgramsTableParamUser -> NgramsTableParamMaster
-> Limit -> Offset
-> Cmd err [NgramsTableData]
getNgramsTableData nodeT ngrmT (NgramsTableParam ul uc) (NgramsTableParam ml mc) limit_ offset_ =
trace ("Ngrams table params" <> show params) <$>
map (\(t,n,nt,w) -> NgramsTableData t n (fromListTypeId nt) w) <$>
runPGSQuery querySelectTableNgrams params
where
nodeTId = nodeTypeId nodeT
ngrmTId = ngramsTypeId ngrmT
params = (ul,uc,nodeTId,ngrmTId,ml,mc,nodeTId,ngrmTId,uc) :.
(limit_, offset_)
data NgramsTableData' = NgramsTableData' { _ntd2_id :: Int data NgramsTableData = NgramsTableData { _ntd_id :: Int
, _ntd2_parent_id :: Maybe Int , _ntd_parent_id :: Maybe Int
, _ntd2_terms :: Text , _ntd_terms :: Text
, _ntd2_n :: Int , _ntd_n :: Int
, _ntd2_listType :: Maybe ListType , _ntd_listType :: Maybe ListType
, _ntd2_weight :: Double , _ntd_weight :: Double
} deriving (Show) } deriving (Show)
getNgramsTableData' :: NodeType -> NgramsType getNgramsTableData :: NodeType -> NgramsType
-> NgramsTableParamUser -> NgramsTableParamMaster -> NgramsTableParamUser -> NgramsTableParamMaster
-> Limit -> Offset -> Limit -> Offset
-> Cmd err [NgramsTableData'] -> Cmd err [NgramsTableData]
getNgramsTableData' nodeT ngrmT (NgramsTableParam ul uc) (NgramsTableParam ml mc) limit_ offset_ = getNgramsTableData nodeT ngrmT (NgramsTableParam ul uc) (NgramsTableParam ml mc) limit_ offset_ =
trace ("Ngrams table params: " <> show params) <$> trace ("Ngrams table params: " <> show params) <$>
map (\(i,p,t,n,lt,w) -> NgramsTableData' i p t n (fromListTypeId lt) w) <$> map (\(i,p,t,n,lt,w) -> NgramsTableData i p t n (fromListTypeId lt) w) <$>
runPGSQuery querySelectTableNgramsTrees params runPGSQuery querySelectTableNgramsTrees params
where where
nodeTId = nodeTypeId nodeT nodeTId = nodeTypeId nodeT
......
...@@ -527,7 +527,7 @@ mkRoot uname uId = case uId > 0 of ...@@ -527,7 +527,7 @@ mkRoot uname uId = case uId > 0 of
mkCorpus :: Maybe Name -> Maybe HyperdataCorpus -> ParentId -> UserId -> Cmd err [CorpusId] mkCorpus :: Maybe Name -> Maybe HyperdataCorpus -> ParentId -> UserId -> Cmd err [CorpusId]
mkCorpus n h p u = insertNodesR [nodeCorpusW n h p u] mkCorpus n h p u = insertNodesR [nodeCorpusW n h p u]
getOrMkList :: HasNodeError err => ParentId -> UserId -> Cmd err NodeId getOrMkList :: HasNodeError err => ParentId -> UserId -> Cmd err ListId
getOrMkList pId uId = getOrMkList pId uId =
maybe (mkList' pId uId) (pure . view node_id) . headMay =<< getListsWithParentId pId maybe (mkList' pId uId) (pure . view node_id) . headMay =<< getListsWithParentId pId
where where
......
...@@ -7,38 +7,43 @@ Maintainer : team@gargantext.org ...@@ -7,38 +7,43 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Definition of TFICF Definition of TFICF : Term Frequency - Inverse of Context Frequency
-} -}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Metrics.TFICF where module Gargantext.Text.Metrics.TFICF where
import GHC.Generics (Generic) --import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms)
import Data.Maybe (Maybe) data TficfContext n m = TficfLanguage n m | TficfCorpus n m | TficfDocument n m
import Data.Text (Text) deriving (Show)
import Text.Show (Show())
-- import Gargantext.Types data Tficf = Tficf { tficf_ngramsId :: NgramsId
import Gargantext.Prelude , tficf_ngramsTerms :: NgramsTerms
, tficf_score :: Double
}
data Context = Corpus | Document type SupraContext = TficfContext
deriving (Show, Generic) type InfraContext = TficfContext
data TFICF = TFICF { _tficfTerms :: Text -- | TFICF is a generalization of TFIDF
, _tficfContext1 :: Context -- https://en.wikipedia.org/wiki/Tf%E2%80%93idf
, _tficfContext2 :: Context tficf :: InfraContext Double Double -> SupraContext Double Double -> Double
, _tficfScore :: Maybe Double tficf (TficfCorpus c c') (TficfLanguage l l') = tficf' c c' l l'
} deriving (Show, Generic) tficf (TficfDocument d d')(TficfCorpus c c') = tficf' d d' c c'
tficf _ _ = panic "Not in definition"
tficf' :: Double -> Double -> Double -> Double -> Double
tficf' c c' l l'
| c <= c' && l < l' = (c/c') / log (l/l')
| otherwise = panic "Frequency impossible"
--tfidf :: Text -> TFICF
--tfidf txt = TFICF txt Document Corpus score
-- where
-- score = Nothing
tficf_example :: [(Double,Double,Double,Double)]
tficf_example = undefined
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment