Commit 26fe8014 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT][Text][Metrics] TFICF.

parent 08d3adb7
......@@ -58,7 +58,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Schema.Node (defaultList, HasNodeError)
import Gargantext.Database.Schema.Ngrams (NgramsType, NgramsTypeId, ngramsTypeId, NgramsTableData'(..))
import Gargantext.Database.Schema.Ngrams (NgramsType, NgramsTypeId, ngramsTypeId, NgramsTableData(..))
import qualified Gargantext.Database.Schema.Ngrams as Ngrams
import Gargantext.Database.Schema.NodeNgram
import Gargantext.Database.Utils (Cmd)
......@@ -120,10 +120,10 @@ newtype NgramsTable = NgramsTable { _ngramsTable :: [NgramsElement] }
deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
-- | TODO Check N and Weight
toNgramsElement :: [NgramsTableData'] -> [NgramsElement]
toNgramsElement :: [NgramsTableData] -> [NgramsElement]
toNgramsElement ns = map toNgramsElement' ns
where
toNgramsElement' (NgramsTableData' _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
where
p' = case p of
Nothing -> Nothing
......@@ -132,14 +132,14 @@ toNgramsElement ns = map toNgramsElement' ns
lt' = maybe (panic "API.Ngrams: listypeId") identity lt
mapParent :: Map Int Text
mapParent = fromListWith (<>) $ map (\(NgramsTableData' i _ t _ _ _) -> (i,t)) ns
mapParent = fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns
mapChildren :: Map Text (Set Text)
mapChildren = mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
$ fromListWith (<>)
$ map (first fromJust)
$ filter (isJust . fst)
$ map (\(NgramsTableData' _ p t _ _ _) -> (p, Set.singleton t)) ns
$ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns
instance Arbitrary NgramsTable where
......
......@@ -18,3 +18,6 @@ module Gargantext.Core.Types.Individu
import Data.Text (Text)
type Username = Text
type UsernameMaster = Username
type UsernameSimple = Username
......@@ -281,7 +281,10 @@ flowList uId cId ngs = do
pure lId
flowListUser :: HasNodeError err => UserId -> CorpusId -> Cmd err NodeId
flowListUser uId cId = getOrMkList cId uId
flowListUser uId cId = do
lid <- getOrMkList cId uId
-- is <- insertLists lId $ ngrams2list ngs
pure lid
------------------------------------------------------------------------
......@@ -319,3 +322,11 @@ insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing (
| (l,(ngt, ng)) <- lngs
]
------------------------------------------------------------------------
......@@ -53,6 +53,11 @@ import Prelude (Enum, Bounded, minBound, maxBound, Functor)
import qualified Data.Set as DS
import qualified Database.PostgreSQL.Simple as PGS
type NgramsTerms = Text
type NgramsId = Int
type Size = Int
--{-
data NgramsPoly id terms n = NgramsDb { ngrams_id :: id
, ngrams_terms :: terms
......@@ -128,10 +133,6 @@ ngramsTypeId NgramsTerms = 4
fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType
fromNgramsTypeId id = lookup id $ fromList [(ngramsTypeId nt,nt) | nt <- [minBound .. maxBound] :: [NgramsType]]
type NgramsTerms = Text
type NgramsId = Int
type Size = Int
------------------------------------------------------------------------
-- | TODO put it in Gargantext.Text.Ngrams
data Ngrams = Ngrams { _ngramsTerms :: Text
......@@ -232,7 +233,7 @@ queryInsertNgrams = [sql|
getNgramsTableDb :: NodeType -> NgramsType
-> NgramsTableParamUser
-> Limit -> Offset
-> Cmd err [NgramsTableData']
-> Cmd err [NgramsTableData]
getNgramsTableDb nt ngrt ntp limit_ offset_ = do
......@@ -245,7 +246,7 @@ getNgramsTableDb nt ngrt ntp limit_ offset_ = do
listMasterId <- maybe (panic "error master list") (view node_id) <$> head <$> getListsWithParentId corpusMasterId
getNgramsTableData' nt ngrt ntp (NgramsTableParam listMasterId corpusMasterId) limit_ offset_
getNgramsTableData nt ngrt ntp (NgramsTableParam listMasterId corpusMasterId) limit_ offset_
data NgramsTableParam =
NgramsTableParam { _nt_listId :: NodeId
......@@ -255,44 +256,24 @@ data NgramsTableParam =
type NgramsTableParamUser = NgramsTableParam
type NgramsTableParamMaster = NgramsTableParam
data NgramsTableData = NgramsTableData { _ntd_ngrams :: Text
, _ntd_n :: Int
, _ntd_listType :: Maybe ListType
, _ntd_weight :: Double
} deriving (Show)
getNgramsTableData :: NodeType -> NgramsType
-> NgramsTableParamUser -> NgramsTableParamMaster
-> Limit -> Offset
-> Cmd err [NgramsTableData]
getNgramsTableData nodeT ngrmT (NgramsTableParam ul uc) (NgramsTableParam ml mc) limit_ offset_ =
trace ("Ngrams table params" <> show params) <$>
map (\(t,n,nt,w) -> NgramsTableData t n (fromListTypeId nt) w) <$>
runPGSQuery querySelectTableNgrams params
where
nodeTId = nodeTypeId nodeT
ngrmTId = ngramsTypeId ngrmT
params = (ul,uc,nodeTId,ngrmTId,ml,mc,nodeTId,ngrmTId,uc) :.
(limit_, offset_)
data NgramsTableData' = NgramsTableData' { _ntd2_id :: Int
, _ntd2_parent_id :: Maybe Int
, _ntd2_terms :: Text
, _ntd2_n :: Int
, _ntd2_listType :: Maybe ListType
, _ntd2_weight :: Double
data NgramsTableData = NgramsTableData { _ntd_id :: Int
, _ntd_parent_id :: Maybe Int
, _ntd_terms :: Text
, _ntd_n :: Int
, _ntd_listType :: Maybe ListType
, _ntd_weight :: Double
} deriving (Show)
getNgramsTableData' :: NodeType -> NgramsType
getNgramsTableData :: NodeType -> NgramsType
-> NgramsTableParamUser -> NgramsTableParamMaster
-> Limit -> Offset
-> Cmd err [NgramsTableData']
getNgramsTableData' nodeT ngrmT (NgramsTableParam ul uc) (NgramsTableParam ml mc) limit_ offset_ =
-> Cmd err [NgramsTableData]
getNgramsTableData nodeT ngrmT (NgramsTableParam ul uc) (NgramsTableParam ml mc) limit_ offset_ =
trace ("Ngrams table params: " <> show params) <$>
map (\(i,p,t,n,lt,w) -> NgramsTableData' i p t n (fromListTypeId lt) w) <$>
map (\(i,p,t,n,lt,w) -> NgramsTableData i p t n (fromListTypeId lt) w) <$>
runPGSQuery querySelectTableNgramsTrees params
where
nodeTId = nodeTypeId nodeT
......
......@@ -527,7 +527,7 @@ mkRoot uname uId = case uId > 0 of
mkCorpus :: Maybe Name -> Maybe HyperdataCorpus -> ParentId -> UserId -> Cmd err [CorpusId]
mkCorpus n h p u = insertNodesR [nodeCorpusW n h p u]
getOrMkList :: HasNodeError err => ParentId -> UserId -> Cmd err NodeId
getOrMkList :: HasNodeError err => ParentId -> UserId -> Cmd err ListId
getOrMkList pId uId =
maybe (mkList' pId uId) (pure . view node_id) . headMay =<< getListsWithParentId pId
where
......
......@@ -7,38 +7,43 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Definition of TFICF
Definition of TFICF : Term Frequency - Inverse of Context Frequency
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Metrics.TFICF where
import GHC.Generics (Generic)
--import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms)
import Data.Maybe (Maybe)
import Data.Text (Text)
import Text.Show (Show())
data TficfContext n m = TficfLanguage n m | TficfCorpus n m | TficfDocument n m
deriving (Show)
-- import Gargantext.Types
import Gargantext.Prelude
data Tficf = Tficf { tficf_ngramsId :: NgramsId
, tficf_ngramsTerms :: NgramsTerms
, tficf_score :: Double
}
data Context = Corpus | Document
deriving (Show, Generic)
type SupraContext = TficfContext
type InfraContext = TficfContext
data TFICF = TFICF { _tficfTerms :: Text
, _tficfContext1 :: Context
, _tficfContext2 :: Context
, _tficfScore :: Maybe Double
} deriving (Show, Generic)
-- | TFICF is a generalization of TFIDF
-- https://en.wikipedia.org/wiki/Tf%E2%80%93idf
tficf :: InfraContext Double Double -> SupraContext Double Double -> Double
tficf (TficfCorpus c c') (TficfLanguage l l') = tficf' c c' l l'
tficf (TficfDocument d d')(TficfCorpus c c') = tficf' d d' c c'
tficf _ _ = panic "Not in definition"
tficf' :: Double -> Double -> Double -> Double -> Double
tficf' c c' l l'
| c <= c' && l < l' = (c/c') / log (l/l')
| otherwise = panic "Frequency impossible"
--tfidf :: Text -> TFICF
--tfidf txt = TFICF txt Document Corpus score
-- where
-- score = Nothing
tficf_example :: [(Double,Double,Double,Double)]
tficf_example = undefined
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment