diff --git a/src/Gargantext/API/Ngrams.hs b/src/Gargantext/API/Ngrams.hs index d95194c52ae89ea659c4f224e5d1536eb0405da8..98769d375c2d57ed1e1158664637dfbe7c3a2007 100644 --- a/src/Gargantext/API/Ngrams.hs +++ b/src/Gargantext/API/Ngrams.hs @@ -58,7 +58,7 @@ import GHC.Generics (Generic) import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Database.Types.Node (NodeType(..)) import Gargantext.Database.Schema.Node (defaultList, HasNodeError) -import Gargantext.Database.Schema.Ngrams (NgramsType, NgramsTypeId, ngramsTypeId, NgramsTableData'(..)) +import Gargantext.Database.Schema.Ngrams (NgramsType, NgramsTypeId, ngramsTypeId, NgramsTableData(..)) import qualified Gargantext.Database.Schema.Ngrams as Ngrams import Gargantext.Database.Schema.NodeNgram import Gargantext.Database.Utils (Cmd) @@ -120,10 +120,10 @@ newtype NgramsTable = NgramsTable { _ngramsTable :: [NgramsElement] } deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show) -- | TODO Check N and Weight -toNgramsElement :: [NgramsTableData'] -> [NgramsElement] +toNgramsElement :: [NgramsTableData] -> [NgramsElement] toNgramsElement ns = map toNgramsElement' ns where - toNgramsElement' (NgramsTableData' _ p t _ lt w) = NgramsElement t lt' (round w) p' c' + toNgramsElement' (NgramsTableData _ p t _ lt w) = NgramsElement t lt' (round w) p' c' where p' = case p of Nothing -> Nothing @@ -132,14 +132,14 @@ toNgramsElement ns = map toNgramsElement' ns lt' = maybe (panic "API.Ngrams: listypeId") identity lt mapParent :: Map Int Text - mapParent = fromListWith (<>) $ map (\(NgramsTableData' i _ t _ _ _) -> (i,t)) ns + mapParent = fromListWith (<>) $ map (\(NgramsTableData i _ t _ _ _) -> (i,t)) ns mapChildren :: Map Text (Set Text) mapChildren = mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent)) $ fromListWith (<>) $ map (first fromJust) $ filter (isJust . fst) - $ map (\(NgramsTableData' _ p t _ _ _) -> (p, Set.singleton t)) ns + $ map (\(NgramsTableData _ p t _ _ _) -> (p, Set.singleton t)) ns instance Arbitrary NgramsTable where diff --git a/src/Gargantext/Core/Types/Individu.hs b/src/Gargantext/Core/Types/Individu.hs index cadfe6296a6bf02339aeab5b406d6dbc0c1cc9a0..cf2acf460be845b26b1b7957ea043295a67067ea 100644 --- a/src/Gargantext/Core/Types/Individu.hs +++ b/src/Gargantext/Core/Types/Individu.hs @@ -18,3 +18,6 @@ module Gargantext.Core.Types.Individu import Data.Text (Text) type Username = Text +type UsernameMaster = Username +type UsernameSimple = Username + diff --git a/src/Gargantext/Database/Flow.hs b/src/Gargantext/Database/Flow.hs index 08f0fdff3f93d42516108136bcde31a0a1459524..86c439e7da5f7415bd5a39d62a627158b7ae757f 100644 --- a/src/Gargantext/Database/Flow.hs +++ b/src/Gargantext/Database/Flow.hs @@ -281,7 +281,10 @@ flowList uId cId ngs = do pure lId flowListUser :: HasNodeError err => UserId -> CorpusId -> Cmd err NodeId -flowListUser uId cId = getOrMkList cId uId +flowListUser uId cId = do + lid <- getOrMkList cId uId + -- is <- insertLists lId $ ngrams2list ngs + pure lid ------------------------------------------------------------------------ @@ -319,3 +322,11 @@ insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing ( | (l,(ngt, ng)) <- lngs ] ------------------------------------------------------------------------ + + + + + + + + diff --git a/src/Gargantext/Database/Schema/Ngrams.hs b/src/Gargantext/Database/Schema/Ngrams.hs index d05e67fa739d937968f954a0a5cdba14fc44d96f..3119f585289c8208cc81ca36c6175495108995bb 100644 --- a/src/Gargantext/Database/Schema/Ngrams.hs +++ b/src/Gargantext/Database/Schema/Ngrams.hs @@ -53,6 +53,11 @@ import Prelude (Enum, Bounded, minBound, maxBound, Functor) import qualified Data.Set as DS import qualified Database.PostgreSQL.Simple as PGS + +type NgramsTerms = Text +type NgramsId = Int +type Size = Int + --{- data NgramsPoly id terms n = NgramsDb { ngrams_id :: id , ngrams_terms :: terms @@ -128,10 +133,6 @@ ngramsTypeId NgramsTerms = 4 fromNgramsTypeId :: NgramsTypeId -> Maybe NgramsType fromNgramsTypeId id = lookup id $ fromList [(ngramsTypeId nt,nt) | nt <- [minBound .. maxBound] :: [NgramsType]] -type NgramsTerms = Text -type NgramsId = Int -type Size = Int - ------------------------------------------------------------------------ -- | TODO put it in Gargantext.Text.Ngrams data Ngrams = Ngrams { _ngramsTerms :: Text @@ -232,7 +233,7 @@ queryInsertNgrams = [sql| getNgramsTableDb :: NodeType -> NgramsType -> NgramsTableParamUser -> Limit -> Offset - -> Cmd err [NgramsTableData'] + -> Cmd err [NgramsTableData] getNgramsTableDb nt ngrt ntp limit_ offset_ = do @@ -245,7 +246,7 @@ getNgramsTableDb nt ngrt ntp limit_ offset_ = do listMasterId <- maybe (panic "error master list") (view node_id) <$> head <$> getListsWithParentId corpusMasterId - getNgramsTableData' nt ngrt ntp (NgramsTableParam listMasterId corpusMasterId) limit_ offset_ + getNgramsTableData nt ngrt ntp (NgramsTableParam listMasterId corpusMasterId) limit_ offset_ data NgramsTableParam = NgramsTableParam { _nt_listId :: NodeId @@ -255,44 +256,24 @@ data NgramsTableParam = type NgramsTableParamUser = NgramsTableParam type NgramsTableParamMaster = NgramsTableParam -data NgramsTableData = NgramsTableData { _ntd_ngrams :: Text - , _ntd_n :: Int - , _ntd_listType :: Maybe ListType - , _ntd_weight :: Double - } deriving (Show) - -getNgramsTableData :: NodeType -> NgramsType - -> NgramsTableParamUser -> NgramsTableParamMaster - -> Limit -> Offset - -> Cmd err [NgramsTableData] -getNgramsTableData nodeT ngrmT (NgramsTableParam ul uc) (NgramsTableParam ml mc) limit_ offset_ = - trace ("Ngrams table params" <> show params) <$> - map (\(t,n,nt,w) -> NgramsTableData t n (fromListTypeId nt) w) <$> - runPGSQuery querySelectTableNgrams params - where - nodeTId = nodeTypeId nodeT - ngrmTId = ngramsTypeId ngrmT - params = (ul,uc,nodeTId,ngrmTId,ml,mc,nodeTId,ngrmTId,uc) :. - (limit_, offset_) - -data NgramsTableData' = NgramsTableData' { _ntd2_id :: Int - , _ntd2_parent_id :: Maybe Int - , _ntd2_terms :: Text - , _ntd2_n :: Int - , _ntd2_listType :: Maybe ListType - , _ntd2_weight :: Double +data NgramsTableData = NgramsTableData { _ntd_id :: Int + , _ntd_parent_id :: Maybe Int + , _ntd_terms :: Text + , _ntd_n :: Int + , _ntd_listType :: Maybe ListType + , _ntd_weight :: Double } deriving (Show) -getNgramsTableData' :: NodeType -> NgramsType +getNgramsTableData :: NodeType -> NgramsType -> NgramsTableParamUser -> NgramsTableParamMaster -> Limit -> Offset - -> Cmd err [NgramsTableData'] -getNgramsTableData' nodeT ngrmT (NgramsTableParam ul uc) (NgramsTableParam ml mc) limit_ offset_ = + -> Cmd err [NgramsTableData] +getNgramsTableData nodeT ngrmT (NgramsTableParam ul uc) (NgramsTableParam ml mc) limit_ offset_ = trace ("Ngrams table params: " <> show params) <$> - map (\(i,p,t,n,lt,w) -> NgramsTableData' i p t n (fromListTypeId lt) w) <$> + map (\(i,p,t,n,lt,w) -> NgramsTableData i p t n (fromListTypeId lt) w) <$> runPGSQuery querySelectTableNgramsTrees params where nodeTId = nodeTypeId nodeT diff --git a/src/Gargantext/Database/Schema/Node.hs b/src/Gargantext/Database/Schema/Node.hs index a491c654578951f8a23e99f646631d04a2fede7c..e2cd6d23523147ac537a3bf5a3bef4f167ac98fd 100644 --- a/src/Gargantext/Database/Schema/Node.hs +++ b/src/Gargantext/Database/Schema/Node.hs @@ -527,7 +527,7 @@ mkRoot uname uId = case uId > 0 of mkCorpus :: Maybe Name -> Maybe HyperdataCorpus -> ParentId -> UserId -> Cmd err [CorpusId] mkCorpus n h p u = insertNodesR [nodeCorpusW n h p u] -getOrMkList :: HasNodeError err => ParentId -> UserId -> Cmd err NodeId +getOrMkList :: HasNodeError err => ParentId -> UserId -> Cmd err ListId getOrMkList pId uId = maybe (mkList' pId uId) (pure . view node_id) . headMay =<< getListsWithParentId pId where diff --git a/src/Gargantext/Text/Metrics/TFICF.hs b/src/Gargantext/Text/Metrics/TFICF.hs index 218f6881266566b5fca0e4f18377cba813d9c269..a1ac37e63666e9a6f55aa6ea38ab2a598a6e5428 100644 --- a/src/Gargantext/Text/Metrics/TFICF.hs +++ b/src/Gargantext/Text/Metrics/TFICF.hs @@ -7,38 +7,43 @@ Maintainer : team@gargantext.org Stability : experimental Portability : POSIX -Definition of TFICF +Definition of TFICF : Term Frequency - Inverse of Context Frequency -} -{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE NoImplicitPrelude #-} - +{-# LANGUAGE OverloadedStrings #-} module Gargantext.Text.Metrics.TFICF where -import GHC.Generics (Generic) +--import Data.Text (Text) +import Gargantext.Prelude +import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms) -import Data.Maybe (Maybe) -import Data.Text (Text) -import Text.Show (Show()) +data TficfContext n m = TficfLanguage n m | TficfCorpus n m | TficfDocument n m + deriving (Show) --- import Gargantext.Types -import Gargantext.Prelude +data Tficf = Tficf { tficf_ngramsId :: NgramsId + , tficf_ngramsTerms :: NgramsTerms + , tficf_score :: Double +} -data Context = Corpus | Document - deriving (Show, Generic) +type SupraContext = TficfContext +type InfraContext = TficfContext -data TFICF = TFICF { _tficfTerms :: Text - , _tficfContext1 :: Context - , _tficfContext2 :: Context - , _tficfScore :: Maybe Double - } deriving (Show, Generic) +-- | TFICF is a generalization of TFIDF +-- https://en.wikipedia.org/wiki/Tf%E2%80%93idf +tficf :: InfraContext Double Double -> SupraContext Double Double -> Double +tficf (TficfCorpus c c') (TficfLanguage l l') = tficf' c c' l l' +tficf (TficfDocument d d')(TficfCorpus c c') = tficf' d d' c c' +tficf _ _ = panic "Not in definition" +tficf' :: Double -> Double -> Double -> Double -> Double +tficf' c c' l l' + | c <= c' && l < l' = (c/c') / log (l/l') + | otherwise = panic "Frequency impossible" ---tfidf :: Text -> TFICF ---tfidf txt = TFICF txt Document Corpus score --- where --- score = Nothing +tficf_example :: [(Double,Double,Double,Double)] +tficf_example = undefined