Commit 42e3688f authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FLOW] adding TFICF fun to flow.

parent dec1cb78
...@@ -47,6 +47,7 @@ import Gargantext.Database.Flow.Utils (insertToNodeNgrams) ...@@ -47,6 +47,7 @@ import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
import Gargantext.Text.Terms (extractTerms) import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Metrics.TFICF (Tficf(..)) import Gargantext.Text.Metrics.TFICF (Tficf(..))
import qualified Gargantext.Database.Node.Document.Add as Doc (add) import qualified Gargantext.Database.Node.Document.Add as Doc (add)
import Gargantext.Database.Metrics.NgramsByNode (getTficf', sortTficf, ngramsGroup)
import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..)) import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Root (getRoot) import Gargantext.Database.Root (getRoot)
import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId) import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
...@@ -91,7 +92,13 @@ flowCorpus userName ff fp corpusName = do ...@@ -91,7 +92,13 @@ flowCorpus userName ff fp corpusName = do
_ <- Doc.add userCorpusId $ concat ids _ <- Doc.add userCorpusId $ concat ids
-- User List Flow -- User List Flow
-- ngs <- getNgramsElementsWithParentNodeId masterCorpusId let masterCorpusId = 2
-- /!\ this extract NgramsTerms Only
_ngs <- sortTficf <$> getTficf' userCorpusId masterCorpusId (ngramsGroup EN 2)
-- TODO getNgramsElement of NgramsType...
--ngs <- getNgramsElementsWithParentNodeId masterCorpusId
--_masterListId <- flowList masterUserId masterCorpusId ngs --_masterListId <- flowList masterUserId masterCorpusId ngs
--_userListId <- flowListUser userId userCorpusId ngs 100 --_userListId <- flowListUser userId userCorpusId ngs 100
...@@ -131,8 +138,6 @@ insertMasterDocs hs = do ...@@ -131,8 +138,6 @@ insertMasterDocs hs = do
getUserCorpusNgrams :: FlowCmdM env ServantErr m getUserCorpusNgrams :: FlowCmdM env ServantErr m
=> CorpusId -> m [Ngrams] => CorpusId -> m [Ngrams]
getUserCorpusNgrams = undefined getUserCorpusNgrams = undefined
...@@ -140,11 +145,6 @@ getUserCorpusNgrams = undefined ...@@ -140,11 +145,6 @@ getUserCorpusNgrams = undefined
type CorpusName = Text type CorpusName = Text
getOrMkRootWithCorpus :: HasNodeError err getOrMkRootWithCorpus :: HasNodeError err
...@@ -252,7 +252,7 @@ extractNgramsT' doc = do ...@@ -252,7 +252,7 @@ extractNgramsT' doc = do
<> [(a', DM.singleton Authors 1) | a' <- authors ] <> [(a', DM.singleton Authors 1) | a' <- authors ]
<> [(t', DM.singleton NgramsTerms 1) | t' <- terms' ] <> [(t', DM.singleton NgramsTerms 1) | t' <- terms' ]
--{-
filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int) filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int)
-> Map Ngrams (Map NgramsType Int) -> Map Ngrams (Map NgramsType Int)
filterNgramsT s ms = DM.fromList $ map (\a -> filter' s a) $ DM.toList ms filterNgramsT s ms = DM.fromList $ map (\a -> filter' s a) $ DM.toList ms
...@@ -260,7 +260,7 @@ filterNgramsT s ms = DM.fromList $ map (\a -> filter' s a) $ DM.toList ms ...@@ -260,7 +260,7 @@ filterNgramsT s ms = DM.fromList $ map (\a -> filter' s a) $ DM.toList ms
filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of
True -> (ng,y) True -> (ng,y)
False -> (Ngrams (Text.take s' t) n , y) False -> (Ngrams (Text.take s' t) n , y)
--}
documentIdWithNgrams :: HasNodeError err documentIdWithNgrams :: HasNodeError err
=> (HyperdataDocument => (HyperdataDocument
...@@ -285,7 +285,9 @@ mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f ...@@ -285,7 +285,9 @@ mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f
nId = documentId $ documentWithId d nId = documentId $ documentWithId d
------------------------------------------------------------------------ ------------------------------------------------------------------------
flowListBase :: FlowCmdM env err m => ListId -> Map NgramsType [NgramsElement] -> m () flowListBase :: FlowCmdM env err m
=> ListId -> Map NgramsType [NgramsElement]
-> m ()
flowListBase lId ngs = do flowListBase lId ngs = do
-- compute Candidate / Map -- compute Candidate / Map
mapM_ (\(typeList, ngElmts) -> putListNgrams lId typeList ngElmts) $ toList ngs mapM_ (\(typeList, ngElmts) -> putListNgrams lId typeList ngElmts) $ toList ngs
...@@ -294,18 +296,9 @@ flowList :: FlowCmdM env err m => UserId -> CorpusId ...@@ -294,18 +296,9 @@ flowList :: FlowCmdM env err m => UserId -> CorpusId
-> Map NgramsType [NgramsElement] -> Map NgramsType [NgramsElement]
-> m ListId -> m ListId
flowList uId cId ngs = do flowList uId cId ngs = do
--printDebug "ngs:" ngs
lId <- getOrMkList cId uId lId <- getOrMkList cId uId
printDebug "listId flowList" lId printDebug "listId flowList" lId
--printDebug "ngs" (DM.keys ngs)
-- TODO grouping
-- TODO needs rework
-- let groupEd = groupNgramsBy (\(NgramsT t1 n1) (NgramsT t2 n2) -> if (((==) t1 t2) && ((==) n1 n2)) then (Just (n1,n2)) else Nothing) ngs
-- _ <- insertGroups lId groupEd
flowListBase lId ngs flowListBase lId ngs
pure lId pure lId
flowListUser :: FlowCmdM env err m flowListUser :: FlowCmdM env err m
...@@ -315,16 +308,11 @@ flowListUser :: FlowCmdM env err m ...@@ -315,16 +308,11 @@ flowListUser :: FlowCmdM env err m
-> m ListId -> m ListId
flowListUser uId cId ngsM _n = do flowListUser uId cId ngsM _n = do
lId <- getOrMkList cId uId lId <- getOrMkList cId uId
{-
ngs <- take n <$> sortWith tficf_score
<$> getTficf userMaster cId lId NgramsTerms
-}
let ngs = [] let ngs = []
trace ("flowListBase" <> show lId) flowListBase lId ngsM trace ("flowListBase" <> show lId) flowListBase lId ngsM
putListNgrams lId NgramsTerms $ putListNgrams lId NgramsTerms $
[ mkNgramsElement (tficf_ngramsTerms ng) GraphList Nothing mempty [ mkNgramsElement (tficf_ngramsTerms ng) GraphList Nothing mempty
| ng <- ngs | ng <- ngs
...@@ -332,19 +320,6 @@ flowListUser uId cId ngsM _n = do ...@@ -332,19 +320,6 @@ flowListUser uId cId ngsM _n = do
pure lId pure lId
------------------------------------------------------------------------
{-
TODO rework:
* quadratic
* DM.keys called twice
groupNgramsBy :: (NgramsT NgramsIndexed -> NgramsT NgramsIndexed -> Maybe (NgramsIndexed, NgramsIndexed))
-> Map (NgramsT NgramsIndexed) (Map NodeId Int)
-> Map NgramsIndexed NgramsIndexed
groupNgramsBy isEqual cId = DM.fromList $ catMaybes [ isEqual n1 n2 | n1 <- DM.keys cId, n2 <- DM.keys cId]
-}
------------------------------------------------------------------------ ------------------------------------------------------------------------
ngrams2list :: Map NgramsIndexed (Map NgramsType a) ngrams2list :: Map NgramsIndexed (Map NgramsType a)
-> [(ListType, (NgramsType, NgramsIndexed))] -> [(ListType, (NgramsType, NgramsIndexed))]
......
...@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org ...@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Ngrams by node enable special metrics. Ngrams by node enable contextual metrics.
-} -}
...@@ -29,6 +29,7 @@ import Gargantext.Database.Config (nodeTypeId) ...@@ -29,6 +29,7 @@ import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..)) import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId) import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Utils (Cmd, runPGSQuery) import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Metrics.TFICF -- (tficf) import Gargantext.Text.Metrics.TFICF -- (tficf)
import Gargantext.Text.Terms.Mono.Stem (stem) import Gargantext.Text.Terms.Mono.Stem (stem)
...@@ -53,7 +54,9 @@ ngramsGroup l n = Text.intercalate " " ...@@ -53,7 +54,9 @@ ngramsGroup l n = Text.intercalate " "
sortTficf :: (Map Text (Double, Set Text)) sortTficf :: (Map Text (Double, Set Text))
-> [(Double, Set Text)] -> [(Double, Set Text)]
sortTficf = List.reverse . List.sortOn fst . elems sortTficf = List.reverse
. List.sortOn fst
. elems
getTficf' :: UserCorpusId -> MasterCorpusId -> (Text -> Text) getTficf' :: UserCorpusId -> MasterCorpusId -> (Text -> Text)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment