Commit 42e3688f authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FLOW] adding TFICF fun to flow.

parent dec1cb78
......@@ -47,6 +47,7 @@ import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Metrics.TFICF (Tficf(..))
import qualified Gargantext.Database.Node.Document.Add as Doc (add)
import Gargantext.Database.Metrics.NgramsByNode (getTficf', sortTficf, ngramsGroup)
import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Root (getRoot)
import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
......@@ -91,7 +92,13 @@ flowCorpus userName ff fp corpusName = do
_ <- Doc.add userCorpusId $ concat ids
-- User List Flow
-- ngs <- getNgramsElementsWithParentNodeId masterCorpusId
let masterCorpusId = 2
-- /!\ this extract NgramsTerms Only
_ngs <- sortTficf <$> getTficf' userCorpusId masterCorpusId (ngramsGroup EN 2)
-- TODO getNgramsElement of NgramsType...
--ngs <- getNgramsElementsWithParentNodeId masterCorpusId
--_masterListId <- flowList masterUserId masterCorpusId ngs
--_userListId <- flowListUser userId userCorpusId ngs 100
......@@ -131,8 +138,6 @@ insertMasterDocs hs = do
getUserCorpusNgrams :: FlowCmdM env ServantErr m
=> CorpusId -> m [Ngrams]
getUserCorpusNgrams = undefined
......@@ -140,11 +145,6 @@ getUserCorpusNgrams = undefined
type CorpusName = Text
getOrMkRootWithCorpus :: HasNodeError err
......@@ -252,7 +252,7 @@ extractNgramsT' doc = do
<> [(a', DM.singleton Authors 1) | a' <- authors ]
<> [(t', DM.singleton NgramsTerms 1) | t' <- terms' ]
--{-
filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int)
-> Map Ngrams (Map NgramsType Int)
filterNgramsT s ms = DM.fromList $ map (\a -> filter' s a) $ DM.toList ms
......@@ -260,7 +260,7 @@ filterNgramsT s ms = DM.fromList $ map (\a -> filter' s a) $ DM.toList ms
filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of
True -> (ng,y)
False -> (Ngrams (Text.take s' t) n , y)
--}
documentIdWithNgrams :: HasNodeError err
=> (HyperdataDocument
......@@ -285,7 +285,9 @@ mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f
nId = documentId $ documentWithId d
------------------------------------------------------------------------
flowListBase :: FlowCmdM env err m => ListId -> Map NgramsType [NgramsElement] -> m ()
flowListBase :: FlowCmdM env err m
=> ListId -> Map NgramsType [NgramsElement]
-> m ()
flowListBase lId ngs = do
-- compute Candidate / Map
mapM_ (\(typeList, ngElmts) -> putListNgrams lId typeList ngElmts) $ toList ngs
......@@ -294,18 +296,9 @@ flowList :: FlowCmdM env err m => UserId -> CorpusId
-> Map NgramsType [NgramsElement]
-> m ListId
flowList uId cId ngs = do
--printDebug "ngs:" ngs
lId <- getOrMkList cId uId
printDebug "listId flowList" lId
--printDebug "ngs" (DM.keys ngs)
-- TODO grouping
-- TODO needs rework
-- let groupEd = groupNgramsBy (\(NgramsT t1 n1) (NgramsT t2 n2) -> if (((==) t1 t2) && ((==) n1 n2)) then (Just (n1,n2)) else Nothing) ngs
-- _ <- insertGroups lId groupEd
flowListBase lId ngs
pure lId
flowListUser :: FlowCmdM env err m
......@@ -315,16 +308,11 @@ flowListUser :: FlowCmdM env err m
-> m ListId
flowListUser uId cId ngsM _n = do
lId <- getOrMkList cId uId
{-
ngs <- take n <$> sortWith tficf_score
<$> getTficf userMaster cId lId NgramsTerms
-}
let ngs = []
trace ("flowListBase" <> show lId) flowListBase lId ngsM
putListNgrams lId NgramsTerms $
[ mkNgramsElement (tficf_ngramsTerms ng) GraphList Nothing mempty
| ng <- ngs
......@@ -332,19 +320,6 @@ flowListUser uId cId ngsM _n = do
pure lId
------------------------------------------------------------------------
{-
TODO rework:
* quadratic
* DM.keys called twice
groupNgramsBy :: (NgramsT NgramsIndexed -> NgramsT NgramsIndexed -> Maybe (NgramsIndexed, NgramsIndexed))
-> Map (NgramsT NgramsIndexed) (Map NodeId Int)
-> Map NgramsIndexed NgramsIndexed
groupNgramsBy isEqual cId = DM.fromList $ catMaybes [ isEqual n1 n2 | n1 <- DM.keys cId, n2 <- DM.keys cId]
-}
------------------------------------------------------------------------
ngrams2list :: Map NgramsIndexed (Map NgramsType a)
-> [(ListType, (NgramsType, NgramsIndexed))]
......
......@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Ngrams by node enable special metrics.
Ngrams by node enable contextual metrics.
-}
......@@ -29,6 +29,7 @@ import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Prelude
import Gargantext.Text.Metrics.TFICF -- (tficf)
import Gargantext.Text.Terms.Mono.Stem (stem)
......@@ -53,7 +54,9 @@ ngramsGroup l n = Text.intercalate " "
sortTficf :: (Map Text (Double, Set Text))
-> [(Double, Set Text)]
sortTficf = List.reverse . List.sortOn fst . elems
sortTficf = List.reverse
. List.sortOn fst
. elems
getTficf' :: UserCorpusId -> MasterCorpusId -> (Text -> Text)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment