Commit 88befdc7 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TERMS] clean/refacto before integration to workflow.

parent 5e68e57c
......@@ -186,6 +186,7 @@ insertMasterDocs c lang hs = do
ids <- insertDb masterUserId masterCorpusId hs'
let documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' hs')
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
maps <- mapNodeIdNgrams <$> documentIdWithNgrams (extractNgramsT lang) documentsWithId
terms2id <- insertNgrams $ Map.keys maps
let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps
......
......@@ -49,7 +49,7 @@ import qualified Data.List as List
import qualified Data.Text as Text
import Gargantext.Text (sentences)
import Gargantext.Text.Terms.Mono.Token.En (tokenize)
import Gargantext.Text.Eleve (mainEleve)
import Gargantext.Text.Terms.Eleve (mainEleve)
data TermType lang
= Mono { _tt_lang :: lang }
......@@ -88,7 +88,7 @@ isPunctuation x = List.elem x $ (Text.pack . pure)
-- | Unsupervised ngrams extraction
-- language agnostic extraction
-- TODO: remove IO
-- TODO: BlockText
-- TODO: newtype BlockText
extractTermsUnsupervised :: Int -> Text -> [[Text]]
extractTermsUnsupervised n =
List.nub
......
{-|
Module : Gargantext.Text.Eleve
Module : Gargantext.Text.Terms.Eleve
Description : Unsupervized Word segmentation
Copyright : (c) CNRS, 2019-Present
License : AGPL + CECILL v3
......@@ -38,7 +38,7 @@ Notes for current implementation:
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Text.Eleve where
module Gargantext.Text.Terms.Eleve where
-- import Debug.Trace (trace)
-- import Debug.SimpleReflect
......
......@@ -75,7 +75,7 @@ getGraph nId = do
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) NgramsTerms (Map.keys ngs)
graph <- liftIO $ cooc2graph myCooc
graph <- liftIO $ cooc2graph 3 myCooc
pure $ set graph_metadata (Just metadata) graph
......
......@@ -35,11 +35,13 @@ import qualified Data.Vector.Storable as Vec
import qualified Data.Map as Map
import qualified Data.List as List
cooc2graph :: (Map (Text, Text) Int) -> IO Graph
cooc2graph myCooc = do
type Threshold = Int
cooc2graph :: Threshold -> (Map (Text, Text) Int) -> IO Graph
cooc2graph threshold myCooc = do
let (ti, _) = createIndices myCooc
myCooc4 = toIndex ti myCooc
matCooc = map2mat (0) (Map.size ti) myCooc4
myCooc' = toIndex ti myCooc
matCooc = map2mat (0) (Map.size ti) $ Map.filter (>threshold) myCooc'
distanceMat = measureConditional matCooc
distanceMap = Map.filter (>0.01) $ mat2map distanceMat
......@@ -50,7 +52,7 @@ cooc2graph myCooc = do
let bridgeness' = bridgeness 300 partitions distanceMap
let confluence' = confluence (Map.keys bridgeness') 3 True False
data2graph (Map.toList ti) myCooc4 bridgeness' confluence' partitions
data2graph (Map.toList ti) myCooc' bridgeness' confluence' partitions
----------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment