Commit 88befdc7 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TERMS] clean/refacto before integration to workflow.

parent 5e68e57c
...@@ -186,6 +186,7 @@ insertMasterDocs c lang hs = do ...@@ -186,6 +186,7 @@ insertMasterDocs c lang hs = do
ids <- insertDb masterUserId masterCorpusId hs' ids <- insertDb masterUserId masterCorpusId hs'
let documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' hs') let documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' hs')
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
maps <- mapNodeIdNgrams <$> documentIdWithNgrams (extractNgramsT lang) documentsWithId maps <- mapNodeIdNgrams <$> documentIdWithNgrams (extractNgramsT lang) documentsWithId
terms2id <- insertNgrams $ Map.keys maps terms2id <- insertNgrams $ Map.keys maps
let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps
......
...@@ -49,7 +49,7 @@ import qualified Data.List as List ...@@ -49,7 +49,7 @@ import qualified Data.List as List
import qualified Data.Text as Text import qualified Data.Text as Text
import Gargantext.Text (sentences) import Gargantext.Text (sentences)
import Gargantext.Text.Terms.Mono.Token.En (tokenize) import Gargantext.Text.Terms.Mono.Token.En (tokenize)
import Gargantext.Text.Eleve (mainEleve) import Gargantext.Text.Terms.Eleve (mainEleve)
data TermType lang data TermType lang
= Mono { _tt_lang :: lang } = Mono { _tt_lang :: lang }
...@@ -88,7 +88,7 @@ isPunctuation x = List.elem x $ (Text.pack . pure) ...@@ -88,7 +88,7 @@ isPunctuation x = List.elem x $ (Text.pack . pure)
-- | Unsupervised ngrams extraction -- | Unsupervised ngrams extraction
-- language agnostic extraction -- language agnostic extraction
-- TODO: remove IO -- TODO: remove IO
-- TODO: BlockText -- TODO: newtype BlockText
extractTermsUnsupervised :: Int -> Text -> [[Text]] extractTermsUnsupervised :: Int -> Text -> [[Text]]
extractTermsUnsupervised n = extractTermsUnsupervised n =
List.nub List.nub
......
{-| {-|
Module : Gargantext.Text.Eleve Module : Gargantext.Text.Terms.Eleve
Description : Unsupervized Word segmentation Description : Unsupervized Word segmentation
Copyright : (c) CNRS, 2019-Present Copyright : (c) CNRS, 2019-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -38,7 +38,7 @@ Notes for current implementation: ...@@ -38,7 +38,7 @@ Notes for current implementation:
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
module Gargantext.Text.Eleve where module Gargantext.Text.Terms.Eleve where
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
-- import Debug.SimpleReflect -- import Debug.SimpleReflect
......
...@@ -75,7 +75,7 @@ getGraph nId = do ...@@ -75,7 +75,7 @@ getGraph nId = do
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) NgramsTerms (Map.keys ngs) <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) NgramsTerms (Map.keys ngs)
graph <- liftIO $ cooc2graph myCooc graph <- liftIO $ cooc2graph 3 myCooc
pure $ set graph_metadata (Just metadata) graph pure $ set graph_metadata (Just metadata) graph
......
...@@ -35,11 +35,13 @@ import qualified Data.Vector.Storable as Vec ...@@ -35,11 +35,13 @@ import qualified Data.Vector.Storable as Vec
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.List as List import qualified Data.List as List
cooc2graph :: (Map (Text, Text) Int) -> IO Graph type Threshold = Int
cooc2graph myCooc = do
cooc2graph :: Threshold -> (Map (Text, Text) Int) -> IO Graph
cooc2graph threshold myCooc = do
let (ti, _) = createIndices myCooc let (ti, _) = createIndices myCooc
myCooc4 = toIndex ti myCooc myCooc' = toIndex ti myCooc
matCooc = map2mat (0) (Map.size ti) myCooc4 matCooc = map2mat (0) (Map.size ti) $ Map.filter (>threshold) myCooc'
distanceMat = measureConditional matCooc distanceMat = measureConditional matCooc
distanceMap = Map.filter (>0.01) $ mat2map distanceMat distanceMap = Map.filter (>0.01) $ mat2map distanceMat
...@@ -50,7 +52,7 @@ cooc2graph myCooc = do ...@@ -50,7 +52,7 @@ cooc2graph myCooc = do
let bridgeness' = bridgeness 300 partitions distanceMap let bridgeness' = bridgeness 300 partitions distanceMap
let confluence' = confluence (Map.keys bridgeness') 3 True False let confluence' = confluence (Map.keys bridgeness') 3 True False
data2graph (Map.toList ti) myCooc4 bridgeness' confluence' partitions data2graph (Map.toList ti) myCooc' bridgeness' confluence' partitions
---------------------------------------------------------- ----------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment