Unverified Commit a26bdc84 authored by Nicolas Pouillard's avatar Nicolas Pouillard
parents ad14d93d e0f3433b
...@@ -40,7 +40,7 @@ main = do ...@@ -40,7 +40,7 @@ main = do
-} -}
let cmdCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId let cmdCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
cmdCorpus = flowCorpus (cs user) CsvHalFormat corpusPath (cs name) cmdCorpus = flowCorpus (cs user) (cs name) CsvHalFormat corpusPath
-- cmd = {-createUsers >>-} cmdCorpus -- cmd = {-createUsers >>-} cmdCorpus
......
...@@ -885,23 +885,6 @@ getNgramsTableMap nodeId ngramsType = do ...@@ -885,23 +885,6 @@ getNgramsTableMap nodeId ngramsType = do
pure $ Versioned (repo ^. r_version) pure $ Versioned (repo ^. r_version)
(repo ^. r_state . at ngramsType . _Just . at nodeId . _Just) (repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
-- UNUSED
_getListNgrams :: RepoCmdM env err m
=> [NodeId] -> NgramsType -> m (Versioned ListNgrams)
_getListNgrams nodeIds ngramsType = do
v <- view repoVar
repo <- liftIO $ readMVar v
let
ngramsMap = repo ^. r_state . at ngramsType . _Just
ngrams =
Map.unionsWith mergeNgramsElement
[ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
pure $ Versioned (repo ^. r_version)
$ NgramsTable (ngramsElementFromRepo <$> Map.toList ngrams)
type MinSize = Int type MinSize = Int
type MaxSize = Int type MaxSize = Int
......
{-|
Module : Gargantext.API.Ngrams.Tools
Description : Tools to manage Ngrams Elements (from the API)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.API.Ngrams.Tools
where
import Control.Concurrent
import Control.Lens (_Just, (^.), at, view)
import Control.Monad.Reader
import Data.Map.Strict (Map)
import Data.Set (Set)
import Data.Text (Text)
import Data.Validity
import Gargantext.API.Ngrams
import Gargantext.Core.Types (ListType(..), NodeId, ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
type RootTerm = Text
getListNgrams :: RepoCmdM env err m
=> [ListId] -> NgramsType
-> m (Map Text NgramsRepoElement)
getListNgrams nodeIds ngramsType = do
v <- view repoVar
repo <- liftIO $ readMVar v
let
ngramsMap = repo ^. r_state . at ngramsType . _Just
ngrams = Map.unionsWith mergeNgramsElement
[ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
pure ngrams
mapTermListRoot :: RepoCmdM env err m
=> [ListId] -> NgramsType
-> m (Map Text (ListType, (Maybe Text)))
mapTermListRoot nodeIds ngramsType = do
ngrams <- getListNgrams nodeIds ngramsType
pure $ Map.fromList [(t, (_nre_list nre, _nre_root nre))
| (t, nre) <- Map.toList ngrams
]
filterListWithRoot :: ListType -> Map Text (ListType, Maybe Text)
-> Map Text (Maybe RootTerm)
filterListWithRoot lt m = Map.fromList
$ map (\(t,(_,r)) -> (t,r))
$ filter isGraphTerm (Map.toList m)
where
isGraphTerm (_t,(l, maybeRoot)) = case maybeRoot of
Nothing -> l == lt
Just r -> case Map.lookup r m of
Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r
Just (l',_) -> l' == lt
groupNodesByNgrams :: Map Text (Maybe RootTerm)
-> Map Text (Set NodeId)
-> Map Text (Set NodeId)
groupNodesByNgrams syn occs = Map.fromListWith (<>) occs'
where
occs' = map toSyn (Map.toList occs)
toSyn (t,ns) = case Map.lookup t syn of
Nothing -> panic $ "Garg.API.Ngrams.Tools: groupNodesByNgrams, unknown key: " <> t
Just r -> case r of
Nothing -> (t, ns)
Just r' -> (r',ns)
getCoocByNgrams :: Map Text (Set NodeId) -> Map (Text, Text) Int
getCoocByNgrams m =
Map.fromList [((t1,t2)
,maybe 0 Set.size $ Set.intersection
<$> Map.lookup t1 m
<*> Map.lookup t2 m
) | (t1,t2) <- listToCombi identity $ Map.keys m
]
...@@ -10,7 +10,7 @@ Portability : POSIX ...@@ -10,7 +10,7 @@ Portability : POSIX
Node API Node API
-} -}
{-# OPTIONS_GHC -fno-warn-name-shadowing -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-} {-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
...@@ -21,7 +21,6 @@ Node API ...@@ -21,7 +21,6 @@ Node API
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
-------------------------------------------------------------------
module Gargantext.API.Node module Gargantext.API.Node
( module Gargantext.API.Node ( module Gargantext.API.Node
, HyperdataAny(..) , HyperdataAny(..)
...@@ -32,47 +31,41 @@ module Gargantext.API.Node ...@@ -32,47 +31,41 @@ module Gargantext.API.Node
, HyperdataDocument(..) , HyperdataDocument(..)
, HyperdataDocumentV3(..) , HyperdataDocumentV3(..)
) where ) where
-------------------------------------------------------------------
import Control.Lens (prism', set) import Control.Lens (prism', set)
import Control.Monad.IO.Class (liftIO)
import Control.Monad ((>>)) import Control.Monad ((>>))
--import System.IO (putStrLn, readFile) import Control.Monad.IO.Class (liftIO)
import qualified Data.Map as Map
import Data.Aeson (FromJSON, ToJSON) import Data.Aeson (FromJSON, ToJSON)
import Data.Text (Text())
import Data.Swagger import Data.Swagger
import Data.Text (Text())
import Data.Time (UTCTime) import Data.Time (UTCTime)
import Debug.Trace (trace)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Servant
import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, TableNgramsApiGet, tableNgramsPatch, getTableNgrams, HasRepo) import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, TableNgramsApiGet, tableNgramsPatch, getTableNgrams, HasRepo)
import Gargantext.Prelude import Gargantext.API.Ngrams.Tools
import Gargantext.Database.Types.Node import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery)
import Gargantext.Database.Utils -- (Cmd, CmdM) import Gargantext.Core.Types (Offset, Limit, ListType(..))
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..)) import Gargantext.Core.Types.Main (Tree, NodeTree)
import Gargantext.Database.Node.Children (getChildren)
import qualified Gargantext.Database.Node.Update as U (update, Update(..))
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),FacetChart,runViewAuthorsDoc) import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),FacetChart,runViewAuthorsDoc)
import Gargantext.Database.Tree (treeDB, HasTreeError(..), TreeError(..)) import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Metrics.Count (getNgramsByNode) import Gargantext.Database.Node.Children (getChildren)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..))
import Gargantext.Database.Schema.Node (defaultList) import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Schema.NodeNode (nodesToFavorite, nodesToTrash) import Gargantext.Database.Schema.NodeNode (nodesToFavorite, nodesToTrash)
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Tree (treeDB, HasTreeError(..), TreeError(..))
import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery) import Gargantext.Database.Types.Node
import Gargantext.Text.Metrics.Count (coocOn) import Gargantext.Database.Types.Node (CorpusId, ContactId)
-- Graph import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude
import Gargantext.Text.Flow (cooc2graph) import Gargantext.Text.Flow (cooc2graph)
import Gargantext.Viz.Graph hiding (Node)-- (Graph(_graph_metadata),LegendField(..), GraphMetadata(..),readGraphFromJson,defaultGraph) import Gargantext.Viz.Graph hiding (Node)-- (Graph(_graph_metadata),LegendField(..), GraphMetadata(..),readGraphFromJson,defaultGraph)
-- import Gargantext.Core (Lang(..)) import Servant
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Types.Main (Tree, NodeTree)
import Gargantext.Database.Types.Node (CorpusId, ContactId)
-- import Gargantext.Text.Terms (TermType(..))
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Map as Map
import qualified Gargantext.Database.Node.Update as U (update, Update(..))
type GargServer api = type GargServer api =
forall env m. forall env m.
...@@ -281,7 +274,6 @@ type GraphAPI = Get '[JSON] Graph ...@@ -281,7 +274,6 @@ type GraphAPI = Get '[JSON] Graph
graphAPI :: NodeId -> GargServer GraphAPI graphAPI :: NodeId -> GargServer GraphAPI
graphAPI nId = do graphAPI nId = do
nodeGraph <- getNode nId HyperdataGraph nodeGraph <- getNode nId HyperdataGraph
let metadata = GraphMetadata "Title" [maybe 0 identity $ _node_parentId nodeGraph] let metadata = GraphMetadata "Title" [maybe 0 identity $ _node_parentId nodeGraph]
...@@ -290,17 +282,16 @@ graphAPI nId = do ...@@ -290,17 +282,16 @@ graphAPI nId = do
] ]
-- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10]) -- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10])
let cId = maybe (panic "no parentId") identity $ _node_parentId nodeGraph let cId = maybe (panic "no parentId") identity $ _node_parentId nodeGraph
_lId <- defaultList cId lId <- defaultList cId
-- lId' <- listsWith masterUser
--myCooc <- getCoocByDocDev cId lId -- (lid' <> [lId]) ngs <- filterListWithRoot GraphTerm <$> mapTermListRoot [lId] NgramsTerms
myCooc <- Map.filter (>2) <$> coocOn identity <$> getNgramsByNode cId NgramsTerms
liftIO $ set graph_metadata (Just metadata) <$> cooc2graph myCooc myCooc <- Map.filter (>1) <$> getCoocByNgrams
<$> groupNodesByNgrams ngs
-- <$> maybe defaultGraph identity <$> getNodesByNgramsOnlyUser cId NgramsTerms (Map.keys ngs)
-- <$> readGraphFromJson "purescript-gargantext/dist/examples/imtNew.json"
-- t <- textFlow (Mono EN) (Contexts contextText) liftIO $ trace (show myCooc) $ set graph_metadata (Just metadata) <$> cooc2graph myCooc
-- liftIO $ liftIO $ pure $ maybe t identity maybeGraph
-- TODO what do we get about the node? to replace contextText
instance HasNodeError ServantErr where instance HasNodeError ServantErr where
_NodeError = prism' mk (const Nothing) -- $ panic "HasNodeError ServantErr: not a prism") _NodeError = prism' mk (const Nothing) -- $ panic "HasNodeError ServantErr: not a prism")
...@@ -337,7 +328,7 @@ treeAPI = treeDB ...@@ -337,7 +328,7 @@ treeAPI = treeDB
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Check if the name is less than 255 char -- | Check if the name is less than 255 char
rename :: NodeId -> RenameNode -> Cmd err [Int] rename :: NodeId -> RenameNode -> Cmd err [Int]
rename nId (RenameNode name) = U.update (U.Rename nId name) rename nId (RenameNode name') = U.update (U.Rename nId name')
getTable :: NodeId -> Maybe TabType getTable :: NodeId -> Maybe TabType
-> Maybe Offset -> Maybe Limit -> Maybe Offset -> Maybe Limit
...@@ -361,7 +352,7 @@ getChart :: NodeId -> Maybe UTCTime -> Maybe UTCTime ...@@ -361,7 +352,7 @@ getChart :: NodeId -> Maybe UTCTime -> Maybe UTCTime
getChart _ _ _ = undefined -- TODO getChart _ _ _ = undefined -- TODO
postNode :: HasNodeError err => UserId -> NodeId -> PostNode -> Cmd err [NodeId] postNode :: HasNodeError err => UserId -> NodeId -> PostNode -> Cmd err [NodeId]
postNode uId pId (PostNode name nt) = mkNodeWithParent nt (Just pId) uId name postNode uId pId (PostNode nodeName nt) = mkNodeWithParent nt (Just pId) uId nodeName
putNode :: NodeId -> Cmd err Int putNode :: NodeId -> Cmd err Int
putNode = undefined -- TODO putNode = undefined -- TODO
......
...@@ -46,6 +46,7 @@ import Gargantext.Core (Lang(..)) ...@@ -46,6 +46,7 @@ import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (NodePoly(..), Terms(..)) import Gargantext.Core.Types (NodePoly(..), Terms(..))
import Gargantext.Core.Types.Individu (Username) import Gargantext.Core.Types.Individu (Username)
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Config (userMaster, corpusMasterName) import Gargantext.Database.Config (userMaster, corpusMasterName)
import Gargantext.Database.Flow.Utils (insertToNodeNgrams) import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..)) import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
...@@ -61,6 +62,7 @@ import Gargantext.Text.List ...@@ -61,6 +62,7 @@ import Gargantext.Text.List
import Gargantext.Text.Parsers (parseDocs, FileFormat) import Gargantext.Text.Parsers (parseDocs, FileFormat)
import Gargantext.Text.Terms (TermType(..)) import Gargantext.Text.Terms (TermType(..))
import Gargantext.Text.Terms (extractTerms) import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
import Servant (ServantErr) import Servant (ServantErr)
import System.FilePath (FilePath) import System.FilePath (FilePath)
import qualified Data.Map as DM import qualified Data.Map as DM
...@@ -76,8 +78,21 @@ type FlowCmdM env err m = ...@@ -76,8 +78,21 @@ type FlowCmdM env err m =
flowCorpus :: FlowCmdM env ServantErr m flowCorpus :: FlowCmdM env ServantErr m
=> Username -> FileFormat -> FilePath -> CorpusName -> m CorpusId => Username -> CorpusName -> FileFormat -> FilePath -> m CorpusId
flowCorpus userName ff fp corpusName = do flowCorpus u cn ff fp = do
ids <- flowCorpusMaster ff fp
flowCorpusUser u cn ids
flowCorpusSearchInDatabase :: FlowCmdM env ServantErr m
=> Username -> Text -> m CorpusId
flowCorpusSearchInDatabase u q = do
(_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus userMaster ""
ids <- chunkAlong 10000 10000 <$> map fst <$> searchInDatabase cId (stemIt q)
flowCorpusUser u q ids
flowCorpusMaster :: FlowCmdM env ServantErr m => FileFormat -> FilePath -> m [[NodeId]]
flowCorpusMaster ff fp = do
-- Master Flow -- Master Flow
docs <- map addUniqIdsDoc <$> liftIO (parseDocs ff fp) docs <- map addUniqIdsDoc <$> liftIO (parseDocs ff fp)
...@@ -90,7 +105,11 @@ flowCorpus userName ff fp corpusName = do ...@@ -90,7 +105,11 @@ flowCorpus userName ff fp corpusName = do
-- TODO: chunkAlongNoRest or chunkAlongWithRest -- TODO: chunkAlongNoRest or chunkAlongWithRest
-- default behavior: NoRest -- default behavior: NoRest
ids <- mapM insertMasterDocs $ chunkAlong 10000 10000 docs ids <- mapM insertMasterDocs $ chunkAlong 10000 10000 docs
pure ids
flowCorpusUser :: FlowCmdM env ServantErr m => Username -> CorpusName -> [[NodeId]] -> m CorpusId
flowCorpusUser userName corpusName ids = do
-- User Flow -- User Flow
(userId, _rootId, userCorpusId) <- getOrMkRootWithCorpus userName corpusName (userId, _rootId, userCorpusId) <- getOrMkRootWithCorpus userName corpusName
-- TODO: check if present already, ignore -- TODO: check if present already, ignore
...@@ -280,11 +299,12 @@ mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f ...@@ -280,11 +299,12 @@ mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f
nId = documentId $ documentWithId d nId = documentId $ documentWithId d
------------------------------------------------------------------------ ------------------------------------------------------------------------
flowListBase :: FlowCmdM env err m listInsert :: FlowCmdM env err m
=> ListId -> Map NgramsType [NgramsElement] => ListId -> Map NgramsType [NgramsElement]
-> m () -> m ()
flowListBase lId ngs = do listInsert lId ngs = mapM_ (\(typeList, ngElmts)
mapM_ (\(typeList, ngElmts) -> putListNgrams lId typeList ngElmts) $ toList ngs -> putListNgrams lId typeList ngElmts
) $ toList ngs
flowList :: FlowCmdM env err m => UserId -> CorpusId flowList :: FlowCmdM env err m => UserId -> CorpusId
-> Map NgramsType [NgramsElement] -> Map NgramsType [NgramsElement]
...@@ -292,7 +312,7 @@ flowList :: FlowCmdM env err m => UserId -> CorpusId ...@@ -292,7 +312,7 @@ flowList :: FlowCmdM env err m => UserId -> CorpusId
flowList uId cId ngs = do flowList uId cId ngs = do
lId <- getOrMkList cId uId lId <- getOrMkList cId uId
printDebug "listId flowList" lId printDebug "listId flowList" lId
flowListBase lId ngs listInsert lId ngs
pure lId pure lId
......
...@@ -137,18 +137,6 @@ getOccByNgramsOnly :: CorpusId -> NgramsType -> [Text] ...@@ -137,18 +137,6 @@ getOccByNgramsOnly :: CorpusId -> NgramsType -> [Text]
getOccByNgramsOnly cId nt ngs = Map.map Set.size getOccByNgramsOnly cId nt ngs = Map.map Set.size
<$> getNodesByNgramsOnlyUser cId nt ngs <$> getNodesByNgramsOnlyUser cId nt ngs
-- TODO add groups
getCoocByNgramsOnly :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map (Text,Text) Int)
getCoocByNgramsOnly cId nt ngs = do
ngs' <- getNodesByNgramsOnlyUser cId nt ngs
pure $ Map.fromList [( (t1,t2)
, maybe 0 Set.size $ Set.intersection
<$> Map.lookup t1 ngs'
<*> Map.lookup t2 ngs'
)
| (t1,t2) <- listToCombi identity $ Map.keys ngs']
getNodesByNgramsOnlyUser :: CorpusId -> NgramsType -> [Text] -> Cmd err (Map Text (Set NodeId)) getNodesByNgramsOnlyUser :: CorpusId -> NgramsType -> [Text] -> Cmd err (Map Text (Set NodeId))
getNodesByNgramsOnlyUser cId nt ngs = fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n)) getNodesByNgramsOnlyUser cId nt ngs = fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n))
<$> selectNgramsOnlyByNodeUser cId nt ngs <$> selectNgramsOnlyByNodeUser cId nt ngs
......
...@@ -36,7 +36,7 @@ import Gargantext.Core.Types (CorpusId) ...@@ -36,7 +36,7 @@ import Gargantext.Core.Types (CorpusId)
--import Gargantext.Database.Types.Node --import Gargantext.Database.Types.Node
import Gargantext.Prelude import Gargantext.Prelude
--import Gargantext.Text.Context (splitBy, SplitContext(Sentences)) --import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..)) --import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..))
--import Gargantext.Text.Metrics.Count (coocOn) --import Gargantext.Text.Metrics.Count (coocOn)
--import Gargantext.Text.Parsers.CSV --import Gargantext.Text.Parsers.CSV
--import Gargantext.Text.Terms (TermType, extractTerms) --import Gargantext.Text.Terms (TermType, extractTerms)
...@@ -120,21 +120,21 @@ cooc2graph :: (Map (Text, Text) Int) -> IO Graph ...@@ -120,21 +120,21 @@ cooc2graph :: (Map (Text, Text) Int) -> IO Graph
cooc2graph myCooc = do cooc2graph myCooc = do
--printDebug "myCooc" myCooc --printDebug "myCooc" myCooc
-- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores -- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores
{-
let myCooc3 = filterCooc ( FilterConfig (MapListSize 350 ) let myCooc3 = filterCooc ( FilterConfig (MapListSize 350 )
(InclusionSize 500 ) (InclusionSize 500 )
(SampleBins 10 ) (SampleBins 10 )
(Clusters 3 ) (Clusters 3 )
(DefaultValue 0 ) (DefaultValue 0 )
) myCooc ) myCooc
--printDebug "myCooc3 size" $ M.size myCooc3 --} --printDebug "myCooc3 size" $ M.size myCooc3
--printDebug "myCooc3" myCooc3
-- Cooc -> Matrix -- Cooc -> Matrix
let (ti, _) = createIndices myCooc3 let (ti, _) = createIndices myCooc
--printDebug "ti size" $ M.size ti --printDebug "ti size" $ M.size ti
--printDebug "ti" ti --printDebug "ti" ti
let myCooc4 = toIndex ti myCooc3 let myCooc4 = toIndex ti myCooc
--printDebug "myCooc4 size" $ M.size myCooc4 --printDebug "myCooc4 size" $ M.size myCooc4
--printDebug "myCooc4" myCooc4 --printDebug "myCooc4" myCooc4
...@@ -153,7 +153,7 @@ cooc2graph myCooc = do ...@@ -153,7 +153,7 @@ cooc2graph myCooc = do
--printDebug "distanceMap size" $ M.size distanceMap --printDebug "distanceMap size" $ M.size distanceMap
--printDebug "distanceMap" distanceMap --printDebug "distanceMap" distanceMap
-- let distance = fromIndex fi distanceMap --let distance = fromIndex fi distanceMap
--printDebug "distance" $ M.size distance --printDebug "distance" $ M.size distance
partitions <- case Map.size distanceMap > 0 of partitions <- case Map.size distanceMap > 0 of
......
...@@ -106,8 +106,8 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs ...@@ -106,8 +106,8 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
ys = take b $ drop a ns ys = take b $ drop a ns
zs = drop b $ drop a ns zs = drop b $ drop a ns
a = 100 a = 1
b = 1000 b = 10000
isStopTerm :: Text -> Bool isStopTerm :: Text -> Bool
isStopTerm x = Text.length x < 3 isStopTerm x = Text.length x < 3
......
...@@ -38,28 +38,25 @@ data SampleBins = SampleBins Double ...@@ -38,28 +38,25 @@ data SampleBins = SampleBins Double
data Clusters = Clusters Int data Clusters = Clusters Int
data DefaultValue = DefaultValue Int data DefaultValue = DefaultValue Int
data FilterConfig = FilterConfig { fc_mapListSize :: MapListSize data FilterConfig = FilterConfig
, fc_inclusionSize :: InclusionSize { fc_mapListSize :: MapListSize
, fc_sampleBins :: SampleBins , fc_inclusionSize :: InclusionSize
, fc_clusters :: Clusters , fc_sampleBins :: SampleBins
, fc_defaultValue :: DefaultValue , fc_clusters :: Clusters
} , fc_defaultValue :: DefaultValue
}
filterCooc :: (Show t, Ord t) => FilterConfig -> Map (t, t) Int -> Map (t, t) Int filterCooc :: (Show t, Ord t) => FilterConfig -> Map (t, t) Int -> Map (t, t) Int
filterCooc fc cc = (filterCooc' fc) ts cc filterCooc fc cc = (filterCooc' fc) ts cc
where where
ts = map _scored_terms $ takeSome fc $ coocScored cc ts = map _scored_terms $ takeSome fc $ coocScored cc
filterCooc' :: (Show t, Ord t) => FilterConfig -> [t] -> Map (t, t) Int -> Map (t, t) Int filterCooc' :: (Show t, Ord t) => FilterConfig -> [t] -> Map (t, t) Int -> Map (t, t) Int
filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m =
-- trace ("coocScored " <> show ts) $ -- trace ("coocScored " <> show ts) $
foldl' (\m' k -> M.insert k (maybe dv identity $ M.lookup k m) m') foldl' (\m' k -> M.insert k (maybe dv identity $ M.lookup k m) m')
M.empty selection M.empty (listToCombi identity ts)
where
selection = [(x,y) | x <- ts
, y <- ts
, x > y
]
-- | Map list creation -- | Map list creation
...@@ -70,7 +67,8 @@ filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = ...@@ -70,7 +67,8 @@ filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m =
takeSome :: Ord t => FilterConfig -> [Scored t] -> [Scored t] takeSome :: Ord t => FilterConfig -> [Scored t] -> [Scored t]
takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Clusters _) _) scores = L.take l takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Clusters _) _) scores = L.take l
$ takeSample n m $ takeSample n m
$ L.take l' $ reverse $ sortWith (Down . _scored_incExc) scores $ L.take l'
$ reverse $ sortWith (Down . _scored_incExc) scores
-- splitKmeans k scores -- splitKmeans k scores
where where
-- TODO: benchmark with accelerate-example kmeans version -- TODO: benchmark with accelerate-example kmeans version
...@@ -90,16 +88,17 @@ takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Cluste ...@@ -90,16 +88,17 @@ takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Cluste
$ sortWith (Down . _scored_speGen) xs $ sortWith (Down . _scored_speGen) xs
data Scored ts = Scored { _scored_terms :: !ts data Scored ts = Scored
, _scored_incExc :: !InclusionExclusion { _scored_terms :: !ts
, _scored_speGen :: !SpecificityGenericity , _scored_incExc :: !InclusionExclusion
} deriving (Show) , _scored_speGen :: !SpecificityGenericity
} deriving (Show)
-- TODO in the textflow we end up needing these indices, it might be better -- TODO in the textflow we end up needing these indices, it might be better
-- to compute them earlier and pass them around. -- to compute them earlier and pass them around.
coocScored :: Ord t => Map (t,t) Int -> [Scored t] coocScored :: Ord t => Map (t,t) Int -> [Scored t]
coocScored m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (M.toList fi) scores coocScored m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (M.toList fi) scores
where where
(ti,fi) = createIndices m (ti, fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat ti m (is, ss) = incExcSpeGen $ cooc2mat ti m
scores = DAA.toList $ DAA.run $ DAA.zip (DAA.use is) (DAA.use ss) scores = DAA.toList $ DAA.run $ DAA.zip (DAA.use is) (DAA.use ss)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment