Unverified Commit a26bdc84 authored by Nicolas Pouillard's avatar Nicolas Pouillard
parents ad14d93d e0f3433b
Pipeline #263 failed with stage
......@@ -40,7 +40,7 @@ main = do
-}
let cmdCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
cmdCorpus = flowCorpus (cs user) CsvHalFormat corpusPath (cs name)
cmdCorpus = flowCorpus (cs user) (cs name) CsvHalFormat corpusPath
-- cmd = {-createUsers >>-} cmdCorpus
......
......@@ -885,23 +885,6 @@ getNgramsTableMap nodeId ngramsType = do
pure $ Versioned (repo ^. r_version)
(repo ^. r_state . at ngramsType . _Just . at nodeId . _Just)
-- UNUSED
_getListNgrams :: RepoCmdM env err m
=> [NodeId] -> NgramsType -> m (Versioned ListNgrams)
_getListNgrams nodeIds ngramsType = do
v <- view repoVar
repo <- liftIO $ readMVar v
let
ngramsMap = repo ^. r_state . at ngramsType . _Just
ngrams =
Map.unionsWith mergeNgramsElement
[ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
pure $ Versioned (repo ^. r_version)
$ NgramsTable (ngramsElementFromRepo <$> Map.toList ngrams)
type MinSize = Int
type MaxSize = Int
......
{-|
Module : Gargantext.API.Ngrams.Tools
Description : Tools to manage Ngrams Elements (from the API)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.API.Ngrams.Tools
where
import Control.Concurrent
import Control.Lens (_Just, (^.), at, view)
import Control.Monad.Reader
import Data.Map.Strict (Map)
import Data.Set (Set)
import Data.Text (Text)
import Data.Validity
import Gargantext.API.Ngrams
import Gargantext.Core.Types (ListType(..), NodeId, ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
type RootTerm = Text
getListNgrams :: RepoCmdM env err m
=> [ListId] -> NgramsType
-> m (Map Text NgramsRepoElement)
getListNgrams nodeIds ngramsType = do
v <- view repoVar
repo <- liftIO $ readMVar v
let
ngramsMap = repo ^. r_state . at ngramsType . _Just
ngrams = Map.unionsWith mergeNgramsElement
[ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
pure ngrams
mapTermListRoot :: RepoCmdM env err m
=> [ListId] -> NgramsType
-> m (Map Text (ListType, (Maybe Text)))
mapTermListRoot nodeIds ngramsType = do
ngrams <- getListNgrams nodeIds ngramsType
pure $ Map.fromList [(t, (_nre_list nre, _nre_root nre))
| (t, nre) <- Map.toList ngrams
]
filterListWithRoot :: ListType -> Map Text (ListType, Maybe Text)
-> Map Text (Maybe RootTerm)
filterListWithRoot lt m = Map.fromList
$ map (\(t,(_,r)) -> (t,r))
$ filter isGraphTerm (Map.toList m)
where
isGraphTerm (_t,(l, maybeRoot)) = case maybeRoot of
Nothing -> l == lt
Just r -> case Map.lookup r m of
Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r
Just (l',_) -> l' == lt
groupNodesByNgrams :: Map Text (Maybe RootTerm)
-> Map Text (Set NodeId)
-> Map Text (Set NodeId)
groupNodesByNgrams syn occs = Map.fromListWith (<>) occs'
where
occs' = map toSyn (Map.toList occs)
toSyn (t,ns) = case Map.lookup t syn of
Nothing -> panic $ "Garg.API.Ngrams.Tools: groupNodesByNgrams, unknown key: " <> t
Just r -> case r of
Nothing -> (t, ns)
Just r' -> (r',ns)
getCoocByNgrams :: Map Text (Set NodeId) -> Map (Text, Text) Int
getCoocByNgrams m =
Map.fromList [((t1,t2)
,maybe 0 Set.size $ Set.intersection
<$> Map.lookup t1 m
<*> Map.lookup t2 m
) | (t1,t2) <- listToCombi identity $ Map.keys m
]
......@@ -10,7 +10,7 @@ Portability : POSIX
Node API
-}
{-# OPTIONS_GHC -fno-warn-name-shadowing -fno-warn-orphans #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
......@@ -21,7 +21,6 @@ Node API
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
-------------------------------------------------------------------
module Gargantext.API.Node
( module Gargantext.API.Node
, HyperdataAny(..)
......@@ -32,47 +31,41 @@ module Gargantext.API.Node
, HyperdataDocument(..)
, HyperdataDocumentV3(..)
) where
-------------------------------------------------------------------
import Control.Lens (prism', set)
import Control.Monad.IO.Class (liftIO)
import Control.Monad ((>>))
--import System.IO (putStrLn, readFile)
import qualified Data.Map as Map
import Control.Monad.IO.Class (liftIO)
import Data.Aeson (FromJSON, ToJSON)
import Data.Text (Text())
import Data.Swagger
import Data.Text (Text())
import Data.Time (UTCTime)
import Debug.Trace (trace)
import GHC.Generics (Generic)
import Servant
import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, TableNgramsApiGet, tableNgramsPatch, getTableNgrams, HasRepo)
import Gargantext.Prelude
import Gargantext.Database.Types.Node
import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..))
import Gargantext.Database.Node.Children (getChildren)
import qualified Gargantext.Database.Node.Update as U (update, Update(..))
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery)
import Gargantext.Core.Types (Offset, Limit, ListType(..))
import Gargantext.Core.Types.Main (Tree, NodeTree)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),FacetChart,runViewAuthorsDoc)
import Gargantext.Database.Tree (treeDB, HasTreeError(..), TreeError(..))
import Gargantext.Database.Metrics.Count (getNgramsByNode)
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Node.Children (getChildren)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..))
import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Schema.NodeNode (nodesToFavorite, nodesToTrash)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery)
import Gargantext.Text.Metrics.Count (coocOn)
-- Graph
import Gargantext.Database.Tree (treeDB, HasTreeError(..), TreeError(..))
import Gargantext.Database.Types.Node
import Gargantext.Database.Types.Node (CorpusId, ContactId)
import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude
import Gargantext.Text.Flow (cooc2graph)
import Gargantext.Viz.Graph hiding (Node)-- (Graph(_graph_metadata),LegendField(..), GraphMetadata(..),readGraphFromJson,defaultGraph)
-- import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Types.Main (Tree, NodeTree)
import Gargantext.Database.Types.Node (CorpusId, ContactId)
-- import Gargantext.Text.Terms (TermType(..))
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Map as Map
import qualified Gargantext.Database.Node.Update as U (update, Update(..))
type GargServer api =
forall env m.
......@@ -281,7 +274,6 @@ type GraphAPI = Get '[JSON] Graph
graphAPI :: NodeId -> GargServer GraphAPI
graphAPI nId = do
nodeGraph <- getNode nId HyperdataGraph
let metadata = GraphMetadata "Title" [maybe 0 identity $ _node_parentId nodeGraph]
......@@ -290,17 +282,16 @@ graphAPI nId = do
]
-- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10])
let cId = maybe (panic "no parentId") identity $ _node_parentId nodeGraph
_lId <- defaultList cId
-- lId' <- listsWith masterUser
--myCooc <- getCoocByDocDev cId lId -- (lid' <> [lId])
myCooc <- Map.filter (>2) <$> coocOn identity <$> getNgramsByNode cId NgramsTerms
liftIO $ set graph_metadata (Just metadata) <$> cooc2graph myCooc
-- <$> maybe defaultGraph identity
-- <$> readGraphFromJson "purescript-gargantext/dist/examples/imtNew.json"
-- t <- textFlow (Mono EN) (Contexts contextText)
-- liftIO $ liftIO $ pure $ maybe t identity maybeGraph
-- TODO what do we get about the node? to replace contextText
lId <- defaultList cId
ngs <- filterListWithRoot GraphTerm <$> mapTermListRoot [lId] NgramsTerms
myCooc <- Map.filter (>1) <$> getCoocByNgrams
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId NgramsTerms (Map.keys ngs)
liftIO $ trace (show myCooc) $ set graph_metadata (Just metadata) <$> cooc2graph myCooc
instance HasNodeError ServantErr where
_NodeError = prism' mk (const Nothing) -- $ panic "HasNodeError ServantErr: not a prism")
......@@ -337,7 +328,7 @@ treeAPI = treeDB
------------------------------------------------------------------------
-- | Check if the name is less than 255 char
rename :: NodeId -> RenameNode -> Cmd err [Int]
rename nId (RenameNode name) = U.update (U.Rename nId name)
rename nId (RenameNode name') = U.update (U.Rename nId name')
getTable :: NodeId -> Maybe TabType
-> Maybe Offset -> Maybe Limit
......@@ -361,7 +352,7 @@ getChart :: NodeId -> Maybe UTCTime -> Maybe UTCTime
getChart _ _ _ = undefined -- TODO
postNode :: HasNodeError err => UserId -> NodeId -> PostNode -> Cmd err [NodeId]
postNode uId pId (PostNode name nt) = mkNodeWithParent nt (Just pId) uId name
postNode uId pId (PostNode nodeName nt) = mkNodeWithParent nt (Just pId) uId nodeName
putNode :: NodeId -> Cmd err Int
putNode = undefined -- TODO
......
......@@ -46,6 +46,7 @@ import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (NodePoly(..), Terms(..))
import Gargantext.Core.Types.Individu (Username)
import Gargantext.Core.Types.Main
import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Config (userMaster, corpusMasterName)
import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
......@@ -61,6 +62,7 @@ import Gargantext.Text.List
import Gargantext.Text.Parsers (parseDocs, FileFormat)
import Gargantext.Text.Terms (TermType(..))
import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
import Servant (ServantErr)
import System.FilePath (FilePath)
import qualified Data.Map as DM
......@@ -76,8 +78,21 @@ type FlowCmdM env err m =
flowCorpus :: FlowCmdM env ServantErr m
=> Username -> FileFormat -> FilePath -> CorpusName -> m CorpusId
flowCorpus userName ff fp corpusName = do
=> Username -> CorpusName -> FileFormat -> FilePath -> m CorpusId
flowCorpus u cn ff fp = do
ids <- flowCorpusMaster ff fp
flowCorpusUser u cn ids
flowCorpusSearchInDatabase :: FlowCmdM env ServantErr m
=> Username -> Text -> m CorpusId
flowCorpusSearchInDatabase u q = do
(_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus userMaster ""
ids <- chunkAlong 10000 10000 <$> map fst <$> searchInDatabase cId (stemIt q)
flowCorpusUser u q ids
flowCorpusMaster :: FlowCmdM env ServantErr m => FileFormat -> FilePath -> m [[NodeId]]
flowCorpusMaster ff fp = do
-- Master Flow
docs <- map addUniqIdsDoc <$> liftIO (parseDocs ff fp)
......@@ -90,7 +105,11 @@ flowCorpus userName ff fp corpusName = do
-- TODO: chunkAlongNoRest or chunkAlongWithRest
-- default behavior: NoRest
ids <- mapM insertMasterDocs $ chunkAlong 10000 10000 docs
pure ids
flowCorpusUser :: FlowCmdM env ServantErr m => Username -> CorpusName -> [[NodeId]] -> m CorpusId
flowCorpusUser userName corpusName ids = do
-- User Flow
(userId, _rootId, userCorpusId) <- getOrMkRootWithCorpus userName corpusName
-- TODO: check if present already, ignore
......@@ -280,11 +299,12 @@ mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f
nId = documentId $ documentWithId d
------------------------------------------------------------------------
flowListBase :: FlowCmdM env err m
listInsert :: FlowCmdM env err m
=> ListId -> Map NgramsType [NgramsElement]
-> m ()
flowListBase lId ngs = do
mapM_ (\(typeList, ngElmts) -> putListNgrams lId typeList ngElmts) $ toList ngs
listInsert lId ngs = mapM_ (\(typeList, ngElmts)
-> putListNgrams lId typeList ngElmts
) $ toList ngs
flowList :: FlowCmdM env err m => UserId -> CorpusId
-> Map NgramsType [NgramsElement]
......@@ -292,7 +312,7 @@ flowList :: FlowCmdM env err m => UserId -> CorpusId
flowList uId cId ngs = do
lId <- getOrMkList cId uId
printDebug "listId flowList" lId
flowListBase lId ngs
listInsert lId ngs
pure lId
......
......@@ -137,18 +137,6 @@ getOccByNgramsOnly :: CorpusId -> NgramsType -> [Text]
getOccByNgramsOnly cId nt ngs = Map.map Set.size
<$> getNodesByNgramsOnlyUser cId nt ngs
-- TODO add groups
getCoocByNgramsOnly :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map (Text,Text) Int)
getCoocByNgramsOnly cId nt ngs = do
ngs' <- getNodesByNgramsOnlyUser cId nt ngs
pure $ Map.fromList [( (t1,t2)
, maybe 0 Set.size $ Set.intersection
<$> Map.lookup t1 ngs'
<*> Map.lookup t2 ngs'
)
| (t1,t2) <- listToCombi identity $ Map.keys ngs']
getNodesByNgramsOnlyUser :: CorpusId -> NgramsType -> [Text] -> Cmd err (Map Text (Set NodeId))
getNodesByNgramsOnlyUser cId nt ngs = fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n))
<$> selectNgramsOnlyByNodeUser cId nt ngs
......
......@@ -36,7 +36,7 @@ import Gargantext.Core.Types (CorpusId)
--import Gargantext.Database.Types.Node
import Gargantext.Prelude
--import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..))
--import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..))
--import Gargantext.Text.Metrics.Count (coocOn)
--import Gargantext.Text.Parsers.CSV
--import Gargantext.Text.Terms (TermType, extractTerms)
......@@ -120,21 +120,21 @@ cooc2graph :: (Map (Text, Text) Int) -> IO Graph
cooc2graph myCooc = do
--printDebug "myCooc" myCooc
-- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores
{-
let myCooc3 = filterCooc ( FilterConfig (MapListSize 350 )
(InclusionSize 500 )
(SampleBins 10 )
(Clusters 3 )
(DefaultValue 0 )
) myCooc
--printDebug "myCooc3 size" $ M.size myCooc3
--printDebug "myCooc3" myCooc3
--} --printDebug "myCooc3 size" $ M.size myCooc3
-- Cooc -> Matrix
let (ti, _) = createIndices myCooc3
let (ti, _) = createIndices myCooc
--printDebug "ti size" $ M.size ti
--printDebug "ti" ti
let myCooc4 = toIndex ti myCooc3
let myCooc4 = toIndex ti myCooc
--printDebug "myCooc4 size" $ M.size myCooc4
--printDebug "myCooc4" myCooc4
......@@ -153,7 +153,7 @@ cooc2graph myCooc = do
--printDebug "distanceMap size" $ M.size distanceMap
--printDebug "distanceMap" distanceMap
-- let distance = fromIndex fi distanceMap
--let distance = fromIndex fi distanceMap
--printDebug "distance" $ M.size distance
partitions <- case Map.size distanceMap > 0 of
......
......@@ -106,8 +106,8 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
ys = take b $ drop a ns
zs = drop b $ drop a ns
a = 100
b = 1000
a = 1
b = 10000
isStopTerm :: Text -> Bool
isStopTerm x = Text.length x < 3
......
......@@ -38,28 +38,25 @@ data SampleBins = SampleBins Double
data Clusters = Clusters Int
data DefaultValue = DefaultValue Int
data FilterConfig = FilterConfig { fc_mapListSize :: MapListSize
, fc_inclusionSize :: InclusionSize
, fc_sampleBins :: SampleBins
, fc_clusters :: Clusters
, fc_defaultValue :: DefaultValue
}
data FilterConfig = FilterConfig
{ fc_mapListSize :: MapListSize
, fc_inclusionSize :: InclusionSize
, fc_sampleBins :: SampleBins
, fc_clusters :: Clusters
, fc_defaultValue :: DefaultValue
}
filterCooc :: (Show t, Ord t) => FilterConfig -> Map (t, t) Int -> Map (t, t) Int
filterCooc fc cc = (filterCooc' fc) ts cc
where
ts = map _scored_terms $ takeSome fc $ coocScored cc
filterCooc' :: (Show t, Ord t) => FilterConfig -> [t] -> Map (t, t) Int -> Map (t, t) Int
filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m =
-- trace ("coocScored " <> show ts) $
foldl' (\m' k -> M.insert k (maybe dv identity $ M.lookup k m) m')
M.empty selection
where
selection = [(x,y) | x <- ts
, y <- ts
, x > y
]
M.empty (listToCombi identity ts)
-- | Map list creation
......@@ -70,7 +67,8 @@ filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m =
takeSome :: Ord t => FilterConfig -> [Scored t] -> [Scored t]
takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Clusters _) _) scores = L.take l
$ takeSample n m
$ L.take l' $ reverse $ sortWith (Down . _scored_incExc) scores
$ L.take l'
$ reverse $ sortWith (Down . _scored_incExc) scores
-- splitKmeans k scores
where
-- TODO: benchmark with accelerate-example kmeans version
......@@ -90,16 +88,17 @@ takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Cluste
$ sortWith (Down . _scored_speGen) xs
data Scored ts = Scored { _scored_terms :: !ts
, _scored_incExc :: !InclusionExclusion
, _scored_speGen :: !SpecificityGenericity
} deriving (Show)
data Scored ts = Scored
{ _scored_terms :: !ts
, _scored_incExc :: !InclusionExclusion
, _scored_speGen :: !SpecificityGenericity
} deriving (Show)
-- TODO in the textflow we end up needing these indices, it might be better
-- to compute them earlier and pass them around.
coocScored :: Ord t => Map (t,t) Int -> [Scored t]
coocScored m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (M.toList fi) scores
where
(ti,fi) = createIndices m
(ti, fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat ti m
scores = DAA.toList $ DAA.run $ DAA.zip (DAA.use is) (DAA.use ss)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment