Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
200
Issues
200
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
a26bdc84
Unverified
Commit
a26bdc84
authored
Mar 07, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev' of
https://gitlab.iscpif.fr/gargantext/haskell-gargantext
into dev
parents
ad14d93d
e0f3433b
Pipeline
#263
failed with stage
Changes
9
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
183 additions
and
105 deletions
+183
-105
Main.hs
bin/gargantext-import/Main.hs
+1
-1
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+0
-17
Tools.hs
src/Gargantext/API/Ngrams/Tools.hs
+97
-0
Node.hs
src/Gargantext/API/Node.hs
+34
-43
Flow.hs
src/Gargantext/Database/Flow.hs
+26
-6
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+0
-12
Flow.hs
src/Gargantext/Text/Flow.hs
+6
-6
List.hs
src/Gargantext/Text/List.hs
+2
-2
Metrics.hs
src/Gargantext/Text/Metrics.hs
+17
-18
No files found.
bin/gargantext-import/Main.hs
View file @
a26bdc84
...
...
@@ -40,7 +40,7 @@ main = do
-}
let
cmdCorpus
::
forall
m
.
FlowCmdM
DevEnv
ServantErr
m
=>
m
CorpusId
cmdCorpus
=
flowCorpus
(
cs
user
)
CsvHalFormat
corpusPath
(
cs
name
)
cmdCorpus
=
flowCorpus
(
cs
user
)
(
cs
name
)
CsvHalFormat
corpusPath
-- cmd = {-createUsers >>-} cmdCorpus
...
...
src/Gargantext/API/Ngrams.hs
View file @
a26bdc84
...
...
@@ -885,23 +885,6 @@ getNgramsTableMap nodeId ngramsType = do
pure
$
Versioned
(
repo
^.
r_version
)
(
repo
^.
r_state
.
at
ngramsType
.
_Just
.
at
nodeId
.
_Just
)
-- UNUSED
_getListNgrams
::
RepoCmdM
env
err
m
=>
[
NodeId
]
->
NgramsType
->
m
(
Versioned
ListNgrams
)
_getListNgrams
nodeIds
ngramsType
=
do
v
<-
view
repoVar
repo
<-
liftIO
$
readMVar
v
let
ngramsMap
=
repo
^.
r_state
.
at
ngramsType
.
_Just
ngrams
=
Map
.
unionsWith
mergeNgramsElement
[
ngramsMap
^.
at
nodeId
.
_Just
|
nodeId
<-
nodeIds
]
pure
$
Versioned
(
repo
^.
r_version
)
$
NgramsTable
(
ngramsElementFromRepo
<$>
Map
.
toList
ngrams
)
type
MinSize
=
Int
type
MaxSize
=
Int
...
...
src/Gargantext/API/Ngrams/Tools.hs
0 → 100644
View file @
a26bdc84
{-|
Module : Gargantext.API.Ngrams.Tools
Description : Tools to manage Ngrams Elements (from the API)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
module
Gargantext.API.Ngrams.Tools
where
import
Control.Concurrent
import
Control.Lens
(
_Just
,
(
^.
),
at
,
view
)
import
Control.Monad.Reader
import
Data.Map.Strict
(
Map
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
import
Data.Validity
import
Gargantext.API.Ngrams
import
Gargantext.Core.Types
(
ListType
(
..
),
NodeId
,
ListId
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
)
import
Gargantext.Prelude
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
type
RootTerm
=
Text
getListNgrams
::
RepoCmdM
env
err
m
=>
[
ListId
]
->
NgramsType
->
m
(
Map
Text
NgramsRepoElement
)
getListNgrams
nodeIds
ngramsType
=
do
v
<-
view
repoVar
repo
<-
liftIO
$
readMVar
v
let
ngramsMap
=
repo
^.
r_state
.
at
ngramsType
.
_Just
ngrams
=
Map
.
unionsWith
mergeNgramsElement
[
ngramsMap
^.
at
nodeId
.
_Just
|
nodeId
<-
nodeIds
]
pure
ngrams
mapTermListRoot
::
RepoCmdM
env
err
m
=>
[
ListId
]
->
NgramsType
->
m
(
Map
Text
(
ListType
,
(
Maybe
Text
)))
mapTermListRoot
nodeIds
ngramsType
=
do
ngrams
<-
getListNgrams
nodeIds
ngramsType
pure
$
Map
.
fromList
[(
t
,
(
_nre_list
nre
,
_nre_root
nre
))
|
(
t
,
nre
)
<-
Map
.
toList
ngrams
]
filterListWithRoot
::
ListType
->
Map
Text
(
ListType
,
Maybe
Text
)
->
Map
Text
(
Maybe
RootTerm
)
filterListWithRoot
lt
m
=
Map
.
fromList
$
map
(
\
(
t
,(
_
,
r
))
->
(
t
,
r
))
$
filter
isGraphTerm
(
Map
.
toList
m
)
where
isGraphTerm
(
_t
,(
l
,
maybeRoot
))
=
case
maybeRoot
of
Nothing
->
l
==
lt
Just
r
->
case
Map
.
lookup
r
m
of
Nothing
->
panic
$
"Garg.API.Ngrams.Tools: filterWithRoot, unknown key: "
<>
r
Just
(
l'
,
_
)
->
l'
==
lt
groupNodesByNgrams
::
Map
Text
(
Maybe
RootTerm
)
->
Map
Text
(
Set
NodeId
)
->
Map
Text
(
Set
NodeId
)
groupNodesByNgrams
syn
occs
=
Map
.
fromListWith
(
<>
)
occs'
where
occs'
=
map
toSyn
(
Map
.
toList
occs
)
toSyn
(
t
,
ns
)
=
case
Map
.
lookup
t
syn
of
Nothing
->
panic
$
"Garg.API.Ngrams.Tools: groupNodesByNgrams, unknown key: "
<>
t
Just
r
->
case
r
of
Nothing
->
(
t
,
ns
)
Just
r'
->
(
r'
,
ns
)
getCoocByNgrams
::
Map
Text
(
Set
NodeId
)
->
Map
(
Text
,
Text
)
Int
getCoocByNgrams
m
=
Map
.
fromList
[((
t1
,
t2
)
,
maybe
0
Set
.
size
$
Set
.
intersection
<$>
Map
.
lookup
t1
m
<*>
Map
.
lookup
t2
m
)
|
(
t1
,
t2
)
<-
listToCombi
identity
$
Map
.
keys
m
]
src/Gargantext/API/Node.hs
View file @
a26bdc84
...
...
@@ -10,7 +10,7 @@ Portability : POSIX
Node API
-}
{-# OPTIONS_GHC -fno-warn-
name-shadowing -fno-warn-
orphans #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
...
...
@@ -21,7 +21,6 @@ Node API
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
-------------------------------------------------------------------
module
Gargantext.API.Node
(
module
Gargantext
.
API
.
Node
,
HyperdataAny
(
..
)
...
...
@@ -32,47 +31,41 @@ module Gargantext.API.Node
,
HyperdataDocument
(
..
)
,
HyperdataDocumentV3
(
..
)
)
where
-------------------------------------------------------------------
import
Control.Lens
(
prism'
,
set
)
import
Control.Monad.IO.Class
(
liftIO
)
import
Control.Monad
((
>>
))
--import System.IO (putStrLn, readFile)
import
qualified
Data.Map
as
Map
import
Control.Monad.IO.Class
(
liftIO
)
import
Data.Aeson
(
FromJSON
,
ToJSON
)
import
Data.Text
(
Text
())
import
Data.Swagger
import
Data.Text
(
Text
())
import
Data.Time
(
UTCTime
)
import
Debug.Trace
(
trace
)
import
GHC.Generics
(
Generic
)
import
Servant
import
Gargantext.API.Ngrams
(
TabType
(
..
),
TableNgramsApi
,
TableNgramsApiGet
,
tableNgramsPatch
,
getTableNgrams
,
HasRepo
)
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
import
Gargantext.Database.Utils
-- (Cmd, CmdM)
import
Gargantext.Database.Schema.Node
(
getNodesWithParentId
,
getNode
,
deleteNode
,
deleteNodes
,
mkNodeWithParent
,
JSONB
,
NodeError
(
..
),
HasNodeError
(
..
))
import
Gargantext.Database.Node.Children
(
getChildren
)
import
qualified
Gargantext.Database.Node.Update
as
U
(
update
,
Update
(
..
))
import
Gargantext.API.Ngrams.Tools
import
Gargantext.API.Search
(
SearchAPI
,
searchIn
,
SearchInQuery
)
import
Gargantext.Core.Types
(
Offset
,
Limit
,
ListType
(
..
))
import
Gargantext.Core.Types.Main
(
Tree
,
NodeTree
)
import
Gargantext.Database.Facet
(
FacetDoc
,
runViewDocuments
,
OrderBy
(
..
),
FacetChart
,
runViewAuthorsDoc
)
import
Gargantext.Database.Tree
(
treeDB
,
HasTreeError
(
..
),
TreeError
(
..
))
import
Gargantext.Database.Metrics.Count
(
getNgramsByNode
)
import
Gargantext.Database.Metrics.NgramsByNode
(
getNodesByNgramsOnlyUser
)
import
Gargantext.Database.Node.Children
(
getChildren
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Node
(
getNodesWithParentId
,
getNode
,
deleteNode
,
deleteNodes
,
mkNodeWithParent
,
JSONB
,
NodeError
(
..
),
HasNodeError
(
..
))
import
Gargantext.Database.Schema.Node
(
defaultList
)
import
Gargantext.Database.Schema.NodeNode
(
nodesToFavorite
,
nodesToTrash
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.API.Search
(
SearchAPI
,
searchIn
,
SearchInQuery
)
import
Gargantext.Text.Metrics.Count
(
coocOn
)
-- Graph
import
Gargantext.Database.Tree
(
treeDB
,
HasTreeError
(
..
),
TreeError
(
..
))
import
Gargantext.Database.Types.Node
import
Gargantext.Database.Types.Node
(
CorpusId
,
ContactId
)
import
Gargantext.Database.Utils
-- (Cmd, CmdM)
import
Gargantext.Prelude
import
Gargantext.Text.Flow
(
cooc2graph
)
import
Gargantext.Viz.Graph
hiding
(
Node
)
-- (Graph(_graph_metadata),LegendField(..), GraphMetadata(..),readGraphFromJson,defaultGraph)
-- import Gargantext.Core (Lang(..))
import
Gargantext.Core.Types
(
Offset
,
Limit
)
import
Gargantext.Core.Types.Main
(
Tree
,
NodeTree
)
import
Gargantext.Database.Types.Node
(
CorpusId
,
ContactId
)
-- import Gargantext.Text.Terms (TermType(..))
import
Servant
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
(
Arbitrary
,
arbitrary
)
import
qualified
Data.Map
as
Map
import
qualified
Gargantext.Database.Node.Update
as
U
(
update
,
Update
(
..
))
type
GargServer
api
=
forall
env
m
.
...
...
@@ -281,7 +274,6 @@ type GraphAPI = Get '[JSON] Graph
graphAPI
::
NodeId
->
GargServer
GraphAPI
graphAPI
nId
=
do
nodeGraph
<-
getNode
nId
HyperdataGraph
let
metadata
=
GraphMetadata
"Title"
[
maybe
0
identity
$
_node_parentId
nodeGraph
]
...
...
@@ -290,17 +282,16 @@ graphAPI nId = do
]
-- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10])
let
cId
=
maybe
(
panic
"no parentId"
)
identity
$
_node_parentId
nodeGraph
_lId
<-
defaultList
cId
-- lId' <- listsWith masterUser
--myCooc <- getCoocByDocDev cId lId -- (lid' <> [lId])
myCooc
<-
Map
.
filter
(
>
2
)
<$>
coocOn
identity
<$>
getNgramsByNode
cId
NgramsTerms
liftIO
$
set
graph_metadata
(
Just
metadata
)
<$>
cooc2graph
myCooc
-- <$> maybe defaultGraph identity
-- <$> readGraphFromJson "purescript-gargantext/dist/examples/imtNew.json"
-- t <- textFlow (Mono EN) (Contexts contextText)
-- liftIO $ liftIO $ pure $ maybe t identity maybeGraph
-- TODO what do we get about the node? to replace contextText
lId
<-
defaultList
cId
ngs
<-
filterListWithRoot
GraphTerm
<$>
mapTermListRoot
[
lId
]
NgramsTerms
myCooc
<-
Map
.
filter
(
>
1
)
<$>
getCoocByNgrams
<$>
groupNodesByNgrams
ngs
<$>
getNodesByNgramsOnlyUser
cId
NgramsTerms
(
Map
.
keys
ngs
)
liftIO
$
trace
(
show
myCooc
)
$
set
graph_metadata
(
Just
metadata
)
<$>
cooc2graph
myCooc
instance
HasNodeError
ServantErr
where
_NodeError
=
prism'
mk
(
const
Nothing
)
-- $ panic "HasNodeError ServantErr: not a prism")
...
...
@@ -337,7 +328,7 @@ treeAPI = treeDB
------------------------------------------------------------------------
-- | Check if the name is less than 255 char
rename
::
NodeId
->
RenameNode
->
Cmd
err
[
Int
]
rename
nId
(
RenameNode
name
)
=
U
.
update
(
U
.
Rename
nId
name
)
rename
nId
(
RenameNode
name
'
)
=
U
.
update
(
U
.
Rename
nId
name'
)
getTable
::
NodeId
->
Maybe
TabType
->
Maybe
Offset
->
Maybe
Limit
...
...
@@ -361,7 +352,7 @@ getChart :: NodeId -> Maybe UTCTime -> Maybe UTCTime
getChart
_
_
_
=
undefined
-- TODO
postNode
::
HasNodeError
err
=>
UserId
->
NodeId
->
PostNode
->
Cmd
err
[
NodeId
]
postNode
uId
pId
(
PostNode
n
ame
nt
)
=
mkNodeWithParent
nt
(
Just
pId
)
uId
n
ame
postNode
uId
pId
(
PostNode
n
odeName
nt
)
=
mkNodeWithParent
nt
(
Just
pId
)
uId
nodeN
ame
putNode
::
NodeId
->
Cmd
err
Int
putNode
=
undefined
-- TODO
...
...
src/Gargantext/Database/Flow.hs
View file @
a26bdc84
...
...
@@ -46,6 +46,7 @@ import Gargantext.Core (Lang(..))
import
Gargantext.Core.Types
(
NodePoly
(
..
),
Terms
(
..
))
import
Gargantext.Core.Types.Individu
(
Username
)
import
Gargantext.Core.Types.Main
import
Gargantext.Database.TextSearch
(
searchInDatabase
)
import
Gargantext.Database.Config
(
userMaster
,
corpusMasterName
)
import
Gargantext.Database.Flow.Utils
(
insertToNodeNgrams
)
import
Gargantext.Database.Node.Document.Insert
-- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
...
...
@@ -61,6 +62,7 @@ import Gargantext.Text.List
import
Gargantext.Text.Parsers
(
parseDocs
,
FileFormat
)
import
Gargantext.Text.Terms
(
TermType
(
..
))
import
Gargantext.Text.Terms
(
extractTerms
)
import
Gargantext.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Servant
(
ServantErr
)
import
System.FilePath
(
FilePath
)
import
qualified
Data.Map
as
DM
...
...
@@ -76,8 +78,21 @@ type FlowCmdM env err m =
flowCorpus
::
FlowCmdM
env
ServantErr
m
=>
Username
->
FileFormat
->
FilePath
->
CorpusName
->
m
CorpusId
flowCorpus
userName
ff
fp
corpusName
=
do
=>
Username
->
CorpusName
->
FileFormat
->
FilePath
->
m
CorpusId
flowCorpus
u
cn
ff
fp
=
do
ids
<-
flowCorpusMaster
ff
fp
flowCorpusUser
u
cn
ids
flowCorpusSearchInDatabase
::
FlowCmdM
env
ServantErr
m
=>
Username
->
Text
->
m
CorpusId
flowCorpusSearchInDatabase
u
q
=
do
(
_masterUserId
,
_masterRootId
,
cId
)
<-
getOrMkRootWithCorpus
userMaster
""
ids
<-
chunkAlong
10000
10000
<$>
map
fst
<$>
searchInDatabase
cId
(
stemIt
q
)
flowCorpusUser
u
q
ids
flowCorpusMaster
::
FlowCmdM
env
ServantErr
m
=>
FileFormat
->
FilePath
->
m
[[
NodeId
]]
flowCorpusMaster
ff
fp
=
do
-- Master Flow
docs
<-
map
addUniqIdsDoc
<$>
liftIO
(
parseDocs
ff
fp
)
...
...
@@ -90,7 +105,11 @@ flowCorpus userName ff fp corpusName = do
-- TODO: chunkAlongNoRest or chunkAlongWithRest
-- default behavior: NoRest
ids
<-
mapM
insertMasterDocs
$
chunkAlong
10000
10000
docs
pure
ids
flowCorpusUser
::
FlowCmdM
env
ServantErr
m
=>
Username
->
CorpusName
->
[[
NodeId
]]
->
m
CorpusId
flowCorpusUser
userName
corpusName
ids
=
do
-- User Flow
(
userId
,
_rootId
,
userCorpusId
)
<-
getOrMkRootWithCorpus
userName
corpusName
-- TODO: check if present already, ignore
...
...
@@ -280,11 +299,12 @@ mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f
nId
=
documentId
$
documentWithId
d
------------------------------------------------------------------------
flowListBase
::
FlowCmdM
env
err
m
listInsert
::
FlowCmdM
env
err
m
=>
ListId
->
Map
NgramsType
[
NgramsElement
]
->
m
()
flowListBase
lId
ngs
=
do
mapM_
(
\
(
typeList
,
ngElmts
)
->
putListNgrams
lId
typeList
ngElmts
)
$
toList
ngs
listInsert
lId
ngs
=
mapM_
(
\
(
typeList
,
ngElmts
)
->
putListNgrams
lId
typeList
ngElmts
)
$
toList
ngs
flowList
::
FlowCmdM
env
err
m
=>
UserId
->
CorpusId
->
Map
NgramsType
[
NgramsElement
]
...
...
@@ -292,7 +312,7 @@ flowList :: FlowCmdM env err m => UserId -> CorpusId
flowList
uId
cId
ngs
=
do
lId
<-
getOrMkList
cId
uId
printDebug
"listId flowList"
lId
flowListBase
lId
ngs
listInsert
lId
ngs
pure
lId
...
...
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
a26bdc84
...
...
@@ -137,18 +137,6 @@ getOccByNgramsOnly :: CorpusId -> NgramsType -> [Text]
getOccByNgramsOnly
cId
nt
ngs
=
Map
.
map
Set
.
size
<$>
getNodesByNgramsOnlyUser
cId
nt
ngs
-- TODO add groups
getCoocByNgramsOnly
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
(
Text
,
Text
)
Int
)
getCoocByNgramsOnly
cId
nt
ngs
=
do
ngs'
<-
getNodesByNgramsOnlyUser
cId
nt
ngs
pure
$
Map
.
fromList
[(
(
t1
,
t2
)
,
maybe
0
Set
.
size
$
Set
.
intersection
<$>
Map
.
lookup
t1
ngs'
<*>
Map
.
lookup
t2
ngs'
)
|
(
t1
,
t2
)
<-
listToCombi
identity
$
Map
.
keys
ngs'
]
getNodesByNgramsOnlyUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsOnlyUser
cId
nt
ngs
=
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
))
<$>
selectNgramsOnlyByNodeUser
cId
nt
ngs
...
...
src/Gargantext/Text/Flow.hs
View file @
a26bdc84
...
...
@@ -36,7 +36,7 @@ import Gargantext.Core.Types (CorpusId)
--import Gargantext.Database.Types.Node
import
Gargantext.Prelude
--import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import
Gargantext.Text.Metrics
(
filterCooc
,
FilterConfig
(
..
),
Clusters
(
..
),
SampleBins
(
..
),
DefaultValue
(
..
),
MapListSize
(
..
),
InclusionSize
(
..
))
--
import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..))
--import Gargantext.Text.Metrics.Count (coocOn)
--import Gargantext.Text.Parsers.CSV
--import Gargantext.Text.Terms (TermType, extractTerms)
...
...
@@ -120,21 +120,21 @@ cooc2graph :: (Map (Text, Text) Int) -> IO Graph
cooc2graph
myCooc
=
do
--printDebug "myCooc" myCooc
-- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores
{-
let myCooc3 = filterCooc ( FilterConfig (MapListSize 350 )
(InclusionSize 500 )
(SampleBins 10 )
(Clusters 3 )
(DefaultValue 0 )
) myCooc
--printDebug "myCooc3 size" $ M.size myCooc3
--printDebug "myCooc3" myCooc3
--}
--printDebug "myCooc3 size" $ M.size myCooc3
-- Cooc -> Matrix
let
(
ti
,
_
)
=
createIndices
myCooc
3
let
(
ti
,
_
)
=
createIndices
myCooc
--printDebug "ti size" $ M.size ti
--printDebug "ti" ti
let
myCooc4
=
toIndex
ti
myCooc
3
let
myCooc4
=
toIndex
ti
myCooc
--printDebug "myCooc4 size" $ M.size myCooc4
--printDebug "myCooc4" myCooc4
...
...
@@ -153,7 +153,7 @@ cooc2graph myCooc = do
--printDebug "distanceMap size" $ M.size distanceMap
--printDebug "distanceMap" distanceMap
--
let distance = fromIndex fi distanceMap
--
let distance = fromIndex fi distanceMap
--printDebug "distance" $ M.size distance
partitions
<-
case
Map
.
size
distanceMap
>
0
of
...
...
src/Gargantext/Text/List.hs
View file @
a26bdc84
...
...
@@ -106,8 +106,8 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
ys
=
take
b
$
drop
a
ns
zs
=
drop
b
$
drop
a
ns
a
=
1
00
b
=
1000
a
=
1
b
=
1000
0
isStopTerm
::
Text
->
Bool
isStopTerm
x
=
Text
.
length
x
<
3
...
...
src/Gargantext/Text/Metrics.hs
View file @
a26bdc84
...
...
@@ -38,28 +38,25 @@ data SampleBins = SampleBins Double
data
Clusters
=
Clusters
Int
data
DefaultValue
=
DefaultValue
Int
data
FilterConfig
=
FilterConfig
{
fc_mapListSize
::
MapListSize
,
fc_inclusionSize
::
InclusionSize
,
fc_sampleBins
::
SampleBins
,
fc_clusters
::
Clusters
,
fc_defaultValue
::
DefaultValue
}
data
FilterConfig
=
FilterConfig
{
fc_mapListSize
::
MapListSize
,
fc_inclusionSize
::
InclusionSize
,
fc_sampleBins
::
SampleBins
,
fc_clusters
::
Clusters
,
fc_defaultValue
::
DefaultValue
}
filterCooc
::
(
Show
t
,
Ord
t
)
=>
FilterConfig
->
Map
(
t
,
t
)
Int
->
Map
(
t
,
t
)
Int
filterCooc
fc
cc
=
(
filterCooc'
fc
)
ts
cc
where
ts
=
map
_scored_terms
$
takeSome
fc
$
coocScored
cc
filterCooc'
::
(
Show
t
,
Ord
t
)
=>
FilterConfig
->
[
t
]
->
Map
(
t
,
t
)
Int
->
Map
(
t
,
t
)
Int
filterCooc'
(
FilterConfig
_
_
_
_
(
DefaultValue
dv
))
ts
m
=
-- trace ("coocScored " <> show ts) $
foldl'
(
\
m'
k
->
M
.
insert
k
(
maybe
dv
identity
$
M
.
lookup
k
m
)
m'
)
M
.
empty
selection
where
selection
=
[(
x
,
y
)
|
x
<-
ts
,
y
<-
ts
,
x
>
y
]
M
.
empty
(
listToCombi
identity
ts
)
-- | Map list creation
...
...
@@ -70,7 +67,8 @@ filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m =
takeSome
::
Ord
t
=>
FilterConfig
->
[
Scored
t
]
->
[
Scored
t
]
takeSome
(
FilterConfig
(
MapListSize
l
)
(
InclusionSize
l'
)
(
SampleBins
s
)
(
Clusters
_
)
_
)
scores
=
L
.
take
l
$
takeSample
n
m
$
L
.
take
l'
$
reverse
$
sortWith
(
Down
.
_scored_incExc
)
scores
$
L
.
take
l'
$
reverse
$
sortWith
(
Down
.
_scored_incExc
)
scores
-- splitKmeans k scores
where
-- TODO: benchmark with accelerate-example kmeans version
...
...
@@ -90,16 +88,17 @@ takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Cluste
$
sortWith
(
Down
.
_scored_speGen
)
xs
data
Scored
ts
=
Scored
{
_scored_terms
::
!
ts
,
_scored_incExc
::
!
InclusionExclusion
,
_scored_speGen
::
!
SpecificityGenericity
}
deriving
(
Show
)
data
Scored
ts
=
Scored
{
_scored_terms
::
!
ts
,
_scored_incExc
::
!
InclusionExclusion
,
_scored_speGen
::
!
SpecificityGenericity
}
deriving
(
Show
)
-- TODO in the textflow we end up needing these indices, it might be better
-- to compute them earlier and pass them around.
coocScored
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
[
Scored
t
]
coocScored
m
=
zipWith
(
\
(
_
,
t
)
(
inc
,
spe
)
->
Scored
t
inc
spe
)
(
M
.
toList
fi
)
scores
where
(
ti
,
fi
)
=
createIndices
m
(
ti
,
fi
)
=
createIndices
m
(
is
,
ss
)
=
incExcSpeGen
$
cooc2mat
ti
m
scores
=
DAA
.
toList
$
DAA
.
run
$
DAA
.
zip
(
DAA
.
use
is
)
(
DAA
.
use
ss
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment