Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
161
Issues
161
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
a26bdc84
Unverified
Commit
a26bdc84
authored
Mar 07, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev' of
https://gitlab.iscpif.fr/gargantext/haskell-gargantext
into dev
parents
ad14d93d
e0f3433b
Pipeline
#263
failed with stage
Changes
9
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
183 additions
and
105 deletions
+183
-105
Main.hs
bin/gargantext-import/Main.hs
+1
-1
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+0
-17
Tools.hs
src/Gargantext/API/Ngrams/Tools.hs
+97
-0
Node.hs
src/Gargantext/API/Node.hs
+34
-43
Flow.hs
src/Gargantext/Database/Flow.hs
+26
-6
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+0
-12
Flow.hs
src/Gargantext/Text/Flow.hs
+6
-6
List.hs
src/Gargantext/Text/List.hs
+2
-2
Metrics.hs
src/Gargantext/Text/Metrics.hs
+17
-18
No files found.
bin/gargantext-import/Main.hs
View file @
a26bdc84
...
...
@@ -40,7 +40,7 @@ main = do
-}
let
cmdCorpus
::
forall
m
.
FlowCmdM
DevEnv
ServantErr
m
=>
m
CorpusId
cmdCorpus
=
flowCorpus
(
cs
user
)
CsvHalFormat
corpusPath
(
cs
name
)
cmdCorpus
=
flowCorpus
(
cs
user
)
(
cs
name
)
CsvHalFormat
corpusPath
-- cmd = {-createUsers >>-} cmdCorpus
...
...
src/Gargantext/API/Ngrams.hs
View file @
a26bdc84
...
...
@@ -885,23 +885,6 @@ getNgramsTableMap nodeId ngramsType = do
pure
$
Versioned
(
repo
^.
r_version
)
(
repo
^.
r_state
.
at
ngramsType
.
_Just
.
at
nodeId
.
_Just
)
-- UNUSED
_getListNgrams
::
RepoCmdM
env
err
m
=>
[
NodeId
]
->
NgramsType
->
m
(
Versioned
ListNgrams
)
_getListNgrams
nodeIds
ngramsType
=
do
v
<-
view
repoVar
repo
<-
liftIO
$
readMVar
v
let
ngramsMap
=
repo
^.
r_state
.
at
ngramsType
.
_Just
ngrams
=
Map
.
unionsWith
mergeNgramsElement
[
ngramsMap
^.
at
nodeId
.
_Just
|
nodeId
<-
nodeIds
]
pure
$
Versioned
(
repo
^.
r_version
)
$
NgramsTable
(
ngramsElementFromRepo
<$>
Map
.
toList
ngrams
)
type
MinSize
=
Int
type
MaxSize
=
Int
...
...
src/Gargantext/API/Ngrams/Tools.hs
0 → 100644
View file @
a26bdc84
{-|
Module : Gargantext.API.Ngrams.Tools
Description : Tools to manage Ngrams Elements (from the API)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
module
Gargantext.API.Ngrams.Tools
where
import
Control.Concurrent
import
Control.Lens
(
_Just
,
(
^.
),
at
,
view
)
import
Control.Monad.Reader
import
Data.Map.Strict
(
Map
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
import
Data.Validity
import
Gargantext.API.Ngrams
import
Gargantext.Core.Types
(
ListType
(
..
),
NodeId
,
ListId
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
)
import
Gargantext.Prelude
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
type
RootTerm
=
Text
getListNgrams
::
RepoCmdM
env
err
m
=>
[
ListId
]
->
NgramsType
->
m
(
Map
Text
NgramsRepoElement
)
getListNgrams
nodeIds
ngramsType
=
do
v
<-
view
repoVar
repo
<-
liftIO
$
readMVar
v
let
ngramsMap
=
repo
^.
r_state
.
at
ngramsType
.
_Just
ngrams
=
Map
.
unionsWith
mergeNgramsElement
[
ngramsMap
^.
at
nodeId
.
_Just
|
nodeId
<-
nodeIds
]
pure
ngrams
mapTermListRoot
::
RepoCmdM
env
err
m
=>
[
ListId
]
->
NgramsType
->
m
(
Map
Text
(
ListType
,
(
Maybe
Text
)))
mapTermListRoot
nodeIds
ngramsType
=
do
ngrams
<-
getListNgrams
nodeIds
ngramsType
pure
$
Map
.
fromList
[(
t
,
(
_nre_list
nre
,
_nre_root
nre
))
|
(
t
,
nre
)
<-
Map
.
toList
ngrams
]
filterListWithRoot
::
ListType
->
Map
Text
(
ListType
,
Maybe
Text
)
->
Map
Text
(
Maybe
RootTerm
)
filterListWithRoot
lt
m
=
Map
.
fromList
$
map
(
\
(
t
,(
_
,
r
))
->
(
t
,
r
))
$
filter
isGraphTerm
(
Map
.
toList
m
)
where
isGraphTerm
(
_t
,(
l
,
maybeRoot
))
=
case
maybeRoot
of
Nothing
->
l
==
lt
Just
r
->
case
Map
.
lookup
r
m
of
Nothing
->
panic
$
"Garg.API.Ngrams.Tools: filterWithRoot, unknown key: "
<>
r
Just
(
l'
,
_
)
->
l'
==
lt
groupNodesByNgrams
::
Map
Text
(
Maybe
RootTerm
)
->
Map
Text
(
Set
NodeId
)
->
Map
Text
(
Set
NodeId
)
groupNodesByNgrams
syn
occs
=
Map
.
fromListWith
(
<>
)
occs'
where
occs'
=
map
toSyn
(
Map
.
toList
occs
)
toSyn
(
t
,
ns
)
=
case
Map
.
lookup
t
syn
of
Nothing
->
panic
$
"Garg.API.Ngrams.Tools: groupNodesByNgrams, unknown key: "
<>
t
Just
r
->
case
r
of
Nothing
->
(
t
,
ns
)
Just
r'
->
(
r'
,
ns
)
getCoocByNgrams
::
Map
Text
(
Set
NodeId
)
->
Map
(
Text
,
Text
)
Int
getCoocByNgrams
m
=
Map
.
fromList
[((
t1
,
t2
)
,
maybe
0
Set
.
size
$
Set
.
intersection
<$>
Map
.
lookup
t1
m
<*>
Map
.
lookup
t2
m
)
|
(
t1
,
t2
)
<-
listToCombi
identity
$
Map
.
keys
m
]
src/Gargantext/API/Node.hs
View file @
a26bdc84
...
...
@@ -10,7 +10,7 @@ Portability : POSIX
Node API
-}
{-# OPTIONS_GHC -fno-warn-
name-shadowing -fno-warn-
orphans #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
...
...
@@ -21,7 +21,6 @@ Node API
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
-------------------------------------------------------------------
module
Gargantext.API.Node
(
module
Gargantext
.
API
.
Node
,
HyperdataAny
(
..
)
...
...
@@ -32,47 +31,41 @@ module Gargantext.API.Node
,
HyperdataDocument
(
..
)
,
HyperdataDocumentV3
(
..
)
)
where
-------------------------------------------------------------------
import
Control.Lens
(
prism'
,
set
)
import
Control.Monad.IO.Class
(
liftIO
)
import
Control.Monad
((
>>
))
--import System.IO (putStrLn, readFile)
import
qualified
Data.Map
as
Map
import
Control.Monad.IO.Class
(
liftIO
)
import
Data.Aeson
(
FromJSON
,
ToJSON
)
import
Data.Text
(
Text
())
import
Data.Swagger
import
Data.Text
(
Text
())
import
Data.Time
(
UTCTime
)
import
Debug.Trace
(
trace
)
import
GHC.Generics
(
Generic
)
import
Servant
import
Gargantext.API.Ngrams
(
TabType
(
..
),
TableNgramsApi
,
TableNgramsApiGet
,
tableNgramsPatch
,
getTableNgrams
,
HasRepo
)
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
import
Gargantext.Database.Utils
-- (Cmd, CmdM)
import
Gargantext.Database.Schema.Node
(
getNodesWithParentId
,
getNode
,
deleteNode
,
deleteNodes
,
mkNodeWithParent
,
JSONB
,
NodeError
(
..
),
HasNodeError
(
..
))
import
Gargantext.Database.Node.Children
(
getChildren
)
import
qualified
Gargantext.Database.Node.Update
as
U
(
update
,
Update
(
..
))
import
Gargantext.API.Ngrams.Tools
import
Gargantext.API.Search
(
SearchAPI
,
searchIn
,
SearchInQuery
)
import
Gargantext.Core.Types
(
Offset
,
Limit
,
ListType
(
..
))
import
Gargantext.Core.Types.Main
(
Tree
,
NodeTree
)
import
Gargantext.Database.Facet
(
FacetDoc
,
runViewDocuments
,
OrderBy
(
..
),
FacetChart
,
runViewAuthorsDoc
)
import
Gargantext.Database.Tree
(
treeDB
,
HasTreeError
(
..
),
TreeError
(
..
))
import
Gargantext.Database.Metrics.Count
(
getNgramsByNode
)
import
Gargantext.Database.Metrics.NgramsByNode
(
getNodesByNgramsOnlyUser
)
import
Gargantext.Database.Node.Children
(
getChildren
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Node
(
getNodesWithParentId
,
getNode
,
deleteNode
,
deleteNodes
,
mkNodeWithParent
,
JSONB
,
NodeError
(
..
),
HasNodeError
(
..
))
import
Gargantext.Database.Schema.Node
(
defaultList
)
import
Gargantext.Database.Schema.NodeNode
(
nodesToFavorite
,
nodesToTrash
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.API.Search
(
SearchAPI
,
searchIn
,
SearchInQuery
)
import
Gargantext.Text.Metrics.Count
(
coocOn
)
-- Graph
import
Gargantext.Database.Tree
(
treeDB
,
HasTreeError
(
..
),
TreeError
(
..
))
import
Gargantext.Database.Types.Node
import
Gargantext.Database.Types.Node
(
CorpusId
,
ContactId
)
import
Gargantext.Database.Utils
-- (Cmd, CmdM)
import
Gargantext.Prelude
import
Gargantext.Text.Flow
(
cooc2graph
)
import
Gargantext.Viz.Graph
hiding
(
Node
)
-- (Graph(_graph_metadata),LegendField(..), GraphMetadata(..),readGraphFromJson,defaultGraph)
-- import Gargantext.Core (Lang(..))
import
Gargantext.Core.Types
(
Offset
,
Limit
)
import
Gargantext.Core.Types.Main
(
Tree
,
NodeTree
)
import
Gargantext.Database.Types.Node
(
CorpusId
,
ContactId
)
-- import Gargantext.Text.Terms (TermType(..))
import
Servant
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
(
Arbitrary
,
arbitrary
)
import
qualified
Data.Map
as
Map
import
qualified
Gargantext.Database.Node.Update
as
U
(
update
,
Update
(
..
))
type
GargServer
api
=
forall
env
m
.
...
...
@@ -281,7 +274,6 @@ type GraphAPI = Get '[JSON] Graph
graphAPI
::
NodeId
->
GargServer
GraphAPI
graphAPI
nId
=
do
nodeGraph
<-
getNode
nId
HyperdataGraph
let
metadata
=
GraphMetadata
"Title"
[
maybe
0
identity
$
_node_parentId
nodeGraph
]
...
...
@@ -290,17 +282,16 @@ graphAPI nId = do
]
-- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10])
let
cId
=
maybe
(
panic
"no parentId"
)
identity
$
_node_parentId
nodeGraph
_lId
<-
defaultList
cId
-- lId' <- listsWith masterUser
--myCooc <- getCoocByDocDev cId lId -- (lid' <> [lId])
myCooc
<-
Map
.
filter
(
>
2
)
<$>
coocOn
identity
<$>
getNgramsByNode
cId
NgramsTerms
liftIO
$
set
graph_metadata
(
Just
metadata
)
<$>
cooc2graph
myCooc
-- <$> maybe defaultGraph identity
-- <$> readGraphFromJson "purescript-gargantext/dist/examples/imtNew.json"
-- t <- textFlow (Mono EN) (Contexts contextText)
-- liftIO $ liftIO $ pure $ maybe t identity maybeGraph
-- TODO what do we get about the node? to replace contextText
lId
<-
defaultList
cId
ngs
<-
filterListWithRoot
GraphTerm
<$>
mapTermListRoot
[
lId
]
NgramsTerms
myCooc
<-
Map
.
filter
(
>
1
)
<$>
getCoocByNgrams
<$>
groupNodesByNgrams
ngs
<$>
getNodesByNgramsOnlyUser
cId
NgramsTerms
(
Map
.
keys
ngs
)
liftIO
$
trace
(
show
myCooc
)
$
set
graph_metadata
(
Just
metadata
)
<$>
cooc2graph
myCooc
instance
HasNodeError
ServantErr
where
_NodeError
=
prism'
mk
(
const
Nothing
)
-- $ panic "HasNodeError ServantErr: not a prism")
...
...
@@ -337,7 +328,7 @@ treeAPI = treeDB
------------------------------------------------------------------------
-- | Check if the name is less than 255 char
rename
::
NodeId
->
RenameNode
->
Cmd
err
[
Int
]
rename
nId
(
RenameNode
name
)
=
U
.
update
(
U
.
Rename
nId
name
)
rename
nId
(
RenameNode
name
'
)
=
U
.
update
(
U
.
Rename
nId
name'
)
getTable
::
NodeId
->
Maybe
TabType
->
Maybe
Offset
->
Maybe
Limit
...
...
@@ -361,7 +352,7 @@ getChart :: NodeId -> Maybe UTCTime -> Maybe UTCTime
getChart
_
_
_
=
undefined
-- TODO
postNode
::
HasNodeError
err
=>
UserId
->
NodeId
->
PostNode
->
Cmd
err
[
NodeId
]
postNode
uId
pId
(
PostNode
n
ame
nt
)
=
mkNodeWithParent
nt
(
Just
pId
)
uId
n
ame
postNode
uId
pId
(
PostNode
n
odeName
nt
)
=
mkNodeWithParent
nt
(
Just
pId
)
uId
nodeN
ame
putNode
::
NodeId
->
Cmd
err
Int
putNode
=
undefined
-- TODO
...
...
src/Gargantext/Database/Flow.hs
View file @
a26bdc84
...
...
@@ -46,6 +46,7 @@ import Gargantext.Core (Lang(..))
import
Gargantext.Core.Types
(
NodePoly
(
..
),
Terms
(
..
))
import
Gargantext.Core.Types.Individu
(
Username
)
import
Gargantext.Core.Types.Main
import
Gargantext.Database.TextSearch
(
searchInDatabase
)
import
Gargantext.Database.Config
(
userMaster
,
corpusMasterName
)
import
Gargantext.Database.Flow.Utils
(
insertToNodeNgrams
)
import
Gargantext.Database.Node.Document.Insert
-- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
...
...
@@ -61,6 +62,7 @@ import Gargantext.Text.List
import
Gargantext.Text.Parsers
(
parseDocs
,
FileFormat
)
import
Gargantext.Text.Terms
(
TermType
(
..
))
import
Gargantext.Text.Terms
(
extractTerms
)
import
Gargantext.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Servant
(
ServantErr
)
import
System.FilePath
(
FilePath
)
import
qualified
Data.Map
as
DM
...
...
@@ -76,8 +78,21 @@ type FlowCmdM env err m =
flowCorpus
::
FlowCmdM
env
ServantErr
m
=>
Username
->
FileFormat
->
FilePath
->
CorpusName
->
m
CorpusId
flowCorpus
userName
ff
fp
corpusName
=
do
=>
Username
->
CorpusName
->
FileFormat
->
FilePath
->
m
CorpusId
flowCorpus
u
cn
ff
fp
=
do
ids
<-
flowCorpusMaster
ff
fp
flowCorpusUser
u
cn
ids
flowCorpusSearchInDatabase
::
FlowCmdM
env
ServantErr
m
=>
Username
->
Text
->
m
CorpusId
flowCorpusSearchInDatabase
u
q
=
do
(
_masterUserId
,
_masterRootId
,
cId
)
<-
getOrMkRootWithCorpus
userMaster
""
ids
<-
chunkAlong
10000
10000
<$>
map
fst
<$>
searchInDatabase
cId
(
stemIt
q
)
flowCorpusUser
u
q
ids
flowCorpusMaster
::
FlowCmdM
env
ServantErr
m
=>
FileFormat
->
FilePath
->
m
[[
NodeId
]]
flowCorpusMaster
ff
fp
=
do
-- Master Flow
docs
<-
map
addUniqIdsDoc
<$>
liftIO
(
parseDocs
ff
fp
)
...
...
@@ -90,7 +105,11 @@ flowCorpus userName ff fp corpusName = do
-- TODO: chunkAlongNoRest or chunkAlongWithRest
-- default behavior: NoRest
ids
<-
mapM
insertMasterDocs
$
chunkAlong
10000
10000
docs
pure
ids
flowCorpusUser
::
FlowCmdM
env
ServantErr
m
=>
Username
->
CorpusName
->
[[
NodeId
]]
->
m
CorpusId
flowCorpusUser
userName
corpusName
ids
=
do
-- User Flow
(
userId
,
_rootId
,
userCorpusId
)
<-
getOrMkRootWithCorpus
userName
corpusName
-- TODO: check if present already, ignore
...
...
@@ -280,11 +299,12 @@ mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f
nId
=
documentId
$
documentWithId
d
------------------------------------------------------------------------
flowListBase
::
FlowCmdM
env
err
m
listInsert
::
FlowCmdM
env
err
m
=>
ListId
->
Map
NgramsType
[
NgramsElement
]
->
m
()
flowListBase
lId
ngs
=
do
mapM_
(
\
(
typeList
,
ngElmts
)
->
putListNgrams
lId
typeList
ngElmts
)
$
toList
ngs
listInsert
lId
ngs
=
mapM_
(
\
(
typeList
,
ngElmts
)
->
putListNgrams
lId
typeList
ngElmts
)
$
toList
ngs
flowList
::
FlowCmdM
env
err
m
=>
UserId
->
CorpusId
->
Map
NgramsType
[
NgramsElement
]
...
...
@@ -292,7 +312,7 @@ flowList :: FlowCmdM env err m => UserId -> CorpusId
flowList
uId
cId
ngs
=
do
lId
<-
getOrMkList
cId
uId
printDebug
"listId flowList"
lId
flowListBase
lId
ngs
listInsert
lId
ngs
pure
lId
...
...
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
a26bdc84
...
...
@@ -137,18 +137,6 @@ getOccByNgramsOnly :: CorpusId -> NgramsType -> [Text]
getOccByNgramsOnly
cId
nt
ngs
=
Map
.
map
Set
.
size
<$>
getNodesByNgramsOnlyUser
cId
nt
ngs
-- TODO add groups
getCoocByNgramsOnly
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
(
Text
,
Text
)
Int
)
getCoocByNgramsOnly
cId
nt
ngs
=
do
ngs'
<-
getNodesByNgramsOnlyUser
cId
nt
ngs
pure
$
Map
.
fromList
[(
(
t1
,
t2
)
,
maybe
0
Set
.
size
$
Set
.
intersection
<$>
Map
.
lookup
t1
ngs'
<*>
Map
.
lookup
t2
ngs'
)
|
(
t1
,
t2
)
<-
listToCombi
identity
$
Map
.
keys
ngs'
]
getNodesByNgramsOnlyUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsOnlyUser
cId
nt
ngs
=
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
))
<$>
selectNgramsOnlyByNodeUser
cId
nt
ngs
...
...
src/Gargantext/Text/Flow.hs
View file @
a26bdc84
...
...
@@ -36,7 +36,7 @@ import Gargantext.Core.Types (CorpusId)
--import Gargantext.Database.Types.Node
import
Gargantext.Prelude
--import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import
Gargantext.Text.Metrics
(
filterCooc
,
FilterConfig
(
..
),
Clusters
(
..
),
SampleBins
(
..
),
DefaultValue
(
..
),
MapListSize
(
..
),
InclusionSize
(
..
))
--
import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..))
--import Gargantext.Text.Metrics.Count (coocOn)
--import Gargantext.Text.Parsers.CSV
--import Gargantext.Text.Terms (TermType, extractTerms)
...
...
@@ -120,21 +120,21 @@ cooc2graph :: (Map (Text, Text) Int) -> IO Graph
cooc2graph
myCooc
=
do
--printDebug "myCooc" myCooc
-- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores
{-
let myCooc3 = filterCooc ( FilterConfig (MapListSize 350 )
(InclusionSize 500 )
(SampleBins 10 )
(Clusters 3 )
(DefaultValue 0 )
) myCooc
--printDebug "myCooc3 size" $ M.size myCooc3
--printDebug "myCooc3" myCooc3
--}
--printDebug "myCooc3 size" $ M.size myCooc3
-- Cooc -> Matrix
let
(
ti
,
_
)
=
createIndices
myCooc
3
let
(
ti
,
_
)
=
createIndices
myCooc
--printDebug "ti size" $ M.size ti
--printDebug "ti" ti
let
myCooc4
=
toIndex
ti
myCooc
3
let
myCooc4
=
toIndex
ti
myCooc
--printDebug "myCooc4 size" $ M.size myCooc4
--printDebug "myCooc4" myCooc4
...
...
@@ -153,7 +153,7 @@ cooc2graph myCooc = do
--printDebug "distanceMap size" $ M.size distanceMap
--printDebug "distanceMap" distanceMap
--
let distance = fromIndex fi distanceMap
--
let distance = fromIndex fi distanceMap
--printDebug "distance" $ M.size distance
partitions
<-
case
Map
.
size
distanceMap
>
0
of
...
...
src/Gargantext/Text/List.hs
View file @
a26bdc84
...
...
@@ -106,8 +106,8 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
ys
=
take
b
$
drop
a
ns
zs
=
drop
b
$
drop
a
ns
a
=
1
00
b
=
1000
a
=
1
b
=
1000
0
isStopTerm
::
Text
->
Bool
isStopTerm
x
=
Text
.
length
x
<
3
...
...
src/Gargantext/Text/Metrics.hs
View file @
a26bdc84
...
...
@@ -38,28 +38,25 @@ data SampleBins = SampleBins Double
data
Clusters
=
Clusters
Int
data
DefaultValue
=
DefaultValue
Int
data
FilterConfig
=
FilterConfig
{
fc_mapListSize
::
MapListSize
,
fc_inclusionSize
::
InclusionSize
,
fc_sampleBins
::
SampleBins
,
fc_clusters
::
Clusters
,
fc_defaultValue
::
DefaultValue
}
data
FilterConfig
=
FilterConfig
{
fc_mapListSize
::
MapListSize
,
fc_inclusionSize
::
InclusionSize
,
fc_sampleBins
::
SampleBins
,
fc_clusters
::
Clusters
,
fc_defaultValue
::
DefaultValue
}
filterCooc
::
(
Show
t
,
Ord
t
)
=>
FilterConfig
->
Map
(
t
,
t
)
Int
->
Map
(
t
,
t
)
Int
filterCooc
fc
cc
=
(
filterCooc'
fc
)
ts
cc
where
ts
=
map
_scored_terms
$
takeSome
fc
$
coocScored
cc
filterCooc'
::
(
Show
t
,
Ord
t
)
=>
FilterConfig
->
[
t
]
->
Map
(
t
,
t
)
Int
->
Map
(
t
,
t
)
Int
filterCooc'
(
FilterConfig
_
_
_
_
(
DefaultValue
dv
))
ts
m
=
-- trace ("coocScored " <> show ts) $
foldl'
(
\
m'
k
->
M
.
insert
k
(
maybe
dv
identity
$
M
.
lookup
k
m
)
m'
)
M
.
empty
selection
where
selection
=
[(
x
,
y
)
|
x
<-
ts
,
y
<-
ts
,
x
>
y
]
M
.
empty
(
listToCombi
identity
ts
)
-- | Map list creation
...
...
@@ -70,7 +67,8 @@ filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m =
takeSome
::
Ord
t
=>
FilterConfig
->
[
Scored
t
]
->
[
Scored
t
]
takeSome
(
FilterConfig
(
MapListSize
l
)
(
InclusionSize
l'
)
(
SampleBins
s
)
(
Clusters
_
)
_
)
scores
=
L
.
take
l
$
takeSample
n
m
$
L
.
take
l'
$
reverse
$
sortWith
(
Down
.
_scored_incExc
)
scores
$
L
.
take
l'
$
reverse
$
sortWith
(
Down
.
_scored_incExc
)
scores
-- splitKmeans k scores
where
-- TODO: benchmark with accelerate-example kmeans version
...
...
@@ -90,16 +88,17 @@ takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Cluste
$
sortWith
(
Down
.
_scored_speGen
)
xs
data
Scored
ts
=
Scored
{
_scored_terms
::
!
ts
,
_scored_incExc
::
!
InclusionExclusion
,
_scored_speGen
::
!
SpecificityGenericity
}
deriving
(
Show
)
data
Scored
ts
=
Scored
{
_scored_terms
::
!
ts
,
_scored_incExc
::
!
InclusionExclusion
,
_scored_speGen
::
!
SpecificityGenericity
}
deriving
(
Show
)
-- TODO in the textflow we end up needing these indices, it might be better
-- to compute them earlier and pass them around.
coocScored
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
[
Scored
t
]
coocScored
m
=
zipWith
(
\
(
_
,
t
)
(
inc
,
spe
)
->
Scored
t
inc
spe
)
(
M
.
toList
fi
)
scores
where
(
ti
,
fi
)
=
createIndices
m
(
ti
,
fi
)
=
createIndices
m
(
is
,
ss
)
=
incExcSpeGen
$
cooc2mat
ti
m
scores
=
DAA
.
toList
$
DAA
.
run
$
DAA
.
zip
(
DAA
.
use
is
)
(
DAA
.
use
ss
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment