Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
05aa3d7e
Commit
05aa3d7e
authored
Apr 28, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[INDEXING] WIP
parent
44be4e4c
Pipeline
#1453
failed with stage
Changes
8
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
149 additions
and
41 deletions
+149
-41
Main.hs
bin/gargantext-phylo/Main.hs
+0
-9
Count.hs
src/Gargantext/API/Count.hs
+4
-11
Dev.hs
src/Gargantext/API/Dev.hs
+11
-4
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+10
-2
List.hs
src/Gargantext/API/Ngrams/List.hs
+47
-5
Flow.hs
src/Gargantext/Core/Text/Flow.hs
+6
-6
WithList.hs
src/Gargantext/Core/Text/Terms/WithList.hs
+19
-4
Index.hs
src/Gargantext/Database/Action/Index.hs
+52
-0
No files found.
bin/gargantext-phylo/Main.hs
View file @
05aa3d7e
...
@@ -103,18 +103,9 @@ getJson path = L.readFile path
...
@@ -103,18 +103,9 @@ getJson path = L.readFile path
-- | Parse | --
-- | Parse | --
---------------
---------------
-- | To filter the Ngrams of a document based on the termList
-- | To filter the Ngrams of a document based on the termList
filterTerms
::
Patterns
->
(
a
,
Text
)
->
(
a
,
[
Text
])
filterTerms
::
Patterns
->
(
a
,
Text
)
->
(
a
,
[
Text
])
filterTerms
patterns
(
y
,
d
)
=
(
y
,
termsInText
patterns
d
)
filterTerms
patterns
(
y
,
d
)
=
(
y
,
termsInText
patterns
d
)
where
--------------------------------------
termsInText
::
Patterns
->
Text
->
[
Text
]
termsInText
pats
txt
=
DL
.
nub
$
DL
.
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
--------------------------------------
-- | To transform a Csv nfile into a readable corpus
-- | To transform a Csv nfile into a readable corpus
...
...
src/Gargantext/API/Count.hs
View file @
05aa3d7e
...
@@ -10,8 +10,6 @@ Portability : POSIX
...
@@ -10,8 +10,6 @@ Portability : POSIX
Count API part of Gargantext.
Count API part of Gargantext.
-}
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveAnyClass #-}
...
@@ -19,23 +17,18 @@ Count API part of Gargantext.
...
@@ -19,23 +17,18 @@ Count API part of Gargantext.
module
Gargantext.API.Count
module
Gargantext.API.Count
where
where
import
GHC.Generics
(
Generic
)
import
Data.Aeson
hiding
(
Error
)
import
Data.Aeson
hiding
(
Error
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Either
import
Data.Either
import
Data.List
(
permutations
)
import
Data.List
(
permutations
)
import
Data.Swagger
import
Data.Swagger
import
Data.Text
(
Text
,
pack
)
import
Data.Text
(
Text
,
pack
)
import
GHC.Generics
(
Generic
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Prelude
import
Servant
import
Servant
import
Test.QuickCheck.Arbitrary
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck
(
elements
)
-- import Control.Applicative ((<*>))
import
Test.QuickCheck.Arbitrary
import
Gargantext.Prelude
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-- TODO-ACCESS: CanCount
-- TODO-ACCESS: CanCount
...
...
src/Gargantext/API/Dev.hs
View file @
05aa3d7e
-- |
{-|
Module : Gargantext.API.Dev
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
-- Use only for dev/repl
-- Use only for dev/repl
module
Gargantext.API.Dev
where
module
Gargantext.API.Dev
where
...
@@ -17,7 +26,6 @@ import Gargantext.Prelude
...
@@ -17,7 +26,6 @@ import Gargantext.Prelude
import
Gargantext.Prelude.Config
(
GargConfig
(
..
),
readConfig
)
import
Gargantext.Prelude.Config
(
GargConfig
(
..
),
readConfig
)
-------------------------------------------------------------------
-------------------------------------------------------------------
withDevEnv
::
IniPath
->
(
DevEnv
->
IO
a
)
->
IO
a
withDevEnv
::
IniPath
->
(
DevEnv
->
IO
a
)
->
IO
a
withDevEnv
iniPath
k
=
do
withDevEnv
iniPath
k
=
do
env
<-
newDevEnv
env
<-
newDevEnv
...
@@ -38,7 +46,6 @@ withDevEnv iniPath k = do
...
@@ -38,7 +46,6 @@ withDevEnv iniPath k = do
}
}
-- | Run Cmd Sugar for the Repl (GHCI)
-- | Run Cmd Sugar for the Repl (GHCI)
runCmdRepl
::
Show
err
=>
Cmd''
DevEnv
err
a
->
IO
a
runCmdRepl
::
Show
err
=>
Cmd''
DevEnv
err
a
->
IO
a
runCmdRepl
f
=
withDevEnv
"gargantext.ini"
$
\
env
->
runCmdDev
env
f
runCmdRepl
f
=
withDevEnv
"gargantext.ini"
$
\
env
->
runCmdDev
env
f
...
@@ -62,4 +69,4 @@ runCmdDevServantErr :: DevEnv -> Cmd' DevEnv ServerError a -> IO a
...
@@ -62,4 +69,4 @@ runCmdDevServantErr :: DevEnv -> Cmd' DevEnv ServerError a -> IO a
runCmdDevServantErr
=
runCmdDev
runCmdDevServantErr
=
runCmdDev
runCmdReplEasy
::
Cmd''
DevEnv
GargError
a
->
IO
a
runCmdReplEasy
::
Cmd''
DevEnv
GargError
a
->
IO
a
runCmdReplEasy
f
=
withDevEnv
"gargantext.ini"
$
\
env
->
runCmdDev
env
f
runCmdReplEasy
f
=
withDevEnv
"gargantext.ini"
$
\
env
->
runCmdDev
env
f
\ No newline at end of file
src/Gargantext/API/Ngrams.hs
View file @
05aa3d7e
...
@@ -514,7 +514,9 @@ getTableNgrams _nType nId tabType listId limit_ offset
...
@@ -514,7 +514,9 @@ getTableNgrams _nType nId tabType listId limit_ offset
filteredNodes
::
Map
NgramsTerm
NgramsElement
->
[
NgramsElement
]
filteredNodes
::
Map
NgramsTerm
NgramsElement
->
[
NgramsElement
]
filteredNodes
tableMap
=
rootOf
<$>
list
&
filter
selected_node
filteredNodes
tableMap
=
rootOf
<$>
list
&
filter
selected_node
where
where
rootOf
ne
=
maybe
ne
(
\
r
->
fromMaybe
(
panic
"getTableNgrams: invalid root"
)
(
tableMap
^.
at
r
))
rootOf
ne
=
maybe
ne
(
\
r
->
fromMaybe
(
panic
"getTableNgrams: invalid root"
)
(
tableMap
^.
at
r
)
)
(
ne
^.
ne_root
)
(
ne
^.
ne_root
)
list
=
tableMap
^..
each
list
=
tableMap
^..
each
...
@@ -523,7 +525,9 @@ getTableNgrams _nType nId tabType listId limit_ offset
...
@@ -523,7 +525,9 @@ getTableNgrams _nType nId tabType listId limit_ offset
selectAndPaginate
tableMap
=
roots
<>
inners
selectAndPaginate
tableMap
=
roots
<>
inners
where
where
list
=
tableMap
^..
each
list
=
tableMap
^..
each
rootOf
ne
=
maybe
ne
(
\
r
->
fromMaybe
(
panic
"getTableNgrams: invalid root"
)
(
tableMap
^.
at
r
))
rootOf
ne
=
maybe
ne
(
\
r
->
fromMaybe
(
panic
"getTableNgrams: invalid root"
)
(
tableMap
^.
at
r
)
)
(
ne
^.
ne_root
)
(
ne
^.
ne_root
)
selected_nodes
=
list
&
take
limit_
selected_nodes
=
list
&
take
limit_
.
drop
offset'
.
drop
offset'
...
@@ -777,3 +781,7 @@ listNgramsChangedSince listId ngramsType version
...
@@ -777,3 +781,7 @@ listNgramsChangedSince listId ngramsType version
Versioned
<$>
currentVersion
<*>
pure
True
Versioned
<$>
currentVersion
<*>
pure
True
|
otherwise
=
|
otherwise
=
tableNgramsPull
listId
ngramsType
version
&
mapped
.
v_data
%~
(
==
mempty
)
tableNgramsPull
listId
ngramsType
version
&
mapped
.
v_data
%~
(
==
mempty
)
src/Gargantext/API/Ngrams/List.hs
View file @
05aa3d7e
...
@@ -15,6 +15,7 @@ Portability : POSIX
...
@@ -15,6 +15,7 @@ Portability : POSIX
module
Gargantext.API.Ngrams.List
module
Gargantext.API.Ngrams.List
where
where
import
Data.Maybe
(
catMaybes
)
import
Control.Lens
hiding
(
elements
)
import
Control.Lens
hiding
(
elements
)
import
Data.Aeson
import
Data.Aeson
import
Data.Map
(
toList
,
fromList
)
import
Data.Map
(
toList
,
fromList
)
...
@@ -22,21 +23,27 @@ import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
...
@@ -22,21 +23,27 @@ import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
import
Data.Text
(
Text
,
concat
,
pack
)
import
Data.Text
(
Text
,
concat
,
pack
)
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
)
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
,
NgramsTerm
)
import
Gargantext.API.Ngrams.Types
(
RepoCmdM
,
Versioned
(
..
),
NgramsList
)
import
Gargantext.API.Ngrams.Types
(
RepoCmdM
,
Versioned
(
..
),
NgramsList
,
NgramsTerm
(
..
)
)
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.API.Prelude
(
GargServer
,
GargNoServer
)
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getOccByNgramsOnlyFast'
)
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypes
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypes
,
NgramsType
(
..
))
import
Gargantext.Database.Query.Table.Node
(
getDocumentsWithParentId
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Network.HTTP.Media
((
//
),
(
/:
))
import
Network.HTTP.Media
((
//
),
(
/:
))
import
Servant
import
Servant
import
Servant.Job.Async
import
Servant.Job.Async
import
Servant.Job.Utils
(
jsonOptions
)
import
Servant.Job.Utils
(
jsonOptions
)
import
Web.FormUrlEncoded
(
FromForm
)
import
Web.FormUrlEncoded
(
FromForm
)
------------------------------------------------------------------------
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.Text
as
Text
------------------------------------------------------------------------
------------------------------------------------------------------------
type
API
=
Get
'[
J
SON
,
HTML
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
type
API
=
Get
'[
J
SON
,
HTML
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
...
@@ -83,6 +90,41 @@ post l m = do
...
@@ -83,6 +90,41 @@ post l m = do
-- TODO reindex
-- TODO reindex
pure
True
pure
True
-----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here)
reIndexWith
::
CorpusId
->
ListId
->
NgramsType
->
[
NgramsTerm
]
->
GargNoServer
()
reIndexWith
cId
lId
nt
ts
=
do
docs
<-
getDocumentsWithParentId
cId
-- Taking the ngrams with 0 occurrences only (orphans)
orphans
<-
map
(
\
k
->
([
unNgramsTerm
k
],
[]
))
<$>
HashMap
.
keys
<$>
HashMap
.
filter
(
==
0
)
<$>
getOccByNgramsOnlyFast'
cId
lId
nt
ts
-- Checking Text documents where orphans match
let
docMatched
=
map
(
\
doc
->
(
doc
^.
node_id
,
termsInText
(
buildPatterns
orphans
)
(
Text
.
unlines
$
catMaybes
[
doc
^.
node_hyperdata
.
hd_title
,
doc
^.
node_hyperdata
.
hd_abstract
]
)
)
)
docs
-- Saving the indexation in database
pure
()
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
type
PostAPI
=
Summary
"Update List"
type
PostAPI
=
Summary
"Update List"
...
...
src/Gargantext/Core/Text/Flow.hs
View file @
05aa3d7e
...
@@ -23,12 +23,12 @@ import GHC.IO (FilePath)
...
@@ -23,12 +23,12 @@ import GHC.IO (FilePath)
import
Gargantext.Core.Types
(
CorpusId
)
import
Gargantext.Core.Types
(
CorpusId
)
{-
{-
____
_ _
____
_____ _
/ ___| __ _ _ __ __ _ __ _ _ _
_ | |_ _____ _| |_
/ ___| __ _ _ __ __ _ __ _ _ _
|_ _|____ _| |_
| | _ / _` | '__/ _` |/ _` | '_ \|
__
/ _ \ \/ / __|
| | _ / _` | '__/ _` |/ _` | '_ \|
|
/ _ \ \/ / __|
| |_| | (_| | | | (_| | (_| | | | | |
| __/> <| |_
| |_| | (_| | | | (_| | (_| | | | | |
__/> <| |_
\____|\__,_|_| \__, |\__,_|_| |_|
\__
\___/_/\_\\__|
\____|\__,_|_| \__, |\__,_|_| |_|
_|
\___/_/\_\\__|
|___/
|___/
-}
-}
...
...
src/Gargantext/Core/Text/Terms/WithList.hs
View file @
05aa3d7e
...
@@ -17,14 +17,14 @@ module Gargantext.Core.Text.Terms.WithList where
...
@@ -17,14 +17,14 @@ module Gargantext.Core.Text.Terms.WithList where
import
Data.List
(
null
)
import
Data.List
(
null
)
import
Data.Ord
import
Data.Ord
import
Data.Text
(
Text
,
concat
)
import
Data.Text
(
Text
,
concat
,
unwords
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Terms.Mono
(
monoTextsBySentence
)
import
Gargantext.Core.Text.Terms.Mono
(
monoTextsBySentence
)
import
Prelude
(
error
)
import
Prelude
(
error
)
import
qualified
Data.Algorithms.KMP
as
KMP
import
qualified
Data.Algorithms.KMP
as
KMP
import
qualified
Data.IntMap.Strict
as
IntMap
import
qualified
Data.IntMap.Strict
as
IntMap
import
qualified
Data.List
as
List
------------------------------------------------------------------------
------------------------------------------------------------------------
data
Pattern
=
Pattern
data
Pattern
=
Pattern
...
@@ -67,6 +67,19 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
...
@@ -67,6 +67,19 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
Pattern
(
KMP
.
build
alt
)
(
length
alt
)
label
Pattern
(
KMP
.
build
alt
)
(
length
alt
)
label
--(Terms label $ Set.empty) -- TODO check stems
--(Terms label $ Set.empty) -- TODO check stems
--------------------------------------------------------------------------
-- Utils
type
BlockText
=
Text
type
MatchedText
=
Text
termsInText
::
Patterns
->
BlockText
->
[
MatchedText
]
termsInText
pats
txt
=
List
.
nub
$
List
.
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
--------------------------------------------------------------------------
extractTermsWithList
::
Patterns
->
Text
->
Corpus
[
Text
]
extractTermsWithList
::
Patterns
->
Text
->
Corpus
[
Text
]
extractTermsWithList
pats
=
map
(
replaceTerms
pats
)
.
monoTextsBySentence
extractTermsWithList
pats
=
map
(
replaceTerms
pats
)
.
monoTextsBySentence
...
@@ -78,7 +91,9 @@ extractTermsWithList' :: Patterns -> Text -> [Text]
...
@@ -78,7 +91,9 @@ extractTermsWithList' :: Patterns -> Text -> [Text]
extractTermsWithList'
pats
=
map
(
concat
.
map
concat
.
replaceTerms
pats
)
extractTermsWithList'
pats
=
map
(
concat
.
map
concat
.
replaceTerms
pats
)
.
monoTextsBySentence
.
monoTextsBySentence
--------------------------------------------------------------------------
{- | Not used
filterWith :: TermList
filterWith :: TermList
-> (a -> Text)
-> (a -> Text)
-> [a]
-> [a]
...
@@ -96,4 +111,4 @@ filterWith' termList f f' xs = f' xs
...
@@ -96,4 +111,4 @@ filterWith' termList f f' xs = f' xs
$ map f xs
$ map f xs
where
where
pats = buildPatterns termList
pats = buildPatterns termList
-}
src/Gargantext/Database/Action/Index.hs
0 → 100644
View file @
05aa3d7e
{-|
Module : Gargantext.Database.Action.Index
Description : Indexation tools
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main Database functions for Gargantext.API.Node.Update
UpdateNodeParamsTexts { methodTexts :: Granularity }
data Granularity = NewNgrams | NewTexts | Both
deriving (Generic, Eq, Ord, Enum, Bounded)
-- TODO add option for type of ngrams
-}
module
Gargantext.Database.Action.Index
where
import
Data.List
(
nub
)
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
filterTerms
,
termsInText
)
index
::
CorpusId
->
Granularity
->
Cmd
err
[
Int
]
index
cId
NewNgrams
=
do
ngrams
<-
get
ngrams
with
zero
count
texts
<-
get
all
text
to
index
indexSave
text
(
buildPatterns
ngrams
)
index
cId
NewTexts
=
do
ngrams
<-
get
all
ngrams
texts
<-
get
text
with
zero
count
indexSave
text
(
buildPatterns
ngrams
)
index
cId
Both
=
do
r1
<-
index
cId
NewNgrams
r2
<-
index
cId
NewTexts
pure
$
r1
<>
r2
indexSave
::
[
Document
]
->
Pattern
->
Cmd
err
[
Int
]
indexSave
corpus
p
=
do
indexedDoc
<-
map
(
filterTerms
patterns
)
corpus
saveIndexDoc
ngramsTextId
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment