Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
05aa3d7e
Commit
05aa3d7e
authored
Apr 28, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[INDEXING] WIP
parent
44be4e4c
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
149 additions
and
41 deletions
+149
-41
Main.hs
bin/gargantext-phylo/Main.hs
+0
-9
Count.hs
src/Gargantext/API/Count.hs
+4
-11
Dev.hs
src/Gargantext/API/Dev.hs
+11
-4
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+10
-2
List.hs
src/Gargantext/API/Ngrams/List.hs
+47
-5
Flow.hs
src/Gargantext/Core/Text/Flow.hs
+6
-6
WithList.hs
src/Gargantext/Core/Text/Terms/WithList.hs
+19
-4
Index.hs
src/Gargantext/Database/Action/Index.hs
+52
-0
No files found.
bin/gargantext-phylo/Main.hs
View file @
05aa3d7e
...
...
@@ -103,18 +103,9 @@ getJson path = L.readFile path
-- | Parse | --
---------------
-- | To filter the Ngrams of a document based on the termList
filterTerms
::
Patterns
->
(
a
,
Text
)
->
(
a
,
[
Text
])
filterTerms
patterns
(
y
,
d
)
=
(
y
,
termsInText
patterns
d
)
where
--------------------------------------
termsInText
::
Patterns
->
Text
->
[
Text
]
termsInText
pats
txt
=
DL
.
nub
$
DL
.
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
--------------------------------------
-- | To transform a Csv nfile into a readable corpus
...
...
src/Gargantext/API/Count.hs
View file @
05aa3d7e
...
...
@@ -10,8 +10,6 @@ Portability : POSIX
Count API part of Gargantext.
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE DeriveAnyClass #-}
...
...
@@ -19,23 +17,18 @@ Count API part of Gargantext.
module
Gargantext.API.Count
where
import
GHC.Generics
(
Generic
)
import
Data.Aeson
hiding
(
Error
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Either
import
Data.List
(
permutations
)
import
Data.Swagger
import
Data.Text
(
Text
,
pack
)
import
GHC.Generics
(
Generic
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Prelude
import
Servant
import
Test.QuickCheck.Arbitrary
import
Test.QuickCheck
(
elements
)
-- import Control.Applicative ((<*>))
import
Gargantext.Prelude
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Test.QuickCheck.Arbitrary
-----------------------------------------------------------------------
-- TODO-ACCESS: CanCount
...
...
src/Gargantext/API/Dev.hs
View file @
05aa3d7e
-- |
{-|
Module : Gargantext.API.Dev
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
-- Use only for dev/repl
module
Gargantext.API.Dev
where
...
...
@@ -17,7 +26,6 @@ import Gargantext.Prelude
import
Gargantext.Prelude.Config
(
GargConfig
(
..
),
readConfig
)
-------------------------------------------------------------------
withDevEnv
::
IniPath
->
(
DevEnv
->
IO
a
)
->
IO
a
withDevEnv
iniPath
k
=
do
env
<-
newDevEnv
...
...
@@ -38,7 +46,6 @@ withDevEnv iniPath k = do
}
-- | Run Cmd Sugar for the Repl (GHCI)
runCmdRepl
::
Show
err
=>
Cmd''
DevEnv
err
a
->
IO
a
runCmdRepl
f
=
withDevEnv
"gargantext.ini"
$
\
env
->
runCmdDev
env
f
...
...
@@ -62,4 +69,4 @@ runCmdDevServantErr :: DevEnv -> Cmd' DevEnv ServerError a -> IO a
runCmdDevServantErr
=
runCmdDev
runCmdReplEasy
::
Cmd''
DevEnv
GargError
a
->
IO
a
runCmdReplEasy
f
=
withDevEnv
"gargantext.ini"
$
\
env
->
runCmdDev
env
f
\ No newline at end of file
runCmdReplEasy
f
=
withDevEnv
"gargantext.ini"
$
\
env
->
runCmdDev
env
f
src/Gargantext/API/Ngrams.hs
View file @
05aa3d7e
...
...
@@ -514,7 +514,9 @@ getTableNgrams _nType nId tabType listId limit_ offset
filteredNodes
::
Map
NgramsTerm
NgramsElement
->
[
NgramsElement
]
filteredNodes
tableMap
=
rootOf
<$>
list
&
filter
selected_node
where
rootOf
ne
=
maybe
ne
(
\
r
->
fromMaybe
(
panic
"getTableNgrams: invalid root"
)
(
tableMap
^.
at
r
))
rootOf
ne
=
maybe
ne
(
\
r
->
fromMaybe
(
panic
"getTableNgrams: invalid root"
)
(
tableMap
^.
at
r
)
)
(
ne
^.
ne_root
)
list
=
tableMap
^..
each
...
...
@@ -523,7 +525,9 @@ getTableNgrams _nType nId tabType listId limit_ offset
selectAndPaginate
tableMap
=
roots
<>
inners
where
list
=
tableMap
^..
each
rootOf
ne
=
maybe
ne
(
\
r
->
fromMaybe
(
panic
"getTableNgrams: invalid root"
)
(
tableMap
^.
at
r
))
rootOf
ne
=
maybe
ne
(
\
r
->
fromMaybe
(
panic
"getTableNgrams: invalid root"
)
(
tableMap
^.
at
r
)
)
(
ne
^.
ne_root
)
selected_nodes
=
list
&
take
limit_
.
drop
offset'
...
...
@@ -777,3 +781,7 @@ listNgramsChangedSince listId ngramsType version
Versioned
<$>
currentVersion
<*>
pure
True
|
otherwise
=
tableNgramsPull
listId
ngramsType
version
&
mapped
.
v_data
%~
(
==
mempty
)
src/Gargantext/API/Ngrams/List.hs
View file @
05aa3d7e
...
...
@@ -15,6 +15,7 @@ Portability : POSIX
module
Gargantext.API.Ngrams.List
where
import
Data.Maybe
(
catMaybes
)
import
Control.Lens
hiding
(
elements
)
import
Data.Aeson
import
Data.Map
(
toList
,
fromList
)
...
...
@@ -22,21 +23,27 @@ import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
import
Data.Text
(
Text
,
concat
,
pack
)
import
GHC.Generics
(
Generic
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
)
import
Gargantext.API.Ngrams.Types
(
RepoCmdM
,
Versioned
(
..
),
NgramsList
)
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
,
NgramsTerm
)
import
Gargantext.API.Ngrams.Types
(
RepoCmdM
,
Versioned
(
..
),
NgramsList
,
NgramsTerm
(
..
)
)
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.API.Prelude
(
GargServer
,
GargNoServer
)
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getOccByNgramsOnlyFast'
)
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypes
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypes
,
NgramsType
(
..
))
import
Gargantext.Database.Query.Table.Node
(
getDocumentsWithParentId
)
import
Gargantext.Prelude
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Network.HTTP.Media
((
//
),
(
/:
))
import
Servant
import
Servant.Job.Async
import
Servant.Job.Utils
(
jsonOptions
)
import
Web.FormUrlEncoded
(
FromForm
)
------------------------------------------------------------------------
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.Text
as
Text
------------------------------------------------------------------------
type
API
=
Get
'[
J
SON
,
HTML
]
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
...
...
@@ -83,6 +90,41 @@ post l m = do
-- TODO reindex
pure
True
-----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here)
reIndexWith
::
CorpusId
->
ListId
->
NgramsType
->
[
NgramsTerm
]
->
GargNoServer
()
reIndexWith
cId
lId
nt
ts
=
do
docs
<-
getDocumentsWithParentId
cId
-- Taking the ngrams with 0 occurrences only (orphans)
orphans
<-
map
(
\
k
->
([
unNgramsTerm
k
],
[]
))
<$>
HashMap
.
keys
<$>
HashMap
.
filter
(
==
0
)
<$>
getOccByNgramsOnlyFast'
cId
lId
nt
ts
-- Checking Text documents where orphans match
let
docMatched
=
map
(
\
doc
->
(
doc
^.
node_id
,
termsInText
(
buildPatterns
orphans
)
(
Text
.
unlines
$
catMaybes
[
doc
^.
node_hyperdata
.
hd_title
,
doc
^.
node_hyperdata
.
hd_abstract
]
)
)
)
docs
-- Saving the indexation in database
pure
()
------------------------------------------------------------------------
------------------------------------------------------------------------
type
PostAPI
=
Summary
"Update List"
...
...
src/Gargantext/Core/Text/Flow.hs
View file @
05aa3d7e
...
...
@@ -23,12 +23,12 @@ import GHC.IO (FilePath)
import
Gargantext.Core.Types
(
CorpusId
)
{-
____
_ _
/ ___| __ _ _ __ __ _ __ _ _ _
_ | |_ _____ _| |_
| | _ / _` | '__/ _` |/ _` | '_ \|
__
/ _ \ \/ / __|
| |_| | (_| | | | (_| | (_| | | | | |
| __/> <| |_
\____|\__,_|_| \__, |\__,_|_| |_|
\__
\___/_/\_\\__|
|___/
____
_____ _
/ ___| __ _ _ __ __ _ __ _ _ _
|_ _|____ _| |_
| | _ / _` | '__/ _` |/ _` | '_ \|
|
/ _ \ \/ / __|
| |_| | (_| | | | (_| | (_| | | | | |
__/> <| |_
\____|\__,_|_| \__, |\__,_|_| |_|
_|
\___/_/\_\\__|
|___/
-}
...
...
src/Gargantext/Core/Text/Terms/WithList.hs
View file @
05aa3d7e
...
...
@@ -17,14 +17,14 @@ module Gargantext.Core.Text.Terms.WithList where
import
Data.List
(
null
)
import
Data.Ord
import
Data.Text
(
Text
,
concat
)
import
Data.Text
(
Text
,
concat
,
unwords
)
import
Gargantext.Prelude
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Terms.Mono
(
monoTextsBySentence
)
import
Prelude
(
error
)
import
qualified
Data.Algorithms.KMP
as
KMP
import
qualified
Data.IntMap.Strict
as
IntMap
import
qualified
Data.IntMap.Strict
as
IntMap
import
qualified
Data.List
as
List
------------------------------------------------------------------------
data
Pattern
=
Pattern
...
...
@@ -67,6 +67,19 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
Pattern
(
KMP
.
build
alt
)
(
length
alt
)
label
--(Terms label $ Set.empty) -- TODO check stems
--------------------------------------------------------------------------
-- Utils
type
BlockText
=
Text
type
MatchedText
=
Text
termsInText
::
Patterns
->
BlockText
->
[
MatchedText
]
termsInText
pats
txt
=
List
.
nub
$
List
.
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
--------------------------------------------------------------------------
extractTermsWithList
::
Patterns
->
Text
->
Corpus
[
Text
]
extractTermsWithList
pats
=
map
(
replaceTerms
pats
)
.
monoTextsBySentence
...
...
@@ -78,7 +91,9 @@ extractTermsWithList' :: Patterns -> Text -> [Text]
extractTermsWithList'
pats
=
map
(
concat
.
map
concat
.
replaceTerms
pats
)
.
monoTextsBySentence
--------------------------------------------------------------------------
{- | Not used
filterWith :: TermList
-> (a -> Text)
-> [a]
...
...
@@ -96,4 +111,4 @@ filterWith' termList f f' xs = f' xs
$ map f xs
where
pats = buildPatterns termList
-}
src/Gargantext/Database/Action/Index.hs
0 → 100644
View file @
05aa3d7e
{-|
Module : Gargantext.Database.Action.Index
Description : Indexation tools
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main Database functions for Gargantext.API.Node.Update
UpdateNodeParamsTexts { methodTexts :: Granularity }
data Granularity = NewNgrams | NewTexts | Both
deriving (Generic, Eq, Ord, Enum, Bounded)
-- TODO add option for type of ngrams
-}
module
Gargantext.Database.Action.Index
where
import
Data.List
(
nub
)
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
filterTerms
,
termsInText
)
index
::
CorpusId
->
Granularity
->
Cmd
err
[
Int
]
index
cId
NewNgrams
=
do
ngrams
<-
get
ngrams
with
zero
count
texts
<-
get
all
text
to
index
indexSave
text
(
buildPatterns
ngrams
)
index
cId
NewTexts
=
do
ngrams
<-
get
all
ngrams
texts
<-
get
text
with
zero
count
indexSave
text
(
buildPatterns
ngrams
)
index
cId
Both
=
do
r1
<-
index
cId
NewNgrams
r2
<-
index
cId
NewTexts
pure
$
r1
<>
r2
indexSave
::
[
Document
]
->
Pattern
->
Cmd
err
[
Int
]
indexSave
corpus
p
=
do
indexedDoc
<-
map
(
filterTerms
patterns
)
corpus
saveIndexDoc
ngramsTextId
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment