Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
9f5df649
Commit
9f5df649
authored
Apr 29, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Indexation function to test
parent
05aa3d7e
Pipeline
#1454
canceled with stage
Changes
9
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
73 additions
and
43 deletions
+73
-43
List.hs
src/Gargantext/API/Ngrams/List.hs
+57
-25
Tools.hs
src/Gargantext/API/Ngrams/Tools.hs
+5
-6
List.hs
src/Gargantext/Core/Text/List.hs
+0
-1
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+1
-2
LegacyMain.hs
src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
+3
-2
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+2
-1
Utils.hs
src/Gargantext/Database/Action/Flow/Utils.hs
+3
-3
Index.hs
src/Gargantext/Database/Action/Index.hs
+2
-1
Ngrams.hs
src/Gargantext/Database/Query/Table/Ngrams.hs
+0
-2
No files found.
src/Gargantext/API/Ngrams/List.hs
View file @
9f5df649
...
@@ -15,34 +15,43 @@ Portability : POSIX
...
@@ -15,34 +15,43 @@ Portability : POSIX
module
Gargantext.API.Ngrams.List
module
Gargantext.API.Ngrams.List
where
where
import
Data.Maybe
(
catMaybes
)
import
Control.Lens
hiding
(
elements
,
Indexed
)
import
Control.Lens
hiding
(
elements
)
import
Data.Aeson
import
Data.Aeson
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
toList
,
fromList
)
import
Data.Map
(
toList
,
fromList
)
import
Data.Maybe
(
catMaybes
)
import
Data.Set
(
Set
)
import
Data.Swagger
(
ToSchema
,
declareNamedSchema
,
genericDeclareNamedSchema
)
import
Data.Swagger
(
ToSchema
,
declareNamedSchema
,
genericDeclareNamedSchema
)
import
Data.Text
(
Text
,
concat
,
pack
)
import
Data.Text
(
Text
,
concat
,
pack
)
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
,
NgramsTerm
)
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
)
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
(
RepoCmdM
,
Versioned
(
..
),
NgramsList
,
NgramsTerm
(
..
))
import
Gargantext.API.Ngrams.Types
(
RepoCmdM
,
Versioned
(
..
),
NgramsList
,
NgramsTerm
(
..
))
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Prelude
(
GargServer
,
GargNoServer
)
import
Gargantext.API.Prelude
(
GargServer
,
GargNoServer
)
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Flow.Utils
(
insertDocNgrams
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getOccByNgramsOnlyFast'
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getOccByNgramsOnlyFast'
)
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypes
,
NgramsType
(
..
))
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Ngrams
(
insertNgrams
)
import
Gargantext.Database.Query.Table.Node
(
getDocumentsWithParentId
)
import
Gargantext.Database.Query.Table.Node
(
getDocumentsWithParentId
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Network.HTTP.Media
((
//
),
(
/:
))
import
Network.HTTP.Media
((
//
),
(
/:
))
import
Servant
import
Servant
import
Servant.Job.Async
import
Servant.Job.Async
import
Servant.Job.Utils
(
jsonOptions
)
import
Servant.Job.Utils
(
jsonOptions
)
import
Web.FormUrlEncoded
(
FromForm
)
import
Web.FormUrlEncoded
(
FromForm
)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Text
as
Text
import
qualified
Data.Text
as
Text
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -64,9 +73,9 @@ get :: RepoCmdM env err m =>
...
@@ -64,9 +73,9 @@ get :: RepoCmdM env err m =>
ListId
->
m
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
ListId
->
m
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
get
lId
=
do
get
lId
=
do
lst
<-
get'
lId
lst
<-
get'
lId
let
(
NodeId
id
)
=
lId
let
(
NodeId
id
'
)
=
lId
return
$
addHeader
(
concat
[
"attachment; filename=GarganText_NgramsList-"
return
$
addHeader
(
concat
[
"attachment; filename=GarganText_NgramsList-"
,
pack
$
show
id
,
pack
$
show
id
'
,
".json"
,
".json"
]
]
)
lst
)
lst
...
@@ -96,35 +105,58 @@ post l m = do
...
@@ -96,35 +105,58 @@ post l m = do
reIndexWith
::
CorpusId
reIndexWith
::
CorpusId
->
ListId
->
ListId
->
NgramsType
->
NgramsType
->
[
NgramsTerm
]
->
Set
ListType
->
GargNoServer
()
->
GargNoServer
()
reIndexWith
cId
lId
nt
ts
=
do
reIndexWith
cId
lId
nt
lts
=
do
docs
<-
getDocumentsWithParentId
cId
-- Getting [NgramsTerm]
ts
<-
List
.
concat
<$>
map
(
\
(
k
,
vs
)
->
k
:
vs
)
<$>
HashMap
.
toList
<$>
getTermsWith
identity
[
lId
]
nt
lts
-- printDebug "ts" ts
-- Taking the ngrams with 0 occurrences only (orphans)
-- Taking the ngrams with 0 occurrences only (orphans)
orphans
<-
map
(
\
k
->
([
unNgramsTerm
k
],
[]
))
orphans
<-
HashMap
.
keys
<$>
HashMap
.
keys
<$>
HashMap
.
filter
(
==
0
)
<$>
HashMap
.
filter
(
==
0
)
<$>
getOccByNgramsOnlyFast'
cId
lId
nt
ts
<$>
getOccByNgramsOnlyFast'
cId
lId
nt
ts
-- Getting the Id of orphan ngrams
mapTextNgramsId
<-
insertNgrams
(
map
(
text2ngrams
.
unNgramsTerm
)
orphans
)
printDebug
"orphans"
orphans
-- Get all documents of the corpus
docs
<-
getDocumentsWithParentId
cId
-- Checking Text documents where orphans match
-- Checking Text documents where orphans match
-- TODO Tests here
let
let
docMatched
=
ngramsByDoc
=
List
.
concat
map
(
\
doc
->
(
doc
^.
node_id
$
map
(
\
doc
->
List
.
zip
,
termsInText
(
buildPatterns
orphans
)
(
termsInText
(
buildPatterns
$
map
(
\
k
->
([
unNgramsTerm
k
],
[]
))
orphans
)
(
Text
.
unlines
$
Text
.
unlines
$
catMaybes
$
catMaybes
[
doc
^.
node_hyperdata
.
hd_title
[
doc
^.
node_hyperdata
.
hd_title
,
doc
^.
node_hyperdata
.
hd_abstract
,
doc
^.
node_hyperdata
.
hd_abstract
]
]
)
)
)
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
node_id
)
1
)]]
)
)
docs
)
docs
-- Saving the indexation in database
-- Saving the indexation in database
_
<-
insertDocNgrams
lId
(
HashMap
.
fromList
$
catMaybes
$
map
(
\
(
t
,
d
)
->
(,)
<$>
toIndexedNgrams
mapTextNgramsId
t
<*>
Just
d
)
ngramsByDoc
)
pure
()
pure
()
toIndexedNgrams
::
HashMap
Text
NgramsId
->
Text
->
Maybe
(
Indexed
Int
Ngrams
)
toIndexedNgrams
m
t
=
Indexed
<$>
i
<*>
n
where
i
=
HashMap
.
lookup
t
m
n
=
Just
(
text2ngrams
t
)
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
type
PostAPI
=
Summary
"Update List"
type
PostAPI
=
Summary
"Update List"
...
...
src/Gargantext/API/Ngrams/Tools.hs
View file @
9f5df649
...
@@ -26,8 +26,8 @@ import Gargantext.Core.Types (ListType(..), NodeId, ListId)
...
@@ -26,8 +26,8 @@ import Gargantext.Core.Types (ListType(..), NodeId, ListId)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HM
import
qualified
Data.HashMap.Strict
as
HM
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
mergeNgramsElement
::
NgramsRepoElement
->
NgramsRepoElement
->
NgramsRepoElement
mergeNgramsElement
::
NgramsRepoElement
->
NgramsRepoElement
->
NgramsRepoElement
mergeNgramsElement
_neOld
neNew
=
neNew
mergeNgramsElement
_neOld
neNew
=
neNew
...
@@ -50,7 +50,6 @@ listNgramsFromRepo nodeIds ngramsType repo = ngrams
...
@@ -50,7 +50,6 @@ listNgramsFromRepo nodeIds ngramsType repo = ngrams
[
ngramsMap
^.
at
nodeId
.
_Just
|
nodeId
<-
nodeIds
]
[
ngramsMap
^.
at
nodeId
.
_Just
|
nodeId
<-
nodeIds
]
-- TODO-ACCESS: We want to do the security check before entering here.
-- TODO-ACCESS: We want to do the security check before entering here.
-- Add a static capability parameter would be nice.
-- Add a static capability parameter would be nice.
-- Ideally this is the access to `repoVar` which needs to
-- Ideally this is the access to `repoVar` which needs to
...
@@ -62,12 +61,12 @@ getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> get
...
@@ -62,12 +61,12 @@ getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> get
getTermsWith
::
(
RepoCmdM
env
err
m
,
Eq
a
,
Hashable
a
)
getTermsWith
::
(
RepoCmdM
env
err
m
,
Eq
a
,
Hashable
a
)
=>
(
NgramsTerm
->
a
)
->
[
ListId
]
=>
(
NgramsTerm
->
a
)
->
[
ListId
]
->
NgramsType
->
ListType
->
NgramsType
->
Set
ListType
->
m
(
HashMap
a
[
a
])
->
m
(
HashMap
a
[
a
])
getTermsWith
f
ls
ngt
lt
=
HM
.
fromListWith
(
<>
)
getTermsWith
f
ls
ngt
lt
s
=
HM
.
fromListWith
(
<>
)
<$>
map
toTreeWith
<$>
map
toTreeWith
<$>
HM
.
toList
<$>
HM
.
toList
<$>
HM
.
filter
(
\
f'
->
fst
f'
==
lt
)
<$>
HM
.
filter
(
\
f'
->
Set
.
member
(
fst
f'
)
lts
)
<$>
mapTermListRoot
ls
ngt
<$>
mapTermListRoot
ls
ngt
<$>
getRepo
<$>
getRepo
where
where
...
...
src/Gargantext/Core/Text/List.hs
View file @
9f5df649
...
@@ -150,7 +150,6 @@ getGroupParams gp _ = pure gp
...
@@ -150,7 +150,6 @@ getGroupParams gp _ = pure gp
-- TODO use ListIds
-- TODO use ListIds
buildNgramsTermsList
::
(
HasNodeError
err
buildNgramsTermsList
::
(
HasNodeError
err
,
CmdM
env
err
m
,
CmdM
env
err
m
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
9f5df649
...
@@ -144,7 +144,6 @@ extracted2ngrams :: ExtractedNgrams -> Ngrams
...
@@ -144,7 +144,6 @@ extracted2ngrams :: ExtractedNgrams -> Ngrams
extracted2ngrams
(
SimpleNgrams
ng
)
=
ng
extracted2ngrams
(
SimpleNgrams
ng
)
=
ng
extracted2ngrams
(
EnrichedNgrams
ng
)
=
view
np_form
ng
extracted2ngrams
(
EnrichedNgrams
ng
)
=
view
np_form
ng
---------------------------
---------------------------
insertExtractedNgrams
::
[
ExtractedNgrams
]
->
Cmd
err
(
HashMap
Text
NgramsId
)
insertExtractedNgrams
::
[
ExtractedNgrams
]
->
Cmd
err
(
HashMap
Text
NgramsId
)
insertExtractedNgrams
ngs
=
do
insertExtractedNgrams
ngs
=
do
...
@@ -216,6 +215,6 @@ text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
...
@@ -216,6 +215,6 @@ text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
isPunctuation
::
Text
->
Bool
isPunctuation
::
Text
->
Bool
isPunctuation
x
=
List
.
elem
x
$
(
Text
.
pack
.
pure
)
isPunctuation
x
=
List
.
elem
x
$
(
Text
.
pack
.
pure
)
<$>
(
"!?(),;."
::
String
)
<$>
(
"!?(),;.
:
"
::
String
)
src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
View file @
9f5df649
...
@@ -41,7 +41,8 @@ import Gargantext.Core.Types
...
@@ -41,7 +41,8 @@ import Gargantext.Core.Types
-- import Gargantext.Core.Viz.Phylo.View.Export
-- import Gargantext.Core.Viz.Phylo.View.Export
-- import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
-- import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.Text
as
Text
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
type
MinSizeBranch
=
Int
type
MinSizeBranch
=
Int
...
@@ -51,7 +52,7 @@ flowPhylo :: FlowCmdM env err m
...
@@ -51,7 +52,7 @@ flowPhylo :: FlowCmdM env err m
flowPhylo
cId
=
do
flowPhylo
cId
=
do
list
<-
defaultList
cId
list
<-
defaultList
cId
termList
<-
HashMap
.
toList
<$>
getTermsWith
(
Text
.
words
.
unNgramsTerm
)
[
list
]
NgramsTerms
MapTerm
termList
<-
HashMap
.
toList
<$>
getTermsWith
(
Text
.
words
.
unNgramsTerm
)
[
list
]
NgramsTerms
(
Set
.
singleton
MapTerm
)
docs'
<-
catMaybes
docs'
<-
catMaybes
<$>
map
(
\
h
->
(,)
<$>
_hd_publication_year
h
<$>
map
(
\
h
->
(,)
<$>
_hd_publication_year
h
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
9f5df649
...
@@ -236,7 +236,8 @@ flowCorpusUser l user corpusName ctype ids = do
...
@@ -236,7 +236,8 @@ flowCorpusUser l user corpusName ctype ids = do
-- printDebug "Node Text Ids:" tId
-- printDebug "Node Text Ids:" tId
-- User List Flow
-- User List Flow
(
masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMk_RootWithCorpus
(
UserName
userMaster
)
(
Left
""
)
ctype
(
masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMk_RootWithCorpus
(
UserName
userMaster
)
(
Left
""
)
ctype
--let gp = (GroupParams l 2 3 (StopSize 3))
--let gp = (GroupParams l 2 3 (StopSize 3))
let
gp
=
GroupWithPosTag
l
CoreNLP
HashMap
.
empty
let
gp
=
GroupWithPosTag
l
CoreNLP
HashMap
.
empty
...
...
src/Gargantext/Database/Action/Flow/Utils.hs
View file @
9f5df649
...
@@ -37,10 +37,10 @@ docNgrams2nodeNodeNgrams :: CorpusId
...
@@ -37,10 +37,10 @@ docNgrams2nodeNodeNgrams :: CorpusId
docNgrams2nodeNodeNgrams
cId
(
DocNgrams
d
n
nt
w
)
=
docNgrams2nodeNodeNgrams
cId
(
DocNgrams
d
n
nt
w
)
=
NodeNodeNgrams
cId
d
n
nt
w
NodeNodeNgrams
cId
d
n
nt
w
data
DocNgrams
=
DocNgrams
{
dn_doc_id
::
DocId
data
DocNgrams
=
DocNgrams
{
dn_doc_id
::
DocId
,
dn_ngrams_id
::
Int
,
dn_ngrams_id
::
Int
,
dn_ngrams_type
::
NgramsTypeId
,
dn_ngrams_type
::
NgramsTypeId
,
dn_weight
::
Double
,
dn_weight
::
Double
}
}
insertDocNgramsOn
::
CorpusId
insertDocNgramsOn
::
CorpusId
...
...
src/Gargantext/Database/Action/Index.hs
View file @
9f5df649
...
@@ -20,6 +20,7 @@ data Granularity = NewNgrams | NewTexts | Both
...
@@ -20,6 +20,7 @@ data Granularity = NewNgrams | NewTexts | Both
module
Gargantext.Database.Action.Index
module
Gargantext.Database.Action.Index
where
where
{-
import Data.List (nub)
import Data.List (nub)
import Gargantext.Core.Text.Terms.WithList (buildPatterns, filterTerms, termsInText)
import Gargantext.Core.Text.Terms.WithList (buildPatterns, filterTerms, termsInText)
...
@@ -44,7 +45,7 @@ indexSave :: [Document] -> Pattern -> Cmd err [Int]
...
@@ -44,7 +45,7 @@ indexSave :: [Document] -> Pattern -> Cmd err [Int]
indexSave corpus p = do
indexSave corpus p = do
indexedDoc <- map (filterTerms patterns) corpus
indexedDoc <- map (filterTerms patterns) corpus
saveIndexDoc ngramsTextId
saveIndexDoc ngramsTextId
-}
...
...
src/Gargantext/Database/Query/Table/Ngrams.hs
View file @
9f5df649
...
@@ -101,5 +101,3 @@ queryInsertNgrams = [sql|
...
@@ -101,5 +101,3 @@ queryInsertNgrams = [sql|
FROM input_rows
FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index
JOIN ngrams c USING (terms); -- columns of unique index
|]
|]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment