Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
9f5df649
Commit
9f5df649
authored
Apr 29, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Indexation function to test
parent
05aa3d7e
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
73 additions
and
43 deletions
+73
-43
List.hs
src/Gargantext/API/Ngrams/List.hs
+57
-25
Tools.hs
src/Gargantext/API/Ngrams/Tools.hs
+5
-6
List.hs
src/Gargantext/Core/Text/List.hs
+0
-1
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+1
-2
LegacyMain.hs
src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
+3
-2
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+2
-1
Utils.hs
src/Gargantext/Database/Action/Flow/Utils.hs
+3
-3
Index.hs
src/Gargantext/Database/Action/Index.hs
+2
-1
Ngrams.hs
src/Gargantext/Database/Query/Table/Ngrams.hs
+0
-2
No files found.
src/Gargantext/API/Ngrams/List.hs
View file @
9f5df649
...
...
@@ -15,34 +15,43 @@ Portability : POSIX
module
Gargantext.API.Ngrams.List
where
import
Data.Maybe
(
catMaybes
)
import
Control.Lens
hiding
(
elements
)
import
Control.Lens
hiding
(
elements
,
Indexed
)
import
Data.Aeson
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
toList
,
fromList
)
import
Data.Maybe
(
catMaybes
)
import
Data.Set
(
Set
)
import
Data.Swagger
(
ToSchema
,
declareNamedSchema
,
genericDeclareNamedSchema
)
import
Data.Text
(
Text
,
concat
,
pack
)
import
GHC.Generics
(
Generic
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
,
NgramsTerm
)
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
)
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
(
RepoCmdM
,
Versioned
(
..
),
NgramsList
,
NgramsTerm
(
..
))
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Prelude
(
GargServer
,
GargNoServer
)
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Flow.Utils
(
insertDocNgrams
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getOccByNgramsOnlyFast'
)
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypes
,
NgramsType
(
..
))
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Ngrams
(
insertNgrams
)
import
Gargantext.Database.Query.Table.Node
(
getDocumentsWithParentId
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Prelude
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Network.HTTP.Media
((
//
),
(
/:
))
import
Servant
import
Servant.Job.Async
import
Servant.Job.Utils
(
jsonOptions
)
import
Web.FormUrlEncoded
(
FromForm
)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Text
as
Text
------------------------------------------------------------------------
...
...
@@ -64,9 +73,9 @@ get :: RepoCmdM env err m =>
ListId
->
m
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
get
lId
=
do
lst
<-
get'
lId
let
(
NodeId
id
)
=
lId
let
(
NodeId
id
'
)
=
lId
return
$
addHeader
(
concat
[
"attachment; filename=GarganText_NgramsList-"
,
pack
$
show
id
,
pack
$
show
id
'
,
".json"
]
)
lst
...
...
@@ -96,35 +105,58 @@ post l m = do
reIndexWith
::
CorpusId
->
ListId
->
NgramsType
->
[
NgramsTerm
]
->
Set
ListType
->
GargNoServer
()
reIndexWith
cId
lId
nt
ts
=
do
docs
<-
getDocumentsWithParentId
cId
reIndexWith
cId
lId
nt
lts
=
do
-- Getting [NgramsTerm]
ts
<-
List
.
concat
<$>
map
(
\
(
k
,
vs
)
->
k
:
vs
)
<$>
HashMap
.
toList
<$>
getTermsWith
identity
[
lId
]
nt
lts
-- printDebug "ts" ts
-- Taking the ngrams with 0 occurrences only (orphans)
orphans
<-
map
(
\
k
->
([
unNgramsTerm
k
],
[]
))
<$>
HashMap
.
keys
orphans
<-
HashMap
.
keys
<$>
HashMap
.
filter
(
==
0
)
<$>
getOccByNgramsOnlyFast'
cId
lId
nt
ts
-- Getting the Id of orphan ngrams
mapTextNgramsId
<-
insertNgrams
(
map
(
text2ngrams
.
unNgramsTerm
)
orphans
)
printDebug
"orphans"
orphans
-- Get all documents of the corpus
docs
<-
getDocumentsWithParentId
cId
-- Checking Text documents where orphans match
-- TODO Tests here
let
docMatched
=
map
(
\
doc
->
(
doc
^.
node_id
,
termsInText
(
buildPatterns
orphans
)
(
Text
.
unlines
$
catMaybes
[
doc
^.
node_hyperdata
.
hd_title
,
doc
^.
node_hyperdata
.
hd_abstract
]
ngramsByDoc
=
List
.
concat
$
map
(
\
doc
->
List
.
zip
(
termsInText
(
buildPatterns
$
map
(
\
k
->
([
unNgramsTerm
k
],
[]
))
orphans
)
$
Text
.
unlines
$
catMaybes
[
doc
^.
node_hyperdata
.
hd_title
,
doc
^.
node_hyperdata
.
hd_abstract
]
)
)
)
docs
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
node_id
)
1
)]]
)
)
docs
-- Saving the indexation in database
_
<-
insertDocNgrams
lId
(
HashMap
.
fromList
$
catMaybes
$
map
(
\
(
t
,
d
)
->
(,)
<$>
toIndexedNgrams
mapTextNgramsId
t
<*>
Just
d
)
ngramsByDoc
)
pure
()
toIndexedNgrams
::
HashMap
Text
NgramsId
->
Text
->
Maybe
(
Indexed
Int
Ngrams
)
toIndexedNgrams
m
t
=
Indexed
<$>
i
<*>
n
where
i
=
HashMap
.
lookup
t
m
n
=
Just
(
text2ngrams
t
)
------------------------------------------------------------------------
------------------------------------------------------------------------
type
PostAPI
=
Summary
"Update List"
...
...
src/Gargantext/API/Ngrams/Tools.hs
View file @
9f5df649
...
...
@@ -26,8 +26,8 @@ import Gargantext.Core.Types (ListType(..), NodeId, ListId)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
)
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HM
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
mergeNgramsElement
::
NgramsRepoElement
->
NgramsRepoElement
->
NgramsRepoElement
mergeNgramsElement
_neOld
neNew
=
neNew
...
...
@@ -50,7 +50,6 @@ listNgramsFromRepo nodeIds ngramsType repo = ngrams
[
ngramsMap
^.
at
nodeId
.
_Just
|
nodeId
<-
nodeIds
]
-- TODO-ACCESS: We want to do the security check before entering here.
-- Add a static capability parameter would be nice.
-- Ideally this is the access to `repoVar` which needs to
...
...
@@ -62,12 +61,12 @@ getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> get
getTermsWith
::
(
RepoCmdM
env
err
m
,
Eq
a
,
Hashable
a
)
=>
(
NgramsTerm
->
a
)
->
[
ListId
]
->
NgramsType
->
ListType
->
NgramsType
->
Set
ListType
->
m
(
HashMap
a
[
a
])
getTermsWith
f
ls
ngt
lt
=
HM
.
fromListWith
(
<>
)
getTermsWith
f
ls
ngt
lt
s
=
HM
.
fromListWith
(
<>
)
<$>
map
toTreeWith
<$>
HM
.
toList
<$>
HM
.
filter
(
\
f'
->
fst
f'
==
lt
)
<$>
HM
.
filter
(
\
f'
->
Set
.
member
(
fst
f'
)
lts
)
<$>
mapTermListRoot
ls
ngt
<$>
getRepo
where
...
...
src/Gargantext/Core/Text/List.hs
View file @
9f5df649
...
...
@@ -150,7 +150,6 @@ getGroupParams gp _ = pure gp
-- TODO use ListIds
buildNgramsTermsList
::
(
HasNodeError
err
,
CmdM
env
err
m
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
9f5df649
...
...
@@ -144,7 +144,6 @@ extracted2ngrams :: ExtractedNgrams -> Ngrams
extracted2ngrams
(
SimpleNgrams
ng
)
=
ng
extracted2ngrams
(
EnrichedNgrams
ng
)
=
view
np_form
ng
---------------------------
insertExtractedNgrams
::
[
ExtractedNgrams
]
->
Cmd
err
(
HashMap
Text
NgramsId
)
insertExtractedNgrams
ngs
=
do
...
...
@@ -216,6 +215,6 @@ text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
isPunctuation
::
Text
->
Bool
isPunctuation
x
=
List
.
elem
x
$
(
Text
.
pack
.
pure
)
<$>
(
"!?(),;."
::
String
)
<$>
(
"!?(),;.
:
"
::
String
)
src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
View file @
9f5df649
...
...
@@ -41,7 +41,8 @@ import Gargantext.Core.Types
-- import Gargantext.Core.Viz.Phylo.View.Export
-- import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.Text
as
Text
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
type
MinSizeBranch
=
Int
...
...
@@ -51,7 +52,7 @@ flowPhylo :: FlowCmdM env err m
flowPhylo
cId
=
do
list
<-
defaultList
cId
termList
<-
HashMap
.
toList
<$>
getTermsWith
(
Text
.
words
.
unNgramsTerm
)
[
list
]
NgramsTerms
MapTerm
termList
<-
HashMap
.
toList
<$>
getTermsWith
(
Text
.
words
.
unNgramsTerm
)
[
list
]
NgramsTerms
(
Set
.
singleton
MapTerm
)
docs'
<-
catMaybes
<$>
map
(
\
h
->
(,)
<$>
_hd_publication_year
h
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
9f5df649
...
...
@@ -236,7 +236,8 @@ flowCorpusUser l user corpusName ctype ids = do
-- printDebug "Node Text Ids:" tId
-- User List Flow
(
masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMk_RootWithCorpus
(
UserName
userMaster
)
(
Left
""
)
ctype
(
masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMk_RootWithCorpus
(
UserName
userMaster
)
(
Left
""
)
ctype
--let gp = (GroupParams l 2 3 (StopSize 3))
let
gp
=
GroupWithPosTag
l
CoreNLP
HashMap
.
empty
...
...
src/Gargantext/Database/Action/Flow/Utils.hs
View file @
9f5df649
...
...
@@ -37,10 +37,10 @@ docNgrams2nodeNodeNgrams :: CorpusId
docNgrams2nodeNodeNgrams
cId
(
DocNgrams
d
n
nt
w
)
=
NodeNodeNgrams
cId
d
n
nt
w
data
DocNgrams
=
DocNgrams
{
dn_doc_id
::
DocId
,
dn_ngrams_id
::
Int
data
DocNgrams
=
DocNgrams
{
dn_doc_id
::
DocId
,
dn_ngrams_id
::
Int
,
dn_ngrams_type
::
NgramsTypeId
,
dn_weight
::
Double
,
dn_weight
::
Double
}
insertDocNgramsOn
::
CorpusId
...
...
src/Gargantext/Database/Action/Index.hs
View file @
9f5df649
...
...
@@ -20,6 +20,7 @@ data Granularity = NewNgrams | NewTexts | Both
module
Gargantext.Database.Action.Index
where
{-
import Data.List (nub)
import Gargantext.Core.Text.Terms.WithList (buildPatterns, filterTerms, termsInText)
...
...
@@ -44,7 +45,7 @@ indexSave :: [Document] -> Pattern -> Cmd err [Int]
indexSave corpus p = do
indexedDoc <- map (filterTerms patterns) corpus
saveIndexDoc ngramsTextId
-}
...
...
src/Gargantext/Database/Query/Table/Ngrams.hs
View file @
9f5df649
...
...
@@ -101,5 +101,3 @@ queryInsertNgrams = [sql|
FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index
|]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment