Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
c5fd09cc
Verified
Commit
c5fd09cc
authored
Dec 06, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[ngrams] rewrite NgramsElement type to use NgramsRepoElement
Also, Tokenize.hs added for future endpoint for tokenization
parent
22d25fbe
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
72 additions
and
50 deletions
+72
-50
gargantext.cabal
gargantext.cabal
+1
-0
EnvTypes.hs
src/Gargantext/API/Admin/EnvTypes.hs
+15
-5
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+5
-5
Types.hs
src/Gargantext/API/Ngrams/Types.hs
+13
-34
Tokenize.hs
src/Gargantext/Core/Text/Terms/Tokenize.hs
+29
-0
Types.hs
src/Gargantext/Core/Types.hs
+1
-0
List.hs
src/Gargantext/Database/Action/Flow/List.hs
+8
-6
No files found.
gargantext.cabal
View file @
c5fd09cc
...
...
@@ -275,6 +275,7 @@ library
Gargantext.Core.Text.Terms.Multi.Group
Gargantext.Core.Text.Terms.Multi.CoreNLP
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
Gargantext.Core.Text.Terms.Tokenize
Gargantext.Core.Text.Upload
Gargantext.Core.Types.Search
Gargantext.Core.Utils.DateUtils
...
...
src/Gargantext/API/Admin/EnvTypes.hs
View file @
c5fd09cc
-- |
{-|
Module : Gargantext.API.Admin.EnvTypes
Description : Env
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
...
...
@@ -106,10 +115,11 @@ data GargJob
|
RecomputeGraphJob
deriving
(
Show
,
Eq
,
Ord
,
Enum
,
Bounded
)
-- Do /not/ treat the data types of this type as strict, because it's convenient
-- to be able to partially initialise things like an 'Env' during tests, without
-- having to specify /everything/. This means that when we /construct/ an 'Env',
-- we need to remember to force the fields to WHNF at that point.
-- | Do /not/ treat the data types of this type as strict, because
-- it's convenient to be able to partially initialise things like an
-- 'Env' during tests, without having to specify /everything/. This
-- means that when we /construct/ an 'Env', we need to remember to
-- force the fields to WHNF at that point.
data
Env
=
Env
{
_env_settings
::
~
Settings
,
_env_logger
::
~
(
Logger
(
GargM
Env
BackendInternalError
))
...
...
src/Gargantext/API/Ngrams.hs
View file @
c5fd09cc
...
...
@@ -547,13 +547,13 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
-- /CAREFUL/: The root we select might /not/ have the same 'listType' we are
-- filtering for, in which case we have to change its type to match, if needed.
rootOf
::
Map
NgramsTerm
NgramsElement
->
NgramsElement
->
NgramsElement
rootOf
tblMap
ne
=
case
ne
^.
ne_root
of
rootOf
tblMap
ne
=
case
ne
^.
ne_
nre
.
nre_
root
of
Nothing
->
ne
Just
rootKey
|
Just
r
<-
tblMap
^.
at
rootKey
-- NOTE(adinapoli) It's unclear what is the correct behaviour here: should
-- we override the type or we filter out the node altogether?
->
over
ne_list
(
\
oldList
->
fromMaybe
oldList
_nsq_listType
)
r
->
over
(
ne_nre
.
nre_list
)
(
\
oldList
->
fromMaybe
oldList
_nsq_listType
)
r
|
otherwise
->
ne
...
...
@@ -561,7 +561,7 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
-- mandated by 'NgramsSearchQuery'.
matchingNode
::
NgramsElement
->
Bool
matchingNode
inputNode
=
let
nodeSize
=
inputNode
^.
ne_size
let
nodeSize
=
inputNode
^.
ne_
nre
.
nre_
size
matchesListType
=
maybe
(
const
True
)
(
==
)
_nsq_listType
respectsMinSize
=
maybe
(
const
True
)
(
<=
)
(
getMinSize
<$>
_nsq_minSize
)
respectsMaxSize
=
maybe
(
const
True
)
(
>=
)
(
getMaxSize
<$>
_nsq_maxSize
)
...
...
@@ -569,7 +569,7 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
in
respectsMinSize
nodeSize
&&
respectsMaxSize
nodeSize
&&
_nsq_searchQuery
(
inputNode
^.
ne_ngrams
)
&&
matchesListType
(
inputNode
^.
ne_list
)
&&
matchesListType
(
inputNode
^.
ne_
nre
.
nre_
list
)
sortOnOrder
::
Maybe
OrderBy
->
([
NgramsElement
]
->
[
NgramsElement
])
sortOnOrder
Nothing
=
sortOnOrder
(
Just
ScoreDesc
)
...
...
@@ -593,7 +593,7 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
where
addSubitemsOccurrences
::
NgramsElement
->
NgramsElement
addSubitemsOccurrences
e
=
e
{
_ne_occurrences
=
foldl'
alterOccurrences
(
e
^.
ne_occurrences
)
(
e
^.
ne_children
)
}
e
{
_ne_occurrences
=
foldl'
alterOccurrences
(
e
^.
ne_occurrences
)
(
e
^.
ne_
nre
.
nre_
children
)
}
alterOccurrences
::
Set
ContextId
->
NgramsTerm
->
Set
ContextId
alterOccurrences
occs
t
=
case
Map
.
lookup
t
tblMap
of
...
...
src/Gargantext/API/Ngrams/Types.hs
View file @
c5fd09cc
...
...
@@ -171,12 +171,8 @@ instance Serialise (MSet NgramsTerm)
data
NgramsElement
=
NgramsElement
{
_ne_ngrams
::
NgramsTerm
,
_ne_size
::
Int
,
_ne_list
::
ListType
,
_ne_occurrences
::
Set
ContextId
,
_ne_root
::
Maybe
NgramsTerm
,
_ne_parent
::
Maybe
NgramsTerm
,
_ne_children
::
MSet
NgramsTerm
,
_ne_nre
::
NgramsRepoElement
}
deriving
(
Ord
,
Eq
,
Show
,
Generic
)
...
...
@@ -189,7 +185,15 @@ mkNgramsElement :: NgramsTerm
->
MSet
NgramsTerm
->
NgramsElement
mkNgramsElement
ngrams
list'
rp
children
=
NgramsElement
ngrams
(
size
(
unNgramsTerm
ngrams
))
list'
mempty
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
NgramsElement
{
_ne_ngrams
=
ngrams
,
_ne_occurrences
=
mempty
,
_ne_nre
=
nre
}
where
nre
=
NgramsRepoElement
{
_nre_size
=
size
$
unNgramsTerm
ngrams
,
_nre_list
=
list'
,
_nre_root
=
_rp_root
<$>
rp
,
_nre_parent
=
_rp_parent
<$>
rp
,
_nre_children
=
children
}
newNgramsElement
::
Maybe
ListType
->
NgramsTerm
->
NgramsElement
newNgramsElement
mayList
ngrams
=
...
...
@@ -587,36 +591,11 @@ instance Applicable NgramsTablePatch (Maybe NgramsTableMap) where
ngramsElementToRepo
::
NgramsElement
->
NgramsRepoElement
ngramsElementToRepo
(
NgramsElement
{
_ne_size
=
s
,
_ne_list
=
l
,
_ne_root
=
r
,
_ne_parent
=
p
,
_ne_children
=
c
})
=
NgramsRepoElement
{
_nre_size
=
s
,
_nre_list
=
l
,
_nre_parent
=
p
,
_nre_root
=
r
,
_nre_children
=
c
}
ngramsElementToRepo
(
NgramsElement
{
_ne_nre
=
nre
})
=
nre
ngramsElementFromRepo
::
NgramsTerm
->
NgramsRepoElement
->
NgramsElement
ngramsElementFromRepo
ngrams
(
NgramsRepoElement
{
_nre_size
=
s
,
_nre_list
=
l
,
_nre_parent
=
p
,
_nre_root
=
r
,
_nre_children
=
c
})
=
NgramsElement
{
_ne_size
=
s
,
_ne_list
=
l
,
_ne_root
=
r
,
_ne_parent
=
p
,
_ne_children
=
c
ngramsElementFromRepo
ngrams
nre
=
NgramsElement
{
_ne_nre
=
nre
,
_ne_ngrams
=
ngrams
,
_ne_occurrences
=
mempty
-- panic $ "API.Ngrams.Types._ne_occurrences"
{-
...
...
src/Gargantext/Core/Text/Terms/Tokenize.hs
0 → 100644
View file @
c5fd09cc
{-|
Module : Gargantext.Core.Text.Terms.Tokenize
Description : String tokenization
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module
Gargantext.Core.Text.Terms.Tokenize
where
import
Control.Lens
(
view
)
import
Control.Monad.Trans.Control
(
MonadBaseControl
)
import
Gargantext.Core
(
Lang
)
import
Gargantext.Core.NLP
(
nlpServerGet
,
HasNLPServer
)
import
Gargantext.Core.Text.Terms.Multi
(
tokenTags
)
import
Gargantext.Core.Types
(
TokenTag
(
..
))
import
Gargantext.Prelude
tokenize
::
(
HasNLPServer
env
,
MonadReader
env
m
,
MonadBaseControl
IO
m
)
=>
Lang
->
Text
->
m
[
TokenTag
]
tokenize
lang
txt
=
do
nlp
<-
view
(
nlpServerGet
lang
)
liftBase
$
concat
<$>
tokenTags
nlp
lang
txt
src/Gargantext/Core/Types.hs
View file @
c5fd09cc
...
...
@@ -157,6 +157,7 @@ data TokenTag = TokenTag { _my_token_word :: [Text]
,
_my_token_offset_begin
::
Int
,
_my_token_offset_end
::
Int
}
deriving
(
Show
)
$
(
deriveJSON
(
unPrefix
"_my_token_"
)
''
T
okenTag
)
-- | NOTE: Combining 'TokenTag' doesn't make much sense to me. And
-- lemma combining is just wrong. You can't just "cat" <> "woman" to
...
...
src/Gargantext/Database/Action/Flow/List.hs
View file @
c5fd09cc
...
...
@@ -125,12 +125,14 @@ toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW'' l) ngs
,
_nng_ngrams_class
=
Nothing
,
_nng_ngrams_weight
=
0
}
|
(
NgramsElement
{
_ne_ngrams
=
NgramsTerm
ngrams_terms'
,
_ne_size
=
_size
,
_ne_list
=
list_type
,
_ne_occurrences
=
_occ
,
_ne_root
=
_root
,
_ne_parent
=
_parent
,
_ne_children
=
_children
-- , _ne_occurrences = _occ
,
_ne_nre
=
NgramsRepoElement
{
_nre_list
=
list_type
-- , _nre_size = _size
-- , _nre_root = _root
-- , _nre_parent = _parent
-- , _nre_children = _children
}
}
)
<-
elms
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment