Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
80fbde18
Commit
80fbde18
authored
Apr 19, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FACTORING] G.Text.Terms.
parent
269eba92
Pipeline
#828
failed with stage
Changes
6
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
112 additions
and
110 deletions
+112
-110
Types.hs
src/Gargantext/Core/Flow/Types.hs
+16
-23
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+24
-43
List.hs
src/Gargantext/Database/Action/Flow/List.hs
+1
-0
Types.hs
src/Gargantext/Database/Action/Flow/Types.hs
+9
-18
Text.hs
src/Gargantext/Text.hs
+8
-18
Terms.hs
src/Gargantext/Text/Terms.hs
+54
-8
No files found.
src/Gargantext/Core/Flow/Types.hs
View file @
80fbde18
...
...
@@ -20,39 +20,17 @@ module Gargantext.Core.Flow.Types where
import
Control.Lens
(
Lens
'
)
import
Data.Map
(
Map
)
import
Data.Maybe
(
Maybe
)
import
Data.Text
(
Text
)
import
Gargantext.Core
(
Lang
)
import
Gargantext.Text
(
HasText
(
..
))
import
Gargantext.Core.Types.Main
(
HashId
)
import
Gargantext.Database.Action.Query.Node.Contact
-- (HyperdataContact(..))
import
Gargantext.Database.Action.Query.Node.Document.Insert
(
AddUniqId
,
InsertDb
)
import
Gargantext.Database.Admin.Types.Node
-- (HyperdataDocument(..))
import
Gargantext.Database.Admin.Utils
(
Cmd
)
import
Gargantext.Database.Schema.Ngrams
(
Ngrams
,
NgramsType
)
import
Gargantext.Prelude
import
Gargantext.Text.Terms
(
TermType
)
type
FlowCorpus
a
=
(
AddUniqId
a
,
UniqId
a
,
InsertDb
a
,
ExtractNgramsT
a
,
HasText
a
)
class
UniqId
a
where
uniqId
::
Lens'
a
(
Maybe
HashId
)
class
ExtractNgramsT
h
where
extractNgramsT
::
HasText
h
=>
TermType
Lang
->
h
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
class
HasText
h
where
hasText
::
h
->
[
Text
]
instance
UniqId
HyperdataDocument
where
uniqId
=
hyperdataDocument_uniqId
...
...
@@ -60,3 +38,18 @@ instance UniqId HyperdataDocument
instance
UniqId
HyperdataContact
where
uniqId
=
hc_uniqId
data
DocumentIdWithNgrams
a
=
DocumentIdWithNgrams
{
documentWithId
::
!
(
DocumentWithId
a
)
,
document_ngrams
::
!
(
Map
Ngrams
(
Map
NgramsType
Int
))
}
deriving
(
Show
)
data
DocumentWithId
a
=
DocumentWithId
{
documentId
::
!
NodeId
,
documentData
::
!
a
}
deriving
(
Show
)
instance
HasText
a
=>
HasText
(
DocumentWithId
a
)
where
hasText
(
DocumentWithId
_
a
)
=
hasText
a
src/Gargantext/Database/Action/Flow.hs
View file @
80fbde18
...
...
@@ -58,7 +58,6 @@ import Data.Swagger
import
Data.Text
(
splitOn
,
intercalate
)
import
Data.Traversable
(
traverse
)
import
Data.Tuple.Extra
(
first
,
second
)
import
Debug.Trace
(
trace
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Flow.Types
import
Gargantext.Core.Types
(
Terms
(
..
))
...
...
@@ -82,18 +81,15 @@ import Gargantext.Database.Schema.NodeNodeNgrams2 -- (NodeNodeNgrams2, insertNod
import
Gargantext.Ext.IMT
(
toSchoolName
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Ext.IMTUser
(
deserialiseImtUsersFromFile
)
import
Gargantext.Text
import
Gargantext.Prelude
import
Gargantext.Text.Corpus.Parsers
(
parseFile
,
FileFormat
)
import
Gargantext.Text.List
(
buildNgramsLists
,
StopSize
(
..
))
import
Gargantext.Text.Terms
(
TermType
(
..
),
tt_lang
,
extractTerms
,
uniText
)
import
Gargantext.Text.Terms.Eleve
(
buildTries
,
toToken
)
import
Gargantext.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Gargantext.Text.Terms
import
GHC.Generics
(
Generic
)
import
Prelude
(
String
)
import
System.FilePath
(
FilePath
)
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Text
as
Text
import
qualified
Gargantext.Database.Action.Query.Node.Document.Add
as
Doc
(
add
)
import
qualified
Gargantext.Text.Corpus.API
as
API
...
...
@@ -272,9 +268,9 @@ insertMasterDocs c lang hs = do
-- insertDocNgrams
_return
<-
insertNodeNodeNgrams2
$
catMaybes
[
NodeNodeNgrams2
<$>
Just
nId
<*>
getCgramsId
mapCgramsId
ngrams_type
(
_ngramsTerms
terms
)
<*>
getCgramsId
mapCgramsId
ngrams_type
(
_ngramsTerms
terms
''
)
<*>
Just
(
fromIntegral
w
::
Double
)
|
(
terms
,
mapNgramsTypes
)
<-
Map
.
toList
maps
|
(
terms
''
,
mapNgramsTypes
)
<-
Map
.
toList
maps
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
,
(
nId
,
w
)
<-
Map
.
toList
mapNodeIdWeight
]
...
...
@@ -287,22 +283,8 @@ insertMasterDocs c lang hs = do
pure
ids'
withLang
::
HasText
a
=>
TermType
Lang
->
[
DocumentWithId
a
]
->
TermType
Lang
withLang
(
Unsupervised
l
n
s
m
)
ns
=
Unsupervised
l
n
s
m'
where
m'
=
case
m
of
Nothing
->
trace
(
"buildTries here"
::
String
)
$
Just
$
buildTries
n
(
fmap
toToken
$
uniText
$
Text
.
intercalate
" . "
$
List
.
concat
$
map
hasText
ns
)
just_m
->
just_m
withLang
l
_
=
l
------------------------------------------------------------------------
------------------------------------------------------------------------
...
...
@@ -335,6 +317,24 @@ instance HasText HyperdataContact
where
hasText
=
undefined
------------------------------------------------------------------------
------------------------------------------------------------------------
documentIdWithNgrams
::
HasNodeError
err
=>
(
a
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
)))
->
[
DocumentWithId
a
]
->
Cmd
err
[
DocumentIdWithNgrams
a
]
documentIdWithNgrams
f
=
traverse
toDocumentIdWithNgrams
where
toDocumentIdWithNgrams
d
=
do
e
<-
f
$
documentData
d
pure
$
DocumentIdWithNgrams
d
e
------------------------------------------------------------------------
instance
ExtractNgramsT
HyperdataContact
where
extractNgramsT
l
hc
=
filterNgramsT
255
<$>
extract
l
hc
...
...
@@ -387,23 +387,4 @@ instance ExtractNgramsT HyperdataDocument
<>
[(
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
<>
[(
t'
,
Map
.
singleton
NgramsTerms
1
)
|
t'
<-
terms'
]
filterNgramsT
::
Int
->
Map
Ngrams
(
Map
NgramsType
Int
)
->
Map
Ngrams
(
Map
NgramsType
Int
)
filterNgramsT
s
ms
=
Map
.
fromList
$
map
(
\
a
->
filter'
s
a
)
$
Map
.
toList
ms
where
filter'
s'
(
ng
@
(
Ngrams
t
n
),
y
)
=
case
(
Text
.
length
t
)
<
s'
of
True
->
(
ng
,
y
)
False
->
(
Ngrams
(
Text
.
take
s'
t
)
n
,
y
)
documentIdWithNgrams
::
HasNodeError
err
=>
(
a
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
)))
->
[
DocumentWithId
a
]
->
Cmd
err
[
DocumentIdWithNgrams
a
]
documentIdWithNgrams
f
=
traverse
toDocumentIdWithNgrams
where
toDocumentIdWithNgrams
d
=
do
e
<-
f
$
documentData
d
pure
$
DocumentIdWithNgrams
d
e
src/Gargantext/Database/Action/Flow/List.hs
View file @
80fbde18
...
...
@@ -30,6 +30,7 @@ import Data.Maybe (Maybe(..), catMaybes)
import
Data.Text
(
Text
)
import
Gargantext.API.Ngrams
(
NgramsElement
(
..
),
putListNgrams
)
import
Gargantext.Core.Types.Main
(
ListType
(
CandidateTerm
))
import
Gargantext.Core.Flow.Types
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Admin.Types.Node
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import
Gargantext.Database.Schema.Ngrams
-- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
...
...
src/Gargantext/Database/Action/Flow/Types.hs
View file @
80fbde18
...
...
@@ -24,14 +24,13 @@ Portability : POSIX
module
Gargantext.Database.Action.Flow.Types
where
import
Data.Map
(
Map
)
import
Gargantext.Prelude
import
Gargantext.Core.Flow.Types
import
Gargantext.Text
import
Gargantext.Text.Terms
import
Gargantext.API.Ngrams
(
HasRepoVar
,
RepoCmdM
)
import
Gargantext.Database.Schema.Ngrams
(
Ngrams
(
..
),
NgramsType
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Database.Admin.Types.Errors
(
HasNodeError
)
import
Gargantext.Database.Admin.Utils
(
CmdM
)
import
Gargantext.Database.Action.Query.Node.Document.Insert
type
FlowCmdM
env
err
m
=
(
CmdM
env
err
m
...
...
@@ -40,18 +39,10 @@ type FlowCmdM env err m =
,
HasRepoVar
env
)
data
DocumentIdWithNgrams
a
=
DocumentIdWithNgrams
{
documentWithId
::
!
(
DocumentWithId
a
)
,
document_ngrams
::
!
(
Map
Ngrams
(
Map
NgramsType
Int
))
}
deriving
(
Show
)
data
DocumentWithId
a
=
DocumentWithId
{
documentId
::
!
NodeId
,
documentData
::
!
a
}
deriving
(
Show
)
instance
HasText
a
=>
HasText
(
DocumentWithId
a
)
where
hasText
(
DocumentWithId
_
a
)
=
hasText
a
type
FlowCorpus
a
=
(
AddUniqId
a
,
UniqId
a
,
InsertDb
a
,
ExtractNgramsT
a
,
HasText
a
)
src/Gargantext/Text.hs
View file @
80fbde18
...
...
@@ -24,16 +24,13 @@ import NLP.FullStop (segment)
import
qualified
Data.Text
as
DT
-----------------------------------------------------------------
-- | Why not use data ?
data
Niveau
=
NiveauTexte
Texte
|
NiveauParagraphe
Paragraphe
|
NiveauPhrase
Phrase
|
NiveauMultiTerme
MultiTerme
|
NiveauMot
Mot
|
NiveauLettre
Lettre
deriving
(
Show
)
-- | Why use newtype ?
class
HasText
h
where
hasText
::
h
->
[
Text
]
-----------------------------------------------------------------
-- French words to distinguish contexts
newtype
Texte
=
Texte
Text
newtype
Paragraphe
=
Paragraphe
Text
newtype
Phrase
=
Phrase
Text
...
...
@@ -43,6 +40,7 @@ newtype Lettre = Lettre Text
-- | Type syn seems obvious
type
Titre
=
Phrase
-----------------------------------------------------------------
instance
Show
Texte
where
...
...
@@ -85,14 +83,6 @@ instance Collage MultiTerme Mot where
dec
(
MultiTerme
mt
)
=
map
Mot
$
DT
.
words
mt
inc
=
MultiTerme
.
DT
.
intercalate
" "
.
map
(
\
(
Mot
m
)
->
m
)
-- | We could use Type Classes but we lose the Sum Type classification
toMultiTerme
::
Niveau
->
[
MultiTerme
]
toMultiTerme
(
NiveauTexte
(
Texte
_t
))
=
undefined
toMultiTerme
(
NiveauPhrase
p
)
=
dec
p
toMultiTerme
(
NiveauMultiTerme
mt
)
=
[
mt
]
toMultiTerme
(
NiveauMot
_m
)
=
undefined
toMultiTerme
_
=
undefined
-------------------------------------------------------------------
-- Contexts of text
sentences
::
Text
->
[
Text
]
...
...
src/Gargantext/Text/Terms.hs
View file @
80fbde18
...
...
@@ -32,21 +32,29 @@ compute graph
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ConstrainedClassMethods #-}
module
Gargantext.Text.Terms
where
import
Control.Lens
import
Data.Map
(
Map
)
import
qualified
Data.Map
as
Map
import
Data.Text
(
Text
)
import
Data.Traversable
import
GHC.Base
(
String
)
import
GHC.Generics
(
Generic
)
import
Gargantext.Core
import
Gargantext.Core.Types
import
Gargantext.Core.Flow.Types
import
Gargantext.Prelude
import
Gargantext.Text
(
sentences
)
import
Gargantext.Text
(
sentences
,
HasText
(
..
)
)
import
Gargantext.Text.Terms.Eleve
(
mainEleveWith
,
Tries
,
Token
,
buildTries
,
toToken
)
import
Gargantext.Database.Schema.Ngrams
(
Ngrams
(
..
),
NgramsType
(
..
))
import
Gargantext.Text.Terms.Mono
(
monoTerms
)
import
Gargantext.Database.Admin.Utils
(
Cmd
)
import
Gargantext.Text.Terms.Mono.Stem
(
stem
)
import
Gargantext.Text.Terms.Mono.Token.En
(
tokenize
)
import
Gargantext.Text.Terms.Multi
(
multiterms
)
...
...
@@ -55,13 +63,13 @@ import qualified Data.Set as Set
import
qualified
Data.Text
as
Text
data
TermType
lang
=
Mono
{
_tt_lang
::
lang
}
|
Multi
{
_tt_lang
::
lang
}
|
MonoMulti
{
_tt_lang
::
lang
}
|
Unsupervised
{
_tt_lang
::
lang
,
_tt_windowSize
::
Int
,
_tt_ngramsSize
::
Int
,
_tt_model
::
Maybe
(
Tries
Token
(
)
)
=
Mono
{
_tt_lang
::
!
lang
}
|
Multi
{
_tt_lang
::
!
lang
}
|
MonoMulti
{
_tt_lang
::
!
lang
}
|
Unsupervised
{
_tt_lang
::
!
lang
,
_tt_windowSize
::
!
Int
,
_tt_ngramsSize
::
!
Int
,
_tt_model
::
!
(
Maybe
(
Tries
Token
()
))
}
deriving
Generic
...
...
@@ -84,7 +92,45 @@ extractTerms (Unsupervised l n s m) xs = mapM (terms (Unsupervised l n s (Just m
extractTerms
termTypeLang
xs
=
mapM
(
terms
termTypeLang
)
xs
------------------------------------------------------------------------
withLang
::
HasText
a
=>
TermType
Lang
->
[
DocumentWithId
a
]
->
TermType
Lang
withLang
(
Unsupervised
l
n
s
m
)
ns
=
Unsupervised
l
n
s
m'
where
m'
=
case
m
of
Nothing
->
-- trace ("buildTries here" :: String)
Just
$
buildTries
n
(
fmap
toToken
$
uniText
$
Text
.
intercalate
" . "
$
List
.
concat
$
map
hasText
ns
)
just_m
->
just_m
withLang
l
_
=
l
------------------------------------------------------------------------
class
ExtractNgramsT
h
where
extractNgramsT
::
HasText
h
=>
TermType
Lang
->
h
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
filterNgramsT
::
Int
->
Map
Ngrams
(
Map
NgramsType
Int
)
->
Map
Ngrams
(
Map
NgramsType
Int
)
filterNgramsT
s
ms
=
Map
.
fromList
$
map
(
\
a
->
filter'
s
a
)
$
Map
.
toList
ms
where
filter'
s'
(
ng
@
(
Ngrams
t
n
),
y
)
=
case
(
Text
.
length
t
)
<
s'
of
True
->
(
ng
,
y
)
False
->
(
Ngrams
(
Text
.
take
s'
t
)
n
,
y
)
-- =======================================================
-- | Terms from Text
-- Mono : mono terms
-- Multi : multi terms
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment