Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
7
Merge Requests
7
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8f0b7cd4
Commit
8f0b7cd4
authored
Mar 21, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[WIP] ngrams lists and groups.
parent
740badb8
Pipeline
#294
failed with stage
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
39 additions
and
24 deletions
+39
-24
Flow.hs
src/Gargantext/Database/Flow.hs
+20
-7
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+7
-7
List.hs
src/Gargantext/Text/List.hs
+12
-10
No files found.
src/Gargantext/Database/Flow.hs
View file @
8f0b7cd4
...
...
@@ -64,8 +64,9 @@ import Gargantext.Text.List (buildNgramsLists)
import
Gargantext.Text.Terms
(
TermType
(
..
))
import
Gargantext.Text.Terms
(
extractTerms
)
import
Gargantext.Text.Terms.Mono.Stem.En
(
stemIt
)
import
qualified
Gargantext.Text.Parsers.GrandDebat
as
GD
import
Servant
(
ServantErr
)
--
import System.FilePath (FilePath)
import
System.FilePath
(
FilePath
)
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
Text
import
qualified
Gargantext.Database.Node.Document.Add
as
Doc
(
add
)
...
...
@@ -77,6 +78,18 @@ type FlowCmdM env err m =
,
HasRepoVar
env
)
flowCorpusDebat
::
FlowCmdM
env
ServantErr
m
=>
Username
->
CorpusName
->
Int
->
FilePath
->
m
CorpusId
flowCorpusDebat
u
n
l
fp
=
do
docs
<-
liftIO
(
splitEvery
500
<$>
take
l
<$>
GD
.
readFile
fp
::
IO
[[
GD
.
GrandDebatReference
]]
)
flowCorpus
u
n
(
Multi
FR
)
docs
{-
flowCorpus :: FlowCmdM env ServantErr m
=> Username -> CorpusName -> TermType Lang -> FileFormat -> FilePath -> m CorpusId
...
...
@@ -91,7 +104,7 @@ flowCorpus :: (FlowCmdM env ServantErr m, ToHyperdataDocument a)
=>
Username
->
CorpusName
->
TermType
Lang
->
[[
a
]]
->
m
CorpusId
flowCorpus
u
cn
la
docs
=
do
ids
<-
mapM
((
insertMasterDocs
la
)
.
(
map
toHyperdataDocument
))
docs
flowCorpusUser
u
cn
(
concat
ids
)
flowCorpusUser
FR
u
cn
(
concat
ids
)
-- TODO query with complex query
...
...
@@ -100,12 +113,12 @@ flowCorpusSearchInDatabase :: FlowCmdM env ServantErr m
flowCorpusSearchInDatabase
u
q
=
do
(
_masterUserId
,
_masterRootId
,
cId
)
<-
getOrMkRootWithCorpus
userMaster
""
ids
<-
map
fst
<$>
searchInDatabase
cId
(
stemIt
q
)
flowCorpusUser
u
q
ids
flowCorpusUser
FR
u
q
ids
flowCorpusUser
::
FlowCmdM
env
ServantErr
m
=>
Username
->
CorpusName
->
[
NodeId
]
->
m
CorpusId
flowCorpusUser
userName
corpusName
ids
=
do
=>
Lang
->
Username
->
CorpusName
->
[
NodeId
]
->
m
CorpusId
flowCorpusUser
l
userName
corpusName
ids
=
do
-- User Flow
(
userId
,
_rootId
,
userCorpusId
)
<-
getOrMkRootWithCorpus
userName
corpusName
-- TODO: check if present already, ignore
...
...
@@ -113,12 +126,12 @@ flowCorpusUser userName corpusName ids = do
-- User List Flow
(
_masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
userMaster
""
ngs
<-
buildNgramsLists
userCorpusId
masterCorpusId
ngs
<-
buildNgramsLists
l
2
3
userCorpusId
masterCorpusId
userListId
<-
flowList
userId
userCorpusId
ngs
printDebug
"userListId"
userListId
-- User Graph Flow
--
_
<-
mkGraph
userCorpusId
userId
_
<-
mkGraph
userCorpusId
userId
-- User Dashboard Flow
-- _ <- mkDashboard userCorpusId userId
...
...
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
8f0b7cd4
...
...
@@ -44,13 +44,13 @@ import qualified Database.PostgreSQL.Simple as DPS
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
ngramsGroup
::
Lang
->
Int
->
Int
->
Text
->
Text
ngramsGroup
l
m
n
=
Text
.
intercalate
" "
.
map
(
stem
l
)
.
take
n
.
List
.
sort
.
(
List
.
filter
(
\
t
->
Text
.
length
t
>
m
))
.
Text
.
splitOn
" "
.
Text
.
replace
"-"
" "
ngramsGroup
l
_m
_
n
=
Text
.
intercalate
" "
.
map
(
stem
l
)
--
. take n
.
List
.
sort
--
. (List.filter (\t -> Text.length t > m))
.
Text
.
splitOn
" "
.
Text
.
replace
"-"
" "
sortTficf
::
(
Map
Text
(
Double
,
Set
Text
))
...
...
src/Gargantext/Text/List.hs
View file @
8f0b7cd4
...
...
@@ -26,6 +26,7 @@ import Gargantext.Database.Metrics.NgramsByNode (getTficf', sortTficf, ngramsGro
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Utils
(
Cmd
)
import
Gargantext.Prelude
--import Gargantext.Text.Terms (TermType(..))
import
qualified
Data.Char
as
Char
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
...
...
@@ -33,10 +34,10 @@ import qualified Data.Set as Set
import
qualified
Data.Text
as
Text
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists
::
UserCorpusId
->
MasterCorpusId
buildNgramsLists
::
Lang
->
Int
->
Int
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
uCid
mCid
buildNgramsLists
l
n
m
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
l
n
m
uCid
mCid
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
...
...
@@ -53,13 +54,14 @@ buildNgramsOthersList uCid groupIt nt = do
]
-- TODO remove hard coded parameters
buildNgramsTermsList
::
UserCorpusId
->
MasterCorpusId
buildNgramsTermsList
::
Lang
->
Int
->
Int
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
uCid
mCid
=
do
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
(
ngramsGroup
EN
4
2
)
buildNgramsTermsList
l
n
m
uCid
mCid
=
do
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
(
ngramsGroup
l
n
m
)
--printDebug "candidate" (length candidates)
let
termList
=
toTermList
(
isStopTerm
.
fst
)
candidates
--let termList = toTermList (isStopTerm . fst) candidates
let
termList
=
toTermList
((
\
_
->
False
)
.
fst
)
candidates
--printDebug "termlist" (length termList)
let
ngs
=
List
.
concat
$
map
toNgramsElement
termList
...
...
@@ -98,14 +100,14 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
ys
=
take
b
$
drop
a
ns
zs
=
drop
b
$
drop
a
ns
a
=
5
0
b
=
10
00
a
=
1
0
b
=
4
00
isStopTerm
::
Text
->
Bool
isStopTerm
x
=
Text
.
length
x
<
3
||
not
(
all
Char
.
isAlpha
(
Text
.
unpack
x'
))
where
x'
=
foldl
(
\
t
->
Text
.
replace
t
""
)
x'
=
foldl
(
\
t
->
Text
.
replace
t
"
a
"
)
x
[
"-"
,
" "
,
"/"
,
"("
,
")"
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment