Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8f0b7cd4
Commit
8f0b7cd4
authored
Mar 21, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[WIP] ngrams lists and groups.
parent
740badb8
Pipeline
#294
failed with stage
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
39 additions
and
24 deletions
+39
-24
Flow.hs
src/Gargantext/Database/Flow.hs
+20
-7
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+7
-7
List.hs
src/Gargantext/Text/List.hs
+12
-10
No files found.
src/Gargantext/Database/Flow.hs
View file @
8f0b7cd4
...
@@ -64,8 +64,9 @@ import Gargantext.Text.List (buildNgramsLists)
...
@@ -64,8 +64,9 @@ import Gargantext.Text.List (buildNgramsLists)
import
Gargantext.Text.Terms
(
TermType
(
..
))
import
Gargantext.Text.Terms
(
TermType
(
..
))
import
Gargantext.Text.Terms
(
extractTerms
)
import
Gargantext.Text.Terms
(
extractTerms
)
import
Gargantext.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Gargantext.Text.Terms.Mono.Stem.En
(
stemIt
)
import
qualified
Gargantext.Text.Parsers.GrandDebat
as
GD
import
Servant
(
ServantErr
)
import
Servant
(
ServantErr
)
--
import System.FilePath (FilePath)
import
System.FilePath
(
FilePath
)
import
qualified
Data.Map
as
DM
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
Text
import
qualified
Data.Text
as
Text
import
qualified
Gargantext.Database.Node.Document.Add
as
Doc
(
add
)
import
qualified
Gargantext.Database.Node.Document.Add
as
Doc
(
add
)
...
@@ -77,6 +78,18 @@ type FlowCmdM env err m =
...
@@ -77,6 +78,18 @@ type FlowCmdM env err m =
,
HasRepoVar
env
,
HasRepoVar
env
)
)
flowCorpusDebat
::
FlowCmdM
env
ServantErr
m
=>
Username
->
CorpusName
->
Int
->
FilePath
->
m
CorpusId
flowCorpusDebat
u
n
l
fp
=
do
docs
<-
liftIO
(
splitEvery
500
<$>
take
l
<$>
GD
.
readFile
fp
::
IO
[[
GD
.
GrandDebatReference
]]
)
flowCorpus
u
n
(
Multi
FR
)
docs
{-
{-
flowCorpus :: FlowCmdM env ServantErr m
flowCorpus :: FlowCmdM env ServantErr m
=> Username -> CorpusName -> TermType Lang -> FileFormat -> FilePath -> m CorpusId
=> Username -> CorpusName -> TermType Lang -> FileFormat -> FilePath -> m CorpusId
...
@@ -91,7 +104,7 @@ flowCorpus :: (FlowCmdM env ServantErr m, ToHyperdataDocument a)
...
@@ -91,7 +104,7 @@ flowCorpus :: (FlowCmdM env ServantErr m, ToHyperdataDocument a)
=>
Username
->
CorpusName
->
TermType
Lang
->
[[
a
]]
->
m
CorpusId
=>
Username
->
CorpusName
->
TermType
Lang
->
[[
a
]]
->
m
CorpusId
flowCorpus
u
cn
la
docs
=
do
flowCorpus
u
cn
la
docs
=
do
ids
<-
mapM
((
insertMasterDocs
la
)
.
(
map
toHyperdataDocument
))
docs
ids
<-
mapM
((
insertMasterDocs
la
)
.
(
map
toHyperdataDocument
))
docs
flowCorpusUser
u
cn
(
concat
ids
)
flowCorpusUser
FR
u
cn
(
concat
ids
)
-- TODO query with complex query
-- TODO query with complex query
...
@@ -100,12 +113,12 @@ flowCorpusSearchInDatabase :: FlowCmdM env ServantErr m
...
@@ -100,12 +113,12 @@ flowCorpusSearchInDatabase :: FlowCmdM env ServantErr m
flowCorpusSearchInDatabase
u
q
=
do
flowCorpusSearchInDatabase
u
q
=
do
(
_masterUserId
,
_masterRootId
,
cId
)
<-
getOrMkRootWithCorpus
userMaster
""
(
_masterUserId
,
_masterRootId
,
cId
)
<-
getOrMkRootWithCorpus
userMaster
""
ids
<-
map
fst
<$>
searchInDatabase
cId
(
stemIt
q
)
ids
<-
map
fst
<$>
searchInDatabase
cId
(
stemIt
q
)
flowCorpusUser
u
q
ids
flowCorpusUser
FR
u
q
ids
flowCorpusUser
::
FlowCmdM
env
ServantErr
m
flowCorpusUser
::
FlowCmdM
env
ServantErr
m
=>
Username
->
CorpusName
->
[
NodeId
]
->
m
CorpusId
=>
Lang
->
Username
->
CorpusName
->
[
NodeId
]
->
m
CorpusId
flowCorpusUser
userName
corpusName
ids
=
do
flowCorpusUser
l
userName
corpusName
ids
=
do
-- User Flow
-- User Flow
(
userId
,
_rootId
,
userCorpusId
)
<-
getOrMkRootWithCorpus
userName
corpusName
(
userId
,
_rootId
,
userCorpusId
)
<-
getOrMkRootWithCorpus
userName
corpusName
-- TODO: check if present already, ignore
-- TODO: check if present already, ignore
...
@@ -113,12 +126,12 @@ flowCorpusUser userName corpusName ids = do
...
@@ -113,12 +126,12 @@ flowCorpusUser userName corpusName ids = do
-- User List Flow
-- User List Flow
(
_masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
userMaster
""
(
_masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
userMaster
""
ngs
<-
buildNgramsLists
userCorpusId
masterCorpusId
ngs
<-
buildNgramsLists
l
2
3
userCorpusId
masterCorpusId
userListId
<-
flowList
userId
userCorpusId
ngs
userListId
<-
flowList
userId
userCorpusId
ngs
printDebug
"userListId"
userListId
printDebug
"userListId"
userListId
-- User Graph Flow
-- User Graph Flow
--
_
<-
mkGraph
userCorpusId
userId
_
<-
mkGraph
userCorpusId
userId
-- User Dashboard Flow
-- User Dashboard Flow
-- _ <- mkDashboard userCorpusId userId
-- _ <- mkDashboard userCorpusId userId
...
...
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
8f0b7cd4
...
@@ -44,13 +44,13 @@ import qualified Database.PostgreSQL.Simple as DPS
...
@@ -44,13 +44,13 @@ import qualified Database.PostgreSQL.Simple as DPS
-- a first grouping option to user and get some
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
-- enriched data to better learn and improve that algo
ngramsGroup
::
Lang
->
Int
->
Int
->
Text
->
Text
ngramsGroup
::
Lang
->
Int
->
Int
->
Text
->
Text
ngramsGroup
l
m
n
=
Text
.
intercalate
" "
ngramsGroup
l
_m
_
n
=
Text
.
intercalate
" "
.
map
(
stem
l
)
.
map
(
stem
l
)
.
take
n
--
. take n
.
List
.
sort
.
List
.
sort
.
(
List
.
filter
(
\
t
->
Text
.
length
t
>
m
))
--
. (List.filter (\t -> Text.length t > m))
.
Text
.
splitOn
" "
.
Text
.
splitOn
" "
.
Text
.
replace
"-"
" "
.
Text
.
replace
"-"
" "
sortTficf
::
(
Map
Text
(
Double
,
Set
Text
))
sortTficf
::
(
Map
Text
(
Double
,
Set
Text
))
...
...
src/Gargantext/Text/List.hs
View file @
8f0b7cd4
...
@@ -26,6 +26,7 @@ import Gargantext.Database.Metrics.NgramsByNode (getTficf', sortTficf, ngramsGro
...
@@ -26,6 +26,7 @@ import Gargantext.Database.Metrics.NgramsByNode (getTficf', sortTficf, ngramsGro
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Utils
(
Cmd
)
import
Gargantext.Database.Utils
(
Cmd
)
import
Gargantext.Prelude
import
Gargantext.Prelude
--import Gargantext.Text.Terms (TermType(..))
import
qualified
Data.Char
as
Char
import
qualified
Data.Char
as
Char
import
qualified
Data.List
as
List
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Map
as
Map
...
@@ -33,10 +34,10 @@ import qualified Data.Set as Set
...
@@ -33,10 +34,10 @@ import qualified Data.Set as Set
import
qualified
Data.Text
as
Text
import
qualified
Data.Text
as
Text
-- | TODO improve grouping functions of Authors, Sources, Institutes..
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists
::
UserCorpusId
->
MasterCorpusId
buildNgramsLists
::
Lang
->
Int
->
Int
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
uCid
mCid
=
do
buildNgramsLists
l
n
m
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
uCid
mCid
ngTerms
<-
buildNgramsTermsList
l
n
m
uCid
mCid
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
...
@@ -53,13 +54,14 @@ buildNgramsOthersList uCid groupIt nt = do
...
@@ -53,13 +54,14 @@ buildNgramsOthersList uCid groupIt nt = do
]
]
-- TODO remove hard coded parameters
-- TODO remove hard coded parameters
buildNgramsTermsList
::
UserCorpusId
->
MasterCorpusId
buildNgramsTermsList
::
Lang
->
Int
->
Int
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
uCid
mCid
=
do
buildNgramsTermsList
l
n
m
uCid
mCid
=
do
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
(
ngramsGroup
EN
4
2
)
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
(
ngramsGroup
l
n
m
)
--printDebug "candidate" (length candidates)
--printDebug "candidate" (length candidates)
let
termList
=
toTermList
(
isStopTerm
.
fst
)
candidates
--let termList = toTermList (isStopTerm . fst) candidates
let
termList
=
toTermList
((
\
_
->
False
)
.
fst
)
candidates
--printDebug "termlist" (length termList)
--printDebug "termlist" (length termList)
let
ngs
=
List
.
concat
$
map
toNgramsElement
termList
let
ngs
=
List
.
concat
$
map
toNgramsElement
termList
...
@@ -98,14 +100,14 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
...
@@ -98,14 +100,14 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
ys
=
take
b
$
drop
a
ns
ys
=
take
b
$
drop
a
ns
zs
=
drop
b
$
drop
a
ns
zs
=
drop
b
$
drop
a
ns
a
=
5
0
a
=
1
0
b
=
10
00
b
=
4
00
isStopTerm
::
Text
->
Bool
isStopTerm
::
Text
->
Bool
isStopTerm
x
=
Text
.
length
x
<
3
isStopTerm
x
=
Text
.
length
x
<
3
||
not
(
all
Char
.
isAlpha
(
Text
.
unpack
x'
))
||
not
(
all
Char
.
isAlpha
(
Text
.
unpack
x'
))
where
where
x'
=
foldl
(
\
t
->
Text
.
replace
t
""
)
x'
=
foldl
(
\
t
->
Text
.
replace
t
"
a
"
)
x
x
[
"-"
,
" "
,
"/"
,
"("
,
")"
]
[
"-"
,
" "
,
"/"
,
"("
,
")"
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment