Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
45cddcc1
Unverified
Commit
45cddcc1
authored
Mar 21, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev' of
https://gitlab.iscpif.fr/gargantext/haskell-gargantext
into dev
parents
863694e6
8f0b7cd4
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
130 additions
and
105 deletions
+130
-105
Main.hs
bin/gargantext-import/Main.hs
+25
-13
Flow.hs
src/Gargantext/Database/Flow.hs
+83
-71
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+9
-11
List.hs
src/Gargantext/Text/List.hs
+12
-10
GrandDebat.hs
src/Gargantext/Text/Parsers/GrandDebat.hs
+1
-0
No files found.
bin/gargantext-import/Main.hs
View file @
45cddcc1
...
@@ -19,10 +19,11 @@ Import a corpus binary.
...
@@ -19,10 +19,11 @@ Import a corpus binary.
module
Main
where
module
Main
where
import
Prelude
(
read
)
import
Control.Exception
(
finally
)
import
Control.Exception
(
finally
)
import
Servant
(
ServantErr
)
import
Servant
(
ServantErr
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Database.Flow
(
FlowCmdM
,
flowCorpus
''
)
import
Gargantext.Database.Flow
(
FlowCmdM
,
flowCorpus
)
import
Gargantext.Text.Parsers
(
FileFormat
(
CsvHalFormat
))
import
Gargantext.Text.Parsers
(
FileFormat
(
CsvHalFormat
))
import
Gargantext.Database.Utils
(
Cmd
,
)
import
Gargantext.Database.Utils
(
Cmd
,
)
import
Gargantext.Database.Types.Node
(
CorpusId
)
import
Gargantext.Database.Types.Node
(
CorpusId
)
...
@@ -38,27 +39,38 @@ import Control.Monad.IO.Class (liftIO)
...
@@ -38,27 +39,38 @@ import Control.Monad.IO.Class (liftIO)
main
::
IO
()
main
::
IO
()
main
=
do
main
=
do
[
user
,
iniPath
,
name
,
corpusPath
]
<-
getArgs
[
user
Create
,
user
,
name
,
iniPath
,
limit
,
corpusPath
]
<-
getArgs
--{-
--{-
let
createUsers
::
Cmd
ServantErr
Int64
let
createUsers
::
Cmd
ServantErr
Int64
createUsers
=
insertUsersDemo
createUsers
=
insertUsersDemo
{-
{-
let c
md
Corpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
let c
sv
Corpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
c
mdCorpus = flowCorpus (cs user) (cs name) (Mono
EN) CsvHalFormat corpusPath
c
svCorpus = flowCorpus (cs user) (cs name) (Multi
EN) CsvHalFormat corpusPath
--}
--}
let
cmdCorpus
::
forall
m
.
FlowCmdM
DevEnv
ServantErr
m
=>
m
[
CorpusId
]
let
debatCorpus
::
forall
m
.
FlowCmdM
DevEnv
ServantErr
m
=>
m
CorpusId
cmdCorpus
=
do
debatCorpus
=
do
docs
<-
liftIO
(
splitEvery
3000
<$>
readFile
corpusPath
::
IO
[[
GrandDebatReference
]])
docs
<-
liftIO
(
splitEvery
500
ids
<-
flowCorpus''
(
Text
.
pack
user
)
(
Text
.
pack
name
)
(
Mono
FR
)
docs
<$>
take
(
read
limit
::
Int
)
pure
ids
<$>
readFile
corpusPath
::
IO
[[
GrandDebatReference
]]
)
flowCorpus
(
Text
.
pack
user
)
(
Text
.
pack
name
)
(
Multi
FR
)
docs
-- cmd = {-createUsers >>-} cmdCorpus
env
<-
newDevEnvWith
iniPath
env
<-
newDevEnvWith
iniPath
-- Better if we keep only one call to runCmdDev.
-- Better if we keep only one call to runCmdDev.
_
<-
runCmdDev
env
createUsers
_
<-
if
userCreate
==
"true"
_
<-
runCmdDev
env
cmdCorpus
then
runCmdDev
env
createUsers
else
pure
0
--(cs "false")
_
<-
runCmdDev
env
debatCorpus
{-
_ <- if corpusType == "csv"
then runCmdDev env csvCorpus
else if corpusType == "debat"
then runCmdDev env debatCorpus
else panic "corpusType unknown: try \"csv\" or \"debat\""
-}
pure
()
pure
()
src/Gargantext/Database/Flow.hs
View file @
45cddcc1
...
@@ -32,6 +32,7 @@ module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
...
@@ -32,6 +32,7 @@ module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
--import Gargantext.Database.Schema.User (insertUsers, simpleUser, gargantuaUser)
--import Gargantext.Database.Schema.User (insertUsers, simpleUser, gargantuaUser)
--import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
--import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
--import Gargantext.Text.Metrics.TFICF (Tficf(..))
--import Gargantext.Text.Metrics.TFICF (Tficf(..))
--import Debug.Trace (trace)
import
Control.Monad
(
mapM_
)
import
Control.Monad
(
mapM_
)
import
Control.Monad.IO.Class
(
liftIO
)
import
Control.Monad.IO.Class
(
liftIO
)
import
Data.List
(
concat
)
import
Data.List
(
concat
)
...
@@ -58,11 +59,12 @@ import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), N
...
@@ -58,11 +59,12 @@ import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), N
import
Gargantext.Database.Utils
(
Cmd
,
CmdM
)
import
Gargantext.Database.Utils
(
Cmd
,
CmdM
)
import
Gargantext.Ext.IMT
(
toSchoolName
)
import
Gargantext.Ext.IMT
(
toSchoolName
)
import
Gargantext.Prelude
import
Gargantext.Prelude
--import Gargantext.Text.List
import
Gargantext.Text.List
(
buildNgramsLists
)
import
Gargantext.Text.Parsers
(
parseDocs
,
FileFormat
)
--
import Gargantext.Text.Parsers (parseDocs, FileFormat)
import
Gargantext.Text.Terms
(
TermType
(
..
))
import
Gargantext.Text.Terms
(
TermType
(
..
))
import
Gargantext.Text.Terms
(
extractTerms
)
import
Gargantext.Text.Terms
(
extractTerms
)
import
Gargantext.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Gargantext.Text.Terms.Mono.Stem.En
(
stemIt
)
import
qualified
Gargantext.Text.Parsers.GrandDebat
as
GD
import
Servant
(
ServantErr
)
import
Servant
(
ServantErr
)
import
System.FilePath
(
FilePath
)
import
System.FilePath
(
FilePath
)
import
qualified
Data.Map
as
DM
import
qualified
Data.Map
as
DM
...
@@ -76,21 +78,33 @@ type FlowCmdM env err m =
...
@@ -76,21 +78,33 @@ type FlowCmdM env err m =
,
HasRepoVar
env
,
HasRepoVar
env
)
)
flowCorpusDebat
::
FlowCmdM
env
ServantErr
m
=>
Username
->
CorpusName
->
Int
->
FilePath
->
m
CorpusId
flowCorpusDebat
u
n
l
fp
=
do
docs
<-
liftIO
(
splitEvery
500
<$>
take
l
<$>
GD
.
readFile
fp
::
IO
[[
GD
.
GrandDebatReference
]]
)
flowCorpus
u
n
(
Multi
FR
)
docs
{-
flowCorpus :: FlowCmdM env ServantErr m
flowCorpus :: FlowCmdM env ServantErr m
=> Username -> CorpusName -> TermType Lang -> FileFormat -> FilePath -> m CorpusId
=> Username -> CorpusName -> TermType Lang -> FileFormat -> FilePath -> m CorpusId
flowCorpus
u
cn
la
ff
fp
=
liftIO
(
parseDocs
ff
fp
)
>>=
\
docs
->
flowCorpus'
u
cn
la
docs
flowCorpus u cn la ff fp =
undefined --
liftIO (parseDocs ff fp) >>= \docs -> flowCorpus' u cn la docs
--{-
flowCorpus''' :: (FlowCmdM env ServantErr m, ToHyperdataDocument a)
flowCorpus''
::
(
FlowCmdM
env
ServantErr
m
,
ToHyperdataDocument
a
)
=> Username -> CorpusName -> TermType Lang -> [[a]] -> m [CorpusId]
=> Username -> CorpusName -> TermType Lang -> [[a]] -> m [CorpusId]
flowCorpus''
u
cn
la
docs
=
mapM
(
\
doc
->
flowCorpus'
u
cn
la
doc
)
docs
flowCorpus''
'
u cn la docs = mapM (\doc -> flowCorpus' u cn la doc) docs
--}
--}
flowCorpus
'
::
(
FlowCmdM
env
ServantErr
m
,
ToHyperdataDocument
a
)
flowCorpus
::
(
FlowCmdM
env
ServantErr
m
,
ToHyperdataDocument
a
)
=>
Username
->
CorpusName
->
TermType
Lang
->
[
a
]
->
m
CorpusId
=>
Username
->
CorpusName
->
TermType
Lang
->
[
[
a
]
]
->
m
CorpusId
flowCorpus
'
u
cn
la
docs
=
do
flowCorpus
u
cn
la
docs
=
do
ids
<-
flowCorpusMaster
la
(
map
toHyperdataDocument
docs
)
ids
<-
mapM
((
insertMasterDocs
la
)
.
(
map
toHyperdataDocument
))
docs
flowCorpusUser
u
cn
ids
flowCorpusUser
FR
u
cn
(
concat
ids
)
-- TODO query with complex query
-- TODO query with complex query
...
@@ -99,29 +113,25 @@ flowCorpusSearchInDatabase :: FlowCmdM env ServantErr m
...
@@ -99,29 +113,25 @@ flowCorpusSearchInDatabase :: FlowCmdM env ServantErr m
flowCorpusSearchInDatabase
u
q
=
do
flowCorpusSearchInDatabase
u
q
=
do
(
_masterUserId
,
_masterRootId
,
cId
)
<-
getOrMkRootWithCorpus
userMaster
""
(
_masterUserId
,
_masterRootId
,
cId
)
<-
getOrMkRootWithCorpus
userMaster
""
ids
<-
map
fst
<$>
searchInDatabase
cId
(
stemIt
q
)
ids
<-
map
fst
<$>
searchInDatabase
cId
(
stemIt
q
)
flowCorpusUser
u
q
ids
flowCorpusUser
FR
u
q
ids
-- TODO uniformize language of corpus
flowCorpusMaster
::
FlowCmdM
env
ServantErr
m
=>
TermType
Lang
->
[
HyperdataDocument
]
->
m
[
NodeId
]
flowCorpusMaster
la
hd
=
(
insertMasterDocs
la
)
$
(
map
addUniqIdsDoc
)
hd
flowCorpusUser
::
FlowCmdM
env
ServantErr
m
=>
Username
->
CorpusName
->
[
NodeId
]
->
m
CorpusId
flowCorpusUser
::
FlowCmdM
env
ServantErr
m
flowCorpusUser
userName
corpusName
ids
=
do
=>
Lang
->
Username
->
CorpusName
->
[
NodeId
]
->
m
CorpusId
flowCorpusUser
l
userName
corpusName
ids
=
do
-- User Flow
-- User Flow
(
_
userId
,
_rootId
,
userCorpusId
)
<-
getOrMkRootWithCorpus
userName
corpusName
(
userId
,
_rootId
,
userCorpusId
)
<-
getOrMkRootWithCorpus
userName
corpusName
-- TODO: check if present already, ignore
-- TODO: check if present already, ignore
_
<-
Doc
.
add
userCorpusId
ids
_
<-
Doc
.
add
userCorpusId
ids
-- User List Flow
-- User List Flow
--
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster ""
(
_masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
userMaster
""
--ngs <- buildNgramsLists
userCorpusId masterCorpusId
ngs
<-
buildNgramsLists
l
2
3
userCorpusId
masterCorpusId
--
userListId <- flowList userId userCorpusId ngs
userListId
<-
flowList
userId
userCorpusId
ngs
--
printDebug "userListId" userListId
printDebug
"userListId"
userListId
-- User Graph Flow
-- User Graph Flow
--
_
<-
mkGraph
userCorpusId
userId
_
<-
mkGraph
userCorpusId
userId
-- User Dashboard Flow
-- User Dashboard Flow
-- _ <- mkDashboard userCorpusId userId
-- _ <- mkDashboard userCorpusId userId
...
@@ -134,20 +144,23 @@ flowCorpusUser userName corpusName ids = do
...
@@ -134,20 +144,23 @@ flowCorpusUser userName corpusName ids = do
insertMasterDocs
::
FlowCmdM
env
ServantErr
m
insertMasterDocs
::
FlowCmdM
env
ServantErr
m
=>
TermType
Lang
->
[
HyperdataDocument
]
->
m
[
DocId
]
=>
TermType
Lang
->
[
HyperdataDocument
]
->
m
[
DocId
]
insertMasterDocs
lang
hs
=
do
insertMasterDocs
lang
hs
=
do
let
hyperdataDocuments'
=
map
(
\
h
->
ToDbDocument
h
)
hs
(
masterUserId
,
_
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
userMaster
corpusMasterName
(
masterUserId
,
_
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
userMaster
corpusMasterName
ids
<-
insertDocuments
masterUserId
masterCorpusId
NodeDocument
hyperdataDocuments'
let
documentsWithId
=
mergeData
(
toInserted
ids
)
(
toInsert
hs
)
-- TODO Type NodeDocumentUnicised
let
hs'
=
map
addUniqIdsDoc
hs
ids
<-
insertDocuments
masterUserId
masterCorpusId
NodeDocument
$
map
ToDbDocument
hs'
-- ^ TODO Type class to insert Doc
-- ^ TODO Type Class AddUnicity where unicity = addUnicity
let
documentsWithId
=
mergeData
(
toInserted
ids
)
(
toInsert
hs'
)
docsWithNgrams
<-
documentIdWithNgrams
(
extractNgramsT
lang
)
documentsWithId
docsWithNgrams
<-
documentIdWithNgrams
(
extractNgramsT
lang
)
documentsWithId
let
maps
=
mapNodeIdNgrams
docsWithNgrams
let
maps
=
mapNodeIdNgrams
docsWithNgrams
terms2id
<-
insertNgrams
$
DM
.
keys
maps
terms2id
<-
insertNgrams
$
DM
.
keys
maps
let
indexedNgrams
=
DM
.
mapKeys
(
indexNgrams
terms2id
)
maps
let
indexedNgrams
=
DM
.
mapKeys
(
indexNgrams
terms2id
)
maps
_
<-
insertToNodeNgrams
indexedNgrams
_
<-
insertToNodeNgrams
indexedNgrams
pure
$
map
reId
ids
pure
$
map
reId
ids
...
@@ -220,51 +233,50 @@ data DocumentIdWithNgrams = DocumentIdWithNgrams
...
@@ -220,51 +233,50 @@ data DocumentIdWithNgrams = DocumentIdWithNgrams
,
document_ngrams
::
!
(
Map
Ngrams
(
Map
NgramsType
Int
))
,
document_ngrams
::
!
(
Map
Ngrams
(
Map
NgramsType
Int
))
}
deriving
(
Show
)
}
deriving
(
Show
)
-- TODO extractNgrams according to Type of Data
extractNgramsT
::
HasNodeError
err
extractNgramsT
::
HasNodeError
err
=>
TermType
Lang
->
HyperdataDocument
=>
TermType
Lang
->
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT
lang
hd
=
filterNgramsT
255
<$>
extractNgramsT'
lang
hd
extractNgramsT
lang
hd
=
filterNgramsT
255
<$>
extractNgramsT'
lang
hd
extractNgramsT'
::
HasNodeError
err
=>
TermType
Lang
->
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT'
lang
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
_hyperdataDocument_source
doc
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
splitOn
", "
))
$
_hyperdataDocument_institutes
doc
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
", "
)
$
_hyperdataDocument_authors
doc
leText
=
catMaybes
[
_hyperdataDocument_title
doc
,
_hyperdataDocument_abstract
doc
]
terms'
<-
map
text2ngrams
<$>
map
(
intercalate
" "
.
_terms_label
)
<$>
concat
<$>
liftIO
(
extractTerms
lang
leText
)
pure
$
DM
.
fromList
$
[(
source
,
DM
.
singleton
Sources
1
)]
<>
[(
i'
,
DM
.
singleton
Institutes
1
)
|
i'
<-
institutes
]
<>
[(
a'
,
DM
.
singleton
Authors
1
)
|
a'
<-
authors
]
<>
[(
t'
,
DM
.
singleton
NgramsTerms
1
)
|
t'
<-
terms'
]
filterNgramsT
::
Int
->
Map
Ngrams
(
Map
NgramsType
Int
)
->
Map
Ngrams
(
Map
NgramsType
Int
)
filterNgramsT
s
ms
=
DM
.
fromList
$
map
(
\
a
->
filter'
s
a
)
$
DM
.
toList
ms
where
where
filter'
s'
(
ng
@
(
Ngrams
t
n
),
y
)
=
case
(
Text
.
length
t
)
<
s'
of
extractNgramsT'
::
HasNodeError
err
True
->
(
ng
,
y
)
=>
TermType
Lang
->
HyperdataDocument
False
->
(
Ngrams
(
Text
.
take
s'
t
)
n
,
y
)
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT'
lang'
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
_hyperdataDocument_source
doc
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
splitOn
", "
))
$
_hyperdataDocument_institutes
doc
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
", "
)
$
_hyperdataDocument_authors
doc
leText
=
catMaybes
[
_hyperdataDocument_title
doc
,
_hyperdataDocument_abstract
doc
]
terms'
<-
map
text2ngrams
<$>
map
(
intercalate
" "
.
_terms_label
)
<$>
concat
<$>
liftIO
(
extractTerms
lang'
leText
)
pure
$
DM
.
fromList
$
[(
source
,
DM
.
singleton
Sources
1
)]
<>
[(
i'
,
DM
.
singleton
Institutes
1
)
|
i'
<-
institutes
]
<>
[(
a'
,
DM
.
singleton
Authors
1
)
|
a'
<-
authors
]
<>
[(
t'
,
DM
.
singleton
NgramsTerms
1
)
|
t'
<-
terms'
]
filterNgramsT
::
Int
->
Map
Ngrams
(
Map
NgramsType
Int
)
->
Map
Ngrams
(
Map
NgramsType
Int
)
filterNgramsT
s
ms
=
DM
.
fromList
$
map
(
\
a
->
filter'
s
a
)
$
DM
.
toList
ms
where
filter'
s'
(
ng
@
(
Ngrams
t
n
),
y
)
=
case
(
Text
.
length
t
)
<
s'
of
True
->
(
ng
,
y
)
False
->
(
Ngrams
(
Text
.
take
s'
t
)
n
,
y
)
documentIdWithNgrams
::
HasNodeError
err
documentIdWithNgrams
::
HasNodeError
err
...
...
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
45cddcc1
...
@@ -44,13 +44,13 @@ import qualified Database.PostgreSQL.Simple as DPS
...
@@ -44,13 +44,13 @@ import qualified Database.PostgreSQL.Simple as DPS
-- a first grouping option to user and get some
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
-- enriched data to better learn and improve that algo
ngramsGroup
::
Lang
->
Int
->
Int
->
Text
->
Text
ngramsGroup
::
Lang
->
Int
->
Int
->
Text
->
Text
ngramsGroup
l
m
n
=
Text
.
intercalate
" "
ngramsGroup
l
_m
_
n
=
Text
.
intercalate
" "
.
map
(
stem
l
)
.
map
(
stem
l
)
.
take
n
--
. take n
.
List
.
sort
.
List
.
sort
.
(
List
.
filter
(
\
t
->
Text
.
length
t
>
m
))
--
. (List.filter (\t -> Text.length t > m))
.
Text
.
splitOn
" "
.
Text
.
splitOn
" "
.
Text
.
replace
"-"
" "
.
Text
.
replace
"-"
" "
sortTficf
::
(
Map
Text
(
Double
,
Set
Text
))
sortTficf
::
(
Map
Text
(
Double
,
Set
Text
))
...
@@ -171,8 +171,6 @@ queryNgramsOnlyByNodeUser = [sql|
...
@@ -171,8 +171,6 @@ queryNgramsOnlyByNodeUser = [sql|
GROUP BY nng.node_id, ng.terms
GROUP BY nng.node_id, ng.terms
|]
|]
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO filter by language, database, any social field
-- | TODO filter by language, database, any social field
getNodesByNgramsMaster
::
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsMaster
::
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
...
@@ -183,10 +181,10 @@ selectNgramsByNodeMaster :: UserCorpusId -> MasterCorpusId -> Cmd err [(NodeId,
...
@@ -183,10 +181,10 @@ selectNgramsByNodeMaster :: UserCorpusId -> MasterCorpusId -> Cmd err [(NodeId,
selectNgramsByNodeMaster
ucId
mcId
=
runPGSQuery
selectNgramsByNodeMaster
ucId
mcId
=
runPGSQuery
queryNgramsByNodeMaster
queryNgramsByNodeMaster
(
ucId
(
ucId
,
nodeTypeId
NodeDocument
,
nodeTypeId
NodeDocument
,
ngramsTypeId
NgramsTerms
,
ngramsTypeId
NgramsTerms
,
mcId
,
mcId
,
nodeTypeId
NodeDocument
,
nodeTypeId
NodeDocument
,
ngramsTypeId
NgramsTerms
,
ngramsTypeId
NgramsTerms
)
)
...
...
src/Gargantext/Text/List.hs
View file @
45cddcc1
...
@@ -26,6 +26,7 @@ import Gargantext.Database.Metrics.NgramsByNode (getTficf', sortTficf, ngramsGro
...
@@ -26,6 +26,7 @@ import Gargantext.Database.Metrics.NgramsByNode (getTficf', sortTficf, ngramsGro
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Utils
(
Cmd
)
import
Gargantext.Database.Utils
(
Cmd
)
import
Gargantext.Prelude
import
Gargantext.Prelude
--import Gargantext.Text.Terms (TermType(..))
import
qualified
Data.Char
as
Char
import
qualified
Data.Char
as
Char
import
qualified
Data.List
as
List
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Map
as
Map
...
@@ -33,10 +34,10 @@ import qualified Data.Set as Set
...
@@ -33,10 +34,10 @@ import qualified Data.Set as Set
import
qualified
Data.Text
as
Text
import
qualified
Data.Text
as
Text
-- | TODO improve grouping functions of Authors, Sources, Institutes..
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists
::
UserCorpusId
->
MasterCorpusId
buildNgramsLists
::
Lang
->
Int
->
Int
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
uCid
mCid
=
do
buildNgramsLists
l
n
m
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
uCid
mCid
ngTerms
<-
buildNgramsTermsList
l
n
m
uCid
mCid
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
...
@@ -53,13 +54,14 @@ buildNgramsOthersList uCid groupIt nt = do
...
@@ -53,13 +54,14 @@ buildNgramsOthersList uCid groupIt nt = do
]
]
-- TODO remove hard coded parameters
-- TODO remove hard coded parameters
buildNgramsTermsList
::
UserCorpusId
->
MasterCorpusId
buildNgramsTermsList
::
Lang
->
Int
->
Int
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
uCid
mCid
=
do
buildNgramsTermsList
l
n
m
uCid
mCid
=
do
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
(
ngramsGroup
EN
4
2
)
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
(
ngramsGroup
l
n
m
)
--printDebug "candidate" (length candidates)
--printDebug "candidate" (length candidates)
let
termList
=
toTermList
(
isStopTerm
.
fst
)
candidates
--let termList = toTermList (isStopTerm . fst) candidates
let
termList
=
toTermList
((
\
_
->
False
)
.
fst
)
candidates
--printDebug "termlist" (length termList)
--printDebug "termlist" (length termList)
let
ngs
=
List
.
concat
$
map
toNgramsElement
termList
let
ngs
=
List
.
concat
$
map
toNgramsElement
termList
...
@@ -98,14 +100,14 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
...
@@ -98,14 +100,14 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
ys
=
take
b
$
drop
a
ns
ys
=
take
b
$
drop
a
ns
zs
=
drop
b
$
drop
a
ns
zs
=
drop
b
$
drop
a
ns
a
=
5
0
a
=
1
0
b
=
10
00
b
=
4
00
isStopTerm
::
Text
->
Bool
isStopTerm
::
Text
->
Bool
isStopTerm
x
=
Text
.
length
x
<
3
isStopTerm
x
=
Text
.
length
x
<
3
||
not
(
all
Char
.
isAlpha
(
Text
.
unpack
x'
))
||
not
(
all
Char
.
isAlpha
(
Text
.
unpack
x'
))
where
where
x'
=
foldl
(
\
t
->
Text
.
replace
t
""
)
x'
=
foldl
(
\
t
->
Text
.
replace
t
"
a
"
)
x
x
[
"-"
,
" "
,
"/"
,
"("
,
")"
]
[
"-"
,
" "
,
"/"
,
"("
,
")"
]
src/Gargantext/Text/Parsers/GrandDebat.hs
View file @
45cddcc1
...
@@ -97,6 +97,7 @@ class ReadFile a
...
@@ -97,6 +97,7 @@ class ReadFile a
instance
ReadFile
[
GrandDebatReference
]
instance
ReadFile
[
GrandDebatReference
]
where
where
-- | read json: 3 version below are working but with increased optimization
--readFile fp = maybe [] identity <$> decode <$> DBL.readFile fp
--readFile fp = maybe [] identity <$> decode <$> DBL.readFile fp
--readFile fp = either (panic . Text.pack) identity <$> P.eitherDecode <$> DBL.readFile fp
--readFile fp = either (panic . Text.pack) identity <$> P.eitherDecode <$> DBL.readFile fp
readFile
fp
=
P
.
parseLazyByteString
(
P
.
arrayOf
P
.
value
)
<$>
DBL
.
readFile
fp
readFile
fp
=
P
.
parseLazyByteString
(
P
.
arrayOf
P
.
value
)
<$>
DBL
.
readFile
fp
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment