Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
c67c2137
Commit
c67c2137
authored
Mar 05, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FLOW][NGRAMS] Lists adding others Ngrams.
parent
9be9b7e0
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
59 additions
and
48 deletions
+59
-48
Flow.hs
src/Gargantext/Database/Flow.hs
+23
-37
Count.hs
src/Gargantext/Database/Metrics/Count.hs
+1
-1
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+7
-7
List.hs
src/Gargantext/Text/List.hs
+28
-3
No files found.
src/Gargantext/Database/Flow.hs
View file @
c67c2137
...
...
@@ -24,48 +24,48 @@ Portability : POSIX
module
Gargantext.Database.Flow
-- (flowDatabase, ngrams2list)
where
--import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId)
--import Gargantext.Database.Metrics.TFICF (getTficf)
--import Gargantext.Database.Node.Contact (HyperdataContact(..))
--import Gargantext.Database.Schema.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
--import Gargantext.Database.Schema.NodeNgramsNgrams (NodeNgramsNgramsPoly(..), insertNodeNgramsNgramsNew)
--import Gargantext.Database.Schema.User (insertUsers, simpleUser, gargantuaUser)
--import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
--import Gargantext.Text.Metrics.TFICF (Tficf(..))
import
Control.Monad
(
mapM_
)
import
Control.Monad.IO.Class
(
liftIO
)
--import Gargantext.Database.Node.Contact (HyperdataContact(..)
)
import
Data.List
(
concat
)
import
Data.Map
(
Map
,
lookup
,
toList
)
import
Data.Maybe
(
Maybe
(
..
),
catMaybes
)
import
Data.Monoid
import
Data.Text
(
Text
,
splitOn
,
intercalate
)
import
qualified
Data.Text
as
Text
import
Data.List
(
concat
)
import
GHC.Show
(
Show
)
import
Gargantext.API.Ngrams
(
HasRepoVar
)
import
Gargantext.API.Ngrams
(
NgramsElement
,
putListNgrams
,
RepoCmdM
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
(
NodePoly
(
..
),
Terms
(
..
))
import
Gargantext.Core.Types.Individu
(
Username
)
import
Gargantext.Core.Types.Main
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Config
(
userMaster
,
corpusMasterName
)
import
Gargantext.Database.Flow.Utils
(
insertToNodeNgrams
)
--import Gargantext.Database.Metrics.TFICF (getTficf)
import
Gargantext.Text.Terms
(
extractTerms
)
--import Gargantext.Text.Metrics.TFICF (Tficf(..))
import
qualified
Gargantext.Database.Node.Document.Add
as
Doc
(
add
)
import
Gargantext.Database.Node.Document.Insert
-- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import
Gargantext.Database.Root
(
getRoot
)
import
Gargantext.Database.Schema.Ngrams
-- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import
Gargantext.Database.Schema.Node
-- (mkRoot, mkCorpus, getOrMkList, mkGraph, mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
-- import Gargantext.Database.Schema.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
--import Gargantext.Database.Schema.NodeNgramsNgrams (NodeNgramsNgramsPoly(..), insertNodeNgramsNgramsNew)
--import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId)
import
Gargantext.Database.Schema.User
(
getUser
,
UserLight
(
..
))
import
Gargantext.Database.Types.Node
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import
Gargantext.Database.Utils
(
Cmd
,
CmdM
)
import
Gargantext.Text.Terms
(
TermType
(
..
))
import
Gargantext.Ext.IMT
(
toSchoolName
)
--import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
import
Gargantext.Prelude
import
Gargantext.Text.List
import
Gargantext.Text.Parsers
(
parseDocs
,
FileFormat
)
import
System.FilePath
(
FilePath
)
import
Gargantext.
API.Ngrams
(
HasRepoVar
)
import
Gargantext.Text.Terms
(
TermType
(
..
)
)
import
Gargantext.
Text.Terms
(
extractTerms
)
import
Servant
(
ServantErr
)
import
Gargantext.API.Ngrams
(
NgramsElement
,
putListNgrams
,
RepoCmdM
)
--import Gargantext.Database.Schema.User (insertUsers, simpleUser, gargantuaUser)
import
System.FilePath
(
FilePath
)
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
Text
import
qualified
Gargantext.Database.Node.Document.Add
as
Doc
(
add
)
type
FlowCmdM
env
err
m
=
(
CmdM
env
err
m
...
...
@@ -86,10 +86,9 @@ flowCorpus userName ff fp corpusName = do
-- TODO uniformize language of corpus
-- TODO ChunkAlong is not the right function here
-- chunkAlong 10 10 [1..15] == [1..10]
-- BUG: what about the rest (divMod 15 10)?
-- but if temporary enables big corpora insert for tests
-- BUG: what about the rest of (divMod 15 10)?
-- TODO: chunkAlongNoRest or chunkAlongWithRest
-- default: NoRest
-- default
behavior
: NoRest
ids
<-
mapM
insertMasterDocs
$
chunkAlong
10000
10000
docs
-- User Flow
...
...
@@ -100,11 +99,11 @@ flowCorpus userName ff fp corpusName = do
-- User List Flow
(
_masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
userMaster
""
-- /!\ this extract NgramsTerms Only
ngs
<-
buildNgramsList
userCorpusId
masterCorpusId
ngs
<-
buildNgramsList
s
userCorpusId
masterCorpusId
--printDebug "ngs" ngs
--
TODO getNgramsElement of NgramsType...
--TODO getNgramsElement of NgramsType...
--ngs <- getNgramsElementsWithParentNodeId masterCorpusId
userListId
<-
flowList
userId
userCorpusId
ngs
userListId
<-
flowList
userId
userCorpusId
ngs
printDebug
"userListId"
userListId
-- User Graph Flow
...
...
@@ -124,8 +123,7 @@ insertMasterDocs :: FlowCmdM env ServantErr m
insertMasterDocs
hs
=
do
let
hyperdataDocuments'
=
map
(
\
h
->
ToDbDocument
h
)
hs
-- TODO put in State Monad
(
masterUserId
,
_
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
userMaster
corpusMasterName
ids
<-
insertDocuments
masterUserId
masterCorpusId
NodeDocument
hyperdataDocuments'
...
...
@@ -134,22 +132,13 @@ insertMasterDocs hs = do
let
maps
=
mapNodeIdNgrams
docsWithNgrams
--printDebug "maps" (maps)
terms2id
<-
insertNgrams
$
DM
.
keys
maps
let
indexedNgrams
=
DM
.
mapKeys
(
indexNgrams
terms2id
)
maps
--printDebug "inserted ngrams" indexedNgrams
_
<-
insertToNodeNgrams
indexedNgrams
pure
$
map
reId
ids
getUserCorpusNgrams
::
FlowCmdM
env
ServantErr
m
=>
CorpusId
->
m
[
Ngrams
]
getUserCorpusNgrams
=
undefined
type
CorpusName
=
Text
getOrMkRootWithCorpus
::
HasNodeError
err
...
...
@@ -188,7 +177,6 @@ getOrMkRootWithCorpus username cName = do
------------------------------------------------------------------------
toInsert
::
[
HyperdataDocument
]
->
Map
HashId
HyperdataDocument
toInsert
=
DM
.
fromList
.
map
(
\
d
->
(
maybe
err
identity
(
_hyperdataDocument_uniqId
d
),
d
))
...
...
@@ -219,7 +207,6 @@ data DocumentIdWithNgrams = DocumentIdWithNgrams
,
document_ngrams
::
!
(
Map
Ngrams
(
Map
NgramsType
Int
))
}
deriving
(
Show
)
-- TODO group terms
extractNgramsT
::
HasNodeError
err
=>
HyperdataDocument
...
...
@@ -297,7 +284,6 @@ flowListBase :: FlowCmdM env err m
=>
ListId
->
Map
NgramsType
[
NgramsElement
]
->
m
()
flowListBase
lId
ngs
=
do
-- compute Candidate / Map
mapM_
(
\
(
typeList
,
ngElmts
)
->
putListNgrams
lId
typeList
ngElmts
)
$
toList
ngs
flowList
::
FlowCmdM
env
err
m
=>
UserId
->
CorpusId
...
...
src/Gargantext/Database/Metrics/Count.hs
View file @
c67c2137
...
...
@@ -245,7 +245,7 @@ countCorpusDocuments r cId = maybe 0 identity
<$>
runQuery'
r
cId
where
runQuery'
RoleUser
cId'
=
runPGSQuery
"SELECT count(*) from nodes_nodes nn WHERE nn.node1_id = ?"
"SELECT count(*) from nodes_nodes nn WHERE nn.node1_id = ?
AND nn.delete = False
"
(
PGS
.
Only
cId'
)
runQuery'
RoleMaster
cId'
=
runPGSQuery
"SELECT count(*) from nodes n WHERE n.parent_id = ? AND n.typename = ?"
...
...
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
c67c2137
...
...
@@ -60,7 +60,7 @@ sortTficf = List.sortOn (fst . snd) . toList
getTficf'
::
UserCorpusId
->
MasterCorpusId
->
(
Text
->
Text
)
->
Cmd
err
(
Map
Text
(
Double
,
Set
Text
))
getTficf'
u
m
f
=
do
u'
<-
getNodesByNgramsUser
u
u'
<-
getNodesByNgramsUser
u
NgramsTerms
m'
<-
getNodesByNgramsMaster
u
m
pure
$
toTficfData
(
countNodesByNgramsWith
f
u'
)
...
...
@@ -104,16 +104,16 @@ groupNodesByNgramsWith f m =
$
toList
m
------------------------------------------------------------------------
getNodesByNgramsUser
::
CorpusId
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsUser
cId
=
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
))
<$>
selectNgramsByNodeUser
cId
getNodesByNgramsUser
::
CorpusId
->
NgramsType
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsUser
cId
nt
=
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
))
<$>
selectNgramsByNodeUser
cId
nt
selectNgramsByNodeUser
::
CorpusId
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeUser
cId
=
runPGSQuery
selectNgramsByNodeUser
::
CorpusId
->
NgramsType
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeUser
cId
nt
=
runPGSQuery
queryNgramsByNodeUser
(
cId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
NgramsTerms
,
ngramsTypeId
nt
)
queryNgramsByNodeUser
::
DPS
.
Query
...
...
src/Gargantext/Text/List.hs
View file @
c67c2137
...
...
@@ -26,7 +26,7 @@ import Data.Text (Text)
import
Gargantext.API.Ngrams
(
NgramsElement
,
mkNgramsElement
,
mSetFromList
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
)
import
Gargantext.Database.Metrics.NgramsByNode
(
getTficf'
,
sortTficf
,
ngramsGroup
)
import
Gargantext.Database.Metrics.NgramsByNode
(
getTficf'
,
sortTficf
,
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Utils
(
Cmd
)
import
Gargantext.Prelude
...
...
@@ -35,9 +35,31 @@ import qualified Data.Set as Set
import
qualified
Data.Text
as
Text
import
qualified
Data.List
as
List
buildNgramsList
::
UserCorpusId
->
MasterCorpusId
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists
::
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
uCid
mCid
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
buildNgramsOthersList
::
UserCorpusId
->
(
Text
->
Text
)
->
NgramsType
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsOthersList
uCid
groupIt
nt
=
do
ngs
<-
groupNodesByNgramsWith
groupIt
<$>
getNodesByNgramsUser
uCid
nt
pure
$
Map
.
fromList
[(
nt
,
[
mkNgramsElement
t
CandidateTerm
Nothing
(
mSetFromList
[]
)
|
(
t
,
_ns
)
<-
Map
.
toList
ngs
]
)
]
buildNgramsTermsList
::
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsList
uCid
mCid
=
do
buildNgrams
Terms
List
uCid
mCid
=
do
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
(
ngramsGroup
EN
2
)
--printDebug "candidate" (length candidates)
...
...
@@ -65,6 +87,9 @@ toNgramsElement (listType, (_stem, (_score, setNgrams))) =
)
children
toTermList
::
(
a
->
Bool
)
->
[
a
]
->
[(
ListType
,
a
)]
toTermList
stop
ns
=
map
(
toTermList'
stop
CandidateTerm
)
xs
<>
map
(
toTermList'
stop
GraphTerm
)
ys
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment