Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
cb739183
Commit
cb739183
authored
Jul 13, 2023
by
Alexandre Delanoë
Committed by
Alfredo Di Napoli
Jul 13, 2023
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[SPECS] Main Specifications to add Chinese lang
parent
f0674838
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
34 additions
and
18 deletions
+34
-18
List.hs
src/Gargantext/API/Ngrams/List.hs
+12
-10
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+3
-3
WithList.hs
src/Gargantext/Core/Text/Terms/WithList.hs
+16
-3
Tools.hs
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
+2
-1
LegacyMain.hs
src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
+1
-1
No files found.
src/Gargantext/API/Ngrams/List.hs
View file @
cb739183
...
...
@@ -24,6 +24,7 @@ import Data.Set (Set)
import
Data.Text
(
Text
,
concat
,
pack
,
splitOn
)
import
Data.Vector
(
Vector
)
import
Gargantext.API.Admin.EnvTypes
(
Env
,
GargJob
(
..
))
import
Gargantext.Core
(
Lang
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
setListNgrams
)
import
Gargantext.API.Ngrams.List.Types
...
...
@@ -34,7 +35,7 @@ import Gargantext.API.Prelude (GargServer, GargM, GargError)
import
Gargantext.API.Types
import
Gargantext.Core.NodeStory
import
Gargantext.Core.Text.Terms
(
ExtractedNgrams
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
MatchedText
,
buildPatterns
,
termsInText
)
import
Gargantext.Core.Text.Terms.WithList
(
MatchedText
,
buildPatterns
With
,
termsInText
)
import
Gargantext.Core.Types
(
TermsCount
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Database.Action.Flow
(
saveDocNgramsWith
)
...
...
@@ -142,7 +143,7 @@ setList l m = do
pure
True
------------------------------------------------------------------------
-- | Re-index documents of a corpus with n
ew ngrams (called orphans here)
-- | Re-index documents of a corpus with n
grams in the list
reIndexWith
::
(
HasNodeStory
env
err
m
,
FlowCmdM
env
err
m
)
...
...
@@ -153,6 +154,7 @@ reIndexWith :: ( HasNodeStory env err m
->
m
()
reIndexWith
cId
lId
nt
lts
=
do
-- printDebug "(cId,lId,nt,lts)" (cId, lId, nt, lts)
-- corpus_node <- getNode cId -- (Proxy :: Proxy HyperdataCorpus)
-- Getting [NgramsTerm]
ts
<-
List
.
concat
...
...
@@ -166,24 +168,24 @@ reIndexWith cId lId nt lts = do
let
-- fromListWith (<>)
ngramsByDoc
=
map
(
HashMap
.
fromListWith
(
Map
.
unionWith
(
Map
.
unionWith
(
\
(
_a
,
b
)
(
_a'
,
b'
)
->
(
1
,
b
+
b'
)))))
$
map
(
map
(
\
((
k
,
cnt
),
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
over
(
traverse
.
traverse
)
(
\
p
->
(
p
,
cnt
))
v
)))
$
map
(
docNgrams
nt
ts
)
docs
-- printDebug "ngramsByDoc: " ngramsByDoc
$
map
(
map
(
\
((
k
,
cnt
),
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
over
(
traverse
.
traverse
)
(
\
p
->
(
p
,
cnt
))
v
)))
$
map
(
docNgrams
Nothing
{-here lang-}
nt
ts
)
docs
-- Saving the indexation in database
_
<-
mapM
(
saveDocNgramsWith
lId
)
ngramsByDoc
-- _ <- refreshNgramsMaterialized
pure
()
docNgrams
::
NgramsType
docNgrams
::
Maybe
Lang
->
NgramsType
->
[
NgramsTerm
]
->
Gargantext
.
Database
.
Admin
.
Types
.
Node
.
Context
HyperdataDocument
->
[((
MatchedText
,
TermsCount
),
Map
NgramsType
(
Map
NodeId
Int
))]
docNgrams
nt
ts
doc
=
docNgrams
lang
nt
ts
doc
=
List
.
zip
(
termsInText
(
buildPatterns
$
map
(
\
k
->
(
Text
.
splitOn
" "
$
unNgramsTerm
k
,
[]
))
ts
)
(
termsInText
lang
(
buildPatternsWith
lang
ts
)
$
Text
.
unlines
$
catMaybes
[
doc
^.
context_hyperdata
.
hd_title
,
doc
^.
context_hyperdata
.
hd_abstract
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
cb739183
...
...
@@ -205,7 +205,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- printDebug "[addToCorpusWithQuery] datafield" datafield
-- printDebug "[addToCorpusWithQuery] flowListWith" flw
-- TODO
-- TODO
: update Node Corpus with the Lang
-- n <- getNode cid
-- let n.wq_lang = l
-- saveNode n
...
...
@@ -270,7 +270,7 @@ addToCorpusWithForm user cid (NewWithForm ft ff d l _n sel) jobHandle = do
-- printDebug "[addToCorpusWithForm] fileType" ft
-- printDebug "[addToCorpusWithForm] fileFormat" ff
-- TODO
-- TODO
: update Node Corpus with the Lang
-- n <- getNode cid
-- let n.wq_lang = l
-- saveNode n
...
...
@@ -380,7 +380,7 @@ addToCorpusWithFile :: (HasSettings env, FlowCmdM env err m, MonadJobStatus m)
->
m
()
addToCorpusWithFile
user
cid
nwf
@
(
NewWithFile
_d
_l
fName
)
jobHandle
=
do
-- TODO
-- TODO
: update Node Corpus with the Lang
-- n <- getNode cid
-- let n.wq_lang = l
-- saveNode n
...
...
src/Gargantext/Core/Text/Terms/WithList.hs
View file @
cb739183
...
...
@@ -18,7 +18,9 @@ module Gargantext.Core.Text.Terms.WithList where
import
Data.List
(
null
)
import
Data.Ord
import
Data.Text
(
Text
,
concat
,
unwords
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Prelude
import
Gargantext.Core
(
Lang
(
ZH
))
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Terms.Mono
(
monoTextsBySentence
)
import
Gargantext.Core.Types
(
TermsCount
)
...
...
@@ -27,6 +29,8 @@ import Prelude (error)
import
qualified
Data.Algorithms.KMP
as
KMP
import
qualified
Data.IntMap.Strict
as
IntMap
import
qualified
Data.List
as
List
import
qualified
Data.Text
as
Text
------------------------------------------------------------------------
data
Pattern
=
Pattern
...
...
@@ -63,6 +67,10 @@ replaceTerms rplaceTerms pats terms = go 0
merge
(
len1
,
lab1
)
(
len2
,
lab2
)
=
if
len2
<
len1
then
(
len1
,
lab1
)
else
(
len2
,
lab2
)
buildPatternsWith
::
Maybe
Lang
->
[
NgramsTerm
]
->
Patterns
buildPatternsWith
(
Just
ZH
)
ts
=
buildPatterns
$
map
(
\
k
->
(
Text
.
chunksOf
1
$
unNgramsTerm
k
,
[]
))
ts
buildPatternsWith
_
ts
=
buildPatterns
$
map
(
\
k
->
(
Text
.
splitOn
" "
$
unNgramsTerm
k
,
[]
))
ts
buildPatterns
::
TermList
->
Patterns
buildPatterns
=
sortWith
(
Down
.
_pat_length
)
.
concatMap
buildPattern
where
...
...
@@ -78,14 +86,14 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
--------------------------------------------------------------------------
-- Utils
type
MatchedText
=
Text
termsInText
::
Patterns
->
Text
->
[(
MatchedText
,
TermsCount
)]
termsInText
pats
txt
=
groupWithCounts
termsInText
::
Maybe
Lang
->
Patterns
->
Text
->
[(
MatchedText
,
TermsCount
)]
termsInText
(
Just
ZH
)
pats
txt
=
termsInText
Nothing
pats
(
addSpaces
txt
)
termsInText
_
pats
txt
=
groupWithCounts
$
List
.
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
--------------------------------------------------------------------------
extractTermsWithList
::
Patterns
->
Text
->
Corpus
[
Text
]
extractTermsWithList
pats
=
map
(
replaceTerms
KeepAll
pats
)
.
monoTextsBySentence
...
...
@@ -97,6 +105,11 @@ extractTermsWithList' :: Patterns -> Text -> [Text]
extractTermsWithList'
pats
=
map
(
concat
.
map
concat
.
replaceTerms
KeepAll
pats
)
.
monoTextsBySentence
--------------------------------------------------------------------------
addSpaces
::
Text
->
Text
addSpaces
=
(
Text
.
intercalate
" "
)
.
(
Text
.
chunksOf
1
)
--------------------------------------------------------------------------
{- | Not used
...
...
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
View file @
cb739183
...
...
@@ -107,8 +107,9 @@ corpusIdtoDocuments timeUnit corpusId = do
Just
termList'
->
buildPatterns
termList'
pure
$
map
(
toPhyloDocs
patterns
timeUnit
)
(
map
_context_hyperdata
docs
)
-- TODO: Add lang to enable Chinese phylomemy
termsInText'
::
Patterns
->
Text
->
[
Text
]
termsInText'
p
t
=
(
map
fst
)
$
termsInText
p
t
termsInText'
p
t
=
(
map
fst
)
$
termsInText
Nothing
p
t
toPhyloDocs
::
Patterns
->
TimeUnit
->
HyperdataDocument
->
Document
toPhyloDocs
patterns
time
d
=
...
...
src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
View file @
cb739183
...
...
@@ -65,7 +65,7 @@ flowPhylo cId = do
patterns
=
buildPatterns
termList
-- | To filter the Ngrams of a document based on the termList
filterTerms
::
Patterns
->
(
Date
,
Text
)
->
(
Date
,
[
Text
])
filterTerms
patterns'
(
y
,
d
)
=
(
y
,
fst
<$>
termsInText
patterns'
d
)
filterTerms
patterns'
(
y
,
d
)
=
(
y
,
fst
<$>
termsInText
Nothing
patterns'
d
)
docs
=
map
((
\
(
y
,
t
)
->
Document
y
t
)
.
filterTerms
patterns
)
docs'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment