Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
5f31a345
Commit
5f31a345
authored
Jun 26, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[REFACT] before scoring new ngrams lists.
parent
bfac1c97
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
63 additions
and
50 deletions
+63
-50
Main.hs
bin/gargantext-import/Main.hs
+5
-2
FrontEnd.hs
src/Gargantext/API/FrontEnd.hs
+1
-1
Metrics.hs
src/Gargantext/API/Metrics.hs
+29
-1
Node.hs
src/Gargantext/API/Node.hs
+2
-26
Flow.hs
src/Gargantext/Database/Flow.hs
+8
-2
Lists.hs
src/Gargantext/Database/Lists.hs
+4
-4
Metrics.hs
src/Gargantext/Database/Metrics.hs
+8
-7
List.hs
src/Gargantext/Text/List.hs
+2
-3
Terms.hs
src/Gargantext/Text/Terms.hs
+4
-4
No files found.
bin/gargantext-import/Main.hs
View file @
5f31a345
...
...
@@ -45,8 +45,11 @@ main = do
let
createUsers
::
Cmd
ServantErr
Int64
createUsers
=
insertUsersDemo
let
cmd
::
forall
m
.
FlowCmdM
DevEnv
ServantErr
m
=>
m
CorpusId
cmd
=
flowCorpusFile
(
cs
user
)
(
cs
name
)
(
read
limit
::
Int
)
(
Unsupervised
EN
5
1
Nothing
)
CsvHalFormat
corpusPath
let
--tt = (Unsupervised EN 5 1 Nothing)
tt
=
(
Mono
EN
)
cmd
::
forall
m
.
FlowCmdM
DevEnv
ServantErr
m
=>
m
CorpusId
cmd
=
flowCorpusFile
(
cs
user
)
(
cs
name
)
(
read
limit
::
Int
)
tt
CsvHalFormat
corpusPath
{-
let debatCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
debatCorpus = do
...
...
src/Gargantext/API/FrontEnd.hs
View file @
5f31a345
...
...
@@ -18,7 +18,7 @@ Loads all static file for the front-end.
---------------------------------------------------------------------
module
Gargantext.API.FrontEnd
where
import
Servant.Static.TH
(
createApiAndServerDecs
)
import
Servant.Static.TH
(
createApiAndServerDecs
)
---------------------------------------------------------------------
$
(
createApiAndServerDecs
"FrontEndAPI"
"frontEndServer"
"purescript-gargantext/dist"
)
...
...
src/Gargantext/API/Metrics.hs
View file @
5f31a345
...
...
@@ -33,14 +33,18 @@ import GHC.Generics (Generic)
import
Gargantext.Core.Types
(
ListType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Utils
import
Gargantext.Core.Types
(
CorpusId
)
import
Gargantext.Core.Types
(
CorpusId
,
ListId
,
Limit
)
import
Gargantext.Prelude
import
Gargantext.API.Ngrams
import
Gargantext.Text.Metrics
(
Scored
(
..
))
import
Gargantext.API.Ngrams.NTree
import
Gargantext.Database.Flow
import
Gargantext.Viz.Chart
import
Servant
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
(
Arbitrary
,
arbitrary
)
import
qualified
Data.Map
as
Map
import
qualified
Gargantext.Database.Metrics
as
Metrics
data
Metrics
=
Metrics
{
metrics_data
::
[
Metric
]}
...
...
@@ -97,6 +101,30 @@ instance Arbitrary MyTree
arbitrary
=
MyTree
<$>
arbitrary
<*>
arbitrary
<*>
arbitrary
-------------------------------------------------------------
-- | Scatter metrics API
type
ScatterAPI
=
Summary
"SepGen IncExc metrics"
:>
QueryParam
"list"
ListId
:>
QueryParamR
"ngramsType"
TabType
:>
QueryParam
"limit"
Int
:>
Get
'[
J
SON
]
Metrics
getScatter
::
FlowCmdM
env
err
m
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Limit
->
m
Metrics
getScatter
cId
maybeListId
tabType
maybeLimit
=
do
(
ngs'
,
scores
)
<-
Metrics
.
getMetrics
cId
maybeListId
tabType
maybeLimit
let
metrics
=
map
(
\
(
Scored
t
s1
s2
)
->
Metric
t
(
log'
5
s1
)
(
log'
2
s2
)
(
listType
t
ngs'
))
scores
log'
n
x
=
1
+
(
if
x
<=
0
then
0
else
(
log
$
(
10
^
(
n
::
Int
))
*
x
))
listType
t
m
=
maybe
(
panic
errorMsg
)
fst
$
Map
.
lookup
t
m
errorMsg
=
"API.Node.metrics: key absent"
pure
$
Metrics
metrics
...
...
src/Gargantext/API/Node.hs
View file @
5f31a345
...
...
@@ -65,7 +65,6 @@ import Gargantext.Database.Types.Node
import
Gargantext.Database.Utils
-- (Cmd, CmdM)
import
Gargantext.Prelude
import
Gargantext.Prelude.Utils
(
hash
)
import
Gargantext.Text.Metrics
(
Scored
(
..
))
import
Gargantext.Viz.Chart
import
Gargantext.Viz.Phylo.API
(
PhyloAPI
,
phyloAPI
)
import
Servant
...
...
@@ -74,8 +73,6 @@ import Servant.Swagger (HasSwagger(toSwagger))
import
Servant.Swagger.Internal
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
(
Arbitrary
,
arbitrary
)
import
qualified
Data.Map
as
Map
import
qualified
Gargantext.Database.Metrics
as
Metrics
import
qualified
Gargantext.Database.Node.Update
as
U
(
update
,
Update
(
..
))
{-
...
...
@@ -144,7 +141,7 @@ type NodeAPI a = Get '[JSON] (Node a)
:>
SearchAPI
-- VIZ
:<|>
"metrics"
:>
Metrics
API
:<|>
"metrics"
:>
Scatter
API
:<|>
"chart"
:>
ChartApi
:<|>
"pie"
:>
PieApi
:<|>
"tree"
:>
TreeApi
...
...
@@ -187,7 +184,7 @@ nodeAPI p uId id
:<|>
delDocs
id
:<|>
searchIn
id
:<|>
get
Metrics
id
:<|>
get
Scatter
id
:<|>
getChart
id
:<|>
getPie
id
:<|>
getTree
id
...
...
@@ -375,27 +372,6 @@ putNode = undefined -- TODO
query
::
Monad
m
=>
Text
->
m
Text
query
s
=
pure
s
-------------------------------------------------------------
type
MetricsAPI
=
Summary
"SepGen IncExc metrics"
:>
QueryParam
"list"
ListId
:>
QueryParamR
"ngramsType"
TabType
:>
QueryParam
"limit"
Int
:>
Get
'[
J
SON
]
Metrics
getMetrics
::
NodeId
->
GargServer
MetricsAPI
getMetrics
cId
maybeListId
tabType
maybeLimit
=
do
(
ngs'
,
scores
)
<-
Metrics
.
getMetrics'
cId
maybeListId
tabType
maybeLimit
let
metrics
=
map
(
\
(
Scored
t
s1
s2
)
->
Metric
t
(
log'
5
s1
)
(
log'
2
s2
)
(
listType
t
ngs'
))
scores
log'
n
x
=
1
+
(
if
x
<=
0
then
0
else
(
log
$
(
10
^
(
n
::
Int
))
*
x
))
listType
t
m
=
maybe
(
panic
errorMsg
)
fst
$
Map
.
lookup
t
m
errorMsg
=
"API.Node.metrics: key absent"
pure
$
Metrics
metrics
-------------------------------------------------------------
type
Hash
=
Text
data
FileType
=
CSV
|
PresseRIS
...
...
src/Gargantext/Database/Flow.hs
View file @
5f31a345
...
...
@@ -217,8 +217,14 @@ insertMasterDocs c lang hs = do
fixLang
(
Unsupervised
l
n
s
m
)
=
Unsupervised
l
n
s
m'
where
m'
=
case
m
of
Nothing
->
trace
(
"buildTries here"
::
String
)
$
Just
$
buildTries
n
(
fmap
toToken
$
uniText
$
Text
.
intercalate
" "
$
List
.
concat
$
map
hasText
documentsWithId
)
m''
->
m''
Nothing
->
trace
(
"buildTries here"
::
String
)
$
Just
$
buildTries
n
(
fmap
toToken
$
uniText
$
Text
.
intercalate
" . "
$
List
.
concat
$
map
hasText
documentsWithId
)
just_m
->
just_m
fixLang
l
=
l
lang'
=
fixLang
lang
...
...
src/Gargantext/Database/Lists.hs
View file @
5f31a345
...
...
@@ -39,7 +39,7 @@ import qualified Gargantext.Database.Metrics as Metrics
{-
trainModel :: FlowCmdM env ServantErr m
=> Username -> m Score
trainMode u = do
trainMode
l
u = do
rootId <- _node_id <$> getRoot u
(id:ids) <- getCorporaWithParentId rootId
(s,_model) <- case length ids >0 of
...
...
@@ -48,11 +48,11 @@ trainMode u = do
--}
getMetrics
::
FlowCmdM
env
err
m
getMetrics
'
::
FlowCmdM
env
err
m
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Int
->
m
(
Map
.
Map
ListType
[
Vec
.
Vector
Double
])
getMetrics
cId
maybeListId
tabType
maybeLimit
=
do
(
ngs'
,
scores
)
<-
Metrics
.
getMetrics
'
cId
maybeListId
tabType
maybeLimit
getMetrics
'
cId
maybeListId
tabType
maybeLimit
=
do
(
ngs'
,
scores
)
<-
Metrics
.
getMetrics
cId
maybeListId
tabType
maybeLimit
let
metrics
=
map
(
\
(
Scored
t
s1
s2
)
->
(
listType
t
ngs'
,
[
Vec
.
fromList
[
s1
,
s2
]]))
scores
...
...
src/Gargantext/Database/Metrics.hs
View file @
5f31a345
...
...
@@ -24,26 +24,27 @@ import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType)
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
groupNodesByNgrams
,
Diagonal
(
..
),
getCoocByNgrams
,
mapTermListRoot
,
RootTerm
)
import
Gargantext.Core.Types
(
ListType
(
..
),
Limit
,
NodeType
(
..
))
import
Gargantext.Database.Flow
(
FlowCmdM
)
import
Gargantext.Database.Metrics.NgramsByNode
(
getNodesByNgramsOnlyUser
,
getTficfWith
)
import
Gargantext.Database.Metrics.NgramsByNode
(
getNodesByNgramsOnlyUser
{-, getTficfWith-}
)
import
Gargantext.Database.Node.Select
import
Gargantext.Database.Schema.Node
(
defaultList
)
import
Gargantext.Database.Types.Node
(
ListId
,
CorpusId
,
HyperdataCorpus
)
import
Gargantext.Database.Flow
(
getOrMkRootWithCorpus
)
import
Gargantext.Database.Types.Node
(
ListId
,
CorpusId
{-, HyperdataCorpus-}
)
--
import Gargantext.Database.Flow (getOrMkRootWithCorpus)
import
Gargantext.Database.Config
(
userMaster
)
import
Gargantext.Prelude
import
Gargantext.Text.Metrics
(
scored
,
Scored
(
..
),
localMetrics
,
toScored
)
import
Gargantext.Text.Metrics
(
scored
,
Scored
(
..
),
localMetrics
{-, toScored-}
)
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector.Storable
as
Vec
getMetrics
'
::
FlowCmdM
env
err
m
getMetrics
::
FlowCmdM
env
err
m
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Limit
->
m
(
Map
Text
(
ListType
,
Maybe
Text
),
[
Scored
Text
])
getMetrics
'
cId
maybeListId
tabType
maybeLimit
=
do
getMetrics
cId
maybeListId
tabType
maybeLimit
=
do
(
ngs
,
_
,
myCooc
)
<-
getNgramsCooc
cId
maybeListId
tabType
maybeLimit
pure
(
ngs
,
scored
myCooc
)
{- | TODO remove unused function
getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text])
...
...
@@ -58,7 +59,7 @@ getMetrics cId maybeListId tabType maybeLimit = do
metrics' <- getTficfWith cId masterCorpusId (lIds <> [lId]) (ngramsTypeFromTabType tabType) ngs'
pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics'])
-}
getLocalMetrics
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Limit
...
...
src/Gargantext/Text/List.hs
View file @
5f31a345
...
...
@@ -43,7 +43,6 @@ data NgramsListBuilder = BuilderStepO { stemSize :: Int
|
BuilderStepN
{
withModel
::
Model
}
data
StopSize
=
StopSize
{
unStopSize
::
Int
}
-- | TODO improve grouping functions of Authors, Sources, Institutes..
...
...
@@ -111,8 +110,8 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
ys
=
take
b
$
drop
a
ns
zs
=
drop
b
$
drop
a
ns
a
=
3
b
=
50
0
a
=
3
00
b
=
35
0
isStopTerm
::
StopSize
->
Text
->
Bool
isStopTerm
(
StopSize
n
)
x
=
Text
.
length
x
<
n
||
any
isStopChar
(
Text
.
unpack
x
)
...
...
src/Gargantext/Text/Terms.hs
View file @
5f31a345
...
...
@@ -83,8 +83,6 @@ extractTerms (Unsupervised l n s m) xs = mapM (terms (Unsupervised l n s (Just m
extractTerms
termTypeLang
xs
=
mapM
(
terms
termTypeLang
)
xs
------------------------------------------------------------------------
-- | Terms from Text
-- Mono : mono terms
...
...
@@ -132,8 +130,10 @@ newTries :: Int -> Text -> Tries Token ()
newTries
n
t
=
buildTries
n
(
fmap
toToken
$
uniText
t
)
uniText
::
Text
->
[[
Text
]]
uniText
=
map
(
List
.
filter
(
not
.
isPunctuation
))
uniText
=
-- map (map (Text.toLower))
map
(
List
.
filter
(
not
.
isPunctuation
))
.
map
tokenize
.
sentences
-- | TODO get sentences according to lang
.
Text
.
toLower
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment