Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8403b183
Commit
8403b183
authored
Apr 10, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIS] Frequent Item Set DSL.
parent
f1c1609c
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
155 additions
and
10 deletions
+155
-10
package.yaml
package.yaml
+1
-0
Ngrams.hs
src/Gargantext/Ngrams.hs
+52
-8
FrequentItemSet.hs
src/Gargantext/Ngrams/FrequentItemSet.hs
+90
-0
Prelude.hs
src/Gargantext/Prelude.hs
+10
-2
stack.yaml
stack.yaml
+2
-0
No files found.
package.yaml
View file @
8403b183
...
...
@@ -84,6 +84,7 @@ library:
-
http-api-data
-
http-types
-
hxt
-
hlcm
-
ini
-
jose-jwt
-
lens
...
...
src/Gargantext/Ngrams.hs
View file @
8403b183
{-|
Module : Gargantext.Ngrams
Description : Ngrams tools
Copyright : (c) CNRS, 201
7
Copyright : (c) CNRS, 201
8
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
...
...
@@ -21,7 +21,7 @@ module Gargantext.Ngrams ( module Gargantext.Ngrams.Letters
,
module
Gargantext
.
Ngrams
.
Occurrences
,
module
Gargantext
.
Ngrams
.
TextMining
,
module
Gargantext
.
Ngrams
.
Metrics
,
ngrams
,
occ
urrence
s
,
ngrams
,
occ
,
sumOcc
,
text2fi
s
--, module Gargantext.Ngrams.Words
)
where
...
...
@@ -30,26 +30,34 @@ import Gargantext.Ngrams.Letters
import
Gargantext.Ngrams.CoreNLP
import
Gargantext.Ngrams.Parser
import
Gargantext.Ngrams.Occurrences
import
Gargantext.Ngrams.TextMining
--import Gargantext.Ngrams.Words
import
Gargantext.Ngrams.Metrics
import
qualified
Gargantext.Ngrams.FrequentItemSet
as
FIS
-----------------------------------------------------------------
import
Data.Char
(
Char
,
isAlpha
,
isSpace
)
import
Data.Text
(
Text
,
words
,
filter
,
toLower
)
import
Data.Map.Strict
(
Map
,
empty
,
insertWith
)
import
Data.Map.Strict
(
Map
,
empty
,
keys
,
insertWith
,
unionWith
,
fromList
,
lookupIndex
)
import
qualified
Data.Map.Strict
as
M
(
filter
)
import
Data.Foldable
(
foldl'
)
import
Gargantext.Prelude
hiding
(
filter
)
import
qualified
Data.List
as
L
(
filter
)
-- Maybe useful later:
--import NLP.Stemmer (stem, Stemmer(..))
--import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
--import Language.Aspell.Options (ACOption(..))
type
Occ
=
Int
type
Index
=
Int
type
FreqMin
=
Int
ngrams
::
Text
->
[
Text
]
ngrams
xs
=
monograms
$
toLower
$
filter
isGram
xs
...
...
@@ -59,12 +67,48 @@ monograms = words
isGram
::
Char
->
Bool
isGram
'-'
=
True
isGram
'/'
=
True
isGram
c
=
isAlpha
c
||
isSpace
c
-- | Compute the occurrences
occurrences
::
Ord
a
=>
[
a
]
->
Map
a
Int
occurrences
xs
=
foldl'
(
\
x
y
->
insertWith
(
+
)
y
1
x
)
empty
xs
-- | Compute the occurrences (occ)
occ
::
Ord
a
=>
[
a
]
->
Map
a
Occ
occ
xs
=
foldl'
(
\
x
y
->
insertWith
(
+
)
y
1
x
)
empty
xs
-- TODO add groups and filter stops
sumOcc
::
Ord
a
=>
[
Map
a
Occ
]
->
Map
a
Occ
sumOcc
xs
=
foldl'
(
\
x
y
->
unionWith
(
+
)
x
y
)
empty
xs
noApax
::
Ord
a
=>
Map
a
Occ
->
Map
a
Occ
noApax
m
=
M
.
filter
(
>
1
)
m
-- | /!\ indexes are not the same:
-- | Index ngrams from Map
indexNgram
::
Ord
a
=>
Map
a
Occ
->
Map
Index
a
indexNgram
m
=
fromList
(
zip
[
1
..
]
(
keys
m
))
-- | Index ngrams from Map
ngramIndex
::
Ord
a
=>
Map
a
Occ
->
Map
a
Index
ngramIndex
m
=
fromList
(
zip
(
keys
m
)
[
1
..
])
indexWith
::
Ord
a
=>
Map
a
Occ
->
[
a
]
->
[
Int
]
indexWith
m
xs
=
unMaybe
$
map
(
\
x
->
lookupIndex
x
m
)
xs
indexIt
::
Ord
a
=>
[[
a
]]
->
(
Map
a
Int
,
[[
Int
]])
indexIt
xs
=
(
m
,
is
)
where
m
=
sumOcc
(
map
occ
xs
)
is
=
map
(
indexWith
m
)
xs
list2fis
::
Ord
a
=>
FIS
.
Frequency
->
[[
a
]]
->
(
Map
a
Int
,
[
FIS
.
Fis
])
list2fis
n
xs
=
(
m
,
fs
)
where
(
m
,
is
)
=
indexIt
xs
fs
=
FIS
.
all
n
is
text2fis
::
FIS
.
Frequency
->
[
Text
]
->
(
Map
Text
Int
,
[
FIS
.
Fis
])
text2fis
n
xs
=
list2fis
n
(
map
ngrams
xs
)
text2fisWith
::
FIS
.
Size
->
FIS
.
Frequency
->
[
Text
]
->
(
Map
Text
Int
,
[
FIS
.
Fis
])
text2fisWith
=
undefined
src/Gargantext/Ngrams/FrequentItemSet.hs
0 → 100644
View file @
8403b183
{-|
Module : Gargantext.Ngrams.FrequentItemSet
Description : Ngrams tools
Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Domain Specific Language to manage Frequent Item Set (FIS)
-}
module
Gargantext.Ngrams.FrequentItemSet
(
Fis
,
Size
,
occ
,
cooc
,
all
,
between
,
module
HLCM
)
where
import
Data.List
(
tail
,
filter
)
import
Data.Either
import
HLCM
import
Gargantext.Prelude
type
Size
=
Either
Int
(
Int
,
Int
)
------------------------------------------------------------------------
-- | Occurrence is Frequent Item Set of size 1
occ
::
Frequency
->
[[
Item
]]
->
[
Fis
]
occ
f
is
=
fisWithSize
(
Left
1
)
f
is
-- | Cooccurrence is Frequent Item Set of size 2
cooc
::
Frequency
->
[[
Item
]]
->
[
Fis
]
cooc
f
is
=
fisWithSize
(
Left
2
)
f
is
all
::
Frequency
->
[[
Item
]]
->
[
Fis
]
all
f
is
=
fisWith
Nothing
f
is
------------------------------------------------------------------------
between
::
(
Int
,
Int
)
->
Frequency
->
[[
Item
]]
->
[
Fis
]
between
(
x
,
y
)
f
is
=
fisWithSize
(
Right
(
x
,
y
))
f
is
maximum
::
Int
->
Frequency
->
[[
Item
]]
->
[
Fis
]
maximum
m
f
is
=
between
(
0
,
m
)
f
is
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Data type to type the Frequent Item Set
-- TODO replace List with Set in fisItemSet
-- be careful : risks to erase HLCM behavior
type
Fis
=
Fis'
Item
data
Fis'
a
=
Fis'
{
_fisCount
::
Int
,
_fisItemSet
::
[
a
]
}
deriving
(
Show
)
-- | Sugar from items to FIS
items2fis
::
[
Item
]
->
Maybe
Fis
items2fis
is
=
case
head
is
of
Nothing
->
Nothing
Just
h
->
Just
(
Fis'
h
(
tail
is
))
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
fisWithSize
::
Size
->
Frequency
->
[[
Item
]]
->
[
Fis
]
fisWithSize
n
f
is
=
case
n
of
Left
n'
->
fisWith
(
Just
(
\
x
->
length
x
==
(
n'
+
1
)
))
f
is
Right
(
a
,
b
)
->
fisWith
(
Just
(
\
x
->
cond1
a
x
&&
cond2
b
x
))
f
is
where
cond1
a'
x
=
length
x
>=
a'
cond2
b'
x
=
length
x
<=
b'
fisWith
::
Maybe
([
Item
]
->
Bool
)
->
Frequency
->
[[
Item
]]
->
[
Fis
]
fisWith
s
f
is
=
unMaybe
$
map
items2fis
$
filter'
$
runLCMmatrix
is
f
where
filter'
=
case
s
of
Nothing
->
identity
Just
fun
->
filter
fun
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
src/Gargantext/Prelude.hs
View file @
8403b183
...
...
@@ -15,17 +15,19 @@ module Gargantext.Prelude
)
where
import
Data.Maybe
(
isJust
,
fromJust
)
import
Protolude
(
Bool
(
True
,
False
),
Int
,
Double
,
Integer
,
Fractional
,
Num
,
Maybe
(
Just
,
Nothing
)
,
Floating
,
Char
,
IO
,
pure
,
(
<$>
),
panic
,
head
,
flip
,
Ord
,
Integral
,
Foldable
,
RealFrac
,
Monad
,
filter
,
reverse
,
map
,
zip
,
drop
,
take
,
zipWith
,
sum
,
fromIntegral
,
length
,
fmap
,
takeWhile
,
sqrt
,
undefined
,
identity
,
abs
,
maximum
,
minimum
,
return
,
snd
,
truncate
,
(
+
),
(
*
),
(
/
),
(
-
),
(
.
),
(
>=
),
(
$
),
(
**
),
(
^
),
(
<
),
(
>
)
,
Eq
,
(
==
),
(
<>
)
,
(
+
),
(
*
),
(
/
),
(
-
),
(
.
),
(
$
),
(
**
),
(
^
),
(
<
),
(
>
)
,
Eq
,
(
==
),
(
>=
),
(
<=
),
(
<>
)
,
(
&&
),
(
||
),
not
,
fst
,
snd
,
toS
)
...
...
@@ -208,3 +210,9 @@ zipFst f xs = zip (f xs) xs
zipSnd
::
([
a
]
->
[
b
])
->
[
a
]
->
[(
a
,
b
)]
zipSnd
f
xs
=
zip
xs
(
f
xs
)
-- Just
unMaybe
::
[
Maybe
a
]
->
[
a
]
unMaybe
=
map
fromJust
.
L
.
filter
isJust
stack.yaml
View file @
8403b183
...
...
@@ -6,6 +6,8 @@ allow-newer: true
extra-deps
:
-
git
:
https://github.com/delanoe/data-time-segment.git
commit
:
4e3d57d80e9dfe6624c8eeaa8595fc8fe64d8723
-
git
:
https://gitlab.iscpif.fr/gargantext/hlcm.git
commit
:
6f0595d2421005837d59151a8b26eee83ebb67b5
-
git
:
https://github.com/delanoe/servant-static-th.git
commit
:
fff77e79fe94d563ab5cae2609b78c17b5c1f434
-
aeson-1.2.4.0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment