Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8403b183
Commit
8403b183
authored
Apr 10, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIS] Frequent Item Set DSL.
parent
f1c1609c
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
155 additions
and
10 deletions
+155
-10
package.yaml
package.yaml
+1
-0
Ngrams.hs
src/Gargantext/Ngrams.hs
+52
-8
FrequentItemSet.hs
src/Gargantext/Ngrams/FrequentItemSet.hs
+90
-0
Prelude.hs
src/Gargantext/Prelude.hs
+10
-2
stack.yaml
stack.yaml
+2
-0
No files found.
package.yaml
View file @
8403b183
...
...
@@ -84,6 +84,7 @@ library:
-
http-api-data
-
http-types
-
hxt
-
hlcm
-
ini
-
jose-jwt
-
lens
...
...
src/Gargantext/Ngrams.hs
View file @
8403b183
{-|
Module : Gargantext.Ngrams
Description : Ngrams tools
Copyright : (c) CNRS, 201
7
Copyright : (c) CNRS, 201
8
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
...
...
@@ -21,7 +21,7 @@ module Gargantext.Ngrams ( module Gargantext.Ngrams.Letters
,
module
Gargantext
.
Ngrams
.
Occurrences
,
module
Gargantext
.
Ngrams
.
TextMining
,
module
Gargantext
.
Ngrams
.
Metrics
,
ngrams
,
occ
urrence
s
,
ngrams
,
occ
,
sumOcc
,
text2fi
s
--, module Gargantext.Ngrams.Words
)
where
...
...
@@ -30,26 +30,34 @@ import Gargantext.Ngrams.Letters
import
Gargantext.Ngrams.CoreNLP
import
Gargantext.Ngrams.Parser
import
Gargantext.Ngrams.Occurrences
import
Gargantext.Ngrams.TextMining
--import Gargantext.Ngrams.Words
import
Gargantext.Ngrams.Metrics
import
qualified
Gargantext.Ngrams.FrequentItemSet
as
FIS
-----------------------------------------------------------------
import
Data.Char
(
Char
,
isAlpha
,
isSpace
)
import
Data.Text
(
Text
,
words
,
filter
,
toLower
)
import
Data.Map.Strict
(
Map
,
empty
,
insertWith
)
import
Data.Map.Strict
(
Map
,
empty
,
keys
,
insertWith
,
unionWith
,
fromList
,
lookupIndex
)
import
qualified
Data.Map.Strict
as
M
(
filter
)
import
Data.Foldable
(
foldl'
)
import
Gargantext.Prelude
hiding
(
filter
)
import
qualified
Data.List
as
L
(
filter
)
-- Maybe useful later:
--import NLP.Stemmer (stem, Stemmer(..))
--import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
--import Language.Aspell.Options (ACOption(..))
type
Occ
=
Int
type
Index
=
Int
type
FreqMin
=
Int
ngrams
::
Text
->
[
Text
]
ngrams
xs
=
monograms
$
toLower
$
filter
isGram
xs
...
...
@@ -59,12 +67,48 @@ monograms = words
isGram
::
Char
->
Bool
isGram
'-'
=
True
isGram
'/'
=
True
isGram
c
=
isAlpha
c
||
isSpace
c
-- | Compute the occurrences
occurrences
::
Ord
a
=>
[
a
]
->
Map
a
Int
occurrences
xs
=
foldl'
(
\
x
y
->
insertWith
(
+
)
y
1
x
)
empty
xs
-- | Compute the occurrences (occ)
occ
::
Ord
a
=>
[
a
]
->
Map
a
Occ
occ
xs
=
foldl'
(
\
x
y
->
insertWith
(
+
)
y
1
x
)
empty
xs
-- TODO add groups and filter stops
sumOcc
::
Ord
a
=>
[
Map
a
Occ
]
->
Map
a
Occ
sumOcc
xs
=
foldl'
(
\
x
y
->
unionWith
(
+
)
x
y
)
empty
xs
noApax
::
Ord
a
=>
Map
a
Occ
->
Map
a
Occ
noApax
m
=
M
.
filter
(
>
1
)
m
-- | /!\ indexes are not the same:
-- | Index ngrams from Map
indexNgram
::
Ord
a
=>
Map
a
Occ
->
Map
Index
a
indexNgram
m
=
fromList
(
zip
[
1
..
]
(
keys
m
))
-- | Index ngrams from Map
ngramIndex
::
Ord
a
=>
Map
a
Occ
->
Map
a
Index
ngramIndex
m
=
fromList
(
zip
(
keys
m
)
[
1
..
])
indexWith
::
Ord
a
=>
Map
a
Occ
->
[
a
]
->
[
Int
]
indexWith
m
xs
=
unMaybe
$
map
(
\
x
->
lookupIndex
x
m
)
xs
indexIt
::
Ord
a
=>
[[
a
]]
->
(
Map
a
Int
,
[[
Int
]])
indexIt
xs
=
(
m
,
is
)
where
m
=
sumOcc
(
map
occ
xs
)
is
=
map
(
indexWith
m
)
xs
list2fis
::
Ord
a
=>
FIS
.
Frequency
->
[[
a
]]
->
(
Map
a
Int
,
[
FIS
.
Fis
])
list2fis
n
xs
=
(
m
,
fs
)
where
(
m
,
is
)
=
indexIt
xs
fs
=
FIS
.
all
n
is
text2fis
::
FIS
.
Frequency
->
[
Text
]
->
(
Map
Text
Int
,
[
FIS
.
Fis
])
text2fis
n
xs
=
list2fis
n
(
map
ngrams
xs
)
text2fisWith
::
FIS
.
Size
->
FIS
.
Frequency
->
[
Text
]
->
(
Map
Text
Int
,
[
FIS
.
Fis
])
text2fisWith
=
undefined
src/Gargantext/Ngrams/FrequentItemSet.hs
0 → 100644
View file @
8403b183
{-|
Module : Gargantext.Ngrams.FrequentItemSet
Description : Ngrams tools
Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Domain Specific Language to manage Frequent Item Set (FIS)
-}
module
Gargantext.Ngrams.FrequentItemSet
(
Fis
,
Size
,
occ
,
cooc
,
all
,
between
,
module
HLCM
)
where
import
Data.List
(
tail
,
filter
)
import
Data.Either
import
HLCM
import
Gargantext.Prelude
type
Size
=
Either
Int
(
Int
,
Int
)
------------------------------------------------------------------------
-- | Occurrence is Frequent Item Set of size 1
occ
::
Frequency
->
[[
Item
]]
->
[
Fis
]
occ
f
is
=
fisWithSize
(
Left
1
)
f
is
-- | Cooccurrence is Frequent Item Set of size 2
cooc
::
Frequency
->
[[
Item
]]
->
[
Fis
]
cooc
f
is
=
fisWithSize
(
Left
2
)
f
is
all
::
Frequency
->
[[
Item
]]
->
[
Fis
]
all
f
is
=
fisWith
Nothing
f
is
------------------------------------------------------------------------
between
::
(
Int
,
Int
)
->
Frequency
->
[[
Item
]]
->
[
Fis
]
between
(
x
,
y
)
f
is
=
fisWithSize
(
Right
(
x
,
y
))
f
is
maximum
::
Int
->
Frequency
->
[[
Item
]]
->
[
Fis
]
maximum
m
f
is
=
between
(
0
,
m
)
f
is
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Data type to type the Frequent Item Set
-- TODO replace List with Set in fisItemSet
-- be careful : risks to erase HLCM behavior
type
Fis
=
Fis'
Item
data
Fis'
a
=
Fis'
{
_fisCount
::
Int
,
_fisItemSet
::
[
a
]
}
deriving
(
Show
)
-- | Sugar from items to FIS
items2fis
::
[
Item
]
->
Maybe
Fis
items2fis
is
=
case
head
is
of
Nothing
->
Nothing
Just
h
->
Just
(
Fis'
h
(
tail
is
))
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
fisWithSize
::
Size
->
Frequency
->
[[
Item
]]
->
[
Fis
]
fisWithSize
n
f
is
=
case
n
of
Left
n'
->
fisWith
(
Just
(
\
x
->
length
x
==
(
n'
+
1
)
))
f
is
Right
(
a
,
b
)
->
fisWith
(
Just
(
\
x
->
cond1
a
x
&&
cond2
b
x
))
f
is
where
cond1
a'
x
=
length
x
>=
a'
cond2
b'
x
=
length
x
<=
b'
fisWith
::
Maybe
([
Item
]
->
Bool
)
->
Frequency
->
[[
Item
]]
->
[
Fis
]
fisWith
s
f
is
=
unMaybe
$
map
items2fis
$
filter'
$
runLCMmatrix
is
f
where
filter'
=
case
s
of
Nothing
->
identity
Just
fun
->
filter
fun
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
src/Gargantext/Prelude.hs
View file @
8403b183
...
...
@@ -15,17 +15,19 @@ module Gargantext.Prelude
)
where
import
Data.Maybe
(
isJust
,
fromJust
)
import
Protolude
(
Bool
(
True
,
False
),
Int
,
Double
,
Integer
,
Fractional
,
Num
,
Maybe
(
Just
,
Nothing
)
,
Floating
,
Char
,
IO
,
pure
,
(
<$>
),
panic
,
head
,
flip
,
Ord
,
Integral
,
Foldable
,
RealFrac
,
Monad
,
filter
,
reverse
,
map
,
zip
,
drop
,
take
,
zipWith
,
sum
,
fromIntegral
,
length
,
fmap
,
takeWhile
,
sqrt
,
undefined
,
identity
,
abs
,
maximum
,
minimum
,
return
,
snd
,
truncate
,
(
+
),
(
*
),
(
/
),
(
-
),
(
.
),
(
>=
),
(
$
),
(
**
),
(
^
),
(
<
),
(
>
)
,
Eq
,
(
==
),
(
<>
)
,
(
+
),
(
*
),
(
/
),
(
-
),
(
.
),
(
$
),
(
**
),
(
^
),
(
<
),
(
>
)
,
Eq
,
(
==
),
(
>=
),
(
<=
),
(
<>
)
,
(
&&
),
(
||
),
not
,
fst
,
snd
,
toS
)
...
...
@@ -208,3 +210,9 @@ zipFst f xs = zip (f xs) xs
zipSnd
::
([
a
]
->
[
b
])
->
[
a
]
->
[(
a
,
b
)]
zipSnd
f
xs
=
zip
xs
(
f
xs
)
-- Just
unMaybe
::
[
Maybe
a
]
->
[
a
]
unMaybe
=
map
fromJust
.
L
.
filter
isJust
stack.yaml
View file @
8403b183
...
...
@@ -6,6 +6,8 @@ allow-newer: true
extra-deps
:
-
git
:
https://github.com/delanoe/data-time-segment.git
commit
:
4e3d57d80e9dfe6624c8eeaa8595fc8fe64d8723
-
git
:
https://gitlab.iscpif.fr/gargantext/hlcm.git
commit
:
6f0595d2421005837d59151a8b26eee83ebb67b5
-
git
:
https://github.com/delanoe/servant-static-th.git
commit
:
fff77e79fe94d563ab5cae2609b78c17b5c1f434
-
aeson-1.2.4.0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment