Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Moutinho
haskell-gargantext
Commits
f4e687d5
Commit
f4e687d5
authored
May 03, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[Structure] Ngrams -> Text.
parent
50d95f87
Changes
31
Hide whitespace changes
Inline
Side-by-side
Showing
31 changed files
with
93 additions
and
145 deletions
+93
-145
moduleDesc.model
moduleDesc.model
+0
-12
package.yaml
package.yaml
+16
-15
En.hs
src-test/Ngrams/Lang/En.hs
+1
-1
Fr.hs
src-test/Ngrams/Lang/Fr.hs
+1
-1
Occurrences.hs
src-test/Ngrams/Lang/Occurrences.hs
+1
-1
Metrics.hs
src-test/Ngrams/Metrics.hs
+1
-1
Network_hs
src/Gargantext/Network_hs
+0
-36
Text.hs
src/Gargantext/Text.hs
+20
-22
Analysis.hs
src/Gargantext/Text/Analysis.hs
+1
-1
CoreNLP.hs
src/Gargantext/Text/CoreNLP.hs
+2
-2
FrequentItemSet.hs
src/Gargantext/Text/FrequentItemSet.hs
+2
-2
Hetero.purs
src/Gargantext/Text/Hetero.purs
+3
-3
En.hs
src/Gargantext/Text/Lang/En.hs
+2
-2
Fr.hs
src/Gargantext/Text/Lang/Fr.hs
+2
-2
Letters.hs
src/Gargantext/Text/Letters.hs
+2
-2
List.hs
src/Gargantext/Text/List.hs
+3
-3
Metrics.hs
src/Gargantext/Text/Metrics.hs
+2
-2
Occurrences.hs
src/Gargantext/Text/Occurrences.hs
+2
-2
Parser.hs
src/Gargantext/Text/Parser.hs
+5
-5
Parsers.hs
src/Gargantext/Text/Parsers.hs
+2
-2
CSV_hs
src/Gargantext/Text/Parsers/CSV_hs
+0
-0
Date.hs
src/Gargantext/Text/Parsers/Date.hs
+2
-2
RIS_hs
src/Gargantext/Text/Parsers/RIS_hs
+0
-0
Utils_hs
src/Gargantext/Text/Parsers/Utils_hs
+0
-0
WOS.hs
src/Gargantext/Text/Parsers/WOS.hs
+2
-2
XML_hs
src/Gargantext/Text/Parsers/XML_hs
+0
-0
En.hs
src/Gargantext/Text/Stem/En.hs
+1
-1
TFICF.hs
src/Gargantext/Text/TFICF.hs
+2
-2
TextMining.hs
src/Gargantext/Text/TextMining.hs
+2
-2
Text.hs
src/Gargantext/Text/Token/Text.hs
+16
-19
Word2Vec_hs
src/Gargantext/Text/Word2Vec_hs
+0
-0
No files found.
moduleDesc.model
deleted
100644 → 0
View file @
50d95f87
{-|
Module : Gargantext.
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
package.yaml
View file @
f4e687d5
...
...
@@ -33,21 +33,22 @@ library:
-
Gargantext.Database.NodeNodeNgram
-
Gargantext.Database.Utils
-
Gargantext.Database.User
-
Gargantext.Ngrams
-
Gargantext.Ngrams.Analysis
-
Gargantext.Ngrams.TFICF
-
Gargantext.Ngrams.Letters
-
Gargantext.Ngrams.CoreNLP
-
Gargantext.Ngrams.Parser
-
Gargantext.Ngrams.Lang.En
-
Gargantext.Ngrams.Stem.En
-
Gargantext.Ngrams.Lang.Fr
-
Gargantext.Ngrams.Metrics
-
Gargantext.Ngrams.TextMining
-
Gargantext.Ngrams.Occurrences
-
Gargantext.Parsers
-
Gargantext.Parsers.WOS
-
Gargantext.Parsers.Date
-
Gargantext.Text
-
Gargantext.Text.Analysis
-
Gargantext.Text.TFICF
-
Gargantext.Text.Letters
-
Gargantext.Text.CoreNLP
-
Gargantext.Text.Parser
-
Gargantext.Text.Token.Text
-
Gargantext.Text.Lang.En
-
Gargantext.Text.Stem.En
-
Gargantext.Text.Lang.Fr
-
Gargantext.Text.Metrics
-
Gargantext.Text.TextMining
-
Gargantext.Text.Occurrences
-
Gargantext.Text.Parsers
-
Gargantext.Text.Parsers.WOS
-
Gargantext.Text.Parsers.Date
-
Gargantext.Prelude
-
Gargantext.API
-
Gargantext.API.Auth
...
...
src-test/Ngrams/Lang/En.hs
View file @
f4e687d5
...
...
@@ -24,7 +24,7 @@ import Test.Hspec
import
Gargantext.Prelude
import
Gargantext.Types.Main
(
Language
(
..
))
import
Gargantext.
Ngrams
.Parser
(
extractNgrams
,
selectNgrams
)
import
Gargantext.
Text
.Parser
(
extractNgrams
,
selectNgrams
)
ngramsExtractionTest
::
IO
()
...
...
src-test/Ngrams/Lang/Fr.hs
View file @
f4e687d5
...
...
@@ -21,7 +21,7 @@ import Test.Hspec
import
Gargantext.Prelude
import
Gargantext.Types.Main
(
Language
(
..
))
import
Gargantext.
Ngrams
.Parser
(
extractNgrams
,
selectNgrams
)
import
Gargantext.
Text
.Parser
(
extractNgrams
,
selectNgrams
)
ngramsExtractionTest
::
IO
()
ngramsExtractionTest
=
hspec
$
do
...
...
src-test/Ngrams/Lang/Occurrences.hs
View file @
f4e687d5
...
...
@@ -22,7 +22,7 @@ import Test.Hspec
import
Data.Either
(
Either
(
Right
))
import
Gargantext.Prelude
import
Gargantext.
Ngrams
.Occurrences
(
parseOccurrences
)
import
Gargantext.
Text
.Occurrences
(
parseOccurrences
)
parsersTest
::
IO
()
parsersTest
=
hspec
$
do
...
...
src-test/Ngrams/Metrics.hs
View file @
f4e687d5
...
...
@@ -27,7 +27,7 @@ import Test.Hspec
import
Test.QuickCheck
import
Gargantext.Prelude
import
Gargantext.
Ngrams
.Metrics
import
Gargantext.
Text
.Metrics
#
if
!
MIN_VERSION_base
(
4
,
8
,
0
)
import
Control.Applicative
...
...
src/Gargantext/Network_hs
deleted
100644 → 0
View file @
50d95f87
module Data.Gargantext.Network where
import Data.Gargantext.Prelude
import Data.Map as DM
import Data.Vector as DV
type Measure a b c = DM.Map a (DM.Map b c)
-- UTCTime Paire Granularity [Candle]
-- GargVector Paire Granularity [Candle]
type GargVector a b c = DM.Map a ( DM.Map b c)
-- GargMatrix Granularity (Paire Paire) [Candle]
type GargMatrix a b c d = DM.Map a (FolioVector b c d)
-- GargMatrix Granularity (Paire Paire) [Candle]
type GargTensor a b c d e = DM.Map a (FolioMatrix b c d e)
--data PortGarg = PortGarg { _portFolioParameters :: Parameters
-- , _portGargData :: Garg
--}
toMeasure :: Granularity -> Paire -> [Candle]
-> Measure Granularity Paire Candle
toMeasure g c1 c2 cs = DM.fromList [(g,
src/Gargantext/
Ngrams
.hs
→
src/Gargantext/
Text
.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
Module : Gargantext.
Text
Description : Ngrams tools
Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3
...
...
@@ -16,30 +16,29 @@ n non negative integer
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
(
module
Gargantext
.
Ngrams
.
Letters
--, module Gargantext.
Ngrams
.Hetero
,
module
Gargantext
.
Ngrams
.
CoreNLP
,
module
Gargantext
.
Ngrams
.
Parser
,
module
Gargantext
.
Ngrams
.
Occurrences
,
module
Gargantext
.
Ngrams
.
TextMining
,
module
Gargantext
.
Ngrams
.
Metrics
module
Gargantext.
Text
(
module
Gargantext
.
Text
.
Letters
--, module Gargantext.
Text
.Hetero
,
module
Gargantext
.
Text
.
CoreNLP
,
module
Gargantext
.
Text
.
Parser
,
module
Gargantext
.
Text
.
Occurrences
,
module
Gargantext
.
Text
.
TextMining
,
module
Gargantext
.
Text
.
Metrics
,
Ngrams
(
..
),
ngrams
,
occ
,
sumOcc
,
text2fis
,
clean
,
ListName
(
..
),
equivNgrams
,
isGram
,
sentences
,
ngramsTest
--, module Gargantext.Ngrams.Words
)
where
import
Gargantext.
Ngrams
.Letters
--import Gargantext.
Ngrams
.Hetero
import
Gargantext.
Ngrams
.CoreNLP
import
Gargantext.
Ngrams
.Parser
import
Gargantext.
Text
.Letters
--import Gargantext.
Text
.Hetero
import
Gargantext.
Text
.CoreNLP
import
Gargantext.
Text
.Parser
import
Gargantext.
Ngrams
.Occurrences
import
Gargantext.
Ngrams
.TextMining
--import Gargantext.
Ngrams
.Words
import
Gargantext.
Text
.Occurrences
import
Gargantext.
Text
.TextMining
--import Gargantext.
Text
.Words
import
Gargantext.
Ngrams
.Metrics
import
qualified
Gargantext.
Ngrams
.FrequentItemSet
as
FIS
import
Gargantext.
Text
.Metrics
import
qualified
Gargantext.
Text
.FrequentItemSet
as
FIS
-----------------------------------------------------------------
import
Data.List
(
sort
)
...
...
@@ -152,10 +151,9 @@ isStop c = c `elem` ['.','?','!']
-- | Tests
-- TODO http://hackage.haskell.org/package/tokenize-0.3.0/docs/NLP-Tokenize-Text.html
ngramsTest
=
ws
ngramsTest
fp
=
ws
where
txt
=
concat
<$>
lines
<$>
clean
<$>
readFile
"Giono-arbres.txt"
txt
=
concat
<$>
lines
<$>
clean
<$>
readFile
fp
-- | Number of sentences
ls
=
sentences
<$>
txt
-- | Number of monograms used in the full text
...
...
@@ -165,6 +163,6 @@ ngramsTest = ws
-- group ngrams
ocs
=
occ
<$>
ws
--
src/Gargantext/
Ngrams
/Analysis.hs
→
src/Gargantext/
Text
/Analysis.hs
View file @
f4e687d5
...
...
@@ -11,7 +11,7 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.Analysis
module
Gargantext.
Text
.Analysis
where
import
Gargantext.Prelude
(
undefined
,
IO
(),
Int
())
...
...
src/Gargantext/
Ngrams
/CoreNLP.hs
→
src/Gargantext/
Text
/CoreNLP.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.CoreNLP
Module : Gargantext.
Text
.CoreNLP
Description : CoreNLP module
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
...
...
@@ -16,7 +16,7 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module
Gargantext.
Ngrams
.CoreNLP
where
module
Gargantext.
Text
.CoreNLP
where
import
Data.Aeson.TH
(
deriveJSON
)
import
GHC.Generics
...
...
src/Gargantext/
Ngrams
/FrequentItemSet.hs
→
src/Gargantext/
Text
/FrequentItemSet.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.FrequentItemSet
Module : Gargantext.
Text
.FrequentItemSet
Description : Ngrams tools
Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3
...
...
@@ -13,7 +13,7 @@ Domain Specific Language to manage Frequent Item Set (FIS)
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.FrequentItemSet
module
Gargantext.
Text
.FrequentItemSet
(
Fis
,
Size
,
occ
,
cooc
,
all
,
between
...
...
src/Gargantext/
Ngrams
/Hetero.purs
→
src/Gargantext/
Text
/Hetero.purs
View file @
f4e687d5
module Gargantext.
Ngrams
.Hetero where
module Gargantext.
Text
.Hetero where
import GHC.Real as R
import Data.Set as S
...
...
@@ -12,8 +12,8 @@ import Gargantext.Database.Gargandb
import Gargantext.Database.Private
--import Gargantext.Utils.Chronos
import Gargantext.
Ngrams
.Words (cleanText)
import Gargantext.
Ngrams
.Count (occurrences)
import Gargantext.
Text
.Words (cleanText)
import Gargantext.
Text
.Count (occurrences)
import Gargantext.Database.Simple
...
...
src/Gargantext/
Ngrams
/Lang/En.hs
→
src/Gargantext/
Text
/Lang/En.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Lang.En
Module : Gargantext.
Text
.Lang.En
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.
Ngrams
.Lang.En
(
selectNgrams
,
groupNgrams
,
textTest
)
where
module
Gargantext.
Text
.Lang.En
(
selectNgrams
,
groupNgrams
,
textTest
)
where
import
Gargantext.Prelude
import
Data.Text
(
Text
)
...
...
src/Gargantext/
Ngrams
/Lang/Fr.hs
→
src/Gargantext/
Text
/Lang/Fr.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Lang.Fr
Module : Gargantext.
Text
.Lang.Fr
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.
Ngrams
.Lang.Fr
(
selectNgrams
,
groupNgrams
,
textTest
)
module
Gargantext.
Text
.Lang.Fr
(
selectNgrams
,
groupNgrams
,
textTest
)
where
import
Gargantext.Prelude
...
...
src/Gargantext/
Ngrams
/Letters.hs
→
src/Gargantext/
Text
/Letters.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Letters
Module : Gargantext.
Text
.Letters
Description : Ngrams.Letters module
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
...
...
@@ -14,7 +14,7 @@ Sugar to work on letters with Text.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.
Ngrams
.Letters
where
module
Gargantext.
Text
.Letters
where
import
qualified
Data.Text.Lazy
as
DTL
-- import qualified Data.Text.Lazy.IO as DTLIO
...
...
src/Gargantext/
Ngrams
/List.hs
→
src/Gargantext/
Text
/List.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.List
Module : Gargantext.
Text
.List
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -13,11 +13,11 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.List
where
module
Gargantext.
Text
.List
where
import
Data.Maybe
import
Data.List
(
filter
)
import
Gargantext.
Ngrams
import
Gargantext.
Text
import
Gargantext.Prelude
graph
::
[
Ngrams
]
->
[
Ngrams
]
...
...
src/Gargantext/
Ngrams
/Metrics.hs
→
src/Gargantext/
Text
/Metrics.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Metrics
Module : Gargantext.
Text
.Metrics
Description : Short description
Copyright : (c) Some Guy, 2013
Someone Else, 2014
...
...
@@ -13,7 +13,7 @@ Mainly reexport functions in @Data.Text.Metrics@
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.Metrics
(
levenshtein
module
Gargantext.
Text
.Metrics
(
levenshtein
,
levenshteinNorm
,
damerauLevenshtein
,
damerauLevenshteinNorm
...
...
src/Gargantext/
Ngrams
/Occurrences.hs
→
src/Gargantext/
Text
/Occurrences.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Occurrences
Module : Gargantext.
Text
.Occurrences
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.
Ngrams
.Occurrences
where
module
Gargantext.
Text
.Occurrences
where
import
Gargantext.Prelude
...
...
src/Gargantext/
Ngrams
/Parser.hs
→
src/Gargantext/
Text
/Parser.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Parser
Module : Gargantext.
Text
.Parser
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -15,15 +15,15 @@ commentary with @some markup@.
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module
Gargantext.
Ngrams
.Parser
where
module
Gargantext.
Text
.Parser
where
import
Gargantext.Prelude
import
Gargantext.
Ngrams
.CoreNLP
import
Gargantext.
Text
.CoreNLP
import
Data.Text
hiding
(
map
)
import
Gargantext.Types.Main
(
Language
(
..
))
import
qualified
Gargantext.
Ngrams
.Lang.En
as
En
import
qualified
Gargantext.
Ngrams
.Lang.Fr
as
Fr
import
qualified
Gargantext.
Text
.Lang.En
as
En
import
qualified
Gargantext.
Text
.Lang.Fr
as
Fr
type
SNgrams
=
(
Text
,
Text
,
Text
)
...
...
src/Gargantext/Parsers.hs
→
src/Gargantext/
Text/
Parsers.hs
View file @
f4e687d5
...
...
@@ -20,7 +20,7 @@ please follow the types.
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.Parsers
-- (parse, FileFormat(..))
module
Gargantext.
Text.
Parsers
-- (parse, FileFormat(..))
where
import
Gargantext.Prelude
...
...
@@ -46,7 +46,7 @@ import Path.IO (resolveFile')
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Data.String
(
String
())
import
Gargantext.Parsers.WOS
(
wosParser
)
import
Gargantext.
Text.
Parsers.WOS
(
wosParser
)
---- import Gargantext.Parsers.XML (xmlParser)
---- import Gargantext.Parsers.DOC (docParser)
---- import Gargantext.Parsers.ODT (odtParser)
...
...
src/Gargantext/Parsers/CSV_hs
→
src/Gargantext/
Text/
Parsers/CSV_hs
View file @
f4e687d5
File moved
src/Gargantext/Parsers/Date.hs
→
src/Gargantext/
Text/
Parsers/Date.hs
View file @
f4e687d5
{-|
Module : Gargantext.Parsers.Date
Module : Gargantext.
Text.
Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017-present
License : AGPL + CECILL v3
...
...
@@ -18,7 +18,7 @@ DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Parsers.Date
(
parseDate1
,
Lang
(
FR
,
EN
),
parseDate
,
fromRFC3339
,
parseTimeOfDay
,
getMultiplicator
)
where
module
Gargantext.
Text.
Parsers.Date
(
parseDate1
,
Lang
(
FR
,
EN
),
parseDate
,
fromRFC3339
,
parseTimeOfDay
,
getMultiplicator
)
where
import
Gargantext.Prelude
import
Prelude
(
toInteger
,
div
,
otherwise
,
(
++
))
...
...
src/Gargantext/Parsers/RIS_hs
→
src/Gargantext/
Text/
Parsers/RIS_hs
View file @
f4e687d5
File moved
src/Gargantext/Parsers/Utils_hs
→
src/Gargantext/
Text/
Parsers/Utils_hs
View file @
f4e687d5
File moved
src/Gargantext/Parsers/WOS.hs
→
src/Gargantext/
Text/
Parsers/WOS.hs
View file @
f4e687d5
{-|
Module : Gargantext.Parsers.WOS
Module : Gargantext.
Text.
Parsers.WOS
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Parsers.WOS
(
wosParser
)
where
module
Gargantext.
Text.
Parsers.WOS
(
wosParser
)
where
-- TOFIX : Should import Gargantext.Prelude here
import
Prelude
hiding
(
takeWhile
,
take
,
concat
,
readFile
,
lines
,
concat
)
...
...
src/Gargantext/Parsers/XML_hs
→
src/Gargantext/
Text/
Parsers/XML_hs
View file @
f4e687d5
File moved
src/Gargantext/
Ngrams
/Stem/En.hs
→
src/Gargantext/
Text
/Stem/En.hs
View file @
f4e687d5
...
...
@@ -17,7 +17,7 @@ Adapted from:
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.Stem.En
module
Gargantext.
Text
.Stem.En
where
import
Control.Monad
...
...
src/Gargantext/
Ngrams
/TFICF.hs
→
src/Gargantext/
Text
/TFICF.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.TFICF
Module : Gargantext.
Text
.TFICF
Description : TFICF Ngrams tools
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
...
...
@@ -15,7 +15,7 @@ Definition of TFICF
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.TFICF
where
module
Gargantext.
Text
.TFICF
where
import
GHC.Generics
(
Generic
)
...
...
src/Gargantext/
Ngrams
/TextMining.hs
→
src/Gargantext/
Text
/TextMining.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.TextMining
Module : Gargantext.
Text
.TextMining
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -13,7 +13,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.TextMining
where
module
Gargantext.
Text
.TextMining
where
import
Gargantext.Prelude
import
Data.Ord
(
Ordering
(
LT
,
GT
),
compare
)
...
...
src/Gargantext/
Ngrams
/Token/Text.hs
→
src/Gargantext/
Text
/Token/Text.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Token.Text
Module : Gargantext.
Text
.Token.Text
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -13,7 +13,7 @@ Inspired from https://bitbucket.org/gchrupala/lingo/overview
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.
Ngrams
.Token.Text
module
Gargantext.
Text
.Token.Text
(
EitherList
(
..
)
,
Tokenizer
,
tokenize
...
...
@@ -32,8 +32,6 @@ where
import
qualified
Data.Char
as
Char
import
Data.Maybe
import
Control.Monad.Instances
()
import
Control.Applicative
import
Control.Monad
import
Data.Text
(
Text
)
...
...
@@ -49,6 +47,18 @@ import qualified Data.Text as T
---
-- > myTokenizer :: Tokenizer
-- > myTokenizer = whitespace >=> allPunctuation
-- examples :: [Text]
-- examples =
-- ["This shouldn't happen."
-- ,"Some 'quoted' stuff"
-- ,"This is a URL: http://example.org."
-- ,"How about an email@example.com"
-- ,"ReferenceError #1065 broke my debugger!"
-- ,"I would've gone."
-- ,"They've been there."
-- ,"Hyphen-words"
-- ,"Yes/No questions"
-- ]
---
type
Tokenizer
=
Text
->
EitherList
Text
Text
...
...
@@ -81,8 +91,8 @@ uris x | isUri x = E [Left x]
punctuation
::
Tokenizer
punctuation
=
finalPunctuation
>=>
initialPunctuation
hyphens
::
Tokenizer
hyphens
xs
=
E
[
Right
w
|
w
<-
T
.
split
(
==
'-'
)
xs
]
--
hyphens :: Tokenizer
--
hyphens xs = E [Right w | w <- T.split (=='-') xs ]
-- | Split off word-final punctuation
finalPunctuation
::
Tokenizer
...
...
@@ -152,16 +162,3 @@ unwrap :: Either a a -> a
unwrap
(
Left
x
)
=
x
unwrap
(
Right
x
)
=
x
examples
::
[
Text
]
examples
=
[
"This shouldn't happen."
,
"Some 'quoted' stuff"
,
"This is a URL: http://example.org."
,
"How about an email@example.com"
,
"ReferenceError #1065 broke my debugger!"
,
"I would've gone."
,
"They've been there."
,
"Hyphen-words"
,
"Yes/No questions"
]
src/Gargantext/
Ngrams
/Word2Vec_hs
→
src/Gargantext/
Text
/Word2Vec_hs
View file @
f4e687d5
File moved
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment