Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f4e687d5
Commit
f4e687d5
authored
May 03, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[Structure] Ngrams -> Text.
parent
50d95f87
Changes
31
Hide whitespace changes
Inline
Side-by-side
Showing
31 changed files
with
93 additions
and
145 deletions
+93
-145
moduleDesc.model
moduleDesc.model
+0
-12
package.yaml
package.yaml
+16
-15
En.hs
src-test/Ngrams/Lang/En.hs
+1
-1
Fr.hs
src-test/Ngrams/Lang/Fr.hs
+1
-1
Occurrences.hs
src-test/Ngrams/Lang/Occurrences.hs
+1
-1
Metrics.hs
src-test/Ngrams/Metrics.hs
+1
-1
Network_hs
src/Gargantext/Network_hs
+0
-36
Text.hs
src/Gargantext/Text.hs
+20
-22
Analysis.hs
src/Gargantext/Text/Analysis.hs
+1
-1
CoreNLP.hs
src/Gargantext/Text/CoreNLP.hs
+2
-2
FrequentItemSet.hs
src/Gargantext/Text/FrequentItemSet.hs
+2
-2
Hetero.purs
src/Gargantext/Text/Hetero.purs
+3
-3
En.hs
src/Gargantext/Text/Lang/En.hs
+2
-2
Fr.hs
src/Gargantext/Text/Lang/Fr.hs
+2
-2
Letters.hs
src/Gargantext/Text/Letters.hs
+2
-2
List.hs
src/Gargantext/Text/List.hs
+3
-3
Metrics.hs
src/Gargantext/Text/Metrics.hs
+2
-2
Occurrences.hs
src/Gargantext/Text/Occurrences.hs
+2
-2
Parser.hs
src/Gargantext/Text/Parser.hs
+5
-5
Parsers.hs
src/Gargantext/Text/Parsers.hs
+2
-2
CSV_hs
src/Gargantext/Text/Parsers/CSV_hs
+0
-0
Date.hs
src/Gargantext/Text/Parsers/Date.hs
+2
-2
RIS_hs
src/Gargantext/Text/Parsers/RIS_hs
+0
-0
Utils_hs
src/Gargantext/Text/Parsers/Utils_hs
+0
-0
WOS.hs
src/Gargantext/Text/Parsers/WOS.hs
+2
-2
XML_hs
src/Gargantext/Text/Parsers/XML_hs
+0
-0
En.hs
src/Gargantext/Text/Stem/En.hs
+1
-1
TFICF.hs
src/Gargantext/Text/TFICF.hs
+2
-2
TextMining.hs
src/Gargantext/Text/TextMining.hs
+2
-2
Text.hs
src/Gargantext/Text/Token/Text.hs
+16
-19
Word2Vec_hs
src/Gargantext/Text/Word2Vec_hs
+0
-0
No files found.
moduleDesc.model
deleted
100644 → 0
View file @
50d95f87
{-|
Module : Gargantext.
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
package.yaml
View file @
f4e687d5
...
...
@@ -33,21 +33,22 @@ library:
-
Gargantext.Database.NodeNodeNgram
-
Gargantext.Database.Utils
-
Gargantext.Database.User
-
Gargantext.Ngrams
-
Gargantext.Ngrams.Analysis
-
Gargantext.Ngrams.TFICF
-
Gargantext.Ngrams.Letters
-
Gargantext.Ngrams.CoreNLP
-
Gargantext.Ngrams.Parser
-
Gargantext.Ngrams.Lang.En
-
Gargantext.Ngrams.Stem.En
-
Gargantext.Ngrams.Lang.Fr
-
Gargantext.Ngrams.Metrics
-
Gargantext.Ngrams.TextMining
-
Gargantext.Ngrams.Occurrences
-
Gargantext.Parsers
-
Gargantext.Parsers.WOS
-
Gargantext.Parsers.Date
-
Gargantext.Text
-
Gargantext.Text.Analysis
-
Gargantext.Text.TFICF
-
Gargantext.Text.Letters
-
Gargantext.Text.CoreNLP
-
Gargantext.Text.Parser
-
Gargantext.Text.Token.Text
-
Gargantext.Text.Lang.En
-
Gargantext.Text.Stem.En
-
Gargantext.Text.Lang.Fr
-
Gargantext.Text.Metrics
-
Gargantext.Text.TextMining
-
Gargantext.Text.Occurrences
-
Gargantext.Text.Parsers
-
Gargantext.Text.Parsers.WOS
-
Gargantext.Text.Parsers.Date
-
Gargantext.Prelude
-
Gargantext.API
-
Gargantext.API.Auth
...
...
src-test/Ngrams/Lang/En.hs
View file @
f4e687d5
...
...
@@ -24,7 +24,7 @@ import Test.Hspec
import
Gargantext.Prelude
import
Gargantext.Types.Main
(
Language
(
..
))
import
Gargantext.
Ngrams
.Parser
(
extractNgrams
,
selectNgrams
)
import
Gargantext.
Text
.Parser
(
extractNgrams
,
selectNgrams
)
ngramsExtractionTest
::
IO
()
...
...
src-test/Ngrams/Lang/Fr.hs
View file @
f4e687d5
...
...
@@ -21,7 +21,7 @@ import Test.Hspec
import
Gargantext.Prelude
import
Gargantext.Types.Main
(
Language
(
..
))
import
Gargantext.
Ngrams
.Parser
(
extractNgrams
,
selectNgrams
)
import
Gargantext.
Text
.Parser
(
extractNgrams
,
selectNgrams
)
ngramsExtractionTest
::
IO
()
ngramsExtractionTest
=
hspec
$
do
...
...
src-test/Ngrams/Lang/Occurrences.hs
View file @
f4e687d5
...
...
@@ -22,7 +22,7 @@ import Test.Hspec
import
Data.Either
(
Either
(
Right
))
import
Gargantext.Prelude
import
Gargantext.
Ngrams
.Occurrences
(
parseOccurrences
)
import
Gargantext.
Text
.Occurrences
(
parseOccurrences
)
parsersTest
::
IO
()
parsersTest
=
hspec
$
do
...
...
src-test/Ngrams/Metrics.hs
View file @
f4e687d5
...
...
@@ -27,7 +27,7 @@ import Test.Hspec
import
Test.QuickCheck
import
Gargantext.Prelude
import
Gargantext.
Ngrams
.Metrics
import
Gargantext.
Text
.Metrics
#
if
!
MIN_VERSION_base
(
4
,
8
,
0
)
import
Control.Applicative
...
...
src/Gargantext/Network_hs
deleted
100644 → 0
View file @
50d95f87
module Data.Gargantext.Network where
import Data.Gargantext.Prelude
import Data.Map as DM
import Data.Vector as DV
type Measure a b c = DM.Map a (DM.Map b c)
-- UTCTime Paire Granularity [Candle]
-- GargVector Paire Granularity [Candle]
type GargVector a b c = DM.Map a ( DM.Map b c)
-- GargMatrix Granularity (Paire Paire) [Candle]
type GargMatrix a b c d = DM.Map a (FolioVector b c d)
-- GargMatrix Granularity (Paire Paire) [Candle]
type GargTensor a b c d e = DM.Map a (FolioMatrix b c d e)
--data PortGarg = PortGarg { _portFolioParameters :: Parameters
-- , _portGargData :: Garg
--}
toMeasure :: Granularity -> Paire -> [Candle]
-> Measure Granularity Paire Candle
toMeasure g c1 c2 cs = DM.fromList [(g,
src/Gargantext/
Ngrams
.hs
→
src/Gargantext/
Text
.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
Module : Gargantext.
Text
Description : Ngrams tools
Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3
...
...
@@ -16,30 +16,29 @@ n non negative integer
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
(
module
Gargantext
.
Ngrams
.
Letters
--, module Gargantext.
Ngrams
.Hetero
,
module
Gargantext
.
Ngrams
.
CoreNLP
,
module
Gargantext
.
Ngrams
.
Parser
,
module
Gargantext
.
Ngrams
.
Occurrences
,
module
Gargantext
.
Ngrams
.
TextMining
,
module
Gargantext
.
Ngrams
.
Metrics
module
Gargantext.
Text
(
module
Gargantext
.
Text
.
Letters
--, module Gargantext.
Text
.Hetero
,
module
Gargantext
.
Text
.
CoreNLP
,
module
Gargantext
.
Text
.
Parser
,
module
Gargantext
.
Text
.
Occurrences
,
module
Gargantext
.
Text
.
TextMining
,
module
Gargantext
.
Text
.
Metrics
,
Ngrams
(
..
),
ngrams
,
occ
,
sumOcc
,
text2fis
,
clean
,
ListName
(
..
),
equivNgrams
,
isGram
,
sentences
,
ngramsTest
--, module Gargantext.Ngrams.Words
)
where
import
Gargantext.
Ngrams
.Letters
--import Gargantext.
Ngrams
.Hetero
import
Gargantext.
Ngrams
.CoreNLP
import
Gargantext.
Ngrams
.Parser
import
Gargantext.
Text
.Letters
--import Gargantext.
Text
.Hetero
import
Gargantext.
Text
.CoreNLP
import
Gargantext.
Text
.Parser
import
Gargantext.
Ngrams
.Occurrences
import
Gargantext.
Ngrams
.TextMining
--import Gargantext.
Ngrams
.Words
import
Gargantext.
Text
.Occurrences
import
Gargantext.
Text
.TextMining
--import Gargantext.
Text
.Words
import
Gargantext.
Ngrams
.Metrics
import
qualified
Gargantext.
Ngrams
.FrequentItemSet
as
FIS
import
Gargantext.
Text
.Metrics
import
qualified
Gargantext.
Text
.FrequentItemSet
as
FIS
-----------------------------------------------------------------
import
Data.List
(
sort
)
...
...
@@ -152,10 +151,9 @@ isStop c = c `elem` ['.','?','!']
-- | Tests
-- TODO http://hackage.haskell.org/package/tokenize-0.3.0/docs/NLP-Tokenize-Text.html
ngramsTest
=
ws
ngramsTest
fp
=
ws
where
txt
=
concat
<$>
lines
<$>
clean
<$>
readFile
"Giono-arbres.txt"
txt
=
concat
<$>
lines
<$>
clean
<$>
readFile
fp
-- | Number of sentences
ls
=
sentences
<$>
txt
-- | Number of monograms used in the full text
...
...
@@ -165,6 +163,6 @@ ngramsTest = ws
-- group ngrams
ocs
=
occ
<$>
ws
--
src/Gargantext/
Ngrams
/Analysis.hs
→
src/Gargantext/
Text
/Analysis.hs
View file @
f4e687d5
...
...
@@ -11,7 +11,7 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.Analysis
module
Gargantext.
Text
.Analysis
where
import
Gargantext.Prelude
(
undefined
,
IO
(),
Int
())
...
...
src/Gargantext/
Ngrams
/CoreNLP.hs
→
src/Gargantext/
Text
/CoreNLP.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.CoreNLP
Module : Gargantext.
Text
.CoreNLP
Description : CoreNLP module
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
...
...
@@ -16,7 +16,7 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module
Gargantext.
Ngrams
.CoreNLP
where
module
Gargantext.
Text
.CoreNLP
where
import
Data.Aeson.TH
(
deriveJSON
)
import
GHC.Generics
...
...
src/Gargantext/
Ngrams
/FrequentItemSet.hs
→
src/Gargantext/
Text
/FrequentItemSet.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.FrequentItemSet
Module : Gargantext.
Text
.FrequentItemSet
Description : Ngrams tools
Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3
...
...
@@ -13,7 +13,7 @@ Domain Specific Language to manage Frequent Item Set (FIS)
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.FrequentItemSet
module
Gargantext.
Text
.FrequentItemSet
(
Fis
,
Size
,
occ
,
cooc
,
all
,
between
...
...
src/Gargantext/
Ngrams
/Hetero.purs
→
src/Gargantext/
Text
/Hetero.purs
View file @
f4e687d5
module Gargantext.
Ngrams
.Hetero where
module Gargantext.
Text
.Hetero where
import GHC.Real as R
import Data.Set as S
...
...
@@ -12,8 +12,8 @@ import Gargantext.Database.Gargandb
import Gargantext.Database.Private
--import Gargantext.Utils.Chronos
import Gargantext.
Ngrams
.Words (cleanText)
import Gargantext.
Ngrams
.Count (occurrences)
import Gargantext.
Text
.Words (cleanText)
import Gargantext.
Text
.Count (occurrences)
import Gargantext.Database.Simple
...
...
src/Gargantext/
Ngrams
/Lang/En.hs
→
src/Gargantext/
Text
/Lang/En.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Lang.En
Module : Gargantext.
Text
.Lang.En
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.
Ngrams
.Lang.En
(
selectNgrams
,
groupNgrams
,
textTest
)
where
module
Gargantext.
Text
.Lang.En
(
selectNgrams
,
groupNgrams
,
textTest
)
where
import
Gargantext.Prelude
import
Data.Text
(
Text
)
...
...
src/Gargantext/
Ngrams
/Lang/Fr.hs
→
src/Gargantext/
Text
/Lang/Fr.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Lang.Fr
Module : Gargantext.
Text
.Lang.Fr
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.
Ngrams
.Lang.Fr
(
selectNgrams
,
groupNgrams
,
textTest
)
module
Gargantext.
Text
.Lang.Fr
(
selectNgrams
,
groupNgrams
,
textTest
)
where
import
Gargantext.Prelude
...
...
src/Gargantext/
Ngrams
/Letters.hs
→
src/Gargantext/
Text
/Letters.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Letters
Module : Gargantext.
Text
.Letters
Description : Ngrams.Letters module
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
...
...
@@ -14,7 +14,7 @@ Sugar to work on letters with Text.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.
Ngrams
.Letters
where
module
Gargantext.
Text
.Letters
where
import
qualified
Data.Text.Lazy
as
DTL
-- import qualified Data.Text.Lazy.IO as DTLIO
...
...
src/Gargantext/
Ngrams
/List.hs
→
src/Gargantext/
Text
/List.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.List
Module : Gargantext.
Text
.List
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -13,11 +13,11 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.List
where
module
Gargantext.
Text
.List
where
import
Data.Maybe
import
Data.List
(
filter
)
import
Gargantext.
Ngrams
import
Gargantext.
Text
import
Gargantext.Prelude
graph
::
[
Ngrams
]
->
[
Ngrams
]
...
...
src/Gargantext/
Ngrams
/Metrics.hs
→
src/Gargantext/
Text
/Metrics.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Metrics
Module : Gargantext.
Text
.Metrics
Description : Short description
Copyright : (c) Some Guy, 2013
Someone Else, 2014
...
...
@@ -13,7 +13,7 @@ Mainly reexport functions in @Data.Text.Metrics@
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.Metrics
(
levenshtein
module
Gargantext.
Text
.Metrics
(
levenshtein
,
levenshteinNorm
,
damerauLevenshtein
,
damerauLevenshteinNorm
...
...
src/Gargantext/
Ngrams
/Occurrences.hs
→
src/Gargantext/
Text
/Occurrences.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Occurrences
Module : Gargantext.
Text
.Occurrences
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.
Ngrams
.Occurrences
where
module
Gargantext.
Text
.Occurrences
where
import
Gargantext.Prelude
...
...
src/Gargantext/
Ngrams
/Parser.hs
→
src/Gargantext/
Text
/Parser.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Parser
Module : Gargantext.
Text
.Parser
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -15,15 +15,15 @@ commentary with @some markup@.
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module
Gargantext.
Ngrams
.Parser
where
module
Gargantext.
Text
.Parser
where
import
Gargantext.Prelude
import
Gargantext.
Ngrams
.CoreNLP
import
Gargantext.
Text
.CoreNLP
import
Data.Text
hiding
(
map
)
import
Gargantext.Types.Main
(
Language
(
..
))
import
qualified
Gargantext.
Ngrams
.Lang.En
as
En
import
qualified
Gargantext.
Ngrams
.Lang.Fr
as
Fr
import
qualified
Gargantext.
Text
.Lang.En
as
En
import
qualified
Gargantext.
Text
.Lang.Fr
as
Fr
type
SNgrams
=
(
Text
,
Text
,
Text
)
...
...
src/Gargantext/Parsers.hs
→
src/Gargantext/
Text/
Parsers.hs
View file @
f4e687d5
...
...
@@ -20,7 +20,7 @@ please follow the types.
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.Parsers
-- (parse, FileFormat(..))
module
Gargantext.
Text.
Parsers
-- (parse, FileFormat(..))
where
import
Gargantext.Prelude
...
...
@@ -46,7 +46,7 @@ import Path.IO (resolveFile')
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Data.String
(
String
())
import
Gargantext.Parsers.WOS
(
wosParser
)
import
Gargantext.
Text.
Parsers.WOS
(
wosParser
)
---- import Gargantext.Parsers.XML (xmlParser)
---- import Gargantext.Parsers.DOC (docParser)
---- import Gargantext.Parsers.ODT (odtParser)
...
...
src/Gargantext/Parsers/CSV_hs
→
src/Gargantext/
Text/
Parsers/CSV_hs
View file @
f4e687d5
File moved
src/Gargantext/Parsers/Date.hs
→
src/Gargantext/
Text/
Parsers/Date.hs
View file @
f4e687d5
{-|
Module : Gargantext.Parsers.Date
Module : Gargantext.
Text.
Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017-present
License : AGPL + CECILL v3
...
...
@@ -18,7 +18,7 @@ DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Parsers.Date
(
parseDate1
,
Lang
(
FR
,
EN
),
parseDate
,
fromRFC3339
,
parseTimeOfDay
,
getMultiplicator
)
where
module
Gargantext.
Text.
Parsers.Date
(
parseDate1
,
Lang
(
FR
,
EN
),
parseDate
,
fromRFC3339
,
parseTimeOfDay
,
getMultiplicator
)
where
import
Gargantext.Prelude
import
Prelude
(
toInteger
,
div
,
otherwise
,
(
++
))
...
...
src/Gargantext/Parsers/RIS_hs
→
src/Gargantext/
Text/
Parsers/RIS_hs
View file @
f4e687d5
File moved
src/Gargantext/Parsers/Utils_hs
→
src/Gargantext/
Text/
Parsers/Utils_hs
View file @
f4e687d5
File moved
src/Gargantext/Parsers/WOS.hs
→
src/Gargantext/
Text/
Parsers/WOS.hs
View file @
f4e687d5
{-|
Module : Gargantext.Parsers.WOS
Module : Gargantext.
Text.
Parsers.WOS
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Parsers.WOS
(
wosParser
)
where
module
Gargantext.
Text.
Parsers.WOS
(
wosParser
)
where
-- TOFIX : Should import Gargantext.Prelude here
import
Prelude
hiding
(
takeWhile
,
take
,
concat
,
readFile
,
lines
,
concat
)
...
...
src/Gargantext/Parsers/XML_hs
→
src/Gargantext/
Text/
Parsers/XML_hs
View file @
f4e687d5
File moved
src/Gargantext/
Ngrams
/Stem/En.hs
→
src/Gargantext/
Text
/Stem/En.hs
View file @
f4e687d5
...
...
@@ -17,7 +17,7 @@ Adapted from:
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.Stem.En
module
Gargantext.
Text
.Stem.En
where
import
Control.Monad
...
...
src/Gargantext/
Ngrams
/TFICF.hs
→
src/Gargantext/
Text
/TFICF.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.TFICF
Module : Gargantext.
Text
.TFICF
Description : TFICF Ngrams tools
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
...
...
@@ -15,7 +15,7 @@ Definition of TFICF
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.TFICF
where
module
Gargantext.
Text
.TFICF
where
import
GHC.Generics
(
Generic
)
...
...
src/Gargantext/
Ngrams
/TextMining.hs
→
src/Gargantext/
Text
/TextMining.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.TextMining
Module : Gargantext.
Text
.TextMining
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -13,7 +13,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.
Ngrams
.TextMining
where
module
Gargantext.
Text
.TextMining
where
import
Gargantext.Prelude
import
Data.Ord
(
Ordering
(
LT
,
GT
),
compare
)
...
...
src/Gargantext/
Ngrams
/Token/Text.hs
→
src/Gargantext/
Text
/Token/Text.hs
View file @
f4e687d5
{-|
Module : Gargantext.
Ngrams
.Token.Text
Module : Gargantext.
Text
.Token.Text
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
...
...
@@ -13,7 +13,7 @@ Inspired from https://bitbucket.org/gchrupala/lingo/overview
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.
Ngrams
.Token.Text
module
Gargantext.
Text
.Token.Text
(
EitherList
(
..
)
,
Tokenizer
,
tokenize
...
...
@@ -32,8 +32,6 @@ where
import
qualified
Data.Char
as
Char
import
Data.Maybe
import
Control.Monad.Instances
()
import
Control.Applicative
import
Control.Monad
import
Data.Text
(
Text
)
...
...
@@ -49,6 +47,18 @@ import qualified Data.Text as T
---
-- > myTokenizer :: Tokenizer
-- > myTokenizer = whitespace >=> allPunctuation
-- examples :: [Text]
-- examples =
-- ["This shouldn't happen."
-- ,"Some 'quoted' stuff"
-- ,"This is a URL: http://example.org."
-- ,"How about an email@example.com"
-- ,"ReferenceError #1065 broke my debugger!"
-- ,"I would've gone."
-- ,"They've been there."
-- ,"Hyphen-words"
-- ,"Yes/No questions"
-- ]
---
type
Tokenizer
=
Text
->
EitherList
Text
Text
...
...
@@ -81,8 +91,8 @@ uris x | isUri x = E [Left x]
punctuation
::
Tokenizer
punctuation
=
finalPunctuation
>=>
initialPunctuation
hyphens
::
Tokenizer
hyphens
xs
=
E
[
Right
w
|
w
<-
T
.
split
(
==
'-'
)
xs
]
--
hyphens :: Tokenizer
--
hyphens xs = E [Right w | w <- T.split (=='-') xs ]
-- | Split off word-final punctuation
finalPunctuation
::
Tokenizer
...
...
@@ -152,16 +162,3 @@ unwrap :: Either a a -> a
unwrap
(
Left
x
)
=
x
unwrap
(
Right
x
)
=
x
examples
::
[
Text
]
examples
=
[
"This shouldn't happen."
,
"Some 'quoted' stuff"
,
"This is a URL: http://example.org."
,
"How about an email@example.com"
,
"ReferenceError #1065 broke my debugger!"
,
"I would've gone."
,
"They've been there."
,
"Hyphen-words"
,
"Yes/No questions"
]
src/Gargantext/
Ngrams
/Word2Vec_hs
→
src/Gargantext/
Text
/Word2Vec_hs
View file @
f4e687d5
File moved
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment