Commit bb469f39 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TYPES] group type for lems

parent 0ad7fc65
Pipeline #1353 failed with stage
...@@ -12,6 +12,7 @@ Portability : POSIX ...@@ -12,6 +12,7 @@ Portability : POSIX
module Gargantext.Core module Gargantext.Core
where where
import Data.Text (Text)
import Data.Aeson import Data.Aeson
import Data.Either(Either(Left)) import Data.Either(Either(Left))
import Data.Hashable (Hashable) import Data.Hashable (Hashable)
...@@ -67,6 +68,9 @@ instance HasDBid Lang where ...@@ -67,6 +68,9 @@ instance HasDBid Lang where
fromDBid 2 = EN fromDBid 2 = EN
fromDBid _ = panic "HasDBid lang, not implemented" fromDBid _ = panic "HasDBid lang, not implemented"
------------------------------------------------------------------------
type Form = Text
type Lem = Text
------------------------------------------------------------------------ ------------------------------------------------------------------------
data PosTagAlgo = CoreNLP data PosTagAlgo = CoreNLP
deriving (Show, Read, Eq, Ord, Generic) deriving (Show, Read, Eq, Ord, Generic)
......
...@@ -17,16 +17,18 @@ Portability : POSIX ...@@ -17,16 +17,18 @@ Portability : POSIX
module Gargantext.Core.Text.List.Group.WithStem module Gargantext.Core.Text.List.Group.WithStem
where where
import Data.HashMap.Strict (HashMap)
import Data.HashSet (HashSet) import Data.HashSet (HashSet)
import Data.Map (Map) import Data.Map (Map)
import Data.Maybe (catMaybes) import Data.Maybe (catMaybes)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..), PosTagAlgo(..), Form, Lem)
import Gargantext.Core.Text.List.Group.Prelude import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Patch import Gargantext.Core.Text.List.Social.Patch
import Gargantext.Core.Text.List.Social.Prelude import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem (stem) import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.HashSet as Set import qualified Data.HashSet as Set
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -57,23 +59,33 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang ...@@ -57,23 +59,33 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
, unGroupParams_stopSize :: !StopSize , unGroupParams_stopSize :: !StopSize
} }
| GroupIdentity | GroupIdentity
| GroupWithPosTag { _gwl_lang :: !Lang
, _gwl_algo :: !PosTagAlgo
, _gwl_map :: !(HashMap Form Lem)
}
deriving (Eq) deriving (Eq)
------------------------------------------------------------------------ ------------------------------------------------------------------------
groupWith :: GroupParams groupWith :: GroupParams
-> NgramsTerm -> NgramsTerm
-> NgramsTerm -> NgramsTerm
groupWith GroupIdentity = identity groupWith GroupIdentity t = identity t
groupWith (GroupParams l _m _n _) = groupWith (GroupParams l _m _n _) t =
NgramsTerm NgramsTerm
. Text.intercalate " " $ Text.intercalate " "
. map (stem l) $ map (stem l)
-- . take n -- . take n
. List.sort $ List.sort
-- . (List.filter (\t -> Text.length t > m)) -- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " " $ Text.splitOn " "
. Text.replace "-" " " $ Text.replace "-" " "
. unNgramsTerm $ unNgramsTerm t
groupWith (GroupWithPosTag _ _ m) t =
case HashMap.lookup (unNgramsTerm t) m of
Nothing -> t
Just t' -> NgramsTerm t'
-------------------------------------------------------------------- --------------------------------------------------------------------
stemPatches :: GroupParams stemPatches :: GroupParams
-> HashSet NgramsTerm -> HashSet NgramsTerm
...@@ -81,7 +93,8 @@ stemPatches :: GroupParams ...@@ -81,7 +93,8 @@ stemPatches :: GroupParams
stemPatches groupParams = patches stemPatches groupParams = patches
. Map.fromListWith (<>) . Map.fromListWith (<>)
. map (\ng -> ( groupWith groupParams ng . map (\ng -> ( groupWith groupParams ng
, Set.singleton ng) , Set.singleton ng
)
) )
. Set.toList . Set.toList
......
...@@ -136,9 +136,6 @@ SELECT terms,id FROM ins_form_ret ...@@ -136,9 +136,6 @@ SELECT terms,id FROM ins_form_ret
|] |]
type Form = Text
type Lem = Text
selectLems :: [Ngrams] -> Cmd err [(Form, Lem)] selectLems :: [Ngrams] -> Cmd err [(Form, Lem)]
selectLems ns = runPGSQuery querySelectLems (PGS.Only $ Values fields (map toRow ns)) selectLems ns = runPGSQuery querySelectLems (PGS.Only $ Values fields (map toRow ns))
where where
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment