Commit e14b2fc6 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Iramuteq parser (WIP)

parent 2ddd6408
Pipeline #3943 failed with stage
in 28 minutes and 33 seconds
......@@ -263,6 +263,7 @@ addToCorpusWithForm user cid (NewWithForm ft ff d l _n sel) jobHandle = do
CSV -> Parser.parseFormatC Parser.CsvGargV3
WOS -> Parser.parseFormatC Parser.WOS
PresseRIS -> Parser.parseFormatC Parser.RisPresse
Iramuteq -> Parser.parseFormatC Parser.Iramuteq
-- TODO granularity of the logStatus
let data' = case ff of
......
......@@ -14,6 +14,7 @@ data FileType = CSV
| CSV_HAL
| PresseRIS
| WOS
| Iramuteq
deriving (Eq, Show, Generic)
instance ToSchema FileType
instance Arbitrary FileType where arbitrary = elements [CSV, PresseRIS]
......@@ -26,7 +27,8 @@ instance FromHttpApiData FileType where
parseUrlPiece "CSV_HAL" = pure CSV_HAL
parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece "WOS" = pure WOS
parseUrlPiece _ = pure CSV -- TODO error here
parseUrlPiece "Iramuteq" = pure Iramuteq
parseUrlPiece _ = panic "[G.A.A.Node.Corpus.New] File Type not implemented (yet)"
instance ToHttpApiData FileType where
toUrlPiece = pack . show
......
......@@ -118,6 +118,16 @@ parseFormatC WOS Plain bs = do
.| mapC (map $ first WOS.keys)
.| mapC (map $ both decodeUtf8)
.| mapMC (toDoc WOS)) ) <$> eDocs
parseFormatC Iramuteq Plain bs = do
let eDocs = runParser' Iramuteq bs
pure $ (\docs ->
( Just $ fromIntegral $ length docs
, yieldMany docs
.| mapC (map $ first Iramuteq.keys)
.| mapC (map $ both decodeUtf8)
.| mapMC ((toDoc Iramuteq) . (map (second (Text.replace "_" " ")))) ))<$> eDocs
parseFormatC ft ZIP bs = do
path <- liftBase $ emptySystemTempFile "parsed-zip"
liftBase $ DB.writeFile path bs
......
......@@ -25,8 +25,8 @@ import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
import Data.ByteString (ByteString, intercalate)
import Gargantext.Prelude hiding (takeWhile, take)
import qualified Data.List as DL
-------------------------------------------------------------
-------------------------------------------------------------
parser :: Parser [[(ByteString, ByteString)]]
parser = do
n <- notice "TY -"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment