Commit e14b2fc6 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Iramuteq parser (WIP)

parent 2ddd6408
Pipeline #3943 failed with stage
in 28 minutes and 33 seconds
...@@ -263,6 +263,7 @@ addToCorpusWithForm user cid (NewWithForm ft ff d l _n sel) jobHandle = do ...@@ -263,6 +263,7 @@ addToCorpusWithForm user cid (NewWithForm ft ff d l _n sel) jobHandle = do
CSV -> Parser.parseFormatC Parser.CsvGargV3 CSV -> Parser.parseFormatC Parser.CsvGargV3
WOS -> Parser.parseFormatC Parser.WOS WOS -> Parser.parseFormatC Parser.WOS
PresseRIS -> Parser.parseFormatC Parser.RisPresse PresseRIS -> Parser.parseFormatC Parser.RisPresse
Iramuteq -> Parser.parseFormatC Parser.Iramuteq
-- TODO granularity of the logStatus -- TODO granularity of the logStatus
let data' = case ff of let data' = case ff of
......
...@@ -14,6 +14,7 @@ data FileType = CSV ...@@ -14,6 +14,7 @@ data FileType = CSV
| CSV_HAL | CSV_HAL
| PresseRIS | PresseRIS
| WOS | WOS
| Iramuteq
deriving (Eq, Show, Generic) deriving (Eq, Show, Generic)
instance ToSchema FileType instance ToSchema FileType
instance Arbitrary FileType where arbitrary = elements [CSV, PresseRIS] instance Arbitrary FileType where arbitrary = elements [CSV, PresseRIS]
...@@ -26,7 +27,8 @@ instance FromHttpApiData FileType where ...@@ -26,7 +27,8 @@ instance FromHttpApiData FileType where
parseUrlPiece "CSV_HAL" = pure CSV_HAL parseUrlPiece "CSV_HAL" = pure CSV_HAL
parseUrlPiece "PresseRis" = pure PresseRIS parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece "WOS" = pure WOS parseUrlPiece "WOS" = pure WOS
parseUrlPiece _ = pure CSV -- TODO error here parseUrlPiece "Iramuteq" = pure Iramuteq
parseUrlPiece _ = panic "[G.A.A.Node.Corpus.New] File Type not implemented (yet)"
instance ToHttpApiData FileType where instance ToHttpApiData FileType where
toUrlPiece = pack . show toUrlPiece = pack . show
......
...@@ -118,6 +118,16 @@ parseFormatC WOS Plain bs = do ...@@ -118,6 +118,16 @@ parseFormatC WOS Plain bs = do
.| mapC (map $ first WOS.keys) .| mapC (map $ first WOS.keys)
.| mapC (map $ both decodeUtf8) .| mapC (map $ both decodeUtf8)
.| mapMC (toDoc WOS)) ) <$> eDocs .| mapMC (toDoc WOS)) ) <$> eDocs
parseFormatC Iramuteq Plain bs = do
let eDocs = runParser' Iramuteq bs
pure $ (\docs ->
( Just $ fromIntegral $ length docs
, yieldMany docs
.| mapC (map $ first Iramuteq.keys)
.| mapC (map $ both decodeUtf8)
.| mapMC ((toDoc Iramuteq) . (map (second (Text.replace "_" " ")))) ))<$> eDocs
parseFormatC ft ZIP bs = do parseFormatC ft ZIP bs = do
path <- liftBase $ emptySystemTempFile "parsed-zip" path <- liftBase $ emptySystemTempFile "parsed-zip"
liftBase $ DB.writeFile path bs liftBase $ DB.writeFile path bs
......
...@@ -25,8 +25,8 @@ import Data.Attoparsec.ByteString.Char8 (isEndOfLine) ...@@ -25,8 +25,8 @@ import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
import Data.ByteString (ByteString, intercalate) import Data.ByteString (ByteString, intercalate)
import Gargantext.Prelude hiding (takeWhile, take) import Gargantext.Prelude hiding (takeWhile, take)
import qualified Data.List as DL import qualified Data.List as DL
-------------------------------------------------------------
-------------------------------------------------------------
parser :: Parser [[(ByteString, ByteString)]] parser :: Parser [[(ByteString, ByteString)]]
parser = do parser = do
n <- notice "TY -" n <- notice "TY -"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment