Commit 8e99f0b6 authored by Mael NICOLAS's avatar Mael NICOLAS

add a markupFormat field to the Page type #4

parent c63495f4
...@@ -40,11 +40,15 @@ import Data.Either ...@@ -40,11 +40,15 @@ import Data.Either
-- (since there is no abstract) will see if other datas are relevant. -- (since there is no abstract) will see if other datas are relevant.
data Page = Page data Page = Page
{ {
_title :: Maybe T.Text _markupFormat :: MarkupFormat
, _title :: Maybe T.Text
, _text :: Maybe T.Text , _text :: Maybe T.Text
} }
deriving (Show) deriving (Show)
data MarkupFormat = Mediawiki | Plaintext
deriving (Show)
parseRevision :: MonadThrow m => ConduitT Event o m (Maybe T.Text) parseRevision :: MonadThrow m => ConduitT Event o m (Maybe T.Text)
parseRevision = parseRevision =
tagNoAttr "{http://www.mediawiki.org/xml/export-0.10/}revision" $ do tagNoAttr "{http://www.mediawiki.org/xml/export-0.10/}revision" $ do
...@@ -55,6 +59,7 @@ parseRevision = ...@@ -55,6 +59,7 @@ parseRevision =
$ ignoreAnyTreeContent $ ignoreAnyTreeContent
return text return text
-- | Utility function that match everything but the tag given
tagUntil :: Name -> NameMatcher Name tagUntil :: Name -> NameMatcher Name
tagUntil name = matching (/= name) tagUntil name = matching (/= name)
...@@ -67,6 +72,8 @@ ignoreExcept name f = do ...@@ -67,6 +72,8 @@ ignoreExcept name f = do
_ <- consumeExcept name _ <- consumeExcept name
tagIgnoreAttrs (matching (==name)) f tagIgnoreAttrs (matching (==name)) f
-- | Utility function that consume everything but the tag given
-- usefull because we have to consume every data.
consumeExcept :: MonadThrow m => Name -> ConduitT Event o m () consumeExcept :: MonadThrow m => Name -> ConduitT Event o m ()
consumeExcept = many_ . ignoreTreeContent . tagUntil consumeExcept = many_ . ignoreTreeContent . tagUntil
...@@ -80,19 +87,20 @@ parsePage = ...@@ -80,19 +87,20 @@ parsePage =
revision <- revision <-
parseRevision parseRevision
many_ $ ignoreAnyTreeContent many_ $ ignoreAnyTreeContent
return $ Page title revision return $ Page Mediawiki title revision
parseMediawiki :: MonadThrow m => ConduitT Event Page m (Maybe ()) parseMediawiki :: MonadThrow m => ConduitT Event Page m (Maybe ())
parseMediawiki = parseMediawiki =
tagIgnoreAttrs "{http://www.mediawiki.org/xml/export-0.10/}mediawiki" tagIgnoreAttrs "{http://www.mediawiki.org/xml/export-0.10/}mediawiki"
$ manyYield' parsePage $ manyYield' parsePage
-- | Need to wrap the result in IO to parse and to combine it. -- | Convert a Mediawiki Page to a Plaintext Page.
-- Need to wrap the result in IO to parse and to combine it.
mediawikiPageToPlain :: Page -> IO Page mediawikiPageToPlain :: Page -> IO Page
mediawikiPageToPlain page = do mediawikiPageToPlain page = do
title <- mediaToPlain $ _title page title <- mediaToPlain $ _title page
revision <- mediaToPlain $ _text page revision <- mediaToPlain $ _text page
return $ Page title revision return $ Page Plaintext title revision
where mediaToPlain media = where mediaToPlain media =
case media of case media of
(Nothing) -> return Nothing (Nothing) -> return Nothing
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment