Commit 349869d9 authored by Mudada's avatar Mudada

Super ugly inline-js parsing

parent a8a0f838
......@@ -21,6 +21,8 @@ import SEARX.Client
import Data.Foldable
import qualified Data.Text as T
import Data.Aeson
import GHC.Generics
{-
debug purpose, use getMetadataWith
......@@ -37,19 +39,39 @@ getMetadataWith2 q = do
// Title
article.close();
-}
parseWebsite :: T.Text -> IO Int
data Article =
Article {title :: Maybe T.Text
,byline :: Maybe T.Text
,dir :: Maybe T.Text
,content :: Maybe T.Text
,textContent :: Maybe T.Text
,length :: Maybe Int
,excerpt :: Maybe T.Text
,siteName :: Maybe T.Text
}
deriving (Generic, Show, FromJSON, ToJSON)
parseWebsite :: T.Text -> IO Article
parseWebsite url =
withSystemTempDirectory "" $ \tmpdir -> do
withCurrentDirectory tmpdir $
traverse_
callCommand
["npm init --yes", "npm install --save node-readability"]
["npm init --yes", "npm install --save jsdom request-promise-native",
"git clone https://github.com/mozilla/readability.git"]
withJSSession
defJSSessionOpts {nodeWorkDir = Just tmpdir}
[block|
return 12;
});
|]
var {JSDOM} = require('jsdom');
var rp = require('request-promise-native');
var Readability = require('./readability');
const val = await rp($url);
const doc = new JSDOM(val, {url: $url})
const reader = new Readability(doc.window.document);
const article = reader.parse();
return article;
|]
specConcatEith :: Semigroup a => Either b a -> Either b a -> Either b a
specConcatEith (Left _) b = b
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment