Commit 8e257d7c authored by Mudada's avatar Mudada

parse every result of searx + article -> io article to handle error (maybe

either...parse every result of searx + article -> io article to handle
error (maybe either...))
parent f0a8c2e6
......@@ -3,15 +3,18 @@
module Main where
import SEARX
import SEARX.Client
import System.Directory
main :: IO ()
main = do
res <- getMetadataWith "ia" 1
res <- getMetadataWith "abeille" 1
case res of
(Left err) -> print err
(Right r) -> do
let urls = (_document_id) <$> (_documents_hits r)
fp <- setUpDirectory
article <- parseWebsite fp "https://www.lemonde.fr/politique/article/2019/10/07/apres-l-attentat-a-la-prefecture-de-police-christophe-castaner-sous-le-feu-des-critiques_6014514_823448.html"
print urls
articles <- sequence $ parseWebsite fp <$> urls
removeDirectoryRecursive $ fp <> "/readability"
print article
print articles
......@@ -64,7 +64,7 @@ setUpDirectory =
getCurrentDirectory
parseWebsite :: FilePath -> T.Text -> IO Article
parseWebsite :: FilePath -> T.Text -> IO (Maybe Article)
parseWebsite tmpdir url =
do
withJSSession
......@@ -73,11 +73,14 @@ parseWebsite tmpdir url =
var {JSDOM} = require('jsdom');
var rp = require('request-promise-native');
var Readability = require('./readability');
const val = await rp($url);
const doc = new JSDOM(val, {url: $url})
const reader = new Readability(doc.window.document);
const article = reader.parse();
return article;
try {
const val = await rp($url);
const doc = new JSDOM(val, {url: $url})
const reader = new Readability(doc.window.document);
return reader.parse();
} catch (err) {
return null;
}
|]
specConcatEith :: Semigroup a => Either b a -> Either b a -> Either b a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment