Commit 8e257d7c authored by Mudada's avatar Mudada

parse every result of searx + article -> io article to handle error (maybe

either...parse every result of searx + article -> io article to handle
error (maybe either...))
parent f0a8c2e6
...@@ -3,15 +3,18 @@ ...@@ -3,15 +3,18 @@
module Main where module Main where
import SEARX import SEARX
import SEARX.Client
import System.Directory import System.Directory
main :: IO () main :: IO ()
main = do main = do
res <- getMetadataWith "ia" 1 res <- getMetadataWith "abeille" 1
case res of case res of
(Left err) -> print err (Left err) -> print err
(Right r) -> do (Right r) -> do
let urls = (_document_id) <$> (_documents_hits r)
fp <- setUpDirectory fp <- setUpDirectory
article <- parseWebsite fp "https://www.lemonde.fr/politique/article/2019/10/07/apres-l-attentat-a-la-prefecture-de-police-christophe-castaner-sous-le-feu-des-critiques_6014514_823448.html" print urls
articles <- sequence $ parseWebsite fp <$> urls
removeDirectoryRecursive $ fp <> "/readability" removeDirectoryRecursive $ fp <> "/readability"
print article print articles
...@@ -64,7 +64,7 @@ setUpDirectory = ...@@ -64,7 +64,7 @@ setUpDirectory =
getCurrentDirectory getCurrentDirectory
parseWebsite :: FilePath -> T.Text -> IO Article parseWebsite :: FilePath -> T.Text -> IO (Maybe Article)
parseWebsite tmpdir url = parseWebsite tmpdir url =
do do
withJSSession withJSSession
...@@ -73,11 +73,14 @@ parseWebsite tmpdir url = ...@@ -73,11 +73,14 @@ parseWebsite tmpdir url =
var {JSDOM} = require('jsdom'); var {JSDOM} = require('jsdom');
var rp = require('request-promise-native'); var rp = require('request-promise-native');
var Readability = require('./readability'); var Readability = require('./readability');
const val = await rp($url); try {
const doc = new JSDOM(val, {url: $url}) const val = await rp($url);
const reader = new Readability(doc.window.document); const doc = new JSDOM(val, {url: $url})
const article = reader.parse(); const reader = new Readability(doc.window.document);
return article; return reader.parse();
} catch (err) {
return null;
}
|] |]
specConcatEith :: Semigroup a => Either b a -> Either b a -> Either b a specConcatEith :: Semigroup a => Either b a -> Either b a -> Either b a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment