Commit a03304f5 authored by Mudada's avatar Mudada

made searx request recursive

parent 8e257d7c
......@@ -5,16 +5,70 @@ module Main where
import SEARX
import SEARX.Client
import System.Directory
import Text.HTML.TagSoup
import Data.Maybe
import qualified Data.Text as T
main :: IO ()
main = do
res <- getMetadataWith "abeille" 1
emptyArticle :: Article
emptyArticle = Article
{
title = Nothing
,byline = Nothing
,dir = Nothing
,content = Nothing
,textContent = Nothing
,SEARX.length = Nothing
,excerpt = Nothing
,siteName = Nothing
}
type Depth = Int
type Limit = Int
type Query = T.Text
searxSearch :: Query -> Limit -> Depth -> IO [Maybe Article]
searxSearch q l d = do
res <- getMetadataWith q l
case res of
(Left err) -> print err
(Left err) -> return []
(Right r) -> do
let urls = (_document_id) <$> (_documents_hits r)
fp <- setUpDirectory
print urls
articles <- sequence $ parseWebsite fp <$> urls
removeDirectoryRecursive $ fp <> "/readability"
print articles
let urls = take l $ _document_id <$> (_documents_hits r)
parseWebsiteReq d urls
parseWebsiteReqWithFp :: FilePath -> Depth -> [T.Text] -> IO [Maybe Article]
parseWebsiteReqWithFp fp d urls
| d <= 0 = do
art <- parseWebsite'
return art
| otherwise = do
articles <- parseWebsite'
d <-
parseWebsiteReqWithFp fp
(d - 1)
(getUrlsFromWebsite articles)
return $ d <> articles
where parseWebsite' =
sequence $ parseWebsite fp <$> urls
parseWebsiteReq :: Depth -> [T.Text] -> IO [Maybe Article]
parseWebsiteReq d urls = do
fp <- setUpDirectory
articles <- parseWebsiteReqWithFp fp d urls
removeDirectoryRecursive $ fp <> "/readability"
return articles
getUrlsFromWebsite :: [Maybe Article] -> [T.Text]
getUrlsFromWebsite articles =
(filter (/= ""))
$ (fromAttrib "href")
<$> (filter isTagOpen
$ concat $ parseTags
<$> ((fromMaybe ""
. content
. fromMaybe emptyArticle)
<$> articles))
-- searxSearch :: Query -> Limit -> Depth -> IO [Maybe Article]
main :: IO ()
main = do
articles <- searxSearch "abeille" 10 1
print $ articles
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment