Commit a03304f5 authored by Mudada's avatar Mudada

made searx request recursive

parent 8e257d7c
...@@ -5,16 +5,70 @@ module Main where ...@@ -5,16 +5,70 @@ module Main where
import SEARX import SEARX
import SEARX.Client import SEARX.Client
import System.Directory import System.Directory
import Text.HTML.TagSoup
import Data.Maybe
import qualified Data.Text as T
main :: IO () emptyArticle :: Article
main = do emptyArticle = Article
res <- getMetadataWith "abeille" 1 {
title = Nothing
,byline = Nothing
,dir = Nothing
,content = Nothing
,textContent = Nothing
,SEARX.length = Nothing
,excerpt = Nothing
,siteName = Nothing
}
type Depth = Int
type Limit = Int
type Query = T.Text
searxSearch :: Query -> Limit -> Depth -> IO [Maybe Article]
searxSearch q l d = do
res <- getMetadataWith q l
case res of case res of
(Left err) -> print err (Left err) -> return []
(Right r) -> do (Right r) -> do
let urls = (_document_id) <$> (_documents_hits r) let urls = take l $ _document_id <$> (_documents_hits r)
fp <- setUpDirectory parseWebsiteReq d urls
print urls
articles <- sequence $ parseWebsite fp <$> urls parseWebsiteReqWithFp :: FilePath -> Depth -> [T.Text] -> IO [Maybe Article]
removeDirectoryRecursive $ fp <> "/readability" parseWebsiteReqWithFp fp d urls
print articles | d <= 0 = do
art <- parseWebsite'
return art
| otherwise = do
articles <- parseWebsite'
d <-
parseWebsiteReqWithFp fp
(d - 1)
(getUrlsFromWebsite articles)
return $ d <> articles
where parseWebsite' =
sequence $ parseWebsite fp <$> urls
parseWebsiteReq :: Depth -> [T.Text] -> IO [Maybe Article]
parseWebsiteReq d urls = do
fp <- setUpDirectory
articles <- parseWebsiteReqWithFp fp d urls
removeDirectoryRecursive $ fp <> "/readability"
return articles
getUrlsFromWebsite :: [Maybe Article] -> [T.Text]
getUrlsFromWebsite articles =
(filter (/= ""))
$ (fromAttrib "href")
<$> (filter isTagOpen
$ concat $ parseTags
<$> ((fromMaybe ""
. content
. fromMaybe emptyArticle)
<$> articles))
-- searxSearch :: Query -> Limit -> Depth -> IO [Maybe Article]
main :: IO ()
main = do
articles <- searxSearch "abeille" 10 1
print $ articles
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment