[refactoring] remove stack.yaml, add Protolude, migrate to cabal

parent 7b740eee
dist-newstyle/
.stack-work/ .stack-work/
*~ *~
...@@ -3,17 +3,18 @@ ...@@ -3,17 +3,18 @@
module Main where module Main where
import Data.LanguageCodes (ISO639_1(..))
import Data.Text qualified as T
import HAL
import HAL (getMetadataWith)
import HAL.Client
import HAL.Doc
import NeatInterpolation (text) import NeatInterpolation (text)
import Network.HTTP.Client (newManager) import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings) import Network.HTTP.Client.TLS (tlsManagerSettings)
import Protolude
import Servant.Client import Servant.Client
import HAL (getMetadataWith)
import HAL.Client
import HAL.Doc
import HAL
import Tree import Tree
import qualified Data.Text as T
yearReq = [text| yearReq = [text|
...@@ -64,8 +65,7 @@ imt = [ ...@@ -64,8 +65,7 @@ imt = [
main :: IO () main :: IO ()
main = do main = do
-- res <- getMetadataWith (generateRequestByStructID "artificial intelligence" imt) (Just 0) (Just 55) -- res <- getMetadataWith (generateRequestByStructID "artificial intelligence" imt) (Just 0) (Just 55)
res <- getMetadataWith (generateRequestByStructID "artificial intelligence" imt) (Just 0) (Just 55) res <- getMetadataWith (generateRequestByStructID "artificial intelligence" imt) (Just 0) (Just 55) (Just EN)
case res of case res of
(Left err) -> print err (Left err) -> print err
(Right val) -> print $ _docs val (Right val) -> print $ _docs val
-- Generated by stack2cabal
with-compiler: ghc-9.2.8
packages:
./
allow-older: *
allow-newer: *
...@@ -24,6 +24,7 @@ source-repository head ...@@ -24,6 +24,7 @@ source-repository head
location: https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal location: https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal
library library
ghc-options: -Wall -Werror
exposed-modules: exposed-modules:
HAL HAL
HAL.Client HAL.Client
...@@ -39,28 +40,33 @@ library ...@@ -39,28 +40,33 @@ library
default-extensions: default-extensions:
DataKinds DataKinds
DeriveGeneric DeriveGeneric
ImportQualifiedPost
NamedFieldPuns NamedFieldPuns
NoImplicitPrelude
OverloadedStrings OverloadedStrings
RecordWildCards RecordWildCards
TypeOperators TypeOperators
build-depends: build-depends:
aeson aeson >= 2.1.0 && < 2.3
, base >=4.7 && <5 , base >=4.7 && <5
, bytestring , bytestring >= 0.11.0 && < 0.13
, conduit , conduit >= 1.3.5 && < 1.4
, containers , containers >= 0.6.7 && < 0.7
, data-default , data-default >= 0.7.1.1 && < 0.8
, http-client , http-client >= 0.7.13.1 && < 0.8
, http-client-tls , http-client-tls >= 0.3.6.2 && < 0.4
, lens , iso639 >= 0.1.0.3 && < 0.2
, neat-interpolation , lens >= 5.2.2 && < 5.3
, scientific , neat-interpolation >= 0.5.1.3 && < 0.6
, servant , protolude >= 0.3.3 && < 0.4
, servant-client , scientific >= 0.3.7.0 && < 0.4
, split , servant >= 0.19 && < 0.21
, text , servant-client >= 0.19 && < 0.21
, utf8-string , split >= 0.2.3.5 && < 0.3
, vector , text >= 2.0.2 && < 2.1
, text-format >= 0.3.2.1 && < 0.4
, utf8-string >= 1.0.2 && < 1.1
, vector >= 0.13.0 && < 0.14
default-language: Haskell2010 default-language: Haskell2010
executable crawlerHAL-exe executable crawlerHAL-exe
...@@ -72,30 +78,34 @@ executable crawlerHAL-exe ...@@ -72,30 +78,34 @@ executable crawlerHAL-exe
default-extensions: default-extensions:
DataKinds DataKinds
DeriveGeneric DeriveGeneric
ImportQualifiedPost
NamedFieldPuns NamedFieldPuns
NoImplicitPrelude
OverloadedStrings OverloadedStrings
RecordWildCards RecordWildCards
TypeOperators TypeOperators
ghc-options: -threaded -rtsopts -with-rtsopts=-N ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: build-depends:
aeson aeson >= 2.1.0 && < 2.3
, base >=4.7 && <5 , base >=4.7 && <5
, bytestring , bytestring >= 0.11.0 && < 0.13
, conduit , conduit >= 1.3.5 && < 1.4
, containers , containers >= 0.6.7 && < 0.7
, crawlerHAL , crawlerHAL
, data-default , data-default >= 0.7.1.1 && < 0.8
, http-client , http-client >= 0.7.13.1 && < 0.8
, http-client-tls , http-client-tls >= 0.3.6.2 && < 0.4
, lens , iso639 >= 0.1.0.3 && < 0.2
, neat-interpolation , lens >= 5.2.2 && < 5.3
, scientific , neat-interpolation >= 0.5.1.3 && < 0.6
, servant , protolude >= 0.3.3 && < 0.4
, servant-client , scientific >= 0.3.7.0 && < 0.4
, split , servant >= 0.19 && < 0.21
, text , servant-client >= 0.19 && < 0.21
, utf8-string , split >= 0.2.3.5 && < 0.3
, vector , text >= 2.0.2 && < 2.1
, utf8-string >= 1.0.2 && < 1.1
, vector >= 0.13.0 && < 0.14
default-language: Haskell2010 default-language: Haskell2010
test-suite halCrawler-test test-suite halCrawler-test
...@@ -108,28 +118,32 @@ test-suite halCrawler-test ...@@ -108,28 +118,32 @@ test-suite halCrawler-test
default-extensions: default-extensions:
DataKinds DataKinds
DeriveGeneric DeriveGeneric
ImportQualifiedPost
NamedFieldPuns NamedFieldPuns
NoImplicitPrelude
OverloadedStrings OverloadedStrings
RecordWildCards RecordWildCards
TypeOperators TypeOperators
ghc-options: -threaded -rtsopts -with-rtsopts=-N ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: build-depends:
aeson aeson >= 2.1.0 && < 2.3
, base >=4.7 && <5 , base >=4.7 && <5
, bytestring , bytestring >= 0.11.0 && < 0.13
, conduit , conduit >= 1.3.5 && < 1.4
, containers , containers >= 0.6.7 && < 0.7
, data-default , data-default >= 0.7.1.1 && < 0.8
, halCrawler , halCrawler
, http-client , http-client >= 0.7.13.1 && < 0.8
, http-client-tls , http-client-tls >= 0.3.6.2 && < 0.4
, lens , iso639 >= 0.1.0.3 && < 0.2
, neat-interpolation , lens >= 5.2.2 && < 5.3
, scientific , neat-interpolation >= 0.5.1.3 && < 0.6
, servant , protolude >= 0.3.3 && < 0.4
, servant-client , scientific >= 0.3.7.0 && < 0.4
, split , servant >= 0.19 && < 0.21
, text , servant-client >= 0.19 && < 0.21
, utf8-string , split >= 0.2.3.5 && < 0.3
, vector , text >= 2.0.2 && < 2.1
, utf8-string >= 1.0.2 && < 1.1
, vector >= 0.13.0 && < 0.14
default-language: Haskell2010 default-language: Haskell2010
name: crawlerHAL
version: 0.1.0.0
git: "https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal"
license: BSD3
author: "CNRS/IMT"
maintainer: "contact@gargantext.org"
copyright: "2019 CNRS/IMT"
extra-source-files:
- README.md
- ChangeLog.md
# Metadata used when publishing your package
# synopsis: Short description of your package
# category: Web
# To avoid duplicated efforts in documentation and dealing with the
# complications of embedding Haddock markup inside cabal files, it is
# common to point users to the README.md file.
description: Please see the README at <https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal>
dependencies:
- aeson
- base >= 4.7 && < 5
- bytestring
- conduit
- containers
- data-default
- http-client
- http-client-tls
- lens
- neat-interpolation
- scientific
- servant
- servant-client
- split
- text
- utf8-string
- vector
library:
source-dirs: src
default-extensions:
- DataKinds
- DeriveGeneric
- NamedFieldPuns
- OverloadedStrings
- RecordWildCards
- TypeOperators
executables:
crawlerHAL-exe:
main: Main.hs
source-dirs: app
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
dependencies:
- crawlerHAL
tests:
halCrawler-test:
main: Spec.hs
source-dirs: test
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
dependencies:
- halCrawler
module HAL where module HAL where
import Conduit import Conduit
import Data.Default (def) import Data.Aeson
import Data.Maybe (fromMaybe) import Data.LanguageCodes (ISO639_1(..))
import Data.Either (fromRight)
import Data.Text import Data.Text
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client (BaseUrl(..), Scheme(..), ClientM, ClientError, runClientM, mkClientEnv)
import HAL.Client import HAL.Client
import HAL.Doc.Corpus import HAL.Doc.Corpus
import HAL.Doc.Struct import HAL.Doc.Struct
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Protolude
import Servant.API import Servant.API
import Data.Aeson import Servant.Client (BaseUrl(..), Scheme(..), ClientM, ClientError, runClientM, mkClientEnv)
batchSize :: Int batchSize :: Int
batchSize = 1000 batchSize = 1000
getMetadataWith :: Text -> Maybe Int -> Maybe Integer -> IO (Either ClientError (Response Corpus)) type Query = Text
getMetadataWith q start rows = do type Start = Int
manager' <- newManager tlsManagerSettings type Limit = Integer
runHalAPIClient $ search (Just requestedFields) [q] Nothing start rows type Count = Integer
getMetadataWithC :: Text -> Maybe Int -> Maybe Integer -> IO (Either ClientError (Maybe Integer, ConduitT () Corpus IO ())) getMetadataWith :: Query
getMetadataWithC q start rows = do -> Maybe Start
manager' <- newManager tlsManagerSettings -> Maybe Limit
-> Maybe ISO639_1
-> IO (Either ClientError (Response Corpus))
getMetadataWith q start_ limit lang = do
runHalAPIClient $ search (Just $ requestedFields lang) [q] Nothing start_ limit
getMetadataWithC :: Query
-> Maybe Start
-> Maybe Limit
-> Maybe ISO639_1
-> IO (Either ClientError (Maybe Count, ConduitT () Corpus IO ()))
getMetadataWithC q start_ limit lang = do
-- First, estimate the total number of documents -- First, estimate the total number of documents
eCount <- countResults q eCount <- countResults q
pure $ get' q start rows <$> eCount pure $ get' <$> eCount
where where
get' :: Text -> Maybe Int -> Maybe Integer -> Integer -> (Maybe Integer, ConduitT () Corpus IO ()) get' :: Count
get' q start rows numFound = -> (Maybe Count, ConduitT () Corpus IO ())
get' numFound_ =
( Just numResults ( Just numResults
, yieldMany [0..] , yieldMany [0..]
.| takeC (fromInteger numPages) .| takeC (fromInteger numPages)
.| concatMapMC (getPage q start')) .| concatMapMC (getPage start'))
where where
start' = fromMaybe 0 start start' = fromMaybe 0 start_
rows' = min numFound $ fromMaybe numFound rows rows' = min numFound_ $ fromMaybe numFound_ limit
numResults = rows' - (fromIntegral start') numResults = rows' - (fromIntegral start')
numPages = numResults `div` (fromIntegral batchSize) + 1 numPages = numResults `div` (fromIntegral batchSize) + 1
getPage :: Text -> Int -> Int -> IO [Corpus] getPage :: Start -> Int -> IO [Corpus]
getPage q start pageNum = do getPage start' pageNum = do
let offset = start + pageNum * batchSize let offset = start' + pageNum * batchSize
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just offset) (Just $ fromIntegral batchSize) eRes <- runHalAPIClient $ search (Just $ requestedFields lang) [q] Nothing (Just offset) (Just $ fromIntegral batchSize)
pure $ case eRes of pure $ case eRes of
Left _ -> [] Left _ -> []
Right (Response { _docs }) -> _docs Right (Response { _docs }) -> _docs
printDoc :: Corpus -> IO Corpus -- printDoc :: Corpus -> IO Corpus
printDoc c@(Corpus { _corpus_docid, _corpus_title }) = do -- printDoc c@(Corpus { .. }) = do
print $ show _corpus_title -- putText $ show _corpus_title
pure c -- pure c
countResults :: Text -> IO (Either ClientError Integer) countResults :: Query -> IO (Either ClientError Count)
countResults q = do countResults q = do
manager' <- newManager tlsManagerSettings
-- First, estimate the total number of documents -- First, estimate the total number of documents
eRes <- runHalAPIClient $ search (Just requestedFields) [q] Nothing (Just 0) (Just 1) :: IO (Either ClientError (Response Corpus)) eRes <- runHalAPIClient $ search (Just $ requestedFields Nothing) [q] Nothing (Just 0) (Just 1) :: IO (Either ClientError (Response Corpus))
pure $ _numFound <$> eRes pure $ _numFound <$> eRes
requestedFields :: Text requestedFields :: Maybe ISO639_1 -> Text
requestedFields = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s" requestedFields (Just EN) = "docid,title_s,en_abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s"
requestedFields (Just FR) = "docid,title_s,en_abstract_s,fr_abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s"
requestedFields _ = requestedFields (Just EN)
structFields :: Text structFields :: Text
structFields = "docid,label_s,parentDocid_i" structFields = "docid,label_s,parentDocid_i"
...@@ -80,9 +88,9 @@ runStructureRequest :: Maybe Text -> IO (Either ClientError (Response Struct)) ...@@ -80,9 +88,9 @@ runStructureRequest :: Maybe Text -> IO (Either ClientError (Response Struct))
runStructureRequest rq = runStructureRequest rq =
runHalAPIClient $ structure (Just structFields) rq (Just 10000) runHalAPIClient $ structure (Just structFields) rq (Just 10000)
runSearchRequest :: [Text] -> IO (Either ClientError (Response Corpus)) runSearchRequest :: [Text] -> Maybe ISO639_1 -> IO (Either ClientError (Response Corpus))
runSearchRequest rq = runSearchRequest rq lang =
runHalAPIClient $ search (Just requestedFields) rq Nothing Nothing Nothing runHalAPIClient $ search (Just $ requestedFields lang) rq Nothing Nothing Nothing
generateRequestByStructID :: Text -> [Text] -> Text generateRequestByStructID :: Text -> [Text] -> Text
generateRequestByStructID rq struct_ids = generateRequestByStructID rq struct_ids =
...@@ -94,5 +102,6 @@ generateRequestByStructID rq struct_ids = ...@@ -94,5 +102,6 @@ generateRequestByStructID rq struct_ids =
<> ")" <> ")"
flattenPipe :: [Text] -> Text flattenPipe :: [Text] -> Text
flattenPipe [] = ""
flattenPipe (x:[]) = x flattenPipe (x:[]) = x
flattenPipe (x:xs) = x <> " || " <> flattenPipe xs flattenPipe (x:xs) = x <> " || " <> flattenPipe xs
...@@ -3,18 +3,13 @@ ...@@ -3,18 +3,13 @@
module HAL.Client where module HAL.Client where
import Control.Lens as L (makeLenses)
import Data.Aeson
import Data.Proxy import Data.Proxy
import GHC.Generics import GHC.Generics
import Protolude
import Servant.API import Servant.API
import Servant.Client hiding (Response) import Servant.Client hiding (Response)
import Data.Text
import Data.Map
import Data.Aeson
import qualified Data.ByteString.Lazy as BSL
import qualified Codec.Binary.UTF8.String as UTF
import Control.Lens as L (makeLenses)
type HALAPI doc = Search doc type HALAPI doc = Search doc
:<|> Structure doc :<|> Structure doc
...@@ -62,10 +57,11 @@ data Response doc = Response ...@@ -62,10 +57,11 @@ data Response doc = Response
L.makeLenses ''Response L.makeLenses ''Response
instance FromJSON doc => FromJSON (Response doc) where instance FromJSON doc => FromJSON (Response doc) where
parseJSON (Object o) = Response <$> parseJSON = withObject "Response" $
((o .: "response") >>= (.: "numFound")) \o -> Response
<*> ((o .: "response") >>= (.: "start")) <$> ((o .: "response") >>= (.: "numFound"))
<*> ((o .: "response") >>= (.: "docs")) <*> ((o .: "response") >>= (.: "start"))
<*> ((o .: "response") >>= (.: "docs"))
halAPI :: Proxy (HALAPI doc) halAPI :: Proxy (HALAPI doc)
halAPI = Proxy halAPI = Proxy
......
module HAL.Doc where module HAL.Doc
( module HAL.Doc.EntityTree
, module HAL.Doc.Corpus )
where
import HAL.Doc.EntityTree import HAL.Doc.EntityTree
import HAL.Doc.Corpus import HAL.Doc.Corpus
...@@ -2,14 +2,11 @@ ...@@ -2,14 +2,11 @@
module HAL.Doc.Corpus where module HAL.Doc.Corpus where
import GHC.Generics import Control.Lens qualified as L
import Data.Aeson import Data.Aeson
import Data.Default import Data.Default
import Data.Text (pack, Text) import GHC.Generics
import Control.Applicative ((<|>)) import Protolude
import qualified Control.Lens as L
import Servant.API (ToHttpApiData(..)) import Servant.API (ToHttpApiData(..))
data Corpus = Corpus data Corpus = Corpus
...@@ -29,15 +26,16 @@ instance Default Corpus where ...@@ -29,15 +26,16 @@ instance Default Corpus where
def = Corpus "" def def def def def def def def = Corpus "" def def def def def def def
instance FromJSON Corpus where instance FromJSON Corpus where
parseJSON (Object o) = Corpus <$> parseJSON = withObject "Corpus" $
(o .: "docid") \o -> Corpus
<*> (o .: "title_s" <|> return []) <$> (o .: "docid")
<*> (o .: "abstract_s" <|> return []) <*> (o .: "title_s" <|> return [])
<*> (o .:? "submittedDate_s") <*> (o .: "en_abstract_s" <|> return [])
<*> (o .:? "source_s") <*> (o .:? "submittedDate_s")
<*> (o .: "authFullName_s" <|> return []) <*> (o .:? "source_s")
<*> (o .: "authOrganism_s" <|> return []) <*> (o .: "authFullName_s" <|> return [])
<*> (o .: "structId_i" <|> return []) <*> (o .: "authOrganism_s" <|> return [])
<*> (o .: "structId_i" <|> return [])
instance ToHttpApiData Corpus where instance ToHttpApiData Corpus where
toUrlPiece _ = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s,structId_i" toUrlPiece _ = "docid,title_s,en_abstract_s,fr_abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s,structId_i"
module HAL.Doc.EntityTree where module HAL.Doc.EntityTree where
import GHC.Generics import Data.Aeson ((.:), (.:?), (.!=), FromJSON(..), withObject)
import Data.Aeson ((.:), (.:?), (.!=), Value(..), ToJSON, FromJSON(..), encode)
import Data.Default import Data.Default
import Data.Text (pack, Text) import GHC.Generics
import Protolude hiding (show)
import Protolude.Base (Show(..))
import Servant.API (ToHttpApiData(..)) import Servant.API (ToHttpApiData(..))
...@@ -20,15 +19,13 @@ instance Default EntityTree where ...@@ -20,15 +19,13 @@ instance Default EntityTree where
def = EntityTree "" def def def = EntityTree "" def def
instance FromJSON EntityTree where instance FromJSON EntityTree where
parseJSON (Object o) = parseJSON = withObject "EntityTree" $
EntityTree <$> (o .: "docid") \o -> EntityTree <$> (o .: "docid")
<*> (o .:? "label_s") <*> (o .:? "label_s")
<*> (o .:? "parentEntityTreeid_i" .!= []) <*> (o .:? "parentEntityTreeid_i" .!= [])
instance ToHttpApiData EntityTree where instance ToHttpApiData EntityTree where
toUrlPiece _ = "docid,label_s,parentEntityTreeid_i" toUrlPiece _ = "docid,label_s,parentEntityTreeid_i"
instance Show EntityTree where instance Show EntityTree where
show (EntityTree id label _) = show label show (EntityTree { .. }) = show _label_s <> "(" <> show _docid <> ")"
<> "(" <> show id <> ")"
...@@ -2,15 +2,12 @@ ...@@ -2,15 +2,12 @@
module HAL.Doc.Struct where module HAL.Doc.Struct where
import GHC.Generics
import Data.Aeson import Data.Aeson
import Data.Default import Data.Default
import Data.Text (pack, Text) import GHC.Generics
import Control.Applicative ((<|>)) import Protolude
import qualified Control.Lens as L
import Servant.API (ToHttpApiData(..)) import Servant.API (ToHttpApiData(..))
import qualified Control.Lens as L
data Struct = Struct data Struct = Struct
{ {
...@@ -24,10 +21,10 @@ instance Default Struct where ...@@ -24,10 +21,10 @@ instance Default Struct where
def = Struct def "" def def = Struct def "" def
instance FromJSON Struct where instance FromJSON Struct where
parseJSON (Object o) = Struct <$> parseJSON = withObject "Struct" $
(o .: "docid") \o -> Struct <$> (o .: "docid")
<*> (o .: "label_s") <*> (o .: "label_s")
<*> (o .: "parentDocid_i" <|> return []) <*> (o .: "parentDocid_i" <|> return [])
instance ToHttpApiData Struct where instance ToHttpApiData Struct where
toUrlPiece _ = "docid,label_s,parentDocid_i" toUrlPiece _ = "docid,label_s,parentDocid_i"
module Tree where module Tree where
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client hiding (Response)
import Control.Lens.Getter ((^.)) import Control.Lens.Getter ((^.))
import Data.Map ((!?), (!), insert, empty, Map, fromList, toList)
import qualified Data.Map as M
import Data.Map.Internal (merge, preserveMissing, zipWithMatched)
import Data.Aeson
import Data.List (groupBy, isInfixOf)
import Data.List.Split (chunksOf) import Data.List.Split (chunksOf)
import Data.Either (rights) import Data.Map qualified as Map
import Data.Maybe (fromMaybe) import Data.Map (insert)
import Data.Scientific (Scientific, floatingOrInteger) import Data.Map.Internal (merge, preserveMissing, zipWithMatched)
import qualified Data.Vector as V import Data.Text qualified as T
import Text.Printf import Data.Text.Format (format)
import Data.Text.Lazy qualified as TL
import GHC.Generics import GHC.Generics
import HAL.Client
import HAL import HAL
import HAL.Client
import HAL.Doc.Struct import HAL.Doc.Struct
import Data.Text (Text, pack) import Protolude
--import Text.Printf
formatParentIdRequest :: [Struct] -> Maybe Text formatParentIdRequest :: [Struct] -> Maybe Text
formatParentIdRequest [] = Nothing formatParentIdRequest [] = Nothing
formatParentIdRequest (x:[]) = Just . pack . show $ _struct_docid x formatParentIdRequest (x:[]) = Just . T.pack . show $ _struct_docid x
formatParentIdRequest (x:xs) = formatParentIdRequest (x:xs) =
(Just . pack . show $ _struct_docid x) (Just . T.pack . show $ _struct_docid x)
<> (Just " || ") <> (Just " || ")
<> formatParentIdRequest xs <> formatParentIdRequest xs
...@@ -43,35 +36,42 @@ fetchChildren [] = pure [] ...@@ -43,35 +36,42 @@ fetchChildren [] = pure []
fetchChildren ds = (ds <>) <$> (fetchChildren =<< ds2Child ds) fetchChildren ds = (ds <>) <$> (fetchChildren =<< ds2Child ds)
isChildOf :: Struct -> Struct -> Bool isChildOf :: Struct -> Struct -> Bool
isChildOf (Struct i l p) (Struct i' l' p') = not . null $ filter (\id -> id == (pack $ show i)) p' isChildOf (Struct i _ _) (Struct _ _ p') =
not . null $ filter (\id -> id == (T.pack $ show i)) p'
data DocTree = DocTree Struct Int [DocTree] data DocTree = DocTree Struct Int [DocTree]
deriving (Show, Generic) deriving (Show, Generic)
buildTree :: Int -> [Struct] -> Struct -> DocTree buildTree :: Int -> [Struct] -> Struct -> DocTree
buildTree depth docs id = DocTree id depth (buildTree (depth + 1) docs <$> children) buildTree depth docs' id = DocTree id depth (buildTree (depth + 1) docs' <$> children)
where children = filter (isChildOf id) docs where
children = filter (isChildOf id) docs'
formatTree :: DocTree -> String formatTree :: DocTree -> Text
formatTree tree@(DocTree doc depth children) = formatTree (DocTree doc depth children) =
printf "%s%s\n%s" (addSpace) (show doc) (concat $ formatTree <$> children) TL.toStrict $ format "{}{}\n{}" [ addSpace
where addSpace , show doc
| depth > 0 = "├" <> (replicate (depth * depth + depth) '─') , T.concat $ formatTree <$> children ]
where
addSpace :: Text
addSpace
| depth > 0 = "├" <> (T.replicate (depth * depth + depth) "─")
| otherwise = "🌲" | otherwise = "🌲"
findDeepest :: Map Int Int -> DocTree -> Map Int Int findDeepest :: Map Int Int -> DocTree -> Map Int Int
findDeepest m tree@(DocTree doc@(Struct docid _ _) depth children) = findDeepest m (DocTree (Struct docid _ _) depth children) =
mergeMap map1 maps mergeMap map1 maps
where map1 = insert docid depth m where map1 = insert docid depth m
maps = foldl mergeMap empty $ findDeepest m <$> children maps = foldl mergeMap Map.empty $ findDeepest m <$> children
mergeMap m1 m2 = merge preserveMissing preserveMissing (zipWithMatched whenMatch) m1 m2 mergeMap m1 m2 = merge preserveMissing preserveMissing (zipWithMatched whenMatch) m1 m2
whenMatch k v v' = if v > v' then v else v' whenMatch _k v v' = if v > v' then v else v'
isDeep :: Map Int Int -> DocTree -> Bool isDeep :: Map Int Int -> DocTree -> Bool
isDeep m (DocTree (Struct id _ _) depth _) = depth >= (fromMaybe 0 $ M.lookup id m) isDeep m (DocTree (Struct id _ _) depth _) = depth >= (fromMaybe 0 $ Map.lookup id m)
removeDuplicate :: Map Int Int -> DocTree -> DocTree removeDuplicate :: Map Int Int -> DocTree -> DocTree
removeDuplicate deepMap tree@(DocTree doc depth children) = DocTree doc depth (removeDuplicate deepMap <$> filter (isDeep deepMap) children) removeDuplicate deepMap (DocTree doc depth children) =
DocTree doc depth (removeDuplicate deepMap <$> filter (isDeep deepMap) children)
noDuplicateTree :: DocTree -> DocTree noDuplicateTree :: DocTree -> DocTree
noDuplicateTree tree = removeDuplicate (findDeepest M.empty tree) tree noDuplicateTree tree = removeDuplicate (findDeepest Map.empty tree) tree
# This file was automatically generated by 'stack init'
#
# Some commonly used options have been documented as comments in this file.
# For advanced use and comprehensive documentation of the format, please see:
# https://docs.haskellstack.org/en/stable/yaml_configuration/
# Resolver to choose a 'specific' stackage snapshot or a compiler version.
# A snapshot resolver dictates the compiler version and the set of packages
# to be used for project dependencies. For example:
#
# resolver: lts-3.5
# resolver: nightly-2015-09-21
# resolver: ghc-7.10.2
#
# The location of a snapshot can be provided as a file or url. Stack assumes
# a snapshot provided as a file might change, whereas a url resource does not.
#
# resolver: ./custom-snapshot.yaml
# resolver: https://example.com/snapshots/2018-01-01.yaml
resolver:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/18.yaml
# User packages to be built.
# Various formats can be used as shown in the example below.
#
# packages:
# - some-directory
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
# - location:
# git: https://github.com/commercialhaskell/stack.git
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# subdirs:
# - auto-update
# - wai
packages:
- .
# Dependency packages to be pulled from upstream that are not in the resolver
# using the same syntax as the packages field.
# (e.g., acme-missiles-0.3)
# extra-deps: []
# Override default flag values for local packages and extra-deps
# flags: {}
# Extra package databases containing global packages
# extra-package-dbs: []
# Control whether we use the GHC we find on the path
# system-ghc: true
#
# Require a specific version of stack, using version ranges
# require-stack-version: -any # Default
# require-stack-version: ">=1.9"
#
# Override the architecture used by stack, especially useful on Windows
# arch: i386
# arch: x86_64
#
# Extra directories used by stack for building
# extra-include-dirs: [/path/to/dir]
# extra-lib-dirs: [/path/to/dir]
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment