From f2ec76b9db59c920d593b2998517be3bb334f29e Mon Sep 17 00:00:00 2001 From: Mark Bailey Date: Sun, 26 Jan 2025 17:48:09 -0500 Subject: [PATCH] feat: v2.1.6 --- scrape.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/scrape.py b/scrape.py index afe648b..ea57b31 100644 --- a/scrape.py +++ b/scrape.py @@ -5,7 +5,7 @@ author_url: https://git.markbailey.dev/cerbervs git_url: https://git.markbailey.dev/cerbervs/scrape.git description: Scrapes web with option for recursive scraping. requirements: websocket, requests, bs4, pydantic -version: 2.1.4 +version: 2.1.5 licence: MIT """ @@ -110,7 +110,7 @@ class Tools: netloc = self.netloc if path is not None and path != "": - path = "/"+path + path = "/" + path if params is not None and params != "": params = ";" + params @@ -170,7 +170,7 @@ class Tools: single_request: bool = Field(default=False, description="Single Request") pass - class Valves(BaseModel): + class Valves(UserValves): request_limit: int = Field(default=5, description="Request Limit") pass @@ -190,19 +190,25 @@ class Tools: :params url: The URL of the web page to be scraped. """ - if self.user_valves.single_request: + single_request = self.user_valves.single_request or self.valves.single_request + + if single_request: request_limit = 1 - print("Single Request Mode") else: request_limit = self.valves.request_limit - print(f"Request Limit Mode ({self.valves.request_limit})") + + print( + f"Single Request Mode - {single_request} - Request Limit - {request_limit}" + ) scraper = self.RecursiveScraper(request_limit) data = scraper.scrape_website(url) json_s = json.dumps(data) + del scraper return json_s + if __name__ == "__main__": tools = Tools() print(tools.scrape_recursively("https://pkg.go.dev/github.com/go-chi/chi/v5"))