feat: v2.1.6

This commit is contained in:
Mark Bailey 2025-01-26 17:48:09 -05:00
parent 94166a7258
commit f2ec76b9db

View File

@ -5,7 +5,7 @@ author_url: https://git.markbailey.dev/cerbervs
git_url: https://git.markbailey.dev/cerbervs/scrape.git git_url: https://git.markbailey.dev/cerbervs/scrape.git
description: Scrapes web with option for recursive scraping. description: Scrapes web with option for recursive scraping.
requirements: websocket, requests, bs4, pydantic requirements: websocket, requests, bs4, pydantic
version: 2.1.4 version: 2.1.5
licence: MIT licence: MIT
""" """
@ -110,7 +110,7 @@ class Tools:
netloc = self.netloc netloc = self.netloc
if path is not None and path != "": if path is not None and path != "":
path = "/"+path path = "/" + path
if params is not None and params != "": if params is not None and params != "":
params = ";" + params params = ";" + params
@ -170,7 +170,7 @@ class Tools:
single_request: bool = Field(default=False, description="Single Request") single_request: bool = Field(default=False, description="Single Request")
pass pass
class Valves(BaseModel): class Valves(UserValves):
request_limit: int = Field(default=5, description="Request Limit") request_limit: int = Field(default=5, description="Request Limit")
pass pass
@ -190,19 +190,25 @@ class Tools:
:params url: The URL of the web page to be scraped. :params url: The URL of the web page to be scraped.
""" """
if self.user_valves.single_request: single_request = self.user_valves.single_request or self.valves.single_request
if single_request:
request_limit = 1 request_limit = 1
print("Single Request Mode")
else: else:
request_limit = self.valves.request_limit request_limit = self.valves.request_limit
print(f"Request Limit Mode ({self.valves.request_limit})")
print(
f"Single Request Mode - {single_request} - Request Limit - {request_limit}"
)
scraper = self.RecursiveScraper(request_limit) scraper = self.RecursiveScraper(request_limit)
data = scraper.scrape_website(url) data = scraper.scrape_website(url)
json_s = json.dumps(data) json_s = json.dumps(data)
del scraper
return json_s return json_s
if __name__ == "__main__": if __name__ == "__main__":
tools = Tools() tools = Tools()
print(tools.scrape_recursively("https://pkg.go.dev/github.com/go-chi/chi/v5")) print(tools.scrape_recursively("https://pkg.go.dev/github.com/go-chi/chi/v5"))