feat: v2.1.6
This commit is contained in:
parent
94166a7258
commit
f2ec76b9db
18
scrape.py
18
scrape.py
@ -5,7 +5,7 @@ author_url: https://git.markbailey.dev/cerbervs
|
|||||||
git_url: https://git.markbailey.dev/cerbervs/scrape.git
|
git_url: https://git.markbailey.dev/cerbervs/scrape.git
|
||||||
description: Scrapes web with option for recursive scraping.
|
description: Scrapes web with option for recursive scraping.
|
||||||
requirements: websocket, requests, bs4, pydantic
|
requirements: websocket, requests, bs4, pydantic
|
||||||
version: 2.1.4
|
version: 2.1.5
|
||||||
licence: MIT
|
licence: MIT
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -110,7 +110,7 @@ class Tools:
|
|||||||
netloc = self.netloc
|
netloc = self.netloc
|
||||||
|
|
||||||
if path is not None and path != "":
|
if path is not None and path != "":
|
||||||
path = "/"+path
|
path = "/" + path
|
||||||
|
|
||||||
if params is not None and params != "":
|
if params is not None and params != "":
|
||||||
params = ";" + params
|
params = ";" + params
|
||||||
@ -170,7 +170,7 @@ class Tools:
|
|||||||
single_request: bool = Field(default=False, description="Single Request")
|
single_request: bool = Field(default=False, description="Single Request")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class Valves(BaseModel):
|
class Valves(UserValves):
|
||||||
request_limit: int = Field(default=5, description="Request Limit")
|
request_limit: int = Field(default=5, description="Request Limit")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -190,19 +190,25 @@ class Tools:
|
|||||||
:params url: The URL of the web page to be scraped.
|
:params url: The URL of the web page to be scraped.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if self.user_valves.single_request:
|
single_request = self.user_valves.single_request or self.valves.single_request
|
||||||
|
|
||||||
|
if single_request:
|
||||||
request_limit = 1
|
request_limit = 1
|
||||||
print("Single Request Mode")
|
|
||||||
else:
|
else:
|
||||||
request_limit = self.valves.request_limit
|
request_limit = self.valves.request_limit
|
||||||
print(f"Request Limit Mode ({self.valves.request_limit})")
|
|
||||||
|
print(
|
||||||
|
f"Single Request Mode - {single_request} - Request Limit - {request_limit}"
|
||||||
|
)
|
||||||
|
|
||||||
scraper = self.RecursiveScraper(request_limit)
|
scraper = self.RecursiveScraper(request_limit)
|
||||||
data = scraper.scrape_website(url)
|
data = scraper.scrape_website(url)
|
||||||
json_s = json.dumps(data)
|
json_s = json.dumps(data)
|
||||||
|
del scraper
|
||||||
|
|
||||||
return json_s
|
return json_s
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
tools = Tools()
|
tools = Tools()
|
||||||
print(tools.scrape_recursively("https://pkg.go.dev/github.com/go-chi/chi/v5"))
|
print(tools.scrape_recursively("https://pkg.go.dev/github.com/go-chi/chi/v5"))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user