feat: v2.1.3

* make text finding recursive
This commit is contained in:
Mark Bailey 2025-01-26 16:37:36 -05:00
parent 08e99b5b09
commit 023760c694

View File

@ -128,21 +128,20 @@ class Tools:
} }
# Find all headings (h1 to h6) # Find all headings (h1 to h6)
heading_tag = ["h1", "h2", "h3", "h4", "h5", "h6"] headings = soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"])
headings = soup.find_all(heading_tag)
if headings: if headings:
data["headings"].extend([tag.get_text() for tag in headings]) data["headings"].extend([tag.get_text() for tag in headings])
# Find all paragraphs # Find all paragraphs
paragraphs = soup.find_all("p") paragraphs = soup.find_all("p", recursive=True)
if paragraphs: if paragraphs:
data["paragraphs"] = [p.get_text() for p in paragraphs] data["paragraphs"] = [p.get_text() for p in paragraphs]
divs = soup.find_all("div") divs = soup.find_all("div", recursive=True)
if divs: if divs:
data["divs"] = [div.get_text() for div in divs] data["divs"] = [div.get_text() for div in divs]
lis = soup.find_all("li") lis = soup.find_all("li", recursive=True)
if lis: if lis:
data["lis"] = [li.get_text() for li in lis] data["lis"] = [li.get_text() for li in lis]
@ -185,7 +184,7 @@ class Tools:
:params url: The URL of the web page to be scraped. :params url: The URL of the web page to be scraped.
""" """
if self.user_valves.single_request or self.valves.single_request: if self.user_valves.single_request:
request_limit = 1 request_limit = 1
else: else:
request_limit = self.valves.request_limit request_limit = self.valves.request_limit
@ -198,4 +197,4 @@ class Tools:
if __name__ == "__main__": if __name__ == "__main__":
tools = Tools() tools = Tools()
print(tools.scrape_recursively("https://stackoverflow.com/questions/6486450/compute-list-difference")) print(tools.scrape_recursively("https://en.wikipedia.org/wiki/Shamisen"))