From 7e50259820bdb66677c41f6e78302c43aa2359fb Mon Sep 17 00:00:00 2001 From: Mark Bailey Date: Sun, 26 Jan 2025 16:37:36 -0500 Subject: [PATCH] feat: v2.1.3 * make text finding recursive --- scrape.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/scrape.py b/scrape.py index e882a7a..6695d06 100644 --- a/scrape.py +++ b/scrape.py @@ -128,21 +128,20 @@ class Tools: } # Find all headings (h1 to h6) - heading_tag = ["h1", "h2", "h3", "h4", "h5", "h6"] - headings = soup.find_all(heading_tag) + headings = soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]) if headings: data["headings"].extend([tag.get_text() for tag in headings]) # Find all paragraphs - paragraphs = soup.find_all("p") + paragraphs = soup.find_all("p", recursive=True) if paragraphs: data["paragraphs"] = [p.get_text() for p in paragraphs] - divs = soup.find_all("div") + divs = soup.find_all("div", recursive=True) if divs: data["divs"] = [div.get_text() for div in divs] - lis = soup.find_all("li") + lis = soup.find_all("li", recursive=True) if lis: data["lis"] = [li.get_text() for li in lis] @@ -198,4 +197,4 @@ class Tools: if __name__ == "__main__": tools = Tools() - print(tools.scrape_recursively("https://stackoverflow.com/questions/6486450/compute-list-difference")) + print(tools.scrape_recursively("https://en.wikipedia.org/wiki/Shamisen"))