Update internet.py

This commit is contained in:
H Lohaus 2024-11-15 18:21:22 +01:00 committed by GitHub
parent 9da45f9a63
commit d53db73248
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -67,7 +67,7 @@ def scrape_text(html: str, max_words: int = None) -> str:
if select:
select.extract()
clean_text = ""
for paragraph in soup.select("p"):
for paragraph in soup.select("p, h1, h2, h3, h4, h5, h6"):
text = paragraph.get_text()
for line in text.splitlines():
words = []