from bs4 import BeautifulSoup
def split_html_data(html_string, split_by="div"):
"""
Splits HTML data into a list of strings based on the specified tag.
Returns a list of strings.
"""
try:
soup = BeautifulSoup(html_string, 'html.parser')
if split_by == "div":
# Split by <div> tags
elements = soup.find_all('div')
data_list = [str(element) for element in elements]
elif split_by == "p":
# Split by <p> tags
elements = soup.find_all('p')
data_list = [str(element) for element in elements]
else:
raise ValueError("Invalid split_by tag. Choose 'div' or 'p'.")
return data_list
except Exception as e:
print(f"Error splitting HTML: {e}")
return [] # Return an empty list on error
if __name__ == '__main__':
# Example Usage:
html_data = """
<div>
<p>This is the first paragraph.</p>
<p>This is the second paragraph.</p>
</div>
<div>
<p>Another paragraph.</p>
</div>
"""
split_data = split_html_data(html_data, split_by="div")
print(split_data)
split_data = split_html_data(html_data, split_by="p")
print(split_data)
#Example with invalid split
split_data = split_html_data(html_data, split_by="span")
print(split_data)
Add your comment