import html
import re
def encode_html(text, allow_tags=None):
"""
Encodes HTML text for internal use with defensive checks.
Args:
text (str): The HTML text to encode.
allow_tags (list, optional): A list of HTML tags to allow. Defaults to None.
Returns:
str: The encoded HTML text.
"""
if not isinstance(text, str):
raise TypeError("Input must be a string.")
if allow_tags is not None and not isinstance(allow_tags, list):
raise TypeError("allow_tags must be a list or None.")
encoded_text = html.escape(text, quote=True) # Escape HTML entities
if allow_tags:
#Remove tags if allowed tags are specified.
allowed_tags_regex = re.compile(r'<(/?)(\w+)>')
def replace_tags(match):
closing_tag = match.group(1)
tag_name = match.group(2)
if closing_tag:
return ""
else:
return match.group(0) #Return tag as is if it's an opening tag that is not allowed
encoded_text = allowed_tags_regex.sub(replace_tags, encoded_text)
return encoded_text
Add your comment