1. import html
  2. import re
  3. def encode_html(text, allow_tags=None):
  4. """
  5. Encodes HTML text for internal use with defensive checks.
  6. Args:
  7. text (str): The HTML text to encode.
  8. allow_tags (list, optional): A list of HTML tags to allow. Defaults to None.
  9. Returns:
  10. str: The encoded HTML text.
  11. """
  12. if not isinstance(text, str):
  13. raise TypeError("Input must be a string.")
  14. if allow_tags is not None and not isinstance(allow_tags, list):
  15. raise TypeError("allow_tags must be a list or None.")
  16. encoded_text = html.escape(text, quote=True) # Escape HTML entities
  17. if allow_tags:
  18. #Remove tags if allowed tags are specified.
  19. allowed_tags_regex = re.compile(r'<(/?)(\w+)>')
  20. def replace_tags(match):
  21. closing_tag = match.group(1)
  22. tag_name = match.group(2)
  23. if closing_tag:
  24. return ""
  25. else:
  26. return match.group(0) #Return tag as is if it's an opening tag that is not allowed
  27. encoded_text = allowed_tags_regex.sub(replace_tags, encoded_text)
  28. return encoded_text

Add your comment