1. import html
  2. import re
  3. def encode_html(html_string):
  4. """Encodes HTML for non-production use with limited memory."""
  5. # Use a generator to process the HTML string in chunks.
  6. for line in html_string.splitlines():
  7. if line.strip(): # Skip empty lines
  8. # Encode HTML entities.
  9. encoded_line = html.escape(line)
  10. yield encoded_line + '\n' # Add newline back
  11. def encode_html_file(input_filename, output_filename):
  12. """Encodes an HTML file and writes the encoded output to a new file."""
  13. try:
  14. with open(input_filename, 'r', encoding='utf-8') as infile, \
  15. open(output_filename, 'w', encoding='utf-8') as outfile:
  16. for encoded_line in encode_html(infile.read()):
  17. outfile.write(encoded_line)
  18. except FileNotFoundError:
  19. print(f"Error: File not found: {input_filename}")
  20. except Exception as e:
  21. print(f"An error occurred: {e}")
  22. if __name__ == '__main__':
  23. # Example usage:
  24. # Create a sample HTML file for testing.
  25. sample_html = """
  26. <html>
  27. <head>
  28. <title>Sample HTML</title>
  29. </head>
  30. <body>
  31. <h1>Hello, <b>World</b>!</h1>
  32. <p>This is a <i>test</i>.</p>
  33. <a href="https://www.example.com">Example Link</a>
  34. </body>
  35. </html>
  36. """
  37. with open("sample.html", "w", encoding="utf-8") as f:
  38. f.write(sample_html)
  39. encode_html_file("sample.html", "encoded.html")
  40. print("HTML encoded and saved to encoded.html")

Add your comment