import html
import re
def encode_html(html_string):
"""Encodes HTML for non-production use with limited memory."""
# Use a generator to process the HTML string in chunks.
for line in html_string.splitlines():
if line.strip(): # Skip empty lines
# Encode HTML entities.
encoded_line = html.escape(line)
yield encoded_line + '\n' # Add newline back
def encode_html_file(input_filename, output_filename):
"""Encodes an HTML file and writes the encoded output to a new file."""
try:
with open(input_filename, 'r', encoding='utf-8') as infile, \
open(output_filename, 'w', encoding='utf-8') as outfile:
for encoded_line in encode_html(infile.read()):
outfile.write(encoded_line)
except FileNotFoundError:
print(f"Error: File not found: {input_filename}")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == '__main__':
# Example usage:
# Create a sample HTML file for testing.
sample_html = """
<html>
<head>
<title>Sample HTML</title>
</head>
<body>
<h1>Hello, <b>World</b>!</h1>
<p>This is a <i>test</i>.</p>
<a href="https://www.example.com">Example Link</a>
</body>
</html>
"""
with open("sample.html", "w", encoding="utf-8") as f:
f.write(sample_html)
encode_html_file("sample.html", "encoded.html")
print("HTML encoded and saved to encoded.html")
Add your comment