import requests
from bs4 import BeautifulSoup
import time
import json
def sync_html_resources(url, default_values):
"""
Syncs resources (links, images, scripts, stylesheets) of an HTML page
with default values for monitoring purposes.
Args:
url (str): The URL of the HTML page to sync.
default_values (dict): A dictionary containing default values for resources.
Example: {'image_url': 'default_image.png', 'script_url': 'default.js'}
Returns:
dict: A dictionary containing the updated HTML with synced resources,
or None if an error occurred.
"""
try:
response = requests.get(url)
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
soup = BeautifulSoup(response.content, 'html.parser')
# Sync links
for link in soup.find_all('a', href=True):
link['href'] = default_values.get('link_url', '') # Use default if not found
# Sync images
for img in soup.find_all('img', src=True):
img['src'] = default_values.get('image_url', '')
# Sync scripts
for script in soup.find_all('script', src=True):
script['src'] = default_values.get('script_url', '')
# Sync stylesheets
for link in soup.find_all('link', rel='stylesheet', href=True):
link['href'] = default_values.get('stylesheet_url', '')
return soup.prettify() # Return prettified HTML
except requests.exceptions.RequestException as e:
print(f"Error fetching URL: {e}")
return None
except Exception as e:
print(f"Error processing HTML: {e}")
return None
if __name__ == '__main__':
# Example Usage
url_to_sync = 'https://www.example.com' # Replace with your target URL
default_resource_values = {
'link_url': 'https://www.example.com/default',
'image_url': 'https://www.example.com/default_image.png',
'script_url': 'https://www.example.com/default.js',
'stylesheet_url': 'https://www.example.com/default.css'
}
synced_html = sync_html_resources(url_to_sync, default_resource_values)
if synced_html:
#Save to a file
with open('synced_html.html', 'w') as f:
f.write(synced_html)
print("HTML synced and saved to synced_html.html")
else:
print("HTML sync failed.")
Add your comment