1. import requests
  2. from bs4 import BeautifulSoup
  3. import time
  4. import json
  5. def sync_html_resources(url, default_values):
  6. """
  7. Syncs resources (links, images, scripts, stylesheets) of an HTML page
  8. with default values for monitoring purposes.
  9. Args:
  10. url (str): The URL of the HTML page to sync.
  11. default_values (dict): A dictionary containing default values for resources.
  12. Example: {'image_url': 'default_image.png', 'script_url': 'default.js'}
  13. Returns:
  14. dict: A dictionary containing the updated HTML with synced resources,
  15. or None if an error occurred.
  16. """
  17. try:
  18. response = requests.get(url)
  19. response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
  20. soup = BeautifulSoup(response.content, 'html.parser')
  21. # Sync links
  22. for link in soup.find_all('a', href=True):
  23. link['href'] = default_values.get('link_url', '') # Use default if not found
  24. # Sync images
  25. for img in soup.find_all('img', src=True):
  26. img['src'] = default_values.get('image_url', '')
  27. # Sync scripts
  28. for script in soup.find_all('script', src=True):
  29. script['src'] = default_values.get('script_url', '')
  30. # Sync stylesheets
  31. for link in soup.find_all('link', rel='stylesheet', href=True):
  32. link['href'] = default_values.get('stylesheet_url', '')
  33. return soup.prettify() # Return prettified HTML
  34. except requests.exceptions.RequestException as e:
  35. print(f"Error fetching URL: {e}")
  36. return None
  37. except Exception as e:
  38. print(f"Error processing HTML: {e}")
  39. return None
  40. if __name__ == '__main__':
  41. # Example Usage
  42. url_to_sync = 'https://www.example.com' # Replace with your target URL
  43. default_resource_values = {
  44. 'link_url': 'https://www.example.com/default',
  45. 'image_url': 'https://www.example.com/default_image.png',
  46. 'script_url': 'https://www.example.com/default.js',
  47. 'stylesheet_url': 'https://www.example.com/default.css'
  48. }
  49. synced_html = sync_html_resources(url_to_sync, default_resource_values)
  50. if synced_html:
  51. #Save to a file
  52. with open('synced_html.html', 'w') as f:
  53. f.write(synced_html)
  54. print("HTML synced and saved to synced_html.html")
  55. else:
  56. print("HTML sync failed.")

Add your comment