1. import argparse
  2. from bs4 import BeautifulSoup
  3. import requests
  4. def check_sandbox(url, sandbox_attributes=None):
  5. """
  6. Checks an HTML document for the presence of the 'sandbox' attribute.
  7. Args:
  8. url (str): The URL of the HTML document to check.
  9. sandbox_attributes (list, optional): A list of sandbox attributes to search for.
  10. Defaults to None (checks for the general 'sandbox' attribute).
  11. Returns:
  12. bool: True if a sandbox attribute is found, False otherwise. Returns None on error.
  13. """
  14. try:
  15. response = requests.get(url)
  16. response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
  17. soup = BeautifulSoup(response.content, 'html.parser')
  18. if sandbox_attributes is None:
  19. # Check for the general 'sandbox' attribute
  20. for tag in soup.find_all():
  21. if 'sandbox' in tag.attrs:
  22. return True
  23. else:
  24. # Check for specified sandbox attributes
  25. for tag in soup.find_all():
  26. if 'sandbox' in tag.attrs and any(attr in tag.attrs['sandbox'] for attr in sandbox_attributes):
  27. return True
  28. return False
  29. except requests.exceptions.RequestException as e:
  30. print(f"Error fetching URL: {e}")
  31. return None # Indicate an error
  32. except Exception as e:
  33. print(f"Error parsing HTML: {e}")
  34. return None #Indicate an error
  35. if __name__ == '__main__':
  36. parser = argparse.ArgumentParser(description='Check HTML documents for sandbox usage.')
  37. parser.add_argument('url', help='The URL of the HTML document.')
  38. parser.add_argument('--sandbox', nargs='+', help='Specific sandbox attributes to search for (e.g., allow-scripts allow-forms).')
  39. args = parser.parse_args()
  40. result = check_sandbox(args.url, args.sandbox)
  41. if result is not None:
  42. if result:
  43. print("Sandbox attribute found.")
  44. else:
  45. print("No sandbox attribute found.")

Add your comment