import argparse
from bs4 import BeautifulSoup
import requests
def check_sandbox(url, sandbox_attributes=None):
"""
Checks an HTML document for the presence of the 'sandbox' attribute.
Args:
url (str): The URL of the HTML document to check.
sandbox_attributes (list, optional): A list of sandbox attributes to search for.
Defaults to None (checks for the general 'sandbox' attribute).
Returns:
bool: True if a sandbox attribute is found, False otherwise. Returns None on error.
"""
try:
response = requests.get(url)
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
soup = BeautifulSoup(response.content, 'html.parser')
if sandbox_attributes is None:
# Check for the general 'sandbox' attribute
for tag in soup.find_all():
if 'sandbox' in tag.attrs:
return True
else:
# Check for specified sandbox attributes
for tag in soup.find_all():
if 'sandbox' in tag.attrs and any(attr in tag.attrs['sandbox'] for attr in sandbox_attributes):
return True
return False
except requests.exceptions.RequestException as e:
print(f"Error fetching URL: {e}")
return None # Indicate an error
except Exception as e:
print(f"Error parsing HTML: {e}")
return None #Indicate an error
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Check HTML documents for sandbox usage.')
parser.add_argument('url', help='The URL of the HTML document.')
parser.add_argument('--sandbox', nargs='+', help='Specific sandbox attributes to search for (e.g., allow-scripts allow-forms).')
args = parser.parse_args()
result = check_sandbox(args.url, args.sandbox)
if result is not None:
if result:
print("Sandbox attribute found.")
else:
print("No sandbox attribute found.")
Add your comment