import requests
from urllib.parse import urlparse
from collections import defaultdict
def remove_duplicate_requests(request_list):
"""
Removes duplicate HTTP requests from a list, considering URL and method.
Args:
request_list: A list of dictionaries, where each dictionary represents an HTTP request
and contains at least 'url' and 'method' keys.
Returns:
A list of unique HTTP requests.
"""
seen_requests = set() # Use a set for efficient duplicate checking
unique_requests = []
for request in request_list:
url = request['url']
method = request['method']
# Create a tuple of (url, method) for hashing
request_tuple = (url, method)
if request_tuple not in seen_requests:
unique_requests.append(request)
seen_requests.add(request_tuple)
return unique_requests
if __name__ == '__main__':
# Example usage:
sample_requests = [
{'url': 'https://www.example.com', 'method': 'GET'},
{'url': 'https://www.example.com', 'method': 'GET'}, # Duplicate
{'url': 'https://www.google.com', 'method': 'GET'},
{'url': 'https://www.example.com/path', 'method': 'POST'},
{'url': 'https://www.google.com', 'method': 'GET'}, # Duplicate
{'url': 'https://www.example.com', 'method': 'POST'}
]
unique_requests = remove_duplicate_requests(sample_requests)
for request in unique_requests:
print(request)
Add your comment