1. import asyncio
  2. import aiohttp
  3. import queue
  4. import time
  5. import logging
  6. # Configure logging
  7. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  8. class HTTPTask:
  9. def __init__(self, url, task_id):
  10. self.url = url
  11. self.task_id = task_id
  12. class TaskQueue:
  13. def __init__(self, max_size=100):
  14. self.queue = queue.Queue(maxsize=max_size)
  15. def enqueue(self, task):
  16. self.queue.put(task)
  17. def dequeue(self):
  18. try:
  19. return self.queue.get(timeout=1) # Timeout to prevent indefinite blocking
  20. except queue.Empty:
  21. return None
  22. def size(self):
  23. return self.queue.qsize()
  24. class HTTPWorker:
  25. def __init__(self, session, task_queue):
  26. self.session = session
  27. self.task_queue = task_queue
  28. async def worker(self):
  29. while True:
  30. task = self.task_queue.dequeue()
  31. if task is None:
  32. break # Exit if queue is empty and timeout occurs
  33. try:
  34. start_time = time.time()
  35. async with self.session.get(task.url) as response:
  36. response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
  37. end_time = time.time()
  38. duration = end_time - start_time
  39. logging.info(f"Task {task.task_id} completed. URL: {task.url}, Status: {response.status}, Duration: {duration:.2f}s")
  40. # Process response data here if needed
  41. except aiohttp.ClientError as e:
  42. logging.error(f"Error processing task {task.task_id} for URL {task.url}: {e}")
  43. finally:
  44. self.task_queue.task_done() # Signal that the task is complete
  45. async def main(urls, num_workers=5):
  46. task_queue = TaskQueue()
  47. tasks = []
  48. for i, url in enumerate(urls):
  49. task = HTTPTask(url, i)
  50. task_queue.enqueue(task)
  51. tasks.append(task)
  52. async with aiohttp.ClientSession() as session:
  53. workers = [HTTPWorker(session, task_queue) for _ in range(num_workers)]
  54. await asyncio.gather(*[w.worker() for w in workers]) # Run workers concurrently
  55. if __name__ == "__main__":
  56. urls = [
  57. "https://www.example.com",
  58. "https://www.google.com",
  59. "https://www.python.org",
  60. "https://httpstat.us/200",
  61. "https://httpstat.us/404",
  62. "https://httpstat.us/500"
  63. ]
  64. asyncio.run(main(urls))

Add your comment