1. import asyncio
  2. from bs4 import BeautifulSoup
  3. import requests
  4. class TaskQueue:
  5. def __init__(self):
  6. self.queue = []
  7. self.running = False
  8. def enqueue(self, task):
  9. """Adds a task to the queue."""
  10. self.queue.append(task)
  11. async def _run_task(self, task):
  12. """Executes a single task."""
  13. try:
  14. # Extract necessary data from task
  15. url = task['url']
  16. selector = task['selector']
  17. action = task['action'] # e.g., 'extract_text', 'click'
  18. response = requests.get(url)
  19. response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
  20. soup = BeautifulSoup(response.content, 'html.parser')
  21. if action == 'extract_text':
  22. element = soup.select_one(selector)
  23. if element:
  24. result = element.get_text(strip=True)
  25. print(f"Extracted text from {selector}: {result}")
  26. task['result'] = result # store result for later use if needed
  27. else:
  28. print(f"Element not found with selector: {selector}")
  29. task['result'] = None
  30. elif action == 'click':
  31. element = soup.select_one(selector)
  32. if element:
  33. print(f"Simulating click on {selector}")
  34. # In a full automation system, you'd use a library like Selenium here
  35. task['result'] = True # Indicate successful click
  36. else:
  37. print(f"Element not found with selector: {selector}")
  38. task['result'] = False
  39. else:
  40. print(f"Unknown action: {action}")
  41. task['result'] = None
  42. except requests.exceptions.RequestException as e:
  43. print(f"Request error: {e}")
  44. task['result'] = None
  45. except Exception as e:
  46. print(f"Error processing task: {e}")
  47. task['result'] = None
  48. async def run(self):
  49. """Starts the task queue and processes tasks."""
  50. self.running = True
  51. while self.running and self.queue:
  52. task = self.queue.pop(0) #FIFO
  53. asyncio.create_task(self._run_task(task)) # Run tasks concurrently
  54. self.running = False
  55. print("All tasks completed.")
  56. async def main():
  57. queue = TaskQueue()
  58. # Define tasks
  59. tasks = [
  60. {'url': 'https://www.example.com', 'selector': 'h1', 'action': 'extract_text'},
  61. {'url': 'https://www.example.com', 'selector': '#my-button', 'action': 'click'},
  62. {'url': 'https://www.google.com', 'selector': 'input[name="q"]', 'action': 'extract_text'}
  63. ]
  64. # Enqueue tasks
  65. for task in tasks:
  66. queue.enqueue(task)
  67. # Run the queue
  68. await queue.run()
  69. if __name__ == "__main__":
  70. asyncio.run(main())

Add your comment