1. import pandas as pd
  2. import requests
  3. import time
  4. import logging
  5. logging.basicConfig(level=logging.INFO)
  6. def load_dataset(url, max_retries=3, retry_delay=5):
  7. """
  8. Loads a dataset from a URL with retry logic and fallback.
  9. Args:
  10. url (str): The URL of the dataset.
  11. max_retries (int): The maximum number of retries.
  12. retry_delay (int): The delay in seconds between retries.
  13. Returns:
  14. pandas.DataFrame: The loaded DataFrame, or None if loading fails after all retries.
  15. """
  16. for attempt in range(max_retries):
  17. try:
  18. # Attempt to load the dataset
  19. df = pd.read_csv(url) # Adjust function based on data format (e.g., read_excel, etc.)
  20. logging.info(f"Successfully loaded dataset from {url} on attempt {attempt + 1}")
  21. return df
  22. except requests.exceptions.RequestException as e:
  23. logging.warning(f"Attempt {attempt + 1} failed: {e}")
  24. if attempt < max_retries - 1:
  25. logging.info(f"Retrying in {retry_delay} seconds...")
  26. time.sleep(retry_delay)
  27. else:
  28. logging.error(f"Failed to load dataset from {url} after {max_retries} attempts.")
  29. return None
  30. except pd.errors.ParserError as e:
  31. logging.warning(f"Attempt {attempt + 1} failed parsing the dataset: {e}")
  32. if attempt < max_retries - 1:
  33. logging.info(f"Retrying in {retry_delay} seconds...")
  34. time.sleep(retry_delay)
  35. else:
  36. logging.error(f"Failed to load/parse dataset from {url} after {max_retries} attempts.")
  37. return None
  38. except Exception as e:
  39. logging.error(f"An unexpected error occurred: {e}")
  40. return None
  41. if __name__ == '__main__':
  42. # Example Usage
  43. url = "https://raw.githubusercontent.com/plotly/datasets/master/iris.csv" # Replace with your dataset URL
  44. df = load_dataset(url)
  45. if df is not None:
  46. print("Dataset loaded successfully:")
  47. print(df.head())
  48. else:
  49. print("Failed to load dataset.")

Add your comment