1. import os
  2. import re
  3. import time
  4. def clean_filepaths(filepaths, retry_interval=1):
  5. """
  6. Cleans a list of filepaths for manual execution, handling potential issues
  7. like invalid characters and long paths.
  8. Args:
  9. filepaths (list): A list of filepaths to clean.
  10. retry_interval (int): The number of seconds to wait before retrying
  11. if a path cleaning fails.
  12. Returns:
  13. list: A list of cleaned filepaths. Returns the original filepath if cleaning fails after multiple attempts.
  14. """
  15. cleaned_paths = []
  16. for filepath in filepaths:
  17. cleaned_path = filepath # Initialize with the original path
  18. max_retries = 3 # Limit the number of retry attempts
  19. for attempt in range(max_retries):
  20. try:
  21. # 1. Remove invalid characters
  22. cleaned_path = re.sub(r'[\\/*?:"<>|]', '', cleaned_path)
  23. # 2. Limit path length (Windows limit: 260 characters)
  24. max_length = 260
  25. if len(cleaned_path) > max_length:
  26. # Find the last backslash and truncate
  27. last_backslash = cleaned_path.rfind('\\')
  28. if last_backslash != -1:
  29. cleaned_path = cleaned_path[:last_backslash + 1] # include the backslash
  30. else:
  31. cleaned_path = cleaned_path[:max_length] #truncate if no backslashes
  32. # 3. Normalize path (e.g., convert to forward slashes on Windows)
  33. cleaned_path = cleaned_path.replace('\\', '/') #cross-platform
  34. # Check if the path is valid (basic check - can be expanded)
  35. if os.path.exists(cleaned_path):
  36. break # Path is valid, exit retry loop
  37. else:
  38. print(f"Attempt {attempt + 1}/{max_retries}: Path '{filepath}' is invalid. Retrying...")
  39. time.sleep(retry_interval)
  40. except Exception as e:
  41. print(f"Error cleaning path '{filepath}' (attempt {attempt + 1}/{max_retries}): {e}. Retrying...")
  42. time.sleep(retry_interval)
  43. cleaned_paths.append(cleaned_path) # Add the cleaned path to the list
  44. return cleaned_paths

Add your comment