import os
import re
import time
def clean_filepaths(filepaths, retry_interval=1):
"""
Cleans a list of filepaths for manual execution, handling potential issues
like invalid characters and long paths.
Args:
filepaths (list): A list of filepaths to clean.
retry_interval (int): The number of seconds to wait before retrying
if a path cleaning fails.
Returns:
list: A list of cleaned filepaths. Returns the original filepath if cleaning fails after multiple attempts.
"""
cleaned_paths = []
for filepath in filepaths:
cleaned_path = filepath # Initialize with the original path
max_retries = 3 # Limit the number of retry attempts
for attempt in range(max_retries):
try:
# 1. Remove invalid characters
cleaned_path = re.sub(r'[\\/*?:"<>|]', '', cleaned_path)
# 2. Limit path length (Windows limit: 260 characters)
max_length = 260
if len(cleaned_path) > max_length:
# Find the last backslash and truncate
last_backslash = cleaned_path.rfind('\\')
if last_backslash != -1:
cleaned_path = cleaned_path[:last_backslash + 1] # include the backslash
else:
cleaned_path = cleaned_path[:max_length] #truncate if no backslashes
# 3. Normalize path (e.g., convert to forward slashes on Windows)
cleaned_path = cleaned_path.replace('\\', '/') #cross-platform
# Check if the path is valid (basic check - can be expanded)
if os.path.exists(cleaned_path):
break # Path is valid, exit retry loop
else:
print(f"Attempt {attempt + 1}/{max_retries}: Path '{filepath}' is invalid. Retrying...")
time.sleep(retry_interval)
except Exception as e:
print(f"Error cleaning path '{filepath}' (attempt {attempt + 1}/{max_retries}): {e}. Retrying...")
time.sleep(retry_interval)
cleaned_paths.append(cleaned_path) # Add the cleaned path to the list
return cleaned_paths
Add your comment