1. import os
  2. def sanitize_binary_file(filepath):
  3. """
  4. Sanitizes a binary file by checking for basic validity and potential issues.
  5. Prints error messages if problems are found.
  6. """
  7. try:
  8. with open(filepath, 'rb') as f:
  9. data = f.read() # Read the entire file content
  10. except FileNotFoundError:
  11. print(f"Error: File not found at {filepath}")
  12. return False # Indicate failure
  13. if not data:
  14. print(f"Error: File {filepath} is empty.")
  15. return False
  16. # Basic size check (optional - adjust as needed)
  17. if len(data) > 1024 * 1024 * 10: #limit to 10MB
  18. print(f"Warning: File {filepath} is large ({len(data) / (1024*1024*10):.2f} MB). Consider reducing size.")
  19. # Check for invalid characters (example: control characters)
  20. for byte in data:
  21. if byte < 0x20 or byte > 0x7f: # Check for printable ASCII characters
  22. print(f"Warning: File {filepath} contains non-printable character: {byte}")
  23. # Check for potential header issues (example: magic number)
  24. magic_number = data[:4]
  25. if magic_number == b'\x4D\x5A\x90\x00': #Example: JPEG magic number
  26. print(f"Warning: File {filepath} may be a JPEG file. Consider appropriate handling.")
  27. return True # Indicate success
  28. if __name__ == '__main__':
  29. # Example usage:
  30. test_file = "test.bin" #Replace with your test file
  31. #Create a dummy file for testing
  32. with open(test_file, "wb") as f:
  33. f.write(b"\x4D\x5A\x90\x00") #jpeg magic number
  34. f.write(b"This is a test")
  35. if sanitize_binary_file(test_file):
  36. print(f"File {test_file} is considered sanitized.")
  37. else:
  38. print(f"File {test_file} requires attention.")

Add your comment