1. import pandas as pd
  2. import os
  3. import zipfile
  4. def archive_dataset(data, filename, compression='zip'):
  5. """
  6. Archives a pandas DataFrame to a file for backward compatibility.
  7. Args:
  8. data (pd.DataFrame): The DataFrame to archive.
  9. filename (str): The name of the archive file (e.g., 'my_dataset.csv').
  10. compression (str): The compression type ('zip' or None). Defaults to 'zip'.
  11. """
  12. try:
  13. if compression == 'zip':
  14. # Save to a zip file
  15. data.to_csv(filename, index=False, compression='zip')
  16. elif compression is None:
  17. # Save to a csv file
  18. data.to_csv(filename, index=False)
  19. else:
  20. raise ValueError("Invalid compression type. Choose 'zip' or None.")
  21. print(f"Dataset archived to {filename}")
  22. except Exception as e:
  23. print(f"Error archiving dataset: {e}")
  24. if __name__ == '__main__':
  25. # Example Usage
  26. # Create a sample DataFrame
  27. data = {'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}
  28. df = pd.DataFrame(data)
  29. # Archive to a zip file
  30. archive_dataset(df, 'my_dataset.zip', compression='zip')
  31. # Archive to a csv file
  32. archive_dataset(df, 'my_dataset.csv', compression=None)

Add your comment