import pandas as pd
import os
import zipfile
def archive_dataset(data, filename, compression='zip'):
"""
Archives a pandas DataFrame to a file for backward compatibility.
Args:
data (pd.DataFrame): The DataFrame to archive.
filename (str): The name of the archive file (e.g., 'my_dataset.csv').
compression (str): The compression type ('zip' or None). Defaults to 'zip'.
"""
try:
if compression == 'zip':
# Save to a zip file
data.to_csv(filename, index=False, compression='zip')
elif compression is None:
# Save to a csv file
data.to_csv(filename, index=False)
else:
raise ValueError("Invalid compression type. Choose 'zip' or None.")
print(f"Dataset archived to {filename}")
except Exception as e:
print(f"Error archiving dataset: {e}")
if __name__ == '__main__':
# Example Usage
# Create a sample DataFrame
data = {'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}
df = pd.DataFrame(data)
# Archive to a zip file
archive_dataset(df, 'my_dataset.zip', compression='zip')
# Archive to a csv file
archive_dataset(df, 'my_dataset.csv', compression=None)
Add your comment