1. import csv
  2. def split_cookie_data(input_file, output_prefix, chunk_size=10000):
  3. """
  4. Splits a cookie data CSV file into smaller files to reduce memory usage.
  5. Args:
  6. input_file (str): Path to the input CSV file.
  7. output_prefix (str): Prefix for the output file names.
  8. chunk_size (int): Number of rows per output file.
  9. """
  10. try:
  11. with open(input_file, 'r', newline='') as infile:
  12. reader = csv.reader(infile)
  13. header = next(reader, None) # Read the header row
  14. chunk_num = 1
  15. row_count = 0
  16. outfile = None
  17. writer = None
  18. for row in reader:
  19. if row_count % chunk_size == 0:
  20. if outfile:
  21. outfile.close()
  22. output_file = f"{output_prefix}_part{chunk_num}.csv"
  23. outfile = open(output_file, 'w', newline='')
  24. writer = csv.writer(outfile)
  25. if header:
  26. writer.writerow(header)
  27. chunk_num += 1
  28. writer.writerow(row)
  29. row_count += 1
  30. if outfile:
  31. outfile.close()
  32. except FileNotFoundError:
  33. print(f"Error: Input file '{input_file}' not found.")
  34. except Exception as e:
  35. print(f"An error occurred: {e}")
  36. if __name__ == '__main__':
  37. # Example usage:
  38. split_cookie_data('cookies.csv', 'cookie_data', chunk_size=5000)

Add your comment