1. import pandas as pd
  2. import json
  3. import io
  4. def merge_session_cookies(input_files, output_file):
  5. """
  6. Merges session cookies from multiple files efficiently, handling large datasets.
  7. """
  8. all_cookies = []
  9. for file in input_files:
  10. try:
  11. with open(file, 'r') as f:
  12. # Assuming each file contains a list of cookie strings, one per line.
  13. cookies = [line.strip() for line in f]
  14. all_cookies.extend(cookies)
  15. except FileNotFoundError:
  16. print(f"Warning: File not found: {file}")
  17. continue # Skip to the next file
  18. # Use pandas for efficient data manipulation, even with large lists
  19. df = pd.DataFrame({'cookie': all_cookies})
  20. # Remove duplicate cookies
  21. df = df.drop_duplicates()
  22. # Save to a new file (e.g., CSV or JSON)
  23. df.to_csv(output_file, index=False) # Or df.to_json(output_file, orient='records')
  24. print(f"Merged cookies saved to {output_file}")
  25. if __name__ == '__main__':
  26. # Example Usage:
  27. input_files = ['cookies1.txt', 'cookies2.txt', 'cookies3.txt'] # Replace with your file names
  28. output_file = 'merged_cookies.csv'
  29. #Create dummy files for testing
  30. with open('cookies1.txt', 'w') as f:
  31. f.write('cookie1\ncookie2\ncookie3\n')
  32. with open('cookies2.txt', 'w') as f:
  33. f.write('cookie2\ncookie4\ncookie5\n')
  34. with open('cookies3.txt', 'w') as f:
  35. f.write('cookie1\ncookie6\ncookie7\n')
  36. merge_session_cookies(input_files, output_file)

Add your comment