import pandas as pd
import json
import io
def merge_session_cookies(input_files, output_file):
"""
Merges session cookies from multiple files efficiently, handling large datasets.
"""
all_cookies = []
for file in input_files:
try:
with open(file, 'r') as f:
# Assuming each file contains a list of cookie strings, one per line.
cookies = [line.strip() for line in f]
all_cookies.extend(cookies)
except FileNotFoundError:
print(f"Warning: File not found: {file}")
continue # Skip to the next file
# Use pandas for efficient data manipulation, even with large lists
df = pd.DataFrame({'cookie': all_cookies})
# Remove duplicate cookies
df = df.drop_duplicates()
# Save to a new file (e.g., CSV or JSON)
df.to_csv(output_file, index=False) # Or df.to_json(output_file, orient='records')
print(f"Merged cookies saved to {output_file}")
if __name__ == '__main__':
# Example Usage:
input_files = ['cookies1.txt', 'cookies2.txt', 'cookies3.txt'] # Replace with your file names
output_file = 'merged_cookies.csv'
#Create dummy files for testing
with open('cookies1.txt', 'w') as f:
f.write('cookie1\ncookie2\ncookie3\n')
with open('cookies2.txt', 'w') as f:
f.write('cookie2\ncookie4\ncookie5\n')
with open('cookies3.txt', 'w') as f:
f.write('cookie1\ncookie6\ncookie7\n')
merge_session_cookies(input_files, output_file)
Add your comment