import json
import difflib
def diff_json_datasets(dataset1_path, dataset2_path, max_lines=100):
"""
Diffs two JSON datasets, limiting the number of lines shown.
Args:
dataset1_path (str): Path to the first JSON dataset file.
dataset2_path (str): Path to the second JSON dataset file.
max_lines (int): Maximum number of lines to display in the diff.
"""
try:
with open(dataset1_path, 'r') as f1:
data1 = json.load(f1)
with open(dataset2_path, 'r') as f2:
data2 = json.load(f2)
except FileNotFoundError as e:
print(f"Error: File not found: {e}")
return
except json.JSONDecodeError as e:
print(f"Error: Invalid JSON: {e}")
return
# Convert data to strings for diffing
str1 = json.dumps(data1, indent=4, sort_keys=True)
str2 = json.dumps(data2, indent=4, sort_keys=True)
# Use difflib to find differences
diff = difflib.unified_diff(str1.splitlines(), str2.splitlines(), fromfile=dataset1_path, tofile=dataset2_path)
# Limit the number of lines displayed
for line in diff:
if len(line) > max_lines:
break
print(line)
if __name__ == '__main__':
# Example usage: Replace with your file paths
diff_json_datasets('dataset1.json', 'dataset2.json', max_lines=50)
Add your comment