1. import json
  2. import difflib
  3. def diff_json_datasets(dataset1_path, dataset2_path, max_lines=100):
  4. """
  5. Diffs two JSON datasets, limiting the number of lines shown.
  6. Args:
  7. dataset1_path (str): Path to the first JSON dataset file.
  8. dataset2_path (str): Path to the second JSON dataset file.
  9. max_lines (int): Maximum number of lines to display in the diff.
  10. """
  11. try:
  12. with open(dataset1_path, 'r') as f1:
  13. data1 = json.load(f1)
  14. with open(dataset2_path, 'r') as f2:
  15. data2 = json.load(f2)
  16. except FileNotFoundError as e:
  17. print(f"Error: File not found: {e}")
  18. return
  19. except json.JSONDecodeError as e:
  20. print(f"Error: Invalid JSON: {e}")
  21. return
  22. # Convert data to strings for diffing
  23. str1 = json.dumps(data1, indent=4, sort_keys=True)
  24. str2 = json.dumps(data2, indent=4, sort_keys=True)
  25. # Use difflib to find differences
  26. diff = difflib.unified_diff(str1.splitlines(), str2.splitlines(), fromfile=dataset1_path, tofile=dataset2_path)
  27. # Limit the number of lines displayed
  28. for line in diff:
  29. if len(line) > max_lines:
  30. break
  31. print(line)
  32. if __name__ == '__main__':
  33. # Example usage: Replace with your file paths
  34. diff_json_datasets('dataset1.json', 'dataset2.json', max_lines=50)

Add your comment