1. import json
  2. import heapq
  3. def sort_json_records(json_file_path, key_path, sort_order='asc', chunk_size=1000):
  4. """
  5. Sorts records of JSON objects from a file with limited memory usage.
  6. Args:
  7. json_file_path (str): Path to the JSON file.
  8. key_path (str): Path to the key used for sorting (e.g., "field1.field2").
  9. sort_order (str): 'asc' for ascending, 'desc' for descending.
  10. chunk_size (int): Number of records to process in each chunk.
  11. Returns:
  12. list: Sorted list of JSON objects.
  13. """
  14. results = []
  15. with open(json_file_path, 'r') as f:
  16. for chunk in read_in_chunks(f, chunk_size):
  17. for record in chunk:
  18. try:
  19. record_dict = json.loads(record)
  20. value = get_value_from_path(record_dict, key_path)
  21. if sort_order == 'asc':
  22. heapq.heappush(results, (value, record_dict))
  23. elif sort_order == 'desc':
  24. heapq.heappush(results, (-value, record_dict)) #Negate for descending order
  25. else:
  26. raise ValueError("Invalid sort_order. Use 'asc' or 'desc'.")
  27. except (json.JSONDecodeError, KeyError, TypeError) as e:
  28. print(f"Skipping invalid record: {record}. Error: {e}")
  29. continue
  30. # Extract sorted records from the heap
  31. sorted_records = [record for _, record in heapq.nlargest(len(results), results)]
  32. return sorted_records
  33. def read_in_chunks(file_object, chunk_size):
  34. """
  35. Helper function to read a file in chunks.
  36. """
  37. while True:
  38. chunk = []
  39. for _ in range(chunk_size):
  40. line = file_object.readline()
  41. if not line:
  42. break
  43. chunk.append(line)
  44. if not chunk:
  45. break
  46. yield chunk
  47. def get_value_from_path(data, key_path):
  48. """
  49. Helper function to get a value from a nested dictionary using a path.
  50. """
  51. keys = key_path.split('.')
  52. value = data
  53. for key in keys:
  54. if isinstance(value, dict) and key in value:
  55. value = value[key]
  56. else:
  57. raise KeyError(f"Key '{key}' not found in data.")
  58. return value

Add your comment