1. import time
  2. import random
  3. import json
  4. def generate_metadata(num_records=1000):
  5. """Generates sample metadata records."""
  6. metadata = []
  7. for i in range(num_records):
  8. metadata.append({
  9. "id": i,
  10. "name": f"Item {i}",
  11. "description": "A sample item",
  12. "category": random.choice(["Electronics", "Clothing", "Books"]),
  13. "price": round(random.uniform(10, 1000), 2),
  14. "valid": True # Initially assume valid
  15. })
  16. return metadata
  17. def validate_metadata(metadata, validation_rules):
  18. """Validates metadata against defined rules."""
  19. invalid_records = []
  20. for record in metadata:
  21. is_valid = True
  22. for rule_name, rule_value in validation_rules.items():
  23. if rule_name == "price_min":
  24. if record["price"] < rule_value:
  25. is_valid = False
  26. break
  27. elif rule_name == "category_allowed":
  28. if record["category"] not in rule_value:
  29. is_valid = False
  30. break
  31. # Add more validation rules here
  32. if not is_valid:
  33. invalid_records.append(record)
  34. return invalid_records
  35. def apply_manual_overrides(metadata, overrides):
  36. """Applies manual overrides to metadata."""
  37. for record_id, override_data in overrides.items():
  38. for key, value in override_data.items():
  39. if key in metadata[record_id]:
  40. metadata[record_id][key] = value
  41. return metadata
  42. def measure_performance(metadata, validation_rules, overrides):
  43. """Measures the performance of metadata validation with overrides."""
  44. start_time = time.time()
  45. # Validation
  46. invalid_records = validate_metadata(metadata, validation_rules)
  47. validation_time = time.time() - start_time
  48. # Apply overrides
  49. updated_metadata = apply_manual_overrides(metadata, overrides)
  50. override_time = time.time() - start_time
  51. return validation_time, override_time, invalid_records, updated_metadata
  52. if __name__ == "__main__":
  53. # Define validation rules
  54. validation_rules = {
  55. "price_min": 50,
  56. "category_allowed": ["Electronics", "Clothing"]
  57. }
  58. # Define manual overrides
  59. overrides = {
  60. 0: {"price": 150, "description": "Updated description"}
  61. }
  62. # Generate sample metadata
  63. metadata = generate_metadata(1000)
  64. # Measure performance
  65. validation_time, override_time, invalid_records, updated_metadata = measure_performance(
  66. metadata, validation_rules, overrides
  67. )
  68. print(f"Validation Time: {validation_time:.4f} seconds")
  69. print(f"Override Time: {override_time:.4f} seconds")
  70. print(f"Number of Invalid Records: {len(invalid_records)}")
  71. # Optionally, save the updated metadata to a JSON file
  72. with open("updated_metadata.json", "w") as f:
  73. json.dump(updated_metadata, f, indent=4)

Add your comment