import time
import random
import json
def generate_metadata(num_records=1000):
"""Generates sample metadata records."""
metadata = []
for i in range(num_records):
metadata.append({
"id": i,
"name": f"Item {i}",
"description": "A sample item",
"category": random.choice(["Electronics", "Clothing", "Books"]),
"price": round(random.uniform(10, 1000), 2),
"valid": True # Initially assume valid
})
return metadata
def validate_metadata(metadata, validation_rules):
"""Validates metadata against defined rules."""
invalid_records = []
for record in metadata:
is_valid = True
for rule_name, rule_value in validation_rules.items():
if rule_name == "price_min":
if record["price"] < rule_value:
is_valid = False
break
elif rule_name == "category_allowed":
if record["category"] not in rule_value:
is_valid = False
break
# Add more validation rules here
if not is_valid:
invalid_records.append(record)
return invalid_records
def apply_manual_overrides(metadata, overrides):
"""Applies manual overrides to metadata."""
for record_id, override_data in overrides.items():
for key, value in override_data.items():
if key in metadata[record_id]:
metadata[record_id][key] = value
return metadata
def measure_performance(metadata, validation_rules, overrides):
"""Measures the performance of metadata validation with overrides."""
start_time = time.time()
# Validation
invalid_records = validate_metadata(metadata, validation_rules)
validation_time = time.time() - start_time
# Apply overrides
updated_metadata = apply_manual_overrides(metadata, overrides)
override_time = time.time() - start_time
return validation_time, override_time, invalid_records, updated_metadata
if __name__ == "__main__":
# Define validation rules
validation_rules = {
"price_min": 50,
"category_allowed": ["Electronics", "Clothing"]
}
# Define manual overrides
overrides = {
0: {"price": 150, "description": "Updated description"}
}
# Generate sample metadata
metadata = generate_metadata(1000)
# Measure performance
validation_time, override_time, invalid_records, updated_metadata = measure_performance(
metadata, validation_rules, overrides
)
print(f"Validation Time: {validation_time:.4f} seconds")
print(f"Override Time: {override_time:.4f} seconds")
print(f"Number of Invalid Records: {len(invalid_records)}")
# Optionally, save the updated metadata to a JSON file
with open("updated_metadata.json", "w") as f:
json.dump(updated_metadata, f, indent=4)
Add your comment