def normalize_user_data(data, scaling_method='minmax'):
"""
Normalizes user data.
Args:
data (list of dict): A list of dictionaries, where each dictionary
represents user data.
scaling_method (str): The scaling method to use. Options are 'minmax',
'zscore', or 'none'. Defaults to 'minmax'.
Returns:
list of dict: A list of dictionaries with normalized data. Returns
an empty list if input is invalid.
"""
if not isinstance(data, list):
print("Error: Input data must be a list.")
return []
if not data:
return []
# Determine the keys (features) to normalize from the first user's data.
keys = list(data[0].keys())
if not keys:
print("Error: No keys found in the data.")
return []
normalized_data = []
for user in data:
normalized_user = {}
for key in keys:
value = user.get(key)
if value is None:
normalized_user[key] = None #Handle missing values
continue
if scaling_method == 'minmax':
min_val = min(user[key] for user in data)
max_val = max(user[key] for user in data)
if max_val == min_val:
normalized_user[key] = 0.0 # Avoid division by zero
else:
normalized_user[key] = (value - min_val) / (max_val - min_val)
elif scaling_method == 'zscore':
mean = sum(user[key] for user in data) / len(data)
std = (sum([(user[key] - mean)**2 for user in data]) / len(data))**0.5
if std == 0:
normalized_user[key] = 0.0 #Avoid division by zero
else:
normalized_user[key] = (value - mean) / std
elif scaling_method == 'none':
normalized_user[key] = value
else:
print(f"Warning: Unknown scaling method '{scaling_method}'. Skipping normalization for {key}")
normalized_user[key] = value
normalized_data.append(normalized_user)
return normalized_data
Add your comment