1. def normalize_user_data(data, scaling_method='minmax'):
  2. """
  3. Normalizes user data.
  4. Args:
  5. data (list of dict): A list of dictionaries, where each dictionary
  6. represents user data.
  7. scaling_method (str): The scaling method to use. Options are 'minmax',
  8. 'zscore', or 'none'. Defaults to 'minmax'.
  9. Returns:
  10. list of dict: A list of dictionaries with normalized data. Returns
  11. an empty list if input is invalid.
  12. """
  13. if not isinstance(data, list):
  14. print("Error: Input data must be a list.")
  15. return []
  16. if not data:
  17. return []
  18. # Determine the keys (features) to normalize from the first user's data.
  19. keys = list(data[0].keys())
  20. if not keys:
  21. print("Error: No keys found in the data.")
  22. return []
  23. normalized_data = []
  24. for user in data:
  25. normalized_user = {}
  26. for key in keys:
  27. value = user.get(key)
  28. if value is None:
  29. normalized_user[key] = None #Handle missing values
  30. continue
  31. if scaling_method == 'minmax':
  32. min_val = min(user[key] for user in data)
  33. max_val = max(user[key] for user in data)
  34. if max_val == min_val:
  35. normalized_user[key] = 0.0 # Avoid division by zero
  36. else:
  37. normalized_user[key] = (value - min_val) / (max_val - min_val)
  38. elif scaling_method == 'zscore':
  39. mean = sum(user[key] for user in data) / len(data)
  40. std = (sum([(user[key] - mean)**2 for user in data]) / len(data))**0.5
  41. if std == 0:
  42. normalized_user[key] = 0.0 #Avoid division by zero
  43. else:
  44. normalized_user[key] = (value - mean) / std
  45. elif scaling_method == 'none':
  46. normalized_user[key] = value
  47. else:
  48. print(f"Warning: Unknown scaling method '{scaling_method}'. Skipping normalization for {key}")
  49. normalized_user[key] = value
  50. normalized_data.append(normalized_user)
  51. return normalized_data

Add your comment