1. import pandas as pd
  2. import numpy as np
  3. def flag_cookie_anomalies(df, threshold=3, dry_run=True):
  4. """
  5. Flags anomalous cookies in a DataFrame.
  6. Args:
  7. df (pd.DataFrame): DataFrame containing cookie data. Must have columns like 'cookie_name', 'value', 'timestamp'.
  8. threshold (float): Number of standard deviations from the mean to consider a cookie anomalous.
  9. dry_run (bool): If True, only prints flagged cookies; doesn't modify the DataFrame.
  10. Returns:
  11. pd.DataFrame: DataFrame with an added 'is_anomaly' column indicating whether a cookie is anomalous.
  12. """
  13. if not isinstance(df, pd.DataFrame):
  14. raise TypeError("df must be a pandas DataFrame")
  15. required_columns = ['cookie_name', 'value', 'timestamp']
  16. for col in required_columns:
  17. if col not in df.columns:
  18. raise ValueError(f"DataFrame must contain column '{col}'")
  19. # Calculate statistics for cookie values
  20. cookie_values = df['value'].values
  21. mean = np.mean(cookie_values)
  22. std = np.std(cookie_values)
  23. # Identify anomalous cookies
  24. df['is_anomaly'] = np.isclose(cookie_values, mean, atol=threshold * std)
  25. if not dry_run:
  26. # Filter and print anomalous cookies
  27. anomalous_cookies = df[df['is_anomaly']]
  28. print("Anomalous Cookies:")
  29. print(anomalous_cookies)
  30. else:
  31. print("Dry run mode: No changes made to the DataFrame.")
  32. return df

Add your comment