import pandas as pd
import numpy as np
def flag_cookie_anomalies(df, threshold=3, dry_run=True):
"""
Flags anomalous cookies in a DataFrame.
Args:
df (pd.DataFrame): DataFrame containing cookie data. Must have columns like 'cookie_name', 'value', 'timestamp'.
threshold (float): Number of standard deviations from the mean to consider a cookie anomalous.
dry_run (bool): If True, only prints flagged cookies; doesn't modify the DataFrame.
Returns:
pd.DataFrame: DataFrame with an added 'is_anomaly' column indicating whether a cookie is anomalous.
"""
if not isinstance(df, pd.DataFrame):
raise TypeError("df must be a pandas DataFrame")
required_columns = ['cookie_name', 'value', 'timestamp']
for col in required_columns:
if col not in df.columns:
raise ValueError(f"DataFrame must contain column '{col}'")
# Calculate statistics for cookie values
cookie_values = df['value'].values
mean = np.mean(cookie_values)
std = np.std(cookie_values)
# Identify anomalous cookies
df['is_anomaly'] = np.isclose(cookie_values, mean, atol=threshold * std)
if not dry_run:
# Filter and print anomalous cookies
anomalous_cookies = df[df['is_anomaly']]
print("Anomalous Cookies:")
print(anomalous_cookies)
else:
print("Dry run mode: No changes made to the DataFrame.")
return df
Add your comment