import re
from collections import defaultdict
def flag_anomalous_submissions(submissions, known_good_users=None):
"""
Flags potentially anomalous form submissions based on simple heuristics.
Args:
submissions (list of dict): A list of dictionaries, where each dictionary represents a form submission.
known_good_users (set, optional): A set of user IDs considered trustworthy. Defaults to None.
Returns:
list of dict: A list of flagged submissions.
"""
flagged_submissions = []
suspicious_patterns = [
r"password.*123456", # Simple password pattern
r"email.*@example\.com", #Suspicious domain
r"extremely.*long.*string", #Detecting very long strings
r"all.*caps", #Detecting all caps
]
user_attempts = defaultdict(int)
for submission in submissions:
user_id = submission.get("user_id")
if user_id:
user_attempts[user_id] += 1
#Check for suspicious patterns
for pattern in suspicious_patterns:
if re.search(pattern, submission.get("form_data", "")):
flagged_submissions.append(submission)
break #Only flag once per submission
#Check for excessive attempts
if user_id and user_attempts[user_id] > 5: #Flag if user exceeds attempt limit
flagged_submissions.append(submission)
user_attempts[user_id] = 0 #reset attempt count
if known_good_users:
for submission in submissions:
user_id = submission.get("user_id")
if user_id and user_id not in known_good_users:
flagged_submissions.append(submission)
return flagged_submissions
Add your comment