1. import re
  2. from collections import defaultdict
  3. def flag_anomalous_submissions(submissions, known_good_users=None):
  4. """
  5. Flags potentially anomalous form submissions based on simple heuristics.
  6. Args:
  7. submissions (list of dict): A list of dictionaries, where each dictionary represents a form submission.
  8. known_good_users (set, optional): A set of user IDs considered trustworthy. Defaults to None.
  9. Returns:
  10. list of dict: A list of flagged submissions.
  11. """
  12. flagged_submissions = []
  13. suspicious_patterns = [
  14. r"password.*123456", # Simple password pattern
  15. r"email.*@example\.com", #Suspicious domain
  16. r"extremely.*long.*string", #Detecting very long strings
  17. r"all.*caps", #Detecting all caps
  18. ]
  19. user_attempts = defaultdict(int)
  20. for submission in submissions:
  21. user_id = submission.get("user_id")
  22. if user_id:
  23. user_attempts[user_id] += 1
  24. #Check for suspicious patterns
  25. for pattern in suspicious_patterns:
  26. if re.search(pattern, submission.get("form_data", "")):
  27. flagged_submissions.append(submission)
  28. break #Only flag once per submission
  29. #Check for excessive attempts
  30. if user_id and user_attempts[user_id] > 5: #Flag if user exceeds attempt limit
  31. flagged_submissions.append(submission)
  32. user_attempts[user_id] = 0 #reset attempt count
  33. if known_good_users:
  34. for submission in submissions:
  35. user_id = submission.get("user_id")
  36. if user_id and user_id not in known_good_users:
  37. flagged_submissions.append(submission)
  38. return flagged_submissions

Add your comment