1. import re
  2. def tokenize_message_queue(message):
  3. """
  4. Tokenizes a message queue string.
  5. Args:
  6. message (str): The message queue string.
  7. Returns:
  8. list: A list of tokens.
  9. """
  10. # Define regex patterns for different tokens
  11. patterns = [
  12. r"([a-zA-Z0-9]+)", # Alphanumeric tokens
  13. r"(\d+)", # Numeric tokens
  14. r"([._-]+)", # Dot, underscore or hyphen separated tokens
  15. r"(\s+)", # Whitespace
  16. r"([`~@#$%^&*()_+=\[\]{};':\",./<>?\|])" # Special characters
  17. ]
  18. tokens = []
  19. for pattern in patterns:
  20. tokens.extend(re.findall(pattern, message))
  21. #Remove empty strings
  22. tokens = [token for token in tokens if token]
  23. return tokens
  24. if __name__ == '__main__':
  25. # Example usage
  26. message = "message.queue_name@example.com 123 abc ~def"
  27. tokens = tokenize_message_queue(message)
  28. print(tokens)

Add your comment