1. import pandas as pd
  2. import numpy as np
  3. def filter_time_data_with_timeout(df, time_column, timeout_seconds):
  4. """
  5. Filters a DataFrame based on time values, considering a timeout.
  6. Args:
  7. df (pd.DataFrame): The input DataFrame.
  8. time_column (str): The name of the column containing time values.
  9. timeout_seconds (int): The timeout in seconds. Rows with time differences
  10. greater than this will be filtered out.
  11. Returns:
  12. pd.DataFrame: A DataFrame containing the filtered data.
  13. """
  14. if time_column not in df.columns:
  15. raise ValueError(f"Time column '{time_column}' not found in DataFrame.")
  16. # Calculate time differences between consecutive rows
  17. df['time_diff'] = df[time_column].diff()
  18. # Filter out rows where the time difference exceeds the timeout
  19. filtered_df = df[abs(df['time_diff']) <= pd.Timedelta(seconds=timeout_seconds)]
  20. # Remove the temporary time_diff column
  21. filtered_df = filtered_df.drop('time_diff', axis=1)
  22. return filtered_df
  23. if __name__ == '__main__':
  24. #Example usage
  25. data = {'timestamp': pd.to_datetime(['2023-10-26 10:00:00', '2023-10-26 10:00:01', '2023-10-26 10:00:05', '2023-10-26 10:00:10', '2023-10-26 10:00:15']),
  26. 'value': [1, 2, 3, 4, 5]}
  27. df = pd.DataFrame(data)
  28. timeout = 5 # seconds
  29. filtered_data = filter_time_data_with_timeout(df.copy(), 'timestamp', timeout) #use copy to avoid modifying original df
  30. print(filtered_data)

Add your comment