import pandas as pd
import numpy as np
def filter_time_data_with_timeout(df, time_column, timeout_seconds):
"""
Filters a DataFrame based on time values, considering a timeout.
Args:
df (pd.DataFrame): The input DataFrame.
time_column (str): The name of the column containing time values.
timeout_seconds (int): The timeout in seconds. Rows with time differences
greater than this will be filtered out.
Returns:
pd.DataFrame: A DataFrame containing the filtered data.
"""
if time_column not in df.columns:
raise ValueError(f"Time column '{time_column}' not found in DataFrame.")
# Calculate time differences between consecutive rows
df['time_diff'] = df[time_column].diff()
# Filter out rows where the time difference exceeds the timeout
filtered_df = df[abs(df['time_diff']) <= pd.Timedelta(seconds=timeout_seconds)]
# Remove the temporary time_diff column
filtered_df = filtered_df.drop('time_diff', axis=1)
return filtered_df
if __name__ == '__main__':
#Example usage
data = {'timestamp': pd.to_datetime(['2023-10-26 10:00:00', '2023-10-26 10:00:01', '2023-10-26 10:00:05', '2023-10-26 10:00:10', '2023-10-26 10:00:15']),
'value': [1, 2, 3, 4, 5]}
df = pd.DataFrame(data)
timeout = 5 # seconds
filtered_data = filter_time_data_with_timeout(df.copy(), 'timestamp', timeout) #use copy to avoid modifying original df
print(filtered_data)
Add your comment