1. import json
  2. import pandas as pd
  3. import pyarrow.parquet as pq
  4. import pyarrow as pa
  5. def deserialize_dataset(data, data_type="json"):
  6. """
  7. Deserializes dataset input based on the specified data type.
  8. Args:
  9. data (str): The input data as a string.
  10. data_type (str): The type of data (e.g., "json", "parquet").
  11. Returns:
  12. pandas.DataFrame or pyarrow.Table or None: The deserialized dataset
  13. as a pandas DataFrame or
  14. pyarrow Table, or None if
  15. deserialization fails.
  16. """
  17. try:
  18. if data_type == "json":
  19. # Deserialize JSON data to pandas DataFrame
  20. df = pd.read_json(data)
  21. return df
  22. elif data_type == "parquet":
  23. # Deserialize Parquet data to pyarrow Table
  24. table = pq.read_table(data)
  25. return table
  26. else:
  27. print(f"Error: Unsupported data type: {data_type}")
  28. return None # Return None for unsupported types
  29. except json.JSONDecodeError as e:
  30. print(f"Error decoding JSON: {e}")
  31. return None
  32. except Exception as e:
  33. print(f"Error deserializing dataset: {e}")
  34. return None
  35. if __name__ == '__main__':
  36. # Example Usage (for testing)
  37. # JSON Example
  38. json_data = '{"name": ["Alice", "Bob"], "age": [30, 25]}'
  39. df = deserialize_dataset(json_data, "json")
  40. if df is not None:
  41. print("JSON Deserialized DataFrame:")
  42. print(df)
  43. # Parquet Example
  44. parquet_data = "data=your_parquet_file.parquet" #replace with path to your parquet file
  45. table = deserialize_dataset(parquet_data, "parquet")
  46. if table is not None:
  47. print("\nParquet Deserialized Table:")
  48. print(table)
  49. print(table.schema)
  50. #Example of error handling
  51. invalid_json = '{"name": "Alice", "age": 30' #missing closing bracket
  52. df = deserialize_dataset(invalid_json, "json")
  53. if df is None:
  54. print("\nJSON deserialization failed as expected.")

Add your comment