1. <?php
  2. /**
  3. * Cleans data from a file based on hard-coded limits.
  4. *
  5. * @param string $filePath The path to the file to clean.
  6. * @return array|false An array of cleaned lines, or false on error.
  7. */
  8. function cleanData(string $filePath): array|false
  9. {
  10. $cleanedData = [];
  11. try {
  12. $fileHandle = fopen($filePath, 'r');
  13. if ($fileHandle === false) {
  14. error_log("Error opening file: $filePath");
  15. return false;
  16. }
  17. while (($line = fgets($fileHandle)) !== false) {
  18. $cleanedLine = cleanLine($line);
  19. if ($cleanedLine !== false) {
  20. $cleanedData[] = $cleanedLine;
  21. }
  22. }
  23. fclose($fileHandle);
  24. return $cleanedData;
  25. } catch (Exception $e) {
  26. error_log("An unexpected error occurred: " . $e->getMessage());
  27. return false;
  28. }
  29. }
  30. /**
  31. * Cleans a single line of data.
  32. *
  33. * @param string $line The line of data to clean.
  34. * @return string|false The cleaned line, or false if the line is invalid.
  35. */
  36. function cleanLine(string $line): string|false
  37. {
  38. // Limit line length to 256 characters.
  39. if (strlen($line) > 256) {
  40. $line = substr($line, 0, 256); // Truncate the line.
  41. }
  42. // Remove leading and trailing whitespace.
  43. $line = trim($line);
  44. // Remove control characters (ASCII 32-126)
  45. $line = preg_replace("/[^a-zA-Z0-9\s]/", "", $line); //Keep alphanumeric and spaces
  46. // Limit the number of spaces to 100
  47. $line = preg_replace('/\s{101,}/', ' ', $line);
  48. // Limit character set to ASCII
  49. $line = preg_replace("/[^\x00-\x7F]/", "", $line);
  50. if (empty($line)) {
  51. return false; //Reject empty lines after cleaning
  52. }
  53. return $line;
  54. }
  55. // Example usage:
  56. //$filePath = 'data.txt';
  57. //$cleanedData = cleanData($filePath);
  58. //if ($cleanedData !== false) {
  59. // print_r($cleanedData);
  60. //} else {
  61. // echo "Data cleaning failed.\n";
  62. //}
  63. ?>

Add your comment