<?php
/**
* Cleans data from a file based on hard-coded limits.
*
* @param string $filePath The path to the file to clean.
* @return array|false An array of cleaned lines, or false on error.
*/
function cleanData(string $filePath): array|false
{
$cleanedData = [];
try {
$fileHandle = fopen($filePath, 'r');
if ($fileHandle === false) {
error_log("Error opening file: $filePath");
return false;
}
while (($line = fgets($fileHandle)) !== false) {
$cleanedLine = cleanLine($line);
if ($cleanedLine !== false) {
$cleanedData[] = $cleanedLine;
}
}
fclose($fileHandle);
return $cleanedData;
} catch (Exception $e) {
error_log("An unexpected error occurred: " . $e->getMessage());
return false;
}
}
/**
* Cleans a single line of data.
*
* @param string $line The line of data to clean.
* @return string|false The cleaned line, or false if the line is invalid.
*/
function cleanLine(string $line): string|false
{
// Limit line length to 256 characters.
if (strlen($line) > 256) {
$line = substr($line, 0, 256); // Truncate the line.
}
// Remove leading and trailing whitespace.
$line = trim($line);
// Remove control characters (ASCII 32-126)
$line = preg_replace("/[^a-zA-Z0-9\s]/", "", $line); //Keep alphanumeric and spaces
// Limit the number of spaces to 100
$line = preg_replace('/\s{101,}/', ' ', $line);
// Limit character set to ASCII
$line = preg_replace("/[^\x00-\x7F]/", "", $line);
if (empty($line)) {
return false; //Reject empty lines after cleaning
}
return $line;
}
// Example usage:
//$filePath = 'data.txt';
//$cleanedData = cleanData($filePath);
//if ($cleanedData !== false) {
// print_r($cleanedData);
//} else {
// echo "Data cleaning failed.\n";
//}
?>
Add your comment