<?php
/**
* Cleans data from a file, removing potentially harmful characters and formatting.
*
* @param string $filePath The path to the file to clean.
* @return string|false The cleaned data as a string, or false on error.
*/
function cleanFileData(string $filePath): string|false
{
if (!file_exists($filePath)) {
error_log("File not found: $filePath");
return false;
}
$fileContent = file_get_contents($filePath);
if ($fileContent === false) {
error_log("Failed to read file: $filePath");
return false;
}
// Remove HTML tags
$fileContent = strip_tags($fileContent);
// Remove Javascript
$fileContent = preg_replace('/<script.*?>.*?<\/script>/s', '', $fileContent);
// Remove CSS
$fileContent = preg_replace('/<style.*?>.*?<\/style>/s', '', $fileContent);
// Remove comments
$fileContent = preg_replace('/<!--.*?-->/', '', $fileContent);
// Remove potentially harmful characters (e.g., backticks, single quotes)
$fileContent = str_replace(["`", "'"], "", $fileContent);
// Remove extra whitespace
$fileContent = trim(preg_replace('/\s+/', ' ', $fileContent));
// Optionally, handle specific characters if needed. Example: Remove control characters.
$fileContent = preg_replace('/[\x00-\x1F\x7F]/', '', $fileContent);
return $fileContent;
}
// Example usage:
//$cleanedData = cleanFileData('path/to/your/file.txt');
//if ($cleanedData !== false) {
// echo $cleanedData;
//} else {
// echo "Error cleaning file.";
//}
?>
Add your comment