<?php
/**
* Deduplicates records from a file, performing basic sanity checks.
*
* @param string $inputFile Path to the input file.
* @param string $outputFile Path to the output file.
* @param array $fields Array of fields to use for deduplication. Defaults to all fields.
* @return bool True on success, false on failure.
*/
function deduplicateFile(string $inputFile, string $outputFile, array $fields = []): bool
{
if (!file_exists($inputFile)) {
error_log("Input file not found: " . $inputFile);
return false;
}
$lines = file($inputFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
if ($lines === false) {
error_log("Failed to read input file: " . $inputFile);
return false;
}
$seen = [];
$deduplicatedLines = [];
foreach ($lines as $line) {
// Basic sanity check: Ensure line is not empty after trimming.
$trimmedLine = trim($line);
if (empty($trimmedLine)) {
continue;
}
// Deduplication logic
if (is_array($fields)) {
$key = array_column($line, $fields); // Create a key based on specified fields
} else {
$key = $line; // Use the entire line as the key if no fields are specified
}
if (!isset($seen[$key])) {
$seen[$key] = true;
$deduplicatedLines[] = $line;
}
}
if (file_put_contents($outputFile, implode("\n", $deduplicatedLines)) === false) {
error_log("Failed to write to output file: " . $outputFile);
return false;
}
return true;
}
Add your comment