1. <?php
  2. /**
  3. * Deduplicates records from a file, performing basic sanity checks.
  4. *
  5. * @param string $inputFile Path to the input file.
  6. * @param string $outputFile Path to the output file.
  7. * @param array $fields Array of fields to use for deduplication. Defaults to all fields.
  8. * @return bool True on success, false on failure.
  9. */
  10. function deduplicateFile(string $inputFile, string $outputFile, array $fields = []): bool
  11. {
  12. if (!file_exists($inputFile)) {
  13. error_log("Input file not found: " . $inputFile);
  14. return false;
  15. }
  16. $lines = file($inputFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
  17. if ($lines === false) {
  18. error_log("Failed to read input file: " . $inputFile);
  19. return false;
  20. }
  21. $seen = [];
  22. $deduplicatedLines = [];
  23. foreach ($lines as $line) {
  24. // Basic sanity check: Ensure line is not empty after trimming.
  25. $trimmedLine = trim($line);
  26. if (empty($trimmedLine)) {
  27. continue;
  28. }
  29. // Deduplication logic
  30. if (is_array($fields)) {
  31. $key = array_column($line, $fields); // Create a key based on specified fields
  32. } else {
  33. $key = $line; // Use the entire line as the key if no fields are specified
  34. }
  35. if (!isset($seen[$key])) {
  36. $seen[$key] = true;
  37. $deduplicatedLines[] = $line;
  38. }
  39. }
  40. if (file_put_contents($outputFile, implode("\n", $deduplicatedLines)) === false) {
  41. error_log("Failed to write to output file: " . $outputFile);
  42. return false;
  43. }
  44. return true;
  45. }

Add your comment