1. import java.util.HashSet;
  2. import java.util.List;
  3. import java.util.Set;
  4. import java.util.ArrayList;
  5. public class RecordDeduplicator {
  6. /**
  7. * Deduplicates a list of records, considering a set of fields for uniqueness.
  8. * This is designed for dry-run scenarios with rate limiting.
  9. *
  10. * @param records The list of records to deduplicate. Each record is assumed to be an object with fields.
  11. * @param fieldExtractor A function that extracts the unique identifier fields from each record.
  12. * @return A new list containing only the unique records.
  13. */
  14. public static List<Object> deduplicate(List<?> records, FieldExtractor fieldExtractor) {
  15. Set<String> seenKeys = new HashSet<>(); // Use a Set to efficiently track seen keys.
  16. List<Object> uniqueRecords = new ArrayList<>();
  17. for (Object record : records) {
  18. String key = fieldExtractor.extractKey(record); // Extract the unique key from the record.
  19. if (key == null) {
  20. // Handle cases where key extraction fails. Could log an error.
  21. continue; // Skip this record.
  22. }
  23. if (!seenKeys.contains(key)) {
  24. seenKeys.add(key); // Mark the key as seen.
  25. uniqueRecords.add(record); // Add the record to the unique list.
  26. }
  27. }
  28. return uniqueRecords;
  29. }
  30. /**
  31. * Functional interface to extract the unique key from a record.
  32. */
  33. public interface FieldExtractor {
  34. String extractKey(Object record);
  35. }
  36. public static void main(String[] args) {
  37. // Example Usage (Dry-Run)
  38. List<Object> data = new ArrayList<>();
  39. data.add(new Record("1", "Alice", "123"));
  40. data.add(new Record("2", "Bob", "456"));
  41. data.add(new Record("1", "Alice", "123")); // Duplicate
  42. data.add(new Record("3", "Charlie", "789"));
  43. data.add(new Record("2", "Bob", "456")); // Duplicate
  44. FieldExtractor extractor = record -> {
  45. //Extract unique key based on ID field.
  46. return ((Record)record).id;
  47. };
  48. List<Object> deduplicatedData = deduplicate(data, extractor);
  49. System.out.println("Original Data: " + data);
  50. System.out.println("Deduplicated Data: " + deduplicatedData);
  51. }
  52. //Simple record class for testing.
  53. static class Record {
  54. String id;
  55. String name;
  56. String value;
  57. public Record(String id, String name, String value) {
  58. this.id = id;
  59. this.name = name;
  60. this.value = value;
  61. }
  62. }
  63. }

Add your comment