filters/dedup.py

36 lines
1.3 KiB
Python

def remove_duplicates(input_filename, output_filename):
"""
Removes duplicate lines from a file while preserving order.
Args:
input_filename (str): Path to the input file
output_filename (str): Path to the output file
"""
seen_lines = set() # Holds lines we've already seen
try:
# Use context managers for proper file handling
with open(input_filename, 'r') as infile, \
open(output_filename, 'w') as outfile:
# Process each line in the input file
for line in infile:
# Check if we haven't seen this line before
if line not in seen_lines:
# Write the line to output and mark it as seen
outfile.write(line)
seen_lines.add(line)
print(f"Successfully processed {input_filename}")
except FileNotFoundError:
print(f"Error: Could not find input file '{input_filename}'")
except PermissionError:
print(f"Error: Permission denied for accessing files")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
input_file = input("Input file: ")
output_file = input("Output file: ")
remove_duplicates(input_file, output_file)