diff --git a/dedup.py b/dedup.py index 6b50d56..810f431 100644 --- a/dedup.py +++ b/dedup.py @@ -1,35 +1,19 @@ def remove_duplicates(input_filename, output_filename): - """ - Removes duplicate lines from a file while preserving order. - - Args: - input_filename (str): Path to the input file - output_filename (str): Path to the output file - """ - seen_lines = set() # Holds lines we've already seen - + seen_lines = set() try: - # Use context managers for proper file handling with open(input_filename, 'r') as infile, \ open(output_filename, 'w') as outfile: - - # Process each line in the input file for line in infile: - # Check if we haven't seen this line before if line not in seen_lines: - # Write the line to output and mark it as seen outfile.write(line) seen_lines.add(line) - print(f"Successfully processed {input_filename}") - except FileNotFoundError: print(f"Error: Could not find input file '{input_filename}'") except PermissionError: print(f"Error: Permission denied for accessing files") except Exception as e: print(f"An error occurred: {e}") - if __name__ == "__main__": input_file = input("Input file: ") output_file = input("Output file: ") diff --git a/file b/file deleted file mode 100644 index 0c74c9e..0000000 --- a/file +++ /dev/null @@ -1,279 +0,0 @@ -||windows.net^ -||microsoftcasualgames.com^ -||microsoftcasualgames.net^ -||microsoftcasualgames.org^ -||azure.com^ -||outlook.com^ -||hotmail.com^ -||live.com^ -||office365.com^ -||office.net^ -||microsoft.ch^ -||microsoft.sucks^ -||github.com^ -||github.io^ -||gh.io^ -||a-msedge.net^ -||acompli.net^ -||adnexus.com^ -||adnexus.net^ -||adnxs.com^ -||adnxs.net^ -||adnxs-simple.com^ -||appcenter.ms^ -||applicationinsights.io^ -||appnexus.com^ -||appnexus.net^ -||apps.mil^ -||apxns.com^ -||aspnetcdn.com^ -||azure.cn^ -||azure.us^ -||azure-dns.com^ -||azure-dns.info^ -||azure-dns.net^ -||azure-dns.org^ -||azureedge.net^ -||azurefd.net^ -||azurewebsites.net^ -||b-msedge.net^ -||battle.net^ -||bing.com^ -||bing.net^ -||bingapistatistics.com^ -||binguxlivesite.net^ -||bizographics.com -||blizzard.com^ -||blz-contentstack.com^ -||clarity.ms^ -||cortana.ai^ -||dual-s-msedge.net^ -||e-msedge.net^ -||footprint.net^ -||footprintdns.com^ -||footprintpredict.com^ -||gearsofwar.com^ -||gfx.ms^ -||githubapp.com^ -||githubassets.com^ -||githubusercontent.com^ -||halo2.com^ -||halo3.com^ -||halo4.com^ -||halo5.com^ -||hololens.com^ -||l-msedge.net^ -||licdn.com^ -||linkedin.com^ -||live.net^ -||m365copilot.com^ -||microsoft.com^ -||microsoft.us^ -||microsoft-falcon.net^ -||microsoft-hohm.com^ -||microsoft365.com^ -||microsoftonline.com^ -||microsoftonline.us^ -||microsoftonline-p.com^ -||minecraft.com^ -||minecraft.net^ -||mojang.com^ -||msads.net^ -||msauth.net^ -||msauthimages.net^ -||msauthimages.us^ -||msecnd.net^ -||msedge.net^ -||msftauth.net^ -||msftauthimages.net^ -||msftauthimages.us^ -||msgamesresearch.com^ -||msn.cn^ -||msn.co.uk^ -||msn.com^ -||nelreports.net^ -||nsatc.net^ -||office.com^ -||office365.us^ -||office365-net.us^ -||officeppe.net^ -||onedrive.com^ -||onestore.ms^ -||onmicrosoft.com^ -||onmicrosoft.us^ -||passport.net^ -||powerappsportals.com^ -||s-microsoft.com^ -||s-msedge.net^ -||seaofthieves.com^ -||sharepoint.com^ -||sharepointonline.com^ -||skydrive.com^ -||skype.com^ -||t-microsoft.com^ -||t-msedge.net^ -||tagdelivery.com^ -||trafficmanager.net^ -||usgovtrafficmanager.net^ -||v0cdn.net^ -||virtualearth.net^ -||visualstudio.com^ -||windows.com^ -||windows.us^ -||windows7.com^ -||windows8.com^ -||windowsupdate.com^ -||xandr.com^ -||xbox.com^ -||xboxlive.com^ -||xboxservices.com^ -||microsoft.be^ -||microsoft.pl^ -||microsoft.fi^ -||microsoft.cat^ -||microsoft.is^ -||microsoft.ge^ -||microsoft.eu^ -||microsoft.az^ -||microsoft.uz^ -||microsoft.md^ -||microsoft.ru^ -||microsoft.cz^ -||microsoft.pt^ -||microsoft.hu^ -||microsoft.lu^ -||microsoft.ca^ -||microsoft.cl^ -||microsoft.fr^ -||microsoft.vn^ -||microsoft.es^ -||microsoft.se^ -||microsoft.ua^ -||microsoft.ro^ -||microsoft.rs^ -||microsoft.it^ -||microsoft.th^ -||microsoft.nl^ -||microsoft.jp^ -||microsoft.no^ -||microsoft.dk^ -||microsoft.by^ -||microsoft.ee^ -||microsoft.si^ -||microsoft.lt^ -||aka.ms^ -||b-gat.es^ -||github.blog^ -||github.careers^ -||thegithubshop.com^ -||office365.net^ -||microsoft365.net^ -||skypeassets.com^ -||msdn.com^ -||microsoft.org^ -||microsoft.net^ -||microsoft.co.uk^ -||microsoft.co^ -||outlook.dk^ -||skype.dk^ -||live.dk^ -||hotmail.dk^ -||hotmail.se^ -||live.se^ -||outlook.se^ -||skype.se^ -||outlook.fr^ -||skype.fr^ -||live.fr^ -||hotmail.fr^ -||outlook.it^ -||skype.it^ -||live.it^ -||hotmail.it^ -||microsoft.de^ -||outlook.de^ -||skype.de^ -||live.de^ -||hotmail.de^ -||outlook.com.au^ -||askapolitician.net^ -||bing.co.uk^ -||bing.co^ -||bingbar.com^ -||bingbar.net^ -||bingtoolbar.com^ -||hotmail.co.uk^ -||hotmail.eu^ -||hotmail.co^ -||hotmail.net^ -||hotmail.org^ -||live.co.uk^ -||live.co^ -||live.org^ -||live.eu^ -||internetexplorer.com^ -||internetexplorer.co^ -||ie8.co^ -||ie9.com^ -||ie10.com^ -||ie11.com^ -||mepn.com^ -||microsoftsilverlight.com^ -||microsoftsilverlight.org^ -||microsoftsilverlight.net^ -||microsoftsqlserver.com^ -||sqlserver.net^ -||microsoftvisualstudio.com^ -||microsoftvisualstudio.net^ -||visualstudio.co.uk^ -||visualstudio.net^ -||visualstudio.eu^ -||visualstudio.co^ -||skydrive.co^ -||onedrive.co.uk^ -||onedrive.net^ -||onedrive.org^ -||onedrive.co^ -||onedrive.eu^ -||1drv.ms^ -||skype.org^ -||octocaptcha.com^ -||skype.net^ -||skype.co.uk^ -||skype.co^ -||skype.eu^ -||outlook.org^ -||outlook.co^ -||outlook.eu^ -||windowsmobile.com^ -||windowsmobile.co.uk^ -||windowsmobile.org^ -||windowsmobile.co^ -||windowsmobile.eu^ -||windowsphone.com^ -||windowsphone.co.uk^ -||windowsphone.org^ -||windowsphone.co^ -||windowsphone.net^ -||zune.com^ -||zune.co.uk^ -||zune.org^ -||zune.net^ -||zune.co^ -||zune.eu^ -||xbox.co.uk^ -||xbox.org^ -||xbox.co^ -||xbox.eu^ -||xbox360.com^ -||xbox360.co.uk^ -||xbox360.co^ -||xbox360.eu^ -||xbox360.org^ -||xboxone.com^ -||xboxone.co.uk^ -||xboxone.co^ -||xboxone.eu^ -*.microsoft/* -*.xbox/* -*.windows/* \ No newline at end of file