anonymizer/bs.py
Ireneusz Bachanowicz d20cf39e4a first commit
2025-07-14 17:12:50 +02:00

25 lines
810 B
Python

import sys
from bs4 import BeautifulSoup
def remove_html_tags(input_file, output_file):
# Read the HTML content from the input file
with open(input_file, 'r', encoding='utf-8') as f:
html_content = f.read()
# Use BeautifulSoup to parse and extract text
soup = BeautifulSoup(html_content, 'html.parser')
text = soup.get_text()
# Write the plain text to the output file
with open(output_file, 'w', encoding='utf-8') as f:
f.write(text)
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python remove_html_tags.py <input_file> <output_file>")
else:
input_file = sys.argv[1]
output_file = sys.argv[2]
remove_html_tags(input_file, output_file)
print(f"HTML tags removed. Output written to {output_file}")