25 lines
810 B
Python
25 lines
810 B
Python
import sys
|
|
from bs4 import BeautifulSoup
|
|
|
|
def remove_html_tags(input_file, output_file):
|
|
# Read the HTML content from the input file
|
|
with open(input_file, 'r', encoding='utf-8') as f:
|
|
html_content = f.read()
|
|
|
|
# Use BeautifulSoup to parse and extract text
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
text = soup.get_text()
|
|
|
|
# Write the plain text to the output file
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write(text)
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 3:
|
|
print("Usage: python remove_html_tags.py <input_file> <output_file>")
|
|
else:
|
|
input_file = sys.argv[1]
|
|
output_file = sys.argv[2]
|
|
remove_html_tags(input_file, output_file)
|
|
print(f"HTML tags removed. Output written to {output_file}")
|