39 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			39 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import sys
 | |
| import json
 | |
| from markdownify import markdownify as md
 | |
| 
 | |
| def convert_html_to_markdown(input_file, output_file):
 | |
|     # Read the JSON content from the input file
 | |
|     with open(input_file, 'r', encoding='utf-8') as f:
 | |
|         data = json.load(f)
 | |
| 
 | |
|     markdown_output = []
 | |
|     for item in data:
 | |
|         title = item.get("title", "N/A")
 | |
|         page_id = item.get("pageID", "N/A")
 | |
|         page_link = item.get("pageLink", "N/A")
 | |
|         html_content = item.get("content", "")
 | |
| 
 | |
|         # Convert HTML content to Markdown
 | |
|         markdown_content = md(html_content)
 | |
| 
 | |
|         # Prepend other attributes
 | |
|         markdown_output.append(f"# {title}\n\n")
 | |
|         markdown_output.append(f"**Page ID:** {page_id}\n")
 | |
|         markdown_output.append(f"**Page Link:** {page_link}\n\n")
 | |
|         markdown_output.append(markdown_content)
 | |
|         markdown_output.append("\n---\n\n") # Separator between entries
 | |
| 
 | |
|     # Write the Markdown content to the output file
 | |
|     with open(output_file, 'w', encoding='utf-8') as f:
 | |
|         f.write("".join(markdown_output))
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     if len(sys.argv) != 3:
 | |
|         print("Usage: python bs.py <input_json_file> <output_md_file>")
 | |
|     else:
 | |
|         input_file = sys.argv[1]
 | |
|         output_file = sys.argv[2]
 | |
|         convert_html_to_markdown(input_file, output_file)
 | |
|         print(f"HTML content converted to Markdown. Output written to {output_file}")
 |