From 5eb793846fd6c07b0b20f00938261cb2d83f25af Mon Sep 17 00:00:00 2001 From: Ireneusz Bachanowicz Date: Sun, 2 Mar 2025 19:21:30 +0100 Subject: [PATCH] Debug, mongodb, colors in layout --- .env | 3 + my-app/app/api/upload-cv/route.ts | 85 ++++------ my-app/app/page.tsx | 202 ++++++++++++----------- my-app/components/CvSummaryPanel.tsx | 51 +++++- my-app/data/cv_summary_history.json | 26 +++ my-app/data/cv_summary_processing.json | 28 ++++ my-app/utils/prompt.txt | 84 +++++++++- my-app/utils/resume_analysis.py | 211 ++++++++++++++++++++----- visuals | 1 - 9 files changed, 499 insertions(+), 192 deletions(-) create mode 100644 .env create mode 100644 my-app/data/cv_summary_history.json create mode 100644 my-app/data/cv_summary_processing.json delete mode 160000 visuals diff --git a/.env b/.env new file mode 100644 index 0000000..f1301bf --- /dev/null +++ b/.env @@ -0,0 +1,3 @@ +MONGODB_URI=mongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000 +MONGODB_DATABASE=cv_summary_db +MODEL_NAME=gpt-4 diff --git a/my-app/app/api/upload-cv/route.ts b/my-app/app/api/upload-cv/route.ts index cab1136..f287bfe 100644 --- a/my-app/app/api/upload-cv/route.ts +++ b/my-app/app/api/upload-cv/route.ts @@ -73,55 +73,18 @@ export async function POST(req: Request) { const { spawn } = require('child_process'); const pythonProcess = spawn('python3', [path.join(process.cwd(), 'utils', 'resume_analysis.py'), "-f", extractedTextFilePath]); - let summary = ''; - pythonProcess.stdout.on('data', (data: Buffer) => { - summary += data.toString(); - }); - - pythonProcess.stderr.on('data', (data: Buffer) => { - console.error(`stderr: ${data}`); - }); - + let rawOutput = ''; let pythonProcessError = false; - let input_tokens = 0; - let output_tokens = 0; - let total_tokens = 0; - let cost = 0; - let rawOutput = ""; - let openaiOutputFilePath = ""; + let summary: any = null; // Change summary to 'any' type + let openaiOutputFilePath = path.join(uploadDir, "openai_raw_output.txt"); // Define path here + pythonProcess.stdout.on('data', (data: Buffer) => { const output = data.toString(); rawOutput += output; - }); - - pythonProcess.on('close', (code: number) => { - console.log(`child process exited with code ${code}`); - if (code !== 0) { - summary = "Error generating summary"; - pythonProcessError = true; - } else { - summary = rawOutput.split("Summary: ")[1]?.split("\n--- Usage Information ---")[0] || "Error generating summary"; - try { - input_tokens = parseInt(rawOutput.split("Input tokens: ")[1]?.split("\n")[0] || "0"); - output_tokens = parseInt(rawOutput.split("Output tokens: ")[1]?.split("\n")[0] || "0"); - total_tokens = parseInt(rawOutput.split("Total tokens: ")[1]?.split("\n")[0] || "0"); - cost = parseFloat(rawOutput.split("Cost: $")[1]?.split("\n")[0] || "0"); - - // Create OpenAI output file path - openaiOutputFilePath = newFilePath.replace(/\.pdf$/i, "_openai.txt"); - fs.writeFileSync(openaiOutputFilePath, rawOutput); - console.log(`OpenAI output saved to: ${openaiOutputFilePath}`); - - } catch (e) { - console.error("Error parsing token information", e); - } - } - console.log(`--- Usage Information ---`); - console.log(`Input tokens: ${input_tokens}`); - console.log(`Output tokens: ${output_tokens}`); - console.log(`Total tokens: ${total_tokens}`); - console.log(`Cost: $${cost}`); + const lines = output.trim().split('\n'); // Split output into lines + const jsonOutputLine = lines[lines.length - 1]; // Take the last line as JSON output + fs.writeFileSync(openaiOutputFilePath, jsonOutputLine); // Save last line to file }); pythonProcess.stderr.on('data', (data: Buffer) => { @@ -131,28 +94,44 @@ export async function POST(req: Request) { pythonProcess.on('close', (code: number) => { console.log(`child process exited with code ${code}`); if (code !== 0) { - summary = "Error generating summary"; + summary = { error: "Error generating summary" }; pythonProcessError = true; + } else { + try { + // Parse JSON from the last line of the output + const lines = rawOutput.trim().split('\n'); + const jsonOutputLine = lines[lines.length - 1]; + summary = JSON.parse(jsonOutputLine); + } catch (error) { + console.error("Failed to parse JSON from python script:", error); + summary = { error: "Failed to parse JSON from python script" }; + pythonProcessError = true; + // Log raw output to file for debugging + const errorLogPath = path.join(uploadDir, "openai_raw_output.txt"); + const timestamp = new Date().toISOString(); + try { + fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: ${error.message}\n`); + console.log(`Raw Python output logged to ${errorLogPath}`); + } catch (logError: any) { // Explicitly type logError as any + console.error("Error logging raw output:", logError); + } + } } - console.log(`--- Usage Information ---`); - console.log(`Input tokens: ${input_tokens}`); - console.log(`Output tokens: ${output_tokens}`); - console.log(`Total tokens: ${total_tokens}`); - console.log(`Cost: $${cost}`); }); // Add a timeout to the python process const timeout = setTimeout(() => { console.error("Python process timed out"); pythonProcess.kill(); - summary = "Error generating summary: Timeout"; + summary = { error: "Error generating summary: Timeout" }; pythonProcessError = true; }, 10000); // 10 seconds return new Promise((resolve) => { - pythonProcess.on('close', (code: number) => { + pythonProcess.on('close', () => { clearTimeout(timeout); - resolve(NextResponse.json({ summary: summary }, { status: pythonProcessError ? 500 : 200 })); + const status = pythonProcessError ? 500 : 200; + resolve(NextResponse.json(summary, { status })); }); }); diff --git a/my-app/app/page.tsx b/my-app/app/page.tsx index 3848c87..b156da5 100644 --- a/my-app/app/page.tsx +++ b/my-app/app/page.tsx @@ -3,20 +3,47 @@ import Image from "next/image"; import { FaBriefcase, FaUserGraduate, FaTools, FaFileUpload } from "react-icons/fa"; import { useState } from "react"; -import CvSummaryPanel from "@/components/CvSummaryPanel"; // Import the new component +import CvSummaryPanel from "@/components/CvSummaryPanel"; + +interface SectionData { + score: number; + suggestions: string[]; + summary: string; + keywords: { [key: string]: number }; +} + +interface OpenAiStats { + input_tokens: number; + output_tokens: number; + total_tokens: number; + cost: number; +} + +interface SummaryData { + sections: { + Summary?: SectionData; + "Work Experience"?: SectionData; + Education?: SectionData; + Skills?: SectionData; + Certifications?: SectionData; + Projects?: SectionData; + }; + openai_stats?: OpenAiStats; + error?: string; +} export default function Home() { const [file, setFile] = useState(null); - const [summary, setSummary] = useState(null); + const [summaryData, setSummaryData] = useState(null); const [loading, setLoading] = useState(false); - const [isSummaryVisible, setIsSummaryVisible] = useState(false); // State for panel visibility - + const [isSummaryVisible, setIsSummaryVisible] = useState(false); + const [showDebug, setShowDebug] = useState(false); const handleFileChange = (event: React.ChangeEvent) => { if (event.target.files) { setFile(event.target.files[0]); - setSummary(null); // Clear previous summary when file changes - setIsSummaryVisible(false); // Hide summary panel on new file upload + setSummaryData(null); + setIsSummaryVisible(false); } }; @@ -24,11 +51,9 @@ export default function Home() { event.preventDefault(); if (!file) return; - console.log("handleSubmit: Start"); // ADDED LOGGING - setLoading(true); - setSummary(null); - setIsSummaryVisible(false); // Hide summary panel while loading + setSummaryData(null); + setIsSummaryVisible(false); const formData = new FormData(); formData.append("cv", file); @@ -40,34 +65,9 @@ export default function Home() { }); if (response.ok) { - const stream = response.body; - if (!stream) { - console.error("No response stream"); - setLoading(false); - return; - } - - const reader = stream.getReader(); - let chunks = ''; - - while (true) { - const { done, value } = await reader.read(); - if (done) { - break; - } - chunks += new TextDecoder().decode(value); - } - const parsed = JSON.parse(chunks); - - console.log("handleSubmit: Parsed response:", parsed); // ADDED LOGGING - console.log("handleSubmit: Before setSummary - summary:", summary, "isSummaryVisible:", isSummaryVisible); // ADDED LOGGING - - setSummary(parsed.summary); - setIsSummaryVisible(true); // Show summary panel after successful upload - console.log("Summary state updated:", parsed.summary); - console.log("handleSubmit: After setSummary - summary:", summary, "isSummaryVisible:", isSummaryVisible); // ADDED LOGGING - - + const parsed: SummaryData = await response.json(); + setSummaryData(parsed); + setIsSummaryVisible(true); } else { alert("CV summary failed."); } @@ -76,9 +76,11 @@ export default function Home() { alert("An error occurred while summarizing the CV."); } finally { setLoading(false); - console.log("handleSubmit: Finally block - loading:", loading); // ADDED LOGGING } - console.log("handleSubmit: End"); // ADDED LOGGING + }; + + const toggleDebug = () => { + setShowDebug(!showDebug); }; return ( @@ -113,9 +115,9 @@ export default function Home() { className="hidden" id="cv-upload" /> -
+
+
+ {loading ? ( +
+
+
+
+
+ ) : summaryData ? ( + <> + {summaryData.error ? ( +

{summaryData.error}

+ ) : ( + + )} + {summaryData.openai_stats && showDebug && ( +
+

OpenAI Stats

+

Input Tokens: {summaryData.openai_stats.input_tokens}

+

Output Tokens: {summaryData.openai_stats.output_tokens}

+

Total Tokens: {summaryData.openai_stats.total_tokens}

+

Cost: ${summaryData.openai_stats.cost}

+
+ )} + + ) : null}
diff --git a/my-app/components/CvSummaryPanel.tsx b/my-app/components/CvSummaryPanel.tsx index d797327..96eeb98 100644 --- a/my-app/components/CvSummaryPanel.tsx +++ b/my-app/components/CvSummaryPanel.tsx @@ -2,19 +2,60 @@ import React from 'react'; interface CvSummaryPanelProps { summary: string | null; + analysisData: any | null; } -const CvSummaryPanel: React.FC = ({ summary }) => { - if (!summary) { +const CvSummaryPanel: React.FC = ({ summary, analysisData }) => { + if (!analysisData) { return
No summary available yet. Upload your CV to see the summary.
; } + const sectionColors = { + "Summary": "bg-blue-500", + "Work Experience": "bg-green-500", + "Education": "bg-yellow-500", + "Skills": "bg-red-500", + "Certifications": "bg-purple-500", + "Projects": "bg-teal-500", + }; + return (
-

CV Summary

-
- {summary} +

CV Section Scores

+
+ {Object.entries(analysisData.sections).map(([sectionName, sectionData]: [string, any]) => ( +
+
+
{sectionName}
+
+
+ {sectionData.score} +
+
+
+ {sectionData.suggestions && sectionData.suggestions.length > 0 && ( +
+
    + {sectionData.suggestions.map((suggestion: string, index: number) => ( +
  • {suggestion}
  • + ))} +
+
+ )} +
+ ))}
+ {summary && ( + <> +

CV Summary

+
+ {summary} +
+ + )}
); }; diff --git a/my-app/data/cv_summary_history.json b/my-app/data/cv_summary_history.json new file mode 100644 index 0000000..5eb0ba0 --- /dev/null +++ b/my-app/data/cv_summary_history.json @@ -0,0 +1,26 @@ +[ + { + "input_text": "Completed CV text", + "output_summary": "Completed summary", + "tokens_sent": 120, + "tokens_received": 60, + "model_used": "GPT-3.5", + "timestamp": "2025-03-01T10:00:00Z", + "cost": 0.012, + "client_id": "client456", + "document_id": "doc789", + "original_filename": "cv_processed.pdf" + }, + { + "input_text": "Another completed CV text", + "output_summary": "Another completed summary", + "tokens_sent": 180, + "tokens_received": 90, + "model_used": "GPT-4", + "timestamp": "2025-03-01T11:00:00Z", + "cost": 0.018, + "client_id": "client112", + "document_id": "doc131", + "original_filename": "resume_processed.docx" + } +] diff --git a/my-app/data/cv_summary_processing.json b/my-app/data/cv_summary_processing.json new file mode 100644 index 0000000..fe0e2a3 --- /dev/null +++ b/my-app/data/cv_summary_processing.json @@ -0,0 +1,28 @@ +[ + { + "input_text": "Example CV text", + "output_summary": "Example summary", + "tokens_sent": 100, + "tokens_received": 50, + "model_used": "GPT-3", + "timestamp": "2025-03-02T16:50:00Z", + "cost": 0.01, + "client_id": "client123", + "document_id": "doc456", + "original_filename": "cv.pdf", + "processing_status": "pending" + }, + { + "input_text": "Another example CV text", + "output_summary": "Another example summary", + "tokens_sent": 150, + "tokens_received": 75, + "model_used": "GPT-4", + "timestamp": "2025-03-02T17:00:00Z", + "cost": 0.015, + "client_id": "client789", + "document_id": "doc101", + "original_filename": "resume.docx", + "processing_status": "processing" + } +] diff --git a/my-app/utils/prompt.txt b/my-app/utils/prompt.txt index 294249d..18c15ef 100644 --- a/my-app/utils/prompt.txt +++ b/my-app/utils/prompt.txt @@ -1 +1,83 @@ -Provide a concise summary of the resume, highlighting key skills and potential areas for improvement, in a at least 5 sentences. +You are an expert CV analyzer specialized in Applicant Tracking System (ATS) evaluations. I will provide you with the text of a CV. Your tasks are as follows: + +1. Identify and extract the following CV sections: + - Summary + - Work Experience + - Education + - Skills + - Certifications + - Projects + +2. For each section, perform an ATS analysis by: + - Calculating a score on a scale from 1 to 10 that reflects the completeness, clarity, and relevance of the information. + - Listing specific improvement suggestions for any section that scores below 7. + - Identifying and counting common ATS-related keywords in each section. + - Providing a concise summary of the section, highlighting key strengths and weaknesses. + +3. **Format the entire output as a valid JSON object with the following structure. The output MUST be valid JSON and strictly adhere to this format to be parsable by an automated system:** + +```json +{ + "sections": { + "Summary": { + "score": , + "suggestions": [, ...], + "summary": , + "keywords": { "": , ... } + }, + "Work Experience": { + "score": , + "suggestions": [, ...], + "summary": , + "keywords": { "": , ... } + }, + "Education": { + "score": , + "suggestions": [, ...], + "summary": , + "keywords": { "": , ... } + }, + "Skills": { + "score": , + "suggestions": [, ...], + "summary": , + "keywords": { "": , ... } + }, + "Certifications": { + "score": , + "suggestions": [, ...], + "summary": , + "keywords": { "": , ... } + }, + "Projects": { + "score": , + "suggestions": [, ...], + "summary": , + "keywords": { "": , ... } + } + }, + "openai_stats": { + "input_tokens": , + "output_tokens": , + "total_tokens": , + "cost": + } +} +``` + +**Important: Only output the JSON object. Do not include any additional text, explanations, or conversational elements outside the JSON object in your response.** +You are an expert CV analyzer specialized in Applicant Tracking System (ATS) evaluations. I will provide you with the text of a CV. Your tasks are as follows: + +1. Identify and extract the following CV sections: + - Summary + - Work Experience + - Education + - Skills + - Certifications + - Projects + +2. For each section, perform an ATS analysis by: + - Calculating a score on a scale from 1 to 10 that reflects the completeness, clarity, and relevance of the information. + - Listing specific improvement suggestions for any section that scores below 7. + - Identifying and counting common ATS-related keywords in each section. + - Providing a concise summary of the section, highlighting key strengths and weaknesses. \ No newline at end of file diff --git a/my-app/utils/resume_analysis.py b/my-app/utils/resume_analysis.py index 0e0f8cf..0bfb90c 100644 --- a/my-app/utils/resume_analysis.py +++ b/my-app/utils/resume_analysis.py @@ -3,59 +3,188 @@ import sys import os import argparse import io +import json from dotenv import load_dotenv +load_dotenv() from openai import OpenAI from pdfminer.high_level import extract_text +import pymongo # Import pymongo +from datetime import datetime, timezone # Import datetime and timezone +import uuid -# Load environment variables from .env file -load_dotenv(dotenv_path=os.path.join(os.path.dirname(__file__), '.env')) +# Directly access environment variables +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") -client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) +client = OpenAI(api_key=OPENAI_API_KEY) + +# MongoDB Connection Details from .env +mongo_uri = os.environ.get("MONGODB_URI") +mongo_db_name = os.environ.get("MONGODB_DATABASE") +mongo_collection_name = "cv_processing_collection" # You can configure this in .env if needed + +# Initialize MongoDB client +mongo_client = pymongo.MongoClient(mongo_uri) +db = mongo_client[mongo_db_name] +cv_collection = db[mongo_collection_name] + +# Configuration +COMPONENT_NAME = "resume_analysis.py" + +# Get log level from environment variable, default to WARN +LOG_LEVEL = os.environ.get("LOG_LEVEL", "WARN").upper() + +# Function for logging +def logger(level, message): + if LOG_LEVEL == "DEBUG": + log_levels = {"DEBUG": 0, "WARN": 1, "ERROR": 2} + elif LOG_LEVEL == "WARN": + log_levels = {"WARN": 0, "ERROR": 1} + elif LOG_LEVEL == "ERROR": + log_levels = {"ERROR": 0} + else: + log_levels = {"WARN": 0, "ERROR": 1} # Default + + if level in log_levels: + timestamp = datetime.now().isoformat() + log_message = f"[{timestamp}] [{COMPONENT_NAME}] [{level}] {message}" + print(log_message) def analyze_resume(text): - response = client.chat.completions.create( - model=os.getenv("MODEL_NAME"), - messages=[{ - "role": "system", - "content": open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r").read() - }, - {"role": "user", "content": text}], - max_tokens=int(os.getenv("MAX_TOKENS")) - ) - return response + logger("DEBUG", "Starting analyze_resume function") + try: + response = client.chat.completions.create( + model=os.getenv("MODEL_NAME"), + messages=[{ + "role": "system", + "content": open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r").read() + }, + {"role": "user", "content": text}], + max_tokens=int(os.getenv("MAX_TOKENS")) + ) + logger("DEBUG", "analyze_resume function completed successfully") + return response + except Exception as e: + logger("ERROR", f"Error in analyze_resume: {e}") + raise + +def insert_processing_data(text_content, summary, response, args, processing_id): # New function to insert data to MongoDB + logger("DEBUG", "Starting insert_processing_data function") + try: + input_tokens = response.usage.prompt_tokens + output_tokens = response.usage.completion_tokens + total_tokens = response.usage.total_tokens + cost = total_tokens * 0.000001 # rough estimate + + document_data = { + "processing_id": processing_id, + "input_text": text_content, + "output_summary": summary, + "tokens_sent": input_tokens, + "tokens_received": output_tokens, + "model_used": os.getenv("MODEL_NAME"), + "timestamp": datetime.now(timezone.utc).isoformat(), # Current timestamp in UTC + "cost": cost, + "client_id": "client_unknown", # You might want to make these dynamic + "document_id": "doc_unknown", # You might want to make these dynamic + "original_filename": args.file if args.file else "command_line_input", + "processing_status": { + "status": "NEW", + "date": datetime.now(timezone.utc).isoformat() + }, + "openai_stats": { + "input_tokens": input_tokens, + "output_tokens": output_tokens, + "total_tokens": total_tokens, + "cost": cost + } + } + cv_collection.insert_one(document_data) + logger("DEBUG", "Data inserted into MongoDB.") + except Exception as e: + logger("ERROR", f"Error in insert_processing_data: {e}") + raise if __name__ == "__main__": parser = argparse.ArgumentParser(description="Analyze resume text using OpenAI.") parser.add_argument("-f", "--file", help="Path to the file containing the resume text.") args = parser.parse_args() - if args.file: - try: - with open(args.file, "r", encoding="latin-1") as f: - text_content = f.read() - except FileNotFoundError: - print(f"Error: File not found: {args.file}") + try: + if args.file: + try: + with open(args.file, "r", encoding="latin-1") as f: + text_content = f.read() + except FileNotFoundError as e: + logger("ERROR", f"File not found: {args.file} - {e}") + sys.exit(1) + elif len(sys.argv) > 1: + text_content = sys.argv[1] + else: + parser.print_help() sys.exit(1) - elif len(sys.argv) > 1: - text_content = sys.argv[1] - else: - parser.print_help() + + # Generate a unique processing ID + processing_id = str(uuid.uuid4()) + + # Update processing status to PROCESSING + if args.file: + filename = args.file + else: + filename = "command_line_input" + + # Find the document in MongoDB + document = cv_collection.find_one({"original_filename": filename}) + + if document: + document_id = document["_id"] + cv_collection.update_one( + {"_id": document_id}, + {"$set": {"processing_status.status": "PROCESSING", "processing_status.date": datetime.now(timezone.utc).isoformat(), "processing_id": processing_id}} + ) + logger("DEBUG", f"Updated processing status to PROCESSING for document with filename: {filename} and processing_id: {processing_id}") + else: + logger("WARN", f"No document found with filename: {filename}. Creating a new document with processing_id: {processing_id}") + + response = analyze_resume(text_content) + try: + content = response.choices[0].message.content + if content.startswith("```json"): + content = content[7:-4] # Remove ```json and ``` + summary = json.loads(content) + except json.JSONDecodeError as e: + logger("WARN", f"Failed to decode JSON from OpenAI response: {e}") + summary = {"error": "Failed to decode JSON from OpenAI"} + error_log_path = "my-app/uploads/cv/openai_raw_output.txt" + try: + with open(error_log_path, "a") as error_file: + error_file.write(f"Processing ID: {processing_id}\n") + error_file.write(f"Error: {e}\n") + error_file.write(f"Raw Response Content:\n{response.choices[0].message.content}\n") + error_file.write("-" * 40 + "\n") # Separator for readability + logger("DEBUG", f"Raw OpenAI response logged to {error_log_path}") + except Exception as log_e: + logger("ERROR", f"Failed to log raw response to {error_log_path}: {log_e}") + + insert_processing_data(text_content, summary, response, args, processing_id) + + # Update processing status to COMPLETED + if document: + cv_collection.update_one( + {"_id": document_id}, + {"$set": {"processing_status.status": "COMPLETED", "processing_status.date": datetime.now(timezone.utc).isoformat()}} + ) + logger("DEBUG", f"Updated processing status to COMPLETED for document with filename: {filename}") + + logger("DEBUG", f"OpenAI > Total tokens used: {response.usage.total_tokens}") + print(json.dumps(summary)) # Ensure JSON output + + except Exception as e: + logger("ERROR", f"An error occurred during processing: {e}") + # Update processing status to FAILED + if document: + cv_collection.update_one( + {"_id": document_id}, + {"$set": {"processing_status.status": "FAILED", "processing_status.date": datetime.now(timezone.utc).isoformat()}} + ) + logger("ERROR", f"Updated processing status to FAILED for document with filename: {filename}") sys.exit(1) - - response = analyze_resume(text_content) - summary = response.choices[0].message.content - - # Print usage information - input_tokens = response.usage.prompt_tokens - output_tokens = response.usage.completion_tokens - total_tokens = response.usage.total_tokens - - print(f"Summary: {summary}") - print(f"\n--- Usage Information ---") - print(f"Input tokens: {input_tokens}") - print(f"Output tokens: {output_tokens}") - print(f"Total tokens: {total_tokens}") - print(f"Cost: ${total_tokens * 0.000001:.6f}") # rough estimate - - print("\n--- Summary from OpenAI ---") - print(f"Total tokens used: {total_tokens}") diff --git a/visuals b/visuals deleted file mode 160000 index c4bc0ae..0000000 --- a/visuals +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c4bc0ae48a812e7601ed2ac462b95e67fb0e322b