Fixed resume script. Probably cost handled incorrectly. Probably broken integartion with ui

This commit is contained in:
Ireneusz Bachanowicz 2025-03-04 00:22:17 +01:00
parent f40b895749
commit aadf1fe94c
5 changed files with 249 additions and 182 deletions

3
.env
View File

@ -1,3 +0,0 @@
MONGODB_URI=mongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000
MONGODB_DATABASE=cv_summary_db
MODEL_NAME=gpt-4

View File

@ -100,24 +100,30 @@ export async function POST(req: Request): Promise<NextResponse> {
// Parse JSON from the last line of the output // Parse JSON from the last line of the output
const lines = rawOutput.trim().split('\n'); const lines = rawOutput.trim().split('\n');
const jsonOutputLine = lines[lines.length - 1]; const jsonOutputLine = lines[lines.length - 1];
summary = JSON.parse(jsonOutputLine); console.log("Attempting to parse JSON:", jsonOutputLine); // Log raw JSON string
} catch (error) {
console.error("Failed to parse JSON from python script:", error);
summary = { error: "Failed to parse JSON from python script" };
pythonProcessError = true;
// Log raw output to file for debugging
const errorLogPath = path.join(uploadDir, "openai_raw_output.txt");
const timestamp = new Date().toISOString();
try { try {
if (error instanceof Error) { summary = JSON.parse(jsonOutputLine);
fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: ${error.message}\n`); } catch (error) {
} else { console.error("Failed to parse JSON from python script:", error);
fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: Unknown error\n`); console.error("Raw JSON string that failed to parse:", jsonOutputLine); // Log the raw JSON string that failed
summary = { error: "Failed to parse JSON from python script" };
pythonProcessError = true;
// Log raw output to file for debugging
const errorLogPath = path.join(uploadDir, "openai_raw_output.txt");
const timestamp = new Date().toISOString();
try {
if (error instanceof Error) {
fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: ${error.message}\nFailed JSON String:\n${jsonOutputLine}\n`); // Include failed JSON string in log
} else {
fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: Unknown error\nFailed JSON String:\n${jsonOutputLine}\n`); // Include failed JSON string in log
}
console.log(`Raw Python output logged to ${errorLogPath}`);
} catch (logError: any) { // Explicitly type logError as any
console.error("Error logging raw output:", logError);
} }
console.log(`Raw Python output logged to ${errorLogPath}`);
} catch (logError: any) { // Explicitly type logError as any
console.error("Error logging raw output:", logError);
} }
} catch (outerError) { // Correctly placed catch block for the outer try
console.error("Outer try block error:", outerError);
} }
} }
}); });
@ -128,7 +134,7 @@ export async function POST(req: Request): Promise<NextResponse> {
pythonProcess.kill(); pythonProcess.kill();
summary = { error: "Error generating summary: Timeout" }; summary = { error: "Error generating summary: Timeout" };
pythonProcessError = true; pythonProcessError = true;
}, 10000); // 10 seconds }, 30000); // 30 seconds
return new Promise<NextResponse>((resolve) => { return new Promise<NextResponse>((resolve) => {
pythonProcess.on('close', () => { pythonProcess.on('close', () => {

View File

@ -0,0 +1,20 @@
{
"choices": [
{
"message": {
"content": "Mockup analysis result",
"role": "assistant"
}
}
],
"usage": {
"prompt_tokens": 100,
"completion_tokens": 50,
"total_tokens": 150
},
"openai_stats": {
"prompt_tokens": 100,
"completion_tokens": 50,
"total_tokens": 150
}
}

View File

@ -78,6 +78,6 @@ You are an expert CV analyzer specialized in Applicant Tracking System (ATS) eva
2. For each section, perform an ATS analysis by: 2. For each section, perform an ATS analysis by:
- Calculating a score on a scale from 1 to 10 that reflects the completeness, clarity, and relevance of the information. - Calculating a score on a scale from 1 to 10 that reflects the completeness, clarity, and relevance of the information.
- Listing specific improvement suggestions for any section that scores below 7. - Listing specific improvement suggestions for any section that scores below 9.
- Identifying and counting common ATS-related keywords in each section. - Identifying and counting common ATS-related keywords in each section.
- Providing a concise summary of the section, highlighting key strengths and weaknesses. - Providing a concise summary of the section, highlighting key strengths and weaknesses.

368
my-app/utils/resume_analysis.py Normal file → Executable file
View File

@ -2,189 +2,233 @@
import sys import sys
import os import os
import argparse import argparse
import io
import json import json
from dotenv import load_dotenv import logging
load_dotenv() from datetime import datetime, timezone
from openai import OpenAI
from pdfminer.high_level import extract_text
import pymongo # Import pymongo
from datetime import datetime, timezone # Import datetime and timezone
import uuid import uuid
from typing import Optional, Any
import time
# Directly access environment variables from dotenv import load_dotenv
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") import pymongo
import openai
from pdfminer.high_level import extract_text
client = OpenAI(api_key=OPENAI_API_KEY) # Load environment variables
load_dotenv()
# MongoDB Connection Details from .env
mongo_uri = os.environ.get("MONGODB_URI")
mongo_db_name = os.environ.get("MONGODB_DATABASE")
mongo_collection_name = "cv_processing_collection" # You can configure this in .env if needed
# Initialize MongoDB client
mongo_client = pymongo.MongoClient(mongo_uri)
db = mongo_client[mongo_db_name]
cv_collection = db[mongo_collection_name]
# Configuration # Configuration
COMPONENT_NAME = "resume_analysis.py" OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MODEL_NAME = os.getenv("MODEL_NAME")
MAX_TOKENS = int(os.getenv("MAX_TOKENS", 500))
USE_MOCKUP = os.getenv("USE_MOCKUP", "false").lower() == "true"
MOCKUP_FILE_PATH = os.getenv("MOCKUP_FILE_PATH")
MONGODB_URI = os.getenv("MONGODB_URI")
MONGODB_DATABASE = os.getenv("MONGODB_DATABASE")
# Get log level from environment variable, default to WARN MONGO_COLLECTION_NAME = "cv_processing_collection"
LOG_LEVEL = os.environ.get("LOG_LEVEL", "WARN").upper()
# Function for logging # Initialize OpenAI client
def logger(level, message): openai.api_key = OPENAI_API_KEY
if LOG_LEVEL == "DEBUG":
log_levels = {"DEBUG": 0, "WARN": 1, "ERROR": 2}
elif LOG_LEVEL == "WARN":
log_levels = {"WARN": 0, "ERROR": 1}
elif LOG_LEVEL == "ERROR":
log_levels = {"ERROR": 0}
else:
log_levels = {"WARN": 0, "ERROR": 1} # Default
if level in log_levels: # Logging setup
timestamp = datetime.now().isoformat() LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG").upper()
log_message = f"[{timestamp}] [{COMPONENT_NAME}] [{level}] {message}"
print(log_message)
def analyze_resume(text): logging.basicConfig(
logger("DEBUG", "Starting analyze_resume function") level=LOG_LEVEL,
try: format='[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s',
response = client.chat.completions.create( datefmt='%Y-%m-%dT%H:%M:%S%z'
model=os.getenv("MODEL_NAME"), )
messages=[{
"role": "system",
"content": open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r").read()
},
{"role": "user", "content": text}],
max_tokens=int(os.getenv("MAX_TOKENS"))
)
logger("DEBUG", "analyze_resume function completed successfully")
return response
except Exception as e:
logger("ERROR", f"Error in analyze_resume: {e}")
raise
def insert_processing_data(text_content, summary, response, args, processing_id): # New function to insert data to MongoDB def get_mongo_collection():
logger("DEBUG", "Starting insert_processing_data function") """Initialize and return MongoDB collection."""
try: mongo_client = pymongo.MongoClient(MONGODB_URI)
input_tokens = response.usage.prompt_tokens db = mongo_client[MONGODB_DATABASE]
output_tokens = response.usage.completion_tokens return db[MONGO_COLLECTION_NAME]
total_tokens = response.usage.total_tokens logger = logging.getLogger(__name__)
cost = total_tokens * 0.000001 # rough estimate
document_data = { def main():
"processing_id": processing_id, """Main function to process the resume."""
"input_text": text_content, parser = argparse.ArgumentParser(
"output_summary": summary, formatter_class=argparse.RawDescriptionHelpFormatter,
"tokens_sent": input_tokens, description="""This tool analyzes resumes using OpenAI's API. Parameters are required to run the analysis.
"tokens_received": output_tokens,
"model_used": os.getenv("MODEL_NAME"), Required Environment Variables:
"timestamp": datetime.now(timezone.utc).isoformat(), # Current timestamp in UTC - OPENAI_API_KEY: Your OpenAI API key
"cost": cost, - MODEL_NAME: OpenAI model to use (e.g. gpt-3.5-turbo)
"client_id": "client_unknown", # You might want to make these dynamic - MONGODB_URI: MongoDB connection string (optional for mockup mode)""",
"document_id": "doc_unknown", # You might want to make these dynamic usage="resume_analysis.py [-h] [-f FILE] [-m]",
"original_filename": args.file if args.file else "command_line_input", epilog="""Examples:
"processing_status": { Analyze a resume: resume_analysis.py -f my_resume.pdf
"status": "NEW", Test with mockup data: resume_analysis.py -f test.pdf -m"""
"date": datetime.now(timezone.utc).isoformat() )
}, parser.add_argument('-f', '--file', help='Path to the resume file to analyze (PDF or text)')
"openai_stats": { parser.add_argument('-m', '--mockup', action='store_true', help='Use mockup response instead of calling OpenAI API')
"input_tokens": input_tokens,
"output_tokens": output_tokens, # If no arguments provided, show help and exit
"total_tokens": total_tokens, if len(sys.argv) == 1:
"cost": cost parser.print_help()
} sys.exit(1)
}
cv_collection.insert_one(document_data)
logger("DEBUG", "Data inserted into MongoDB.")
except Exception as e:
logger("ERROR", f"Error in insert_processing_data: {e}")
raise
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Analyze resume text using OpenAI.")
parser.add_argument("-f", "--file", help="Path to the file containing the resume text.")
args = parser.parse_args() args = parser.parse_args()
try: # Determine whether to use mockup based on the -m flag, overriding USE_MOCKUP
if args.file: use_mockup = args.mockup
try:
with open(args.file, "r", encoding="latin-1") as f: # Load the resume text from the provided file or use mockup
text_content = f.read() if use_mockup:
except FileNotFoundError as e: resume_text = "Mockup resume text"
logger("ERROR", f"File not found: {args.file} - {e}") else:
sys.exit(1) if not os.path.exists(args.file):
elif len(sys.argv) > 1: logger.error(f"File not found: {args.file}")
text_content = sys.argv[1]
else:
parser.print_help()
sys.exit(1) sys.exit(1)
# Generate a unique processing ID start_file_read_time = time.time()
processing_id = str(uuid.uuid4()) with open(args.file, 'r') as f:
resume_text = f.read()
file_read_time = time.time() - start_file_read_time
logger.debug(f"File read time: {file_read_time:.2f} seconds")
# Update processing status to PROCESSING # Call the OpenAI API with the resume text
if args.file: start_time = time.time()
filename = args.file response = call_openai_api(resume_text, use_mockup)
else: openai_api_time = time.time() - start_time
filename = "command_line_input" logger.debug(f"OpenAI API call time: {openai_api_time:.2f} seconds")
# Initialize MongoDB collection only when needed
cv_collection = get_mongo_collection()
# Find the document in MongoDB # Measure MongoDB insertion time
document = cv_collection.find_one({"original_filename": filename}) start_mongo_time = time.time()
cost = insert_processing_data(resume_text, {}, response, args, str(uuid.uuid4()), use_mockup, cv_collection)
mongo_insert_time = time.time() - start_mongo_time
logger.debug(f"MongoDB insert time: {mongo_insert_time:.2f} seconds")
write_openai_response(response, use_mockup, args.file, cost)
if document: def load_mockup_response(mockup_file_path: str) -> dict:
document_id = document["_id"] """Load mockup response from a JSON file."""
cv_collection.update_one( logger.debug(f"Loading mockup response from: {mockup_file_path}")
{"_id": document_id}, if not os.path.exists(mockup_file_path):
{"$set": {"processing_status.status": "PROCESSING", "processing_status.date": datetime.now(timezone.utc).isoformat(), "processing_id": processing_id}} raise FileNotFoundError(f"Mockup file not found at: {mockup_file_path}")
) with open(mockup_file_path, "r") as f:
logger("DEBUG", f"Updated processing status to PROCESSING for document with filename: {filename} and processing_id: {processing_id}") response = json.load(f)
else: response.setdefault("openai_stats", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
logger("WARN", f"No document found with filename: {filename}. Creating a new document with processing_id: {processing_id}") return response
response = analyze_resume(text_content) def call_openai_api(text: str, use_mockup: bool) -> Optional[Any]:
try: """Call OpenAI API to analyze resume text."""
content = response.choices[0].message.content logger.debug("Calling OpenAI API.")
if content.startswith("```json"): try:
content = content[7:-4] # Remove ```json and ``` if use_mockup:
summary = json.loads(content) return load_mockup_response(MOCKUP_FILE_PATH)
except json.JSONDecodeError as e:
logger("WARN", f"Failed to decode JSON from OpenAI response: {e}")
summary = {"error": "Failed to decode JSON from OpenAI"}
error_log_path = "my-app/uploads/cv/openai_raw_output.txt"
try:
with open(error_log_path, "a") as error_file:
error_file.write(f"Processing ID: {processing_id}\n")
error_file.write(f"Error: {e}\n")
error_file.write(f"Raw Response Content:\n{response.choices[0].message.content}\n")
error_file.write("-" * 40 + "\n") # Separator for readability
logger("DEBUG", f"Raw OpenAI response logged to {error_log_path}")
except Exception as log_e:
logger("ERROR", f"Failed to log raw response to {error_log_path}: {log_e}")
insert_processing_data(text_content, summary, response, args, processing_id) with open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r") as prompt_file:
system_content = prompt_file.read()
# Update processing status to COMPLETED
if document:
cv_collection.update_one(
{"_id": document_id},
{"$set": {"processing_status.status": "COMPLETED", "processing_status.date": datetime.now(timezone.utc).isoformat()}}
)
logger("DEBUG", f"Updated processing status to COMPLETED for document with filename: {filename}")
logger("DEBUG", f"OpenAI > Total tokens used: {response.usage.total_tokens}")
print(json.dumps(summary)) # Ensure JSON output
response = openai.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": system_content},
{"role": "user", "content": text}
],
max_tokens=MAX_TOKENS
)
logger.debug(f"OpenAI API response: {response}")
return response
except Exception as e: except Exception as e:
logger("ERROR", f"An error occurred during processing: {e}") logger.error(f"Error during OpenAI API call: {e}", exc_info=True)
# Update processing status to FAILED return None
if document:
cv_collection.update_one( def write_openai_response(response: Any, use_mockup: bool, input_file_path: str = None, cost: float = 0) -> None: # Add cost argument
{"_id": document_id}, """Write raw OpenAI response to a file."""
{"$set": {"processing_status.status": "FAILED", "processing_status.date": datetime.now(timezone.utc).isoformat()}} if use_mockup:
) logger.debug("Using mockup response; no OpenAI message to write.")
logger("ERROR", f"Updated processing status to FAILED for document with filename: {filename}") return
sys.exit(1) if response and response.choices: # Changed from hasattr to direct attribute access
message_content = response.choices[0].message.content
logger.debug(f"Raw OpenAI message content: {message_content}")
output_dir = os.path.dirname(input_file_path) if input_file_path else '.'
base_filename = os.path.splitext(os.path.basename(input_file_path))[0] if input_file_path else "default"
processing_id = str(uuid.uuid4())
file_path = os.path.join(output_dir, f"{base_filename}_openai_response_{processing_id}") + ".json"
try:
serializable_response = { # Create a serializable dictionary
"choices": [
{
"message": {
"content": choice.message.content,
"role": choice.message.role
},
"finish_reason": choice.finish_reason,
"index": choice.index
} for choice in response.choices
],
"openai_stats": {
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
},
"cost": cost, # Include cost in the output JSON
"model": response.model
}
with open(file_path, "w") as f:
json.dump(serializable_response, f, indent=2) # Dump the serializable dictionary
logger.debug(f"OpenAI response written to {file_path}")
except IOError as e:
logger.error(f"Failed to write OpenAI response to file: {e}")
else:
logger.warning("No choices in OpenAI response to extract message from.")
logger.debug(f"Response object: {response}")
def insert_processing_data(text_content: str, summary: dict, response: Any, args: argparse.Namespace, processing_id: str, use_mockup: bool, cv_collection) -> None:
"""Insert processing data into MongoDB."""
logger.debug("Inserting processing data into MongoDB.")
if not use_mockup:
if response and response.choices:
message_content = response.choices[0].message.content
try:
openai_stats_content = json.loads(message_content)
openai_stats = openai_stats_content.get("openai_stats", {})
cost = openai_stats.get("cost", 0)
except json.JSONDecodeError:
logger.error("Failed to decode JSON from message content for openai_stats.")
openai_stats = {}
cost = 0
usage = response.usage
input_tokens = usage.prompt_tokens
output_tokens = usage.completion_tokens
total_tokens = usage.total_tokens
else:
logger.error("Invalid response format or missing usage data.")
input_tokens = output_tokens = total_tokens = 0
cost = 0
openai_stats = {}
usage = {}
processing_data = {
"processing_id": processing_id,
"timestamp": datetime.now(timezone.utc).isoformat(),
"text_content": text_content,
"summary": summary,
"usage_prompt_tokens": input_tokens, # Renamed to avoid collision
"usage_completion_tokens": output_tokens, # Renamed to avoid collision
"usage_total_tokens": total_tokens, # Renamed to avoid collision
"openai_stats_input_tokens": openai_stats.get("input_tokens"),
"openai_stats_output_tokens": openai_stats.get("output_tokens"),
"openai_stats_total_tokens": openai_stats.get("total_tokens"),
"cost": cost
}
try:
cv_collection.insert_one(processing_data)
logger.debug(f"Inserted processing data for ID: {processing_id}")
return cost # Return the cost
except Exception as e:
logger.error(f"Failed to insert processing data into MongoDB: {e}", exc_info=True)
else:
logger.debug("Using mockup; skipping MongoDB insertion.")
return 0 # Return 0 for mockup mode
if __name__ == "__main__":
main()