diff --git a/.env b/.env deleted file mode 100644 index f1301bf..0000000 --- a/.env +++ /dev/null @@ -1,3 +0,0 @@ -MONGODB_URI=mongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000 -MONGODB_DATABASE=cv_summary_db -MODEL_NAME=gpt-4 diff --git a/my-app/app/api/upload-cv/route.ts b/my-app/app/api/upload-cv/route.ts index b68d6fc..28012e8 100644 --- a/my-app/app/api/upload-cv/route.ts +++ b/my-app/app/api/upload-cv/route.ts @@ -100,24 +100,30 @@ export async function POST(req: Request): Promise { // Parse JSON from the last line of the output const lines = rawOutput.trim().split('\n'); const jsonOutputLine = lines[lines.length - 1]; - summary = JSON.parse(jsonOutputLine); - } catch (error) { - console.error("Failed to parse JSON from python script:", error); - summary = { error: "Failed to parse JSON from python script" }; - pythonProcessError = true; - // Log raw output to file for debugging - const errorLogPath = path.join(uploadDir, "openai_raw_output.txt"); - const timestamp = new Date().toISOString(); + console.log("Attempting to parse JSON:", jsonOutputLine); // Log raw JSON string try { - if (error instanceof Error) { - fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: ${error.message}\n`); - } else { - fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: Unknown error\n`); + summary = JSON.parse(jsonOutputLine); + } catch (error) { + console.error("Failed to parse JSON from python script:", error); + console.error("Raw JSON string that failed to parse:", jsonOutputLine); // Log the raw JSON string that failed + summary = { error: "Failed to parse JSON from python script" }; + pythonProcessError = true; + // Log raw output to file for debugging + const errorLogPath = path.join(uploadDir, "openai_raw_output.txt"); + const timestamp = new Date().toISOString(); + try { + if (error instanceof Error) { + fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: ${error.message}\nFailed JSON String:\n${jsonOutputLine}\n`); // Include failed JSON string in log + } else { + fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: Unknown error\nFailed JSON String:\n${jsonOutputLine}\n`); // Include failed JSON string in log + } + console.log(`Raw Python output logged to ${errorLogPath}`); + } catch (logError: any) { // Explicitly type logError as any + console.error("Error logging raw output:", logError); } - console.log(`Raw Python output logged to ${errorLogPath}`); - } catch (logError: any) { // Explicitly type logError as any - console.error("Error logging raw output:", logError); } + } catch (outerError) { // Correctly placed catch block for the outer try + console.error("Outer try block error:", outerError); } } }); @@ -128,7 +134,7 @@ export async function POST(req: Request): Promise { pythonProcess.kill(); summary = { error: "Error generating summary: Timeout" }; pythonProcessError = true; - }, 10000); // 10 seconds + }, 30000); // 30 seconds return new Promise((resolve) => { pythonProcess.on('close', () => { diff --git a/my-app/utils/mockup_response.json b/my-app/utils/mockup_response.json new file mode 100644 index 0000000..fc21be7 --- /dev/null +++ b/my-app/utils/mockup_response.json @@ -0,0 +1,20 @@ +{ + "choices": [ + { + "message": { + "content": "Mockup analysis result", + "role": "assistant" + } + } + ], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + }, + "openai_stats": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + } +} diff --git a/my-app/utils/prompt.txt b/my-app/utils/prompt.txt index 18c15ef..88a7292 100644 --- a/my-app/utils/prompt.txt +++ b/my-app/utils/prompt.txt @@ -78,6 +78,6 @@ You are an expert CV analyzer specialized in Applicant Tracking System (ATS) eva 2. For each section, perform an ATS analysis by: - Calculating a score on a scale from 1 to 10 that reflects the completeness, clarity, and relevance of the information. - - Listing specific improvement suggestions for any section that scores below 7. + - Listing specific improvement suggestions for any section that scores below 9. - Identifying and counting common ATS-related keywords in each section. - Providing a concise summary of the section, highlighting key strengths and weaknesses. \ No newline at end of file diff --git a/my-app/utils/resume_analysis.py b/my-app/utils/resume_analysis.py old mode 100644 new mode 100755 index 0bfb90c..0b4f0ce --- a/my-app/utils/resume_analysis.py +++ b/my-app/utils/resume_analysis.py @@ -2,189 +2,233 @@ import sys import os import argparse -import io import json -from dotenv import load_dotenv -load_dotenv() -from openai import OpenAI -from pdfminer.high_level import extract_text -import pymongo # Import pymongo -from datetime import datetime, timezone # Import datetime and timezone +import logging +from datetime import datetime, timezone import uuid +from typing import Optional, Any +import time -# Directly access environment variables -OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") +from dotenv import load_dotenv +import pymongo +import openai +from pdfminer.high_level import extract_text -client = OpenAI(api_key=OPENAI_API_KEY) - -# MongoDB Connection Details from .env -mongo_uri = os.environ.get("MONGODB_URI") -mongo_db_name = os.environ.get("MONGODB_DATABASE") -mongo_collection_name = "cv_processing_collection" # You can configure this in .env if needed - -# Initialize MongoDB client -mongo_client = pymongo.MongoClient(mongo_uri) -db = mongo_client[mongo_db_name] -cv_collection = db[mongo_collection_name] +# Load environment variables +load_dotenv() # Configuration -COMPONENT_NAME = "resume_analysis.py" +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") +MODEL_NAME = os.getenv("MODEL_NAME") +MAX_TOKENS = int(os.getenv("MAX_TOKENS", 500)) +USE_MOCKUP = os.getenv("USE_MOCKUP", "false").lower() == "true" +MOCKUP_FILE_PATH = os.getenv("MOCKUP_FILE_PATH") +MONGODB_URI = os.getenv("MONGODB_URI") +MONGODB_DATABASE = os.getenv("MONGODB_DATABASE") -# Get log level from environment variable, default to WARN -LOG_LEVEL = os.environ.get("LOG_LEVEL", "WARN").upper() +MONGO_COLLECTION_NAME = "cv_processing_collection" -# Function for logging -def logger(level, message): - if LOG_LEVEL == "DEBUG": - log_levels = {"DEBUG": 0, "WARN": 1, "ERROR": 2} - elif LOG_LEVEL == "WARN": - log_levels = {"WARN": 0, "ERROR": 1} - elif LOG_LEVEL == "ERROR": - log_levels = {"ERROR": 0} - else: - log_levels = {"WARN": 0, "ERROR": 1} # Default +# Initialize OpenAI client +openai.api_key = OPENAI_API_KEY - if level in log_levels: - timestamp = datetime.now().isoformat() - log_message = f"[{timestamp}] [{COMPONENT_NAME}] [{level}] {message}" - print(log_message) +# Logging setup +LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG").upper() -def analyze_resume(text): - logger("DEBUG", "Starting analyze_resume function") - try: - response = client.chat.completions.create( - model=os.getenv("MODEL_NAME"), - messages=[{ - "role": "system", - "content": open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r").read() - }, - {"role": "user", "content": text}], - max_tokens=int(os.getenv("MAX_TOKENS")) - ) - logger("DEBUG", "analyze_resume function completed successfully") - return response - except Exception as e: - logger("ERROR", f"Error in analyze_resume: {e}") - raise +logging.basicConfig( + level=LOG_LEVEL, + format='[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s', + datefmt='%Y-%m-%dT%H:%M:%S%z' +) -def insert_processing_data(text_content, summary, response, args, processing_id): # New function to insert data to MongoDB - logger("DEBUG", "Starting insert_processing_data function") - try: - input_tokens = response.usage.prompt_tokens - output_tokens = response.usage.completion_tokens - total_tokens = response.usage.total_tokens - cost = total_tokens * 0.000001 # rough estimate +def get_mongo_collection(): + """Initialize and return MongoDB collection.""" + mongo_client = pymongo.MongoClient(MONGODB_URI) + db = mongo_client[MONGODB_DATABASE] + return db[MONGO_COLLECTION_NAME] +logger = logging.getLogger(__name__) - document_data = { - "processing_id": processing_id, - "input_text": text_content, - "output_summary": summary, - "tokens_sent": input_tokens, - "tokens_received": output_tokens, - "model_used": os.getenv("MODEL_NAME"), - "timestamp": datetime.now(timezone.utc).isoformat(), # Current timestamp in UTC - "cost": cost, - "client_id": "client_unknown", # You might want to make these dynamic - "document_id": "doc_unknown", # You might want to make these dynamic - "original_filename": args.file if args.file else "command_line_input", - "processing_status": { - "status": "NEW", - "date": datetime.now(timezone.utc).isoformat() - }, - "openai_stats": { - "input_tokens": input_tokens, - "output_tokens": output_tokens, - "total_tokens": total_tokens, - "cost": cost - } - } - cv_collection.insert_one(document_data) - logger("DEBUG", "Data inserted into MongoDB.") - except Exception as e: - logger("ERROR", f"Error in insert_processing_data: {e}") - raise +def main(): + """Main function to process the resume.""" + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description="""This tool analyzes resumes using OpenAI's API. Parameters are required to run the analysis. + +Required Environment Variables: +- OPENAI_API_KEY: Your OpenAI API key +- MODEL_NAME: OpenAI model to use (e.g. gpt-3.5-turbo) +- MONGODB_URI: MongoDB connection string (optional for mockup mode)""", + usage="resume_analysis.py [-h] [-f FILE] [-m]", + epilog="""Examples: + Analyze a resume: resume_analysis.py -f my_resume.pdf + Test with mockup data: resume_analysis.py -f test.pdf -m""" + ) + parser.add_argument('-f', '--file', help='Path to the resume file to analyze (PDF or text)') + parser.add_argument('-m', '--mockup', action='store_true', help='Use mockup response instead of calling OpenAI API') + + # If no arguments provided, show help and exit + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Analyze resume text using OpenAI.") - parser.add_argument("-f", "--file", help="Path to the file containing the resume text.") args = parser.parse_args() - try: - if args.file: - try: - with open(args.file, "r", encoding="latin-1") as f: - text_content = f.read() - except FileNotFoundError as e: - logger("ERROR", f"File not found: {args.file} - {e}") - sys.exit(1) - elif len(sys.argv) > 1: - text_content = sys.argv[1] - else: - parser.print_help() + # Determine whether to use mockup based on the -m flag, overriding USE_MOCKUP + use_mockup = args.mockup + + # Load the resume text from the provided file or use mockup + if use_mockup: + resume_text = "Mockup resume text" + else: + if not os.path.exists(args.file): + logger.error(f"File not found: {args.file}") sys.exit(1) - # Generate a unique processing ID - processing_id = str(uuid.uuid4()) + start_file_read_time = time.time() + with open(args.file, 'r') as f: + resume_text = f.read() + file_read_time = time.time() - start_file_read_time + logger.debug(f"File read time: {file_read_time:.2f} seconds") - # Update processing status to PROCESSING - if args.file: - filename = args.file - else: - filename = "command_line_input" + # Call the OpenAI API with the resume text + start_time = time.time() + response = call_openai_api(resume_text, use_mockup) + openai_api_time = time.time() - start_time + logger.debug(f"OpenAI API call time: {openai_api_time:.2f} seconds") + # Initialize MongoDB collection only when needed + cv_collection = get_mongo_collection() - # Find the document in MongoDB - document = cv_collection.find_one({"original_filename": filename}) + # Measure MongoDB insertion time + start_mongo_time = time.time() + cost = insert_processing_data(resume_text, {}, response, args, str(uuid.uuid4()), use_mockup, cv_collection) + mongo_insert_time = time.time() - start_mongo_time + logger.debug(f"MongoDB insert time: {mongo_insert_time:.2f} seconds") + write_openai_response(response, use_mockup, args.file, cost) - if document: - document_id = document["_id"] - cv_collection.update_one( - {"_id": document_id}, - {"$set": {"processing_status.status": "PROCESSING", "processing_status.date": datetime.now(timezone.utc).isoformat(), "processing_id": processing_id}} - ) - logger("DEBUG", f"Updated processing status to PROCESSING for document with filename: {filename} and processing_id: {processing_id}") - else: - logger("WARN", f"No document found with filename: {filename}. Creating a new document with processing_id: {processing_id}") +def load_mockup_response(mockup_file_path: str) -> dict: + """Load mockup response from a JSON file.""" + logger.debug(f"Loading mockup response from: {mockup_file_path}") + if not os.path.exists(mockup_file_path): + raise FileNotFoundError(f"Mockup file not found at: {mockup_file_path}") + with open(mockup_file_path, "r") as f: + response = json.load(f) + response.setdefault("openai_stats", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}) + return response - response = analyze_resume(text_content) - try: - content = response.choices[0].message.content - if content.startswith("```json"): - content = content[7:-4] # Remove ```json and ``` - summary = json.loads(content) - except json.JSONDecodeError as e: - logger("WARN", f"Failed to decode JSON from OpenAI response: {e}") - summary = {"error": "Failed to decode JSON from OpenAI"} - error_log_path = "my-app/uploads/cv/openai_raw_output.txt" - try: - with open(error_log_path, "a") as error_file: - error_file.write(f"Processing ID: {processing_id}\n") - error_file.write(f"Error: {e}\n") - error_file.write(f"Raw Response Content:\n{response.choices[0].message.content}\n") - error_file.write("-" * 40 + "\n") # Separator for readability - logger("DEBUG", f"Raw OpenAI response logged to {error_log_path}") - except Exception as log_e: - logger("ERROR", f"Failed to log raw response to {error_log_path}: {log_e}") +def call_openai_api(text: str, use_mockup: bool) -> Optional[Any]: + """Call OpenAI API to analyze resume text.""" + logger.debug("Calling OpenAI API.") + try: + if use_mockup: + return load_mockup_response(MOCKUP_FILE_PATH) - insert_processing_data(text_content, summary, response, args, processing_id) - - # Update processing status to COMPLETED - if document: - cv_collection.update_one( - {"_id": document_id}, - {"$set": {"processing_status.status": "COMPLETED", "processing_status.date": datetime.now(timezone.utc).isoformat()}} - ) - logger("DEBUG", f"Updated processing status to COMPLETED for document with filename: {filename}") - - logger("DEBUG", f"OpenAI > Total tokens used: {response.usage.total_tokens}") - print(json.dumps(summary)) # Ensure JSON output + with open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r") as prompt_file: + system_content = prompt_file.read() + response = openai.chat.completions.create( + model=MODEL_NAME, + messages=[ + {"role": "system", "content": system_content}, + {"role": "user", "content": text} + ], + max_tokens=MAX_TOKENS + ) + logger.debug(f"OpenAI API response: {response}") + return response except Exception as e: - logger("ERROR", f"An error occurred during processing: {e}") - # Update processing status to FAILED - if document: - cv_collection.update_one( - {"_id": document_id}, - {"$set": {"processing_status.status": "FAILED", "processing_status.date": datetime.now(timezone.utc).isoformat()}} - ) - logger("ERROR", f"Updated processing status to FAILED for document with filename: {filename}") - sys.exit(1) + logger.error(f"Error during OpenAI API call: {e}", exc_info=True) + return None + +def write_openai_response(response: Any, use_mockup: bool, input_file_path: str = None, cost: float = 0) -> None: # Add cost argument + """Write raw OpenAI response to a file.""" + if use_mockup: + logger.debug("Using mockup response; no OpenAI message to write.") + return + if response and response.choices: # Changed from hasattr to direct attribute access + message_content = response.choices[0].message.content + logger.debug(f"Raw OpenAI message content: {message_content}") + output_dir = os.path.dirname(input_file_path) if input_file_path else '.' + base_filename = os.path.splitext(os.path.basename(input_file_path))[0] if input_file_path else "default" + processing_id = str(uuid.uuid4()) + file_path = os.path.join(output_dir, f"{base_filename}_openai_response_{processing_id}") + ".json" + try: + serializable_response = { # Create a serializable dictionary + "choices": [ + { + "message": { + "content": choice.message.content, + "role": choice.message.role + }, + "finish_reason": choice.finish_reason, + "index": choice.index + } for choice in response.choices + ], + "openai_stats": { + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + "total_tokens": response.usage.total_tokens + }, + "cost": cost, # Include cost in the output JSON + "model": response.model + } + with open(file_path, "w") as f: + json.dump(serializable_response, f, indent=2) # Dump the serializable dictionary + logger.debug(f"OpenAI response written to {file_path}") + except IOError as e: + logger.error(f"Failed to write OpenAI response to file: {e}") + else: + logger.warning("No choices in OpenAI response to extract message from.") + logger.debug(f"Response object: {response}") + +def insert_processing_data(text_content: str, summary: dict, response: Any, args: argparse.Namespace, processing_id: str, use_mockup: bool, cv_collection) -> None: + """Insert processing data into MongoDB.""" + logger.debug("Inserting processing data into MongoDB.") + if not use_mockup: + if response and response.choices: + message_content = response.choices[0].message.content + try: + openai_stats_content = json.loads(message_content) + openai_stats = openai_stats_content.get("openai_stats", {}) + cost = openai_stats.get("cost", 0) + except json.JSONDecodeError: + logger.error("Failed to decode JSON from message content for openai_stats.") + openai_stats = {} + cost = 0 + + usage = response.usage + input_tokens = usage.prompt_tokens + output_tokens = usage.completion_tokens + total_tokens = usage.total_tokens + else: + logger.error("Invalid response format or missing usage data.") + input_tokens = output_tokens = total_tokens = 0 + cost = 0 + openai_stats = {} + usage = {} + + + processing_data = { + "processing_id": processing_id, + "timestamp": datetime.now(timezone.utc).isoformat(), + "text_content": text_content, + "summary": summary, + "usage_prompt_tokens": input_tokens, # Renamed to avoid collision + "usage_completion_tokens": output_tokens, # Renamed to avoid collision + "usage_total_tokens": total_tokens, # Renamed to avoid collision + "openai_stats_input_tokens": openai_stats.get("input_tokens"), + "openai_stats_output_tokens": openai_stats.get("output_tokens"), + "openai_stats_total_tokens": openai_stats.get("total_tokens"), + "cost": cost + } + + try: + cv_collection.insert_one(processing_data) + logger.debug(f"Inserted processing data for ID: {processing_id}") + return cost # Return the cost + except Exception as e: + logger.error(f"Failed to insert processing data into MongoDB: {e}", exc_info=True) + else: + logger.debug("Using mockup; skipping MongoDB insertion.") + return 0 # Return 0 for mockup mode + +if __name__ == "__main__": + main()