Fixed resume script. Probably cost handled incorrectly. Probably broken integartion with ui

2025-03-04 00:22:17 +01:00 · 2025-03-04 00:22:17 +01:00 · aadf1fe94c
commit aadf1fe94c
parent f40b895749
5 changed files with 249 additions and 182 deletions
--- a/.env
+++ b/.env
@ -1,3 +0,0 @@
 MONGODB_URI=mongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000
 MONGODB_DATABASE=cv_summary_db
 MODEL_NAME=gpt-4
--- a/my-app/app/api/upload-cv/route.ts
+++ b/my-app/app/api/upload-cv/route.ts
@ -100,24 +100,30 @@ export async function POST(req: Request): Promise<NextResponse> {
          // Parse JSON from the last line of the output
          const lines = rawOutput.trim().split('\n');
          const jsonOutputLine = lines[lines.length - 1];
-          summary = JSON.parse(jsonOutputLine);
+          console.log("Attempting to parse JSON:", jsonOutputLine); // Log raw JSON string
        } catch (error) {
          console.error("Failed to parse JSON from python script:", error);
          summary = { error: "Failed to parse JSON from python script" };
          pythonProcessError = true;
          // Log raw output to file for debugging
          const errorLogPath = path.join(uploadDir, "openai_raw_output.txt");
          const timestamp = new Date().toISOString();
          try {
-            if (error instanceof Error) {
+            summary = JSON.parse(jsonOutputLine);
-              fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: ${error.message}\n`);
+          } catch (error) {
-            } else {
+            console.error("Failed to parse JSON from python script:", error);
-              fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: Unknown error\n`);
+            console.error("Raw JSON string that failed to parse:", jsonOutputLine); // Log the raw JSON string that failed
            summary = { error: "Failed to parse JSON from python script" };
            pythonProcessError = true;
            // Log raw output to file for debugging
            const errorLogPath = path.join(uploadDir, "openai_raw_output.txt");
            const timestamp = new Date().toISOString();
            try {
              if (error instanceof Error) {
                fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: ${error.message}\nFailed JSON String:\n${jsonOutputLine}\n`); // Include failed JSON string in log
              } else {
                fs.appendFileSync(errorLogPath, `\n--- JSON Parse Error ---\nTimestamp: ${timestamp}\nRaw Output:\n${rawOutput}\nError: Unknown error\nFailed JSON String:\n${jsonOutputLine}\n`); // Include failed JSON string in log
              }
              console.log(`Raw Python output logged to ${errorLogPath}`);
            } catch (logError: any) { // Explicitly type logError as any
              console.error("Error logging raw output:", logError);
            }
            console.log(`Raw Python output logged to ${errorLogPath}`);
          } catch (logError: any) { // Explicitly type logError as any
            console.error("Error logging raw output:", logError);
          }
        } catch (outerError) { // Correctly placed catch block for the outer try
          console.error("Outer try block error:", outerError);
        }
      }
    });
@ -128,7 +134,7 @@ export async function POST(req: Request): Promise<NextResponse> {
      pythonProcess.kill();
      summary = { error: "Error generating summary: Timeout" };
      pythonProcessError = true;
-    }, 10000); // 10 seconds
+    }, 30000); // 30 seconds
    return new Promise<NextResponse>((resolve) => {
      pythonProcess.on('close', () => {
--- a/my-app/utils/mockup_response.json
+++ b/my-app/utils/mockup_response.json
@ -0,0 +1,20 @@
 {
    "choices": [
        {
            "message": {
                "content": "Mockup analysis result",
                "role": "assistant"
            }
        }
    ],
    "usage": {
        "prompt_tokens": 100,
        "completion_tokens": 50,
        "total_tokens": 150
    },
    "openai_stats": {
        "prompt_tokens": 100,
        "completion_tokens": 50,
        "total_tokens": 150
    }
 }
--- a/my-app/utils/prompt.txt
+++ b/my-app/utils/prompt.txt
@ -78,6 +78,6 @@ You are an expert CV analyzer specialized in Applicant Tracking System (ATS) eva
 2. For each section, perform an ATS analysis by:
   - Calculating a score on a scale from 1 to 10 that reflects the completeness, clarity, and relevance of the information.
-   - Listing specific improvement suggestions for any section that scores below 7.
+   - Listing specific improvement suggestions for any section that scores below 9.
   - Identifying and counting common ATS-related keywords in each section.
   - Providing a concise summary of the section, highlighting key strengths and weaknesses.
--- a/my-app/utils/resume_analysis.py
+++ b/my-app/utils/resume_analysis.py
@ -2,189 +2,233 @@
 import sys
 import os
 import argparse
 import io
 import json
-from dotenv import load_dotenv
+import logging
-load_dotenv()
+from datetime import datetime, timezone
 from openai import OpenAI
 from pdfminer.high_level import extract_text
 import pymongo  # Import pymongo
 from datetime import datetime, timezone # Import datetime and timezone
 import uuid
 from typing import Optional, Any
 import time
-# Directly access environment variables
+from dotenv import load_dotenv
-OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+import pymongo
 import openai
 from pdfminer.high_level import extract_text
-client = OpenAI(api_key=OPENAI_API_KEY)
+# Load environment variables
-
+load_dotenv()
 # MongoDB Connection Details from .env
 mongo_uri = os.environ.get("MONGODB_URI")
 mongo_db_name = os.environ.get("MONGODB_DATABASE")
 mongo_collection_name = "cv_processing_collection" # You can configure this in .env if needed
 # Initialize MongoDB client
 mongo_client = pymongo.MongoClient(mongo_uri)
 db = mongo_client[mongo_db_name]
 cv_collection = db[mongo_collection_name]
 # Configuration
-COMPONENT_NAME = "resume_analysis.py"
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 MODEL_NAME = os.getenv("MODEL_NAME")
 MAX_TOKENS = int(os.getenv("MAX_TOKENS", 500))
 USE_MOCKUP = os.getenv("USE_MOCKUP", "false").lower() == "true"
 MOCKUP_FILE_PATH = os.getenv("MOCKUP_FILE_PATH")
 MONGODB_URI = os.getenv("MONGODB_URI")
 MONGODB_DATABASE = os.getenv("MONGODB_DATABASE")
-# Get log level from environment variable, default to WARN
+MONGO_COLLECTION_NAME = "cv_processing_collection"
 LOG_LEVEL = os.environ.get("LOG_LEVEL", "WARN").upper()
-# Function for logging
+# Initialize OpenAI client
-def logger(level, message):
+openai.api_key = OPENAI_API_KEY
    if LOG_LEVEL == "DEBUG":
        log_levels = {"DEBUG": 0, "WARN": 1, "ERROR": 2}
    elif LOG_LEVEL == "WARN":
        log_levels = {"WARN": 0, "ERROR": 1}
    elif LOG_LEVEL == "ERROR":
        log_levels = {"ERROR": 0}
    else:
        log_levels = {"WARN": 0, "ERROR": 1} # Default
-    if level in log_levels:
+# Logging setup
-        timestamp = datetime.now().isoformat()
+LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG").upper()
        log_message = f"[{timestamp}] [{COMPONENT_NAME}] [{level}] {message}"
        print(log_message)
-def analyze_resume(text):
+logging.basicConfig(
-    logger("DEBUG", "Starting analyze_resume function")
+    level=LOG_LEVEL,
-    try:
+    format='[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s',
-        response = client.chat.completions.create(
+    datefmt='%Y-%m-%dT%H:%M:%S%z'
-            model=os.getenv("MODEL_NAME"),
+)
            messages=[{
                "role": "system",
                "content": open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r").read()
            },
            {"role": "user", "content": text}],
            max_tokens=int(os.getenv("MAX_TOKENS"))
        )
        logger("DEBUG", "analyze_resume function completed successfully")
        return response
    except Exception as e:
        logger("ERROR", f"Error in analyze_resume: {e}")
        raise
-def insert_processing_data(text_content, summary, response, args, processing_id): # New function to insert data to MongoDB
+def get_mongo_collection():
-    logger("DEBUG", "Starting insert_processing_data function")
+    """Initialize and return MongoDB collection."""
-    try:
+    mongo_client = pymongo.MongoClient(MONGODB_URI)
-        input_tokens = response.usage.prompt_tokens
+    db = mongo_client[MONGODB_DATABASE]
-        output_tokens = response.usage.completion_tokens
+    return db[MONGO_COLLECTION_NAME]
-        total_tokens = response.usage.total_tokens
+logger = logging.getLogger(__name__)
        cost = total_tokens * 0.000001 # rough estimate
-        document_data = {
+def main():
-            "processing_id": processing_id,
+    """Main function to process the resume."""
-            "input_text": text_content,
+    parser = argparse.ArgumentParser(
-            "output_summary": summary,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
-            "tokens_sent": input_tokens,
+        description="""This tool analyzes resumes using OpenAI's API. Parameters are required to run the analysis.
-            "tokens_received": output_tokens,
+
-            "model_used": os.getenv("MODEL_NAME"),
+Required Environment Variables:
-            "timestamp": datetime.now(timezone.utc).isoformat(), # Current timestamp in UTC
+- OPENAI_API_KEY: Your OpenAI API key
-            "cost": cost,
+- MODEL_NAME: OpenAI model to use (e.g. gpt-3.5-turbo)
-            "client_id": "client_unknown", # You might want to make these dynamic
+- MONGODB_URI: MongoDB connection string (optional for mockup mode)""",
-            "document_id": "doc_unknown", # You might want to make these dynamic
+        usage="resume_analysis.py [-h] [-f FILE] [-m]",
-            "original_filename": args.file if args.file else "command_line_input",
+        epilog="""Examples:
-            "processing_status": {
+  Analyze a resume:        resume_analysis.py -f my_resume.pdf
-                "status": "NEW",
+  Test with mockup data:   resume_analysis.py -f test.pdf -m"""
-                "date": datetime.now(timezone.utc).isoformat()
+    )
-            },
+    parser.add_argument('-f', '--file', help='Path to the resume file to analyze (PDF or text)')
-            "openai_stats": {
+    parser.add_argument('-m', '--mockup', action='store_true', help='Use mockup response instead of calling OpenAI API')
-                "input_tokens": input_tokens,
+
-                "output_tokens": output_tokens,
+    # If no arguments provided, show help and exit
-                "total_tokens": total_tokens,
+    if len(sys.argv) == 1:
-                "cost": cost
+        parser.print_help()
-            }
+        sys.exit(1)
        }
        cv_collection.insert_one(document_data)
        logger("DEBUG", "Data inserted into MongoDB.")
    except Exception as e:
        logger("ERROR", f"Error in insert_processing_data: {e}")
        raise
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Analyze resume text using OpenAI.")
    parser.add_argument("-f", "--file", help="Path to the file containing the resume text.")
    args = parser.parse_args()
-    try:
+    # Determine whether to use mockup based on the -m flag, overriding USE_MOCKUP
-        if args.file:
+    use_mockup = args.mockup
-            try:
+
-                with open(args.file, "r", encoding="latin-1") as f:
+    # Load the resume text from the provided file or use mockup
-                    text_content = f.read()
+    if use_mockup:
-            except FileNotFoundError as e:
+        resume_text = "Mockup resume text"
-                logger("ERROR", f"File not found: {args.file} - {e}")
+    else:
-                sys.exit(1)
+        if not os.path.exists(args.file):
-        elif len(sys.argv) > 1:
+            logger.error(f"File not found: {args.file}")
            text_content = sys.argv[1]
        else:
            parser.print_help()
            sys.exit(1)
-        # Generate a unique processing ID
+        start_file_read_time = time.time()
-        processing_id = str(uuid.uuid4())
+        with open(args.file, 'r') as f:
            resume_text = f.read()
        file_read_time = time.time() - start_file_read_time
        logger.debug(f"File read time: {file_read_time:.2f} seconds")
-        # Update processing status to PROCESSING
+    # Call the OpenAI API with the resume text
-        if args.file:
+    start_time = time.time()
-            filename = args.file
+    response = call_openai_api(resume_text, use_mockup)
-        else:
+    openai_api_time = time.time() - start_time
-            filename = "command_line_input"
+    logger.debug(f"OpenAI API call time: {openai_api_time:.2f} seconds")
    # Initialize MongoDB collection only when needed
    cv_collection = get_mongo_collection()
-        # Find the document in MongoDB
+    # Measure MongoDB insertion time
-        document = cv_collection.find_one({"original_filename": filename})
+    start_mongo_time = time.time()
    cost = insert_processing_data(resume_text, {}, response, args, str(uuid.uuid4()), use_mockup, cv_collection)
    mongo_insert_time = time.time() - start_mongo_time
    logger.debug(f"MongoDB insert time: {mongo_insert_time:.2f} seconds")
    write_openai_response(response, use_mockup, args.file, cost)
-        if document:
+def load_mockup_response(mockup_file_path: str) -> dict:
-            document_id = document["_id"]
+    """Load mockup response from a JSON file."""
-            cv_collection.update_one(
+    logger.debug(f"Loading mockup response from: {mockup_file_path}")
-                {"_id": document_id},
+    if not os.path.exists(mockup_file_path):
-                {"$set": {"processing_status.status": "PROCESSING", "processing_status.date": datetime.now(timezone.utc).isoformat(), "processing_id": processing_id}}
+        raise FileNotFoundError(f"Mockup file not found at: {mockup_file_path}")
-            )
+    with open(mockup_file_path, "r") as f:
-            logger("DEBUG", f"Updated processing status to PROCESSING for document with filename: {filename} and processing_id: {processing_id}")
+        response = json.load(f)
-        else:
+    response.setdefault("openai_stats", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
-            logger("WARN", f"No document found with filename: {filename}. Creating a new document with processing_id: {processing_id}")
+    return response
-        response = analyze_resume(text_content)
+def call_openai_api(text: str, use_mockup: bool) -> Optional[Any]:
-        try:
+    """Call OpenAI API to analyze resume text."""
-            content = response.choices[0].message.content
+    logger.debug("Calling OpenAI API.")
-            if content.startswith("```json"):
+    try:
-                content = content[7:-4] # Remove ```json and ```
+        if use_mockup:
-            summary = json.loads(content)
+            return load_mockup_response(MOCKUP_FILE_PATH)
        except json.JSONDecodeError as e:
            logger("WARN", f"Failed to decode JSON from OpenAI response: {e}")
            summary = {"error": "Failed to decode JSON from OpenAI"}
            error_log_path = "my-app/uploads/cv/openai_raw_output.txt"
            try:
                with open(error_log_path, "a") as error_file:
                    error_file.write(f"Processing ID: {processing_id}\n")
                    error_file.write(f"Error: {e}\n")
                    error_file.write(f"Raw Response Content:\n{response.choices[0].message.content}\n")
                    error_file.write("-" * 40 + "\n")  # Separator for readability
                logger("DEBUG", f"Raw OpenAI response logged to {error_log_path}")
            except Exception as log_e:
                logger("ERROR", f"Failed to log raw response to {error_log_path}: {log_e}")
-        insert_processing_data(text_content, summary, response, args, processing_id)
+        with open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r") as prompt_file:
-
+            system_content = prompt_file.read()
        # Update processing status to COMPLETED
        if document:
            cv_collection.update_one(
                {"_id": document_id},
                {"$set": {"processing_status.status": "COMPLETED", "processing_status.date": datetime.now(timezone.utc).isoformat()}}
            )
            logger("DEBUG", f"Updated processing status to COMPLETED for document with filename: {filename}")
        logger("DEBUG", f"OpenAI > Total tokens used: {response.usage.total_tokens}")
        print(json.dumps(summary)) # Ensure JSON output
        response = openai.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {"role": "system", "content": system_content},
                {"role": "user", "content": text}
            ],
            max_tokens=MAX_TOKENS
        )
        logger.debug(f"OpenAI API response: {response}")
        return response
    except Exception as e:
-        logger("ERROR", f"An error occurred during processing: {e}")
+        logger.error(f"Error during OpenAI API call: {e}", exc_info=True)
-        # Update processing status to FAILED
+        return None
-        if document:
+
-            cv_collection.update_one(
+def write_openai_response(response: Any, use_mockup: bool, input_file_path: str = None, cost: float = 0) -> None: # Add cost argument
-                {"_id": document_id},
+    """Write raw OpenAI response to a file."""
-                {"$set": {"processing_status.status": "FAILED", "processing_status.date": datetime.now(timezone.utc).isoformat()}}
+    if use_mockup:
-            )
+        logger.debug("Using mockup response; no OpenAI message to write.")
-            logger("ERROR", f"Updated processing status to FAILED for document with filename: {filename}")
+        return
-        sys.exit(1)
+    if response and response.choices: # Changed from hasattr to direct attribute access
        message_content = response.choices[0].message.content
        logger.debug(f"Raw OpenAI message content: {message_content}")
        output_dir = os.path.dirname(input_file_path) if input_file_path else '.'
        base_filename = os.path.splitext(os.path.basename(input_file_path))[0] if input_file_path else "default"
        processing_id = str(uuid.uuid4())
        file_path = os.path.join(output_dir, f"{base_filename}_openai_response_{processing_id}") + ".json"
        try:
            serializable_response = { # Create a serializable dictionary
                "choices": [
                    {
                        "message": {
                            "content": choice.message.content,
                            "role": choice.message.role
                        },
                        "finish_reason": choice.finish_reason,
                        "index": choice.index
                    } for choice in response.choices
                ],
                "openai_stats": {
                    "prompt_tokens": response.usage.prompt_tokens,
                    "completion_tokens": response.usage.completion_tokens,
                    "total_tokens": response.usage.total_tokens
                 },
                "cost": cost, # Include cost in the output JSON
                "model": response.model
            }
            with open(file_path, "w") as f:
                json.dump(serializable_response, f, indent=2) # Dump the serializable dictionary
            logger.debug(f"OpenAI response written to {file_path}")
        except IOError as e:
            logger.error(f"Failed to write OpenAI response to file: {e}")
    else:
        logger.warning("No choices in OpenAI response to extract message from.")
        logger.debug(f"Response object: {response}")
 def insert_processing_data(text_content: str, summary: dict, response: Any, args: argparse.Namespace, processing_id: str, use_mockup: bool, cv_collection) -> None:
    """Insert processing data into MongoDB."""
    logger.debug("Inserting processing data into MongoDB.")
    if not use_mockup:
        if response and response.choices:
            message_content = response.choices[0].message.content
            try:
                openai_stats_content = json.loads(message_content)
                openai_stats = openai_stats_content.get("openai_stats", {})
                cost = openai_stats.get("cost", 0)
            except json.JSONDecodeError:
                logger.error("Failed to decode JSON from message content for openai_stats.")
                openai_stats = {}
                cost = 0
            usage = response.usage
            input_tokens = usage.prompt_tokens
            output_tokens = usage.completion_tokens
            total_tokens = usage.total_tokens
        else:
            logger.error("Invalid response format or missing usage data.")
            input_tokens = output_tokens = total_tokens = 0
            cost = 0
            openai_stats = {}
            usage = {}
        processing_data = {
            "processing_id": processing_id,
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "text_content": text_content,
            "summary": summary,
            "usage_prompt_tokens": input_tokens, # Renamed to avoid collision
            "usage_completion_tokens": output_tokens, # Renamed to avoid collision
            "usage_total_tokens": total_tokens, # Renamed to avoid collision
            "openai_stats_input_tokens": openai_stats.get("input_tokens"),
            "openai_stats_output_tokens": openai_stats.get("output_tokens"),
            "openai_stats_total_tokens": openai_stats.get("total_tokens"),
            "cost": cost
        }
        try:
            cv_collection.insert_one(processing_data)
            logger.debug(f"Inserted processing data for ID: {processing_id}")
            return cost # Return the cost
        except Exception as e:
            logger.error(f"Failed to insert processing data into MongoDB: {e}", exc_info=True)
    else:
        logger.debug("Using mockup; skipping MongoDB insertion.")
    return 0 # Return 0 for mockup mode
 if __name__ == "__main__":
    main()