#!/usr/bin/env python3 import sys import os import argparse import json import logging from datetime import datetime, timezone import uuid from typing import Optional, Any import time from dotenv import load_dotenv import pymongo import openai from pdfminer.high_level import extract_text # Load environment variables load_dotenv() # Configuration OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") MODEL_NAME = os.getenv("MODEL_NAME") MAX_TOKENS = int(os.getenv("MAX_TOKENS", 500)) USE_MOCKUP = os.getenv("USE_MOCKUP", "false").lower() == "true" MOCKUP_FILE_PATH = os.getenv("MOCKUP_FILE_PATH") MONGODB_URI = os.getenv("MONGODB_URI") MONGODB_DATABASE = os.getenv("MONGODB_DATABASE") MONGO_COLLECTION_NAME = "cv_processing_collection" # Initialize OpenAI client openai.api_key = OPENAI_API_KEY # Logging setup LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG").upper() logging.basicConfig( level=LOG_LEVEL, format='[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s', datefmt='%Y-%m-%dT%H:%M:%S%z' ) def get_mongo_collection(): """Initialize and return MongoDB collection.""" mongo_client = pymongo.MongoClient(MONGODB_URI) db = mongo_client[MONGODB_DATABASE] return db[MONGO_COLLECTION_NAME] logger = logging.getLogger(__name__) def main(): """Main function to process the resume.""" parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="""This tool analyzes resumes using OpenAI's API. Parameters are required to run the analysis. Required Environment Variables: - OPENAI_API_KEY: Your OpenAI API key - MODEL_NAME: OpenAI model to use (e.g. gpt-3.5-turbo) - MONGODB_URI: MongoDB connection string (optional for mockup mode)""", usage="resume_analysis.py [-h] [-f FILE] [-m]", epilog="""Examples: Analyze a resume: resume_analysis.py -f my_resume.txt Test with mockup data: resume_analysis.py -f test.txt -m""" ) parser.add_argument('-f', '--file', help='Path to the resume file to analyze (TXT)') parser.add_argument('-p', '--pdf', help='Path to the resume file to analyze (PDF)') parser.add_argument('-m', '--mockup', action='store_true', help='Use mockup response instead of calling OpenAI API') # If no arguments provided, show help and exit if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() # Determine whether to use mockup based on the -m flag, overriding USE_MOCKUP use_mockup = args.mockup # Load the resume text from the provided file or use mockup if use_mockup: resume_text = "Mockup resume text" else: if args.pdf: if not os.path.exists(args.pdf): logger.error(f"PDF file not found: {args.pdf}") sys.exit(1) start_file_read_time = time.time() try: resume_text = extract_text(args.pdf) except Exception as e: logger.error(f"Error extracting text from PDF: {e}", exc_info=True) sys.exit(1) file_read_time = time.time() - start_file_read_time logger.debug(f"PDF file read time: {file_read_time:.2f} seconds") # Save extracted text to file pdf_filename = os.path.splitext(os.path.basename(args.pdf))[0] text_file_path = os.path.join(os.path.dirname(args.pdf), f"{pdf_filename}_text.txt") with open(text_file_path, "w", encoding="utf-8") as text_file: text_file.write(resume_text) logger.debug(f"Extracted text saved to: {text_file_path}") elif args.file: if not os.path.exists(args.file): logger.error(f"File not found: {args.file}") sys.exit(1) start_file_read_time = time.time() with open(args.file, 'r', encoding='latin-1') as f: resume_text = f.read() file_read_time = time.time() - start_file_read_time logger.debug(f"File read time: {file_read_time:.2f} seconds") else: parser.print_help() sys.exit(1) # Call the OpenAI API with the resume text start_time = time.time() try: response = call_openai_api(resume_text, use_mockup) openai_api_time = time.time() - start_time logger.debug(f"OpenAI API call time: {openai_api_time:.2f} seconds") except Exception as e: logger.error(f"Error during OpenAI API call: {e}", exc_info=True) response = None # Initialize MongoDB collection only when needed cv_collection = get_mongo_collection() # Measure MongoDB insertion time start_mongo_time = time.time() cost = insert_processing_data(resume_text, {}, response, args, str(uuid.uuid4()), use_mockup, cv_collection) mongo_insert_time = time.time() - start_mongo_time logger.debug(f"MongoDB insert time: {mongo_insert_time:.2f} seconds") write_openai_response(response, use_mockup, args.file, cost) def load_mockup_response(mockup_file_path: str) -> dict: """Load mockup response from a JSON file.""" logger.debug(f"Loading mockup response from: {mockup_file_path}") if not os.path.exists(mockup_file_path): raise FileNotFoundError(f"Mockup file not found at: {mockup_file_path}") with open(mockup_file_path, "r") as f: response = json.load(f) #response.setdefault("openai_stats", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}) return response def call_openai_api(text: str, use_mockup: bool) -> Optional[Any]: """Call OpenAI API to analyze resume text.""" logger.debug("Calling OpenAI API.") try: if use_mockup: return load_mockup_response(os.path.join(os.path.dirname(__file__), 'tests', 'mockup_response.json')) with open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r") as prompt_file: system_content = prompt_file.read() response = openai.chat.completions.create( model=MODEL_NAME, messages=[ {"role": "system", "content": system_content}, {"role": "user", "content": text} ], max_tokens=MAX_TOKENS ) logger.debug(f"OpenAI API response: {response}") return response except Exception as e: logger.error(f"Error during OpenAI API call: {e}", exc_info=True) return None def write_openai_response(response: Any, use_mockup: bool, input_file_path: str = None, cost: float = 0) -> None: """Write raw OpenAI response to a file.""" if use_mockup: logger.debug("Using mockup response; no OpenAI message to write.") return if response and response.choices: message_content = response.choices[0].message.content logger.debug(f"Raw OpenAI message content: {message_content}") if input_file_path: output_dir = os.path.dirname(input_file_path) base_filename = os.path.splitext(os.path.basename(input_file_path))[0] else: logger.warning("Input file path not provided. Using default output directory and filename.") output_dir = os.path.join(os.path.dirname(__file__)) # Default to script's directory base_filename = "default" # Default filename processing_id = str(uuid.uuid4()) file_path = os.path.join(output_dir, f"{base_filename}_openai_response_{processing_id}") + ".json" openai_file_path = os.path.join(output_dir, f"{base_filename}_openai.txt") try: message_content = response.choices[0].message.content if response and response.choices else "No content" with open(openai_file_path, "w", encoding="utf-8") as openai_file: openai_file.write(message_content) logger.debug(f"OpenAI response written to {openai_file_path}") serializable_response = { "choices": [ { "message": { "content": choice.message.content, "role": choice.message.role }, "finish_reason": choice.finish_reason, "index": choice.index } for choice in response.choices ], "usage": { "prompt_tokens": response.usage.prompt_tokens, "completion_tokens": response.usage.completion_tokens, "total_tokens": response.usage.total_tokens }, "cost": cost, # Include cost in the output JSON "model": response.model } with open(file_path, "w") as f: json.dump(serializable_response, f, indent=2, ensure_ascii=False) logger.debug(f"OpenAI response written to {file_path}") except IOError as e: logger.error(f"Failed to write OpenAI response to file: {e}") else: logger.warning("No choices in OpenAI response to extract message from.") logger.debug(f"Response object: {response}") def insert_processing_data(text_content: str, summary: dict, response: Any, args: argparse.Namespace, processing_id: str, use_mockup: bool, cv_collection) -> float: """Insert processing data into MongoDB.""" logger.debug("Inserting processing data into MongoDB.") cost = 0.0 # Initialize cost to 0.0 if not use_mockup: if response and response.choices: message_content = response.choices[0].message.content openai_stats = {} # Initialize openai_stats try: # Attempt to decode JSON, handling potential decode errors openai_stats_content = json.loads(message_content.encode('utf-8').decode('unicode_escape')) openai_stats = openai_stats_content.get("openai_stats", {}) cost = openai_stats.get("cost", 0.0) except json.JSONDecodeError as e: logger.error(f"JSONDecodeError in message_content: {e}", exc_info=True) cost = 0.0 except AttributeError as e: logger.error(f"AttributeError accessing openai_stats: {e}", exc_info=True) cost = 0.0 except Exception as e: logger.error(f"Unexpected error extracting cost: {e}", exc_info=True) cost = 0.0 except AttributeError as e: logger.error(f"AttributeError when accessing openai_stats or cost: {e}", exc_info=True) cost = 0.0 try: usage = response.usage input_tokens = usage.prompt_tokens output_tokens = usage.completion_tokens total_tokens = usage.total_tokens except Exception as e: logger.error(f"Error extracting usage data: {e}", exc_info=True) input_tokens = output_tokens = total_tokens = 0 else: logger.error("Invalid response format or missing usage data.") input_tokens = output_tokens = total_tokens = 0 cost = 0.0 openai_stats = {} usage = {} processing_data = { "processing_id": processing_id, "timestamp": datetime.now(timezone.utc).isoformat(), "text_content": text_content, "summary": summary, "usage_prompt_tokens": input_tokens, # Renamed to avoid collision "usage_completion_tokens": output_tokens, # Renamed to avoid collision "usage_total_tokens": total_tokens, # Renamed to avoid collision "cost": cost } try: cv_collection.insert_one(processing_data) logger.debug(f"Inserted processing data for ID: {processing_id}") return cost # Return the cost except Exception as e: logger.error(f"Failed to insert processing data into MongoDB: {e}", exc_info=True) else: logger.debug("Using mockup; skipping MongoDB insertion.") return cost # Return 0 for mockup mode if __name__ == "__main__": main()