#!/usr/bin/env python3 import sys import os import argparse import io import json from dotenv import load_dotenv load_dotenv() from openai import OpenAI from pdfminer.high_level import extract_text import pymongo # Import pymongo from datetime import datetime, timezone # Import datetime and timezone import uuid # Directly access environment variables OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") client = OpenAI(api_key=OPENAI_API_KEY) # MongoDB Connection Details from .env mongo_uri = os.environ.get("MONGODB_URI") mongo_db_name = os.environ.get("MONGODB_DATABASE") mongo_collection_name = "cv_processing_collection" # You can configure this in .env if needed # Initialize MongoDB client mongo_client = pymongo.MongoClient(mongo_uri) db = mongo_client[mongo_db_name] cv_collection = db[mongo_collection_name] # Configuration COMPONENT_NAME = "resume_analysis.py" # Get log level from environment variable, default to WARN LOG_LEVEL = os.environ.get("LOG_LEVEL", "WARN").upper() # Function for logging def logger(level, message): if LOG_LEVEL == "DEBUG": log_levels = {"DEBUG": 0, "WARN": 1, "ERROR": 2} elif LOG_LEVEL == "WARN": log_levels = {"WARN": 0, "ERROR": 1} elif LOG_LEVEL == "ERROR": log_levels = {"ERROR": 0} else: log_levels = {"WARN": 0, "ERROR": 1} # Default if level in log_levels: timestamp = datetime.now().isoformat() log_message = f"[{timestamp}] [{COMPONENT_NAME}] [{level}] {message}" print(log_message) def analyze_resume(text): logger("DEBUG", "Starting analyze_resume function") try: response = client.chat.completions.create( model=os.getenv("MODEL_NAME"), messages=[{ "role": "system", "content": open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r").read() }, {"role": "user", "content": text}], max_tokens=int(os.getenv("MAX_TOKENS")) ) logger("DEBUG", "analyze_resume function completed successfully") return response except Exception as e: logger("ERROR", f"Error in analyze_resume: {e}") raise def insert_processing_data(text_content, summary, response, args, processing_id): # New function to insert data to MongoDB logger("DEBUG", "Starting insert_processing_data function") try: input_tokens = response.usage.prompt_tokens output_tokens = response.usage.completion_tokens total_tokens = response.usage.total_tokens cost = total_tokens * 0.000001 # rough estimate document_data = { "processing_id": processing_id, "input_text": text_content, "output_summary": summary, "tokens_sent": input_tokens, "tokens_received": output_tokens, "model_used": os.getenv("MODEL_NAME"), "timestamp": datetime.now(timezone.utc).isoformat(), # Current timestamp in UTC "cost": cost, "client_id": "client_unknown", # You might want to make these dynamic "document_id": "doc_unknown", # You might want to make these dynamic "original_filename": args.file if args.file else "command_line_input", "processing_status": { "status": "NEW", "date": datetime.now(timezone.utc).isoformat() }, "openai_stats": { "input_tokens": input_tokens, "output_tokens": output_tokens, "total_tokens": total_tokens, "cost": cost } } cv_collection.insert_one(document_data) logger("DEBUG", "Data inserted into MongoDB.") except Exception as e: logger("ERROR", f"Error in insert_processing_data: {e}") raise if __name__ == "__main__": parser = argparse.ArgumentParser(description="Analyze resume text using OpenAI.") parser.add_argument("-f", "--file", help="Path to the file containing the resume text.") args = parser.parse_args() try: if args.file: try: with open(args.file, "r", encoding="latin-1") as f: text_content = f.read() except FileNotFoundError as e: logger("ERROR", f"File not found: {args.file} - {e}") sys.exit(1) elif len(sys.argv) > 1: text_content = sys.argv[1] else: parser.print_help() sys.exit(1) # Generate a unique processing ID processing_id = str(uuid.uuid4()) # Update processing status to PROCESSING if args.file: filename = args.file else: filename = "command_line_input" # Find the document in MongoDB document = cv_collection.find_one({"original_filename": filename}) if document: document_id = document["_id"] cv_collection.update_one( {"_id": document_id}, {"$set": {"processing_status.status": "PROCESSING", "processing_status.date": datetime.now(timezone.utc).isoformat(), "processing_id": processing_id}} ) logger("DEBUG", f"Updated processing status to PROCESSING for document with filename: {filename} and processing_id: {processing_id}") else: logger("WARN", f"No document found with filename: {filename}. Creating a new document with processing_id: {processing_id}") response = analyze_resume(text_content) try: content = response.choices[0].message.content if content.startswith("```json"): content = content[7:-4] # Remove ```json and ``` summary = json.loads(content) except json.JSONDecodeError as e: logger("WARN", f"Failed to decode JSON from OpenAI response: {e}") summary = {"error": "Failed to decode JSON from OpenAI"} error_log_path = "my-app/uploads/cv/openai_raw_output.txt" try: with open(error_log_path, "a") as error_file: error_file.write(f"Processing ID: {processing_id}\n") error_file.write(f"Error: {e}\n") error_file.write(f"Raw Response Content:\n{response.choices[0].message.content}\n") error_file.write("-" * 40 + "\n") # Separator for readability logger("DEBUG", f"Raw OpenAI response logged to {error_log_path}") except Exception as log_e: logger("ERROR", f"Failed to log raw response to {error_log_path}: {log_e}") insert_processing_data(text_content, summary, response, args, processing_id) # Update processing status to COMPLETED if document: cv_collection.update_one( {"_id": document_id}, {"$set": {"processing_status.status": "COMPLETED", "processing_status.date": datetime.now(timezone.utc).isoformat()}} ) logger("DEBUG", f"Updated processing status to COMPLETED for document with filename: {filename}") logger("DEBUG", f"OpenAI > Total tokens used: {response.usage.total_tokens}") print(json.dumps(summary)) # Ensure JSON output except Exception as e: logger("ERROR", f"An error occurred during processing: {e}") # Update processing status to FAILED if document: cv_collection.update_one( {"_id": document_id}, {"$set": {"processing_status.status": "FAILED", "processing_status.date": datetime.now(timezone.utc).isoformat()}} ) logger("ERROR", f"Updated processing status to FAILED for document with filename: {filename}") sys.exit(1)