191 lines
7.6 KiB
Python
191 lines
7.6 KiB
Python
#!/usr/bin/env python3
|
|
import sys
|
|
import os
|
|
import argparse
|
|
import io
|
|
import json
|
|
from dotenv import load_dotenv
|
|
load_dotenv()
|
|
from openai import OpenAI
|
|
from pdfminer.high_level import extract_text
|
|
import pymongo # Import pymongo
|
|
from datetime import datetime, timezone # Import datetime and timezone
|
|
import uuid
|
|
|
|
# Directly access environment variables
|
|
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
|
|
|
client = OpenAI(api_key=OPENAI_API_KEY)
|
|
|
|
# MongoDB Connection Details from .env
|
|
mongo_uri = os.environ.get("MONGODB_URI")
|
|
mongo_db_name = os.environ.get("MONGODB_DATABASE")
|
|
mongo_collection_name = "cv_processing_collection" # You can configure this in .env if needed
|
|
|
|
# Initialize MongoDB client
|
|
mongo_client = pymongo.MongoClient(mongo_uri)
|
|
db = mongo_client[mongo_db_name]
|
|
cv_collection = db[mongo_collection_name]
|
|
|
|
# Configuration
|
|
COMPONENT_NAME = "resume_analysis.py"
|
|
|
|
# Get log level from environment variable, default to WARN
|
|
LOG_LEVEL = os.environ.get("LOG_LEVEL", "WARN").upper()
|
|
|
|
# Function for logging
|
|
def logger(level, message):
|
|
if LOG_LEVEL == "DEBUG":
|
|
log_levels = {"DEBUG": 0, "WARN": 1, "ERROR": 2}
|
|
elif LOG_LEVEL == "WARN":
|
|
log_levels = {"WARN": 0, "ERROR": 1}
|
|
elif LOG_LEVEL == "ERROR":
|
|
log_levels = {"ERROR": 0}
|
|
else:
|
|
log_levels = {"WARN": 0, "ERROR": 1} # Default
|
|
|
|
if level in log_levels:
|
|
timestamp = datetime.now().isoformat()
|
|
log_message = f"[{timestamp}] [{COMPONENT_NAME}] [{level}] {message}"
|
|
print(log_message)
|
|
|
|
def analyze_resume(text):
|
|
logger("DEBUG", "Starting analyze_resume function")
|
|
try:
|
|
response = client.chat.completions.create(
|
|
model=os.getenv("MODEL_NAME"),
|
|
messages=[{
|
|
"role": "system",
|
|
"content": open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r").read()
|
|
},
|
|
{"role": "user", "content": text}],
|
|
max_tokens=int(os.getenv("MAX_TOKENS"))
|
|
)
|
|
logger("DEBUG", "analyze_resume function completed successfully")
|
|
return response
|
|
except Exception as e:
|
|
logger("ERROR", f"Error in analyze_resume: {e}")
|
|
raise
|
|
|
|
def insert_processing_data(text_content, summary, response, args, processing_id): # New function to insert data to MongoDB
|
|
logger("DEBUG", "Starting insert_processing_data function")
|
|
try:
|
|
input_tokens = response.usage.prompt_tokens
|
|
output_tokens = response.usage.completion_tokens
|
|
total_tokens = response.usage.total_tokens
|
|
cost = total_tokens * 0.000001 # rough estimate
|
|
|
|
document_data = {
|
|
"processing_id": processing_id,
|
|
"input_text": text_content,
|
|
"output_summary": summary,
|
|
"tokens_sent": input_tokens,
|
|
"tokens_received": output_tokens,
|
|
"model_used": os.getenv("MODEL_NAME"),
|
|
"timestamp": datetime.now(timezone.utc).isoformat(), # Current timestamp in UTC
|
|
"cost": cost,
|
|
"client_id": "client_unknown", # You might want to make these dynamic
|
|
"document_id": "doc_unknown", # You might want to make these dynamic
|
|
"original_filename": args.file if args.file else "command_line_input",
|
|
"processing_status": {
|
|
"status": "NEW",
|
|
"date": datetime.now(timezone.utc).isoformat()
|
|
},
|
|
"openai_stats": {
|
|
"input_tokens": input_tokens,
|
|
"output_tokens": output_tokens,
|
|
"total_tokens": total_tokens,
|
|
"cost": cost
|
|
}
|
|
}
|
|
cv_collection.insert_one(document_data)
|
|
logger("DEBUG", "Data inserted into MongoDB.")
|
|
except Exception as e:
|
|
logger("ERROR", f"Error in insert_processing_data: {e}")
|
|
raise
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Analyze resume text using OpenAI.")
|
|
parser.add_argument("-f", "--file", help="Path to the file containing the resume text.")
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
if args.file:
|
|
try:
|
|
with open(args.file, "r", encoding="latin-1") as f:
|
|
text_content = f.read()
|
|
except FileNotFoundError as e:
|
|
logger("ERROR", f"File not found: {args.file} - {e}")
|
|
sys.exit(1)
|
|
elif len(sys.argv) > 1:
|
|
text_content = sys.argv[1]
|
|
else:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
# Generate a unique processing ID
|
|
processing_id = str(uuid.uuid4())
|
|
|
|
# Update processing status to PROCESSING
|
|
if args.file:
|
|
filename = args.file
|
|
else:
|
|
filename = "command_line_input"
|
|
|
|
# Find the document in MongoDB
|
|
document = cv_collection.find_one({"original_filename": filename})
|
|
|
|
if document:
|
|
document_id = document["_id"]
|
|
cv_collection.update_one(
|
|
{"_id": document_id},
|
|
{"$set": {"processing_status.status": "PROCESSING", "processing_status.date": datetime.now(timezone.utc).isoformat(), "processing_id": processing_id}}
|
|
)
|
|
logger("DEBUG", f"Updated processing status to PROCESSING for document with filename: {filename} and processing_id: {processing_id}")
|
|
else:
|
|
logger("WARN", f"No document found with filename: {filename}. Creating a new document with processing_id: {processing_id}")
|
|
|
|
response = analyze_resume(text_content)
|
|
try:
|
|
content = response.choices[0].message.content
|
|
if content.startswith("```json"):
|
|
content = content[7:-4] # Remove ```json and ```
|
|
summary = json.loads(content)
|
|
except json.JSONDecodeError as e:
|
|
logger("WARN", f"Failed to decode JSON from OpenAI response: {e}")
|
|
summary = {"error": "Failed to decode JSON from OpenAI"}
|
|
error_log_path = "my-app/uploads/cv/openai_raw_output.txt"
|
|
try:
|
|
with open(error_log_path, "a") as error_file:
|
|
error_file.write(f"Processing ID: {processing_id}\n")
|
|
error_file.write(f"Error: {e}\n")
|
|
error_file.write(f"Raw Response Content:\n{response.choices[0].message.content}\n")
|
|
error_file.write("-" * 40 + "\n") # Separator for readability
|
|
logger("DEBUG", f"Raw OpenAI response logged to {error_log_path}")
|
|
except Exception as log_e:
|
|
logger("ERROR", f"Failed to log raw response to {error_log_path}: {log_e}")
|
|
|
|
insert_processing_data(text_content, summary, response, args, processing_id)
|
|
|
|
# Update processing status to COMPLETED
|
|
if document:
|
|
cv_collection.update_one(
|
|
{"_id": document_id},
|
|
{"$set": {"processing_status.status": "COMPLETED", "processing_status.date": datetime.now(timezone.utc).isoformat()}}
|
|
)
|
|
logger("DEBUG", f"Updated processing status to COMPLETED for document with filename: {filename}")
|
|
|
|
logger("DEBUG", f"OpenAI > Total tokens used: {response.usage.total_tokens}")
|
|
print(json.dumps(summary)) # Ensure JSON output
|
|
|
|
except Exception as e:
|
|
logger("ERROR", f"An error occurred during processing: {e}")
|
|
# Update processing status to FAILED
|
|
if document:
|
|
cv_collection.update_one(
|
|
{"_id": document_id},
|
|
{"$set": {"processing_status.status": "FAILED", "processing_status.date": datetime.now(timezone.utc).isoformat()}}
|
|
)
|
|
logger("ERROR", f"Updated processing status to FAILED for document with filename: {filename}")
|
|
sys.exit(1)
|