diff --git a/my-app/docs/openrouter-refactoring-plan.md b/my-app/docs/openrouter-refactoring-plan.md new file mode 100644 index 0000000..2e48759 --- /dev/null +++ b/my-app/docs/openrouter-refactoring-plan.md @@ -0,0 +1,160 @@ +# Plan refaktoryzacji integracji OpenRouter + +## Cel +Refaktoryzacja kodu w `resume_analysis.py` w celu eliminacji wszystkich zależności od OpenAI API i wykorzystania wyłącznie OpenRouter API, z poprawą obecnej implementacji połączenia z OpenRouter. + +## Diagram przepływu zmian +```mermaid +graph TD + A[Obecna implementacja] --> B[Faza 1: Usunięcie zależności OpenAI] + B --> C[Faza 2: Refaktoryzacja klienta OpenRouter] + C --> D[Faza 3: Optymalizacja obsługi odpowiedzi] + D --> E[Faza 4: Testy i walidacja] + + subgraph "Faza 1: Usunięcie zależności OpenAI" + B1[Usuń importy OpenAI] + B2[Usuń zmienne konfiguracyjne OpenAI] + B3[Usuń logikę wyboru klienta] + end + + subgraph "Faza 2: Refaktoryzacja klienta OpenRouter" + C1[Stwórz dedykowaną klasę OpenRouterClient] + C2[Implementuj prawidłową konfigurację nagłówków] + C3[Dodaj obsługę różnych modeli] + end + + subgraph "Faza 3: Optymalizacja obsługi odpowiedzi" + D1[Ujednolicenie formatu odpowiedzi] + D2[Implementacja lepszej obsługi błędów] + D3[Dodanie walidacji odpowiedzi] + end + + subgraph "Faza 4: Testy i walidacja" + E1[Testy jednostkowe] + E2[Testy integracyjne] + E3[Dokumentacja zmian] + end +``` + +## Szczegółowa implementacja + +### 1. Dedykowana klasa OpenRouterClient + +```python +class OpenRouterClient: + def __init__(self, api_key: str, model_name: str): + self.api_key = api_key + self.model_name = model_name + self.base_url = "https://openrouter.ai/api/v1" + self.session = requests.Session() + self.session.headers.update({ + "Authorization": f"Bearer {api_key}", + "HTTP-Referer": "https://github.com/OpenRouterTeam/openrouter-examples", + "X-Title": "CV Analysis Tool" + }) + + def create_chat_completion(self, messages: list, max_tokens: int = None): + endpoint = f"{self.base_url}/chat/completions" + payload = { + "model": self.model_name, + "messages": messages, + "max_tokens": max_tokens + } + + response = self.session.post(endpoint, json=payload) + response.raise_for_status() + return response.json() + + def get_available_models(self): + endpoint = f"{self.base_url}/models" + response = self.session.get(endpoint) + response.raise_for_status() + return response.json() +``` + +### 2. Konfiguracja i inicjalizacja + +```python +def initialize_openrouter_client(): + if not OPENROUTER_API_KEY: + raise ValueError("OPENROUTER_API_KEY is required") + + client = OpenRouterClient( + api_key=OPENROUTER_API_KEY, + model_name=OPENROUTER_MODEL_NAME + ) + + # Verify connection and model availability + try: + models = client.get_available_models() + if not any(model["id"] == OPENROUTER_MODEL_NAME for model in models): + raise ValueError(f"Model {OPENROUTER_MODEL_NAME} not available") + logger.debug(f"Successfully connected to OpenRouter. Available models: {models}") + return client + except Exception as e: + logger.error(f"Failed to initialize OpenRouter client: {e}") + raise +``` + +### 3. Obsługa odpowiedzi + +```python +class OpenRouterResponse: + def __init__(self, raw_response: dict): + self.raw_response = raw_response + self.choices = self._parse_choices() + self.usage = self._parse_usage() + self.model = raw_response.get("model") + + def _parse_choices(self): + choices = self.raw_response.get("choices", []) + return [ + { + "message": choice.get("message", {}), + "finish_reason": choice.get("finish_reason"), + "index": choice.get("index") + } + for choice in choices + ] + + def _parse_usage(self): + usage = self.raw_response.get("usage", {}) + return { + "prompt_tokens": usage.get("prompt_tokens", 0), + "completion_tokens": usage.get("completion_tokens", 0), + "total_tokens": usage.get("total_tokens", 0) + } +``` + +### 4. Obsługa błędów + +```python +class OpenRouterError(Exception): + def __init__(self, message: str, status_code: int = None, response: dict = None): + super().__init__(message) + self.status_code = status_code + self.response = response + +def handle_openrouter_error(error: Exception) -> OpenRouterError: + if isinstance(error, requests.exceptions.RequestException): + if error.response is not None: + try: + error_data = error.response.json() + message = error_data.get("error", {}).get("message", str(error)) + return OpenRouterError( + message=message, + status_code=error.response.status_code, + response=error_data + ) + except ValueError: + pass + return OpenRouterError(str(error)) +``` + +## Kolejne kroki + +1. Implementacja powyższych klas i funkcji +2. Usunięcie wszystkich zależności OpenAI +3. Aktualizacja istniejącego kodu do korzystania z nowego klienta +4. Dodanie testów jednostkowych i integracyjnych +5. Aktualizacja dokumentacji \ No newline at end of file diff --git a/my-app/utils/__pycache__/openrouter_client.cpython-312.pyc b/my-app/utils/__pycache__/openrouter_client.cpython-312.pyc new file mode 100644 index 0000000..1fc3e06 Binary files /dev/null and b/my-app/utils/__pycache__/openrouter_client.cpython-312.pyc differ diff --git a/my-app/utils/openrouter_client.py b/my-app/utils/openrouter_client.py new file mode 100644 index 0000000..7bcb47e --- /dev/null +++ b/my-app/utils/openrouter_client.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +import logging +import requests +from typing import Optional, Dict, List, Any + +logger = logging.getLogger(__name__) + +class OpenRouterError(Exception): + """Custom exception for OpenRouter API errors.""" + def __init__(self, message: str, status_code: int = None, response: dict = None): + super().__init__(message) + self.status_code = status_code + self.response = response + +class OpenRouterResponse: + """Wrapper for OpenRouter API responses.""" + def __init__(self, raw_response: dict): + self.raw_response = raw_response + self.choices = self._parse_choices() + self.usage = self._parse_usage() + self.model = raw_response.get("model") + + def _parse_choices(self) -> List[Dict[str, Any]]: + choices = self.raw_response.get("choices", []) + return [ + { + "message": choice.get("message", {}), + "finish_reason": choice.get("finish_reason"), + "index": choice.get("index") + } + for choice in choices + ] + + def _parse_usage(self) -> Dict[str, int]: + usage = self.raw_response.get("usage", {}) + return { + "prompt_tokens": usage.get("prompt_tokens", 0), + "completion_tokens": usage.get("completion_tokens", 0), + "total_tokens": usage.get("total_tokens", 0) + } + +class OpenRouterClient: + """Client for interacting with the OpenRouter API.""" + def __init__(self, api_key: str, model_name: str): + if not api_key: + raise ValueError("OpenRouter API key is required") + if not model_name: + raise ValueError("Model name is required") + + self.api_key = api_key + self.model_name = model_name + self.base_url = "https://openrouter.ai/api/v1" + self.session = requests.Session() + self.session.headers.update({ + "Authorization": f"Bearer {api_key}", + "HTTP-Referer": "https://github.com/OpenRouterTeam/openrouter-examples", + "X-Title": "CV Analysis Tool", + "Content-Type": "application/json" + }) + + def create_chat_completion( + self, + messages: List[Dict[str, str]], + max_tokens: Optional[int] = None + ) -> OpenRouterResponse: + """ + Create a chat completion using the OpenRouter API. + + Args: + messages: List of message dictionaries with 'role' and 'content' keys + max_tokens: Maximum number of tokens to generate + + Returns: + OpenRouterResponse object containing the API response + + Raises: + OpenRouterError: If the API request fails + """ + endpoint = f"{self.base_url}/chat/completions" + payload = { + "model": self.model_name, + "messages": messages + } + + if max_tokens is not None: + payload["max_tokens"] = max_tokens + + try: + response = self.session.post(endpoint, json=payload) + response.raise_for_status() + return OpenRouterResponse(response.json()) + except requests.exceptions.RequestException as e: + raise self._handle_request_error(e) + + def get_available_models(self) -> List[Dict[str, Any]]: + """ + Get list of available models from OpenRouter API. + + Returns: + List of model information dictionaries + + Raises: + OpenRouterError: If the API request fails + """ + endpoint = f"{self.base_url}/models" + + try: + logger.debug(f"Fetching available models from: {endpoint}") + response = self.session.get(endpoint) + response.raise_for_status() + + data = response.json() + logger.debug(f"Raw API response: {data}") + + if not isinstance(data, dict) or "data" not in data: + raise OpenRouterError( + message="Invalid response format from OpenRouter API", + response=data + ) + + return data + except requests.exceptions.RequestException as e: + raise self._handle_request_error(e) + + def verify_model_availability(self) -> bool: + """ + Verify if the configured model is available. + + Returns: + True if model is available, False otherwise + """ + try: + response = self.get_available_models() + # OpenRouter API zwraca listę modeli w formacie: + # {"data": [{"id": "model_name", ...}, ...]} + models = response.get("data", []) + logger.debug(f"Available models: {[model.get('id') for model in models]}") + return any(model.get("id") == self.model_name for model in models) + except OpenRouterError as e: + logger.error(f"Failed to verify model availability: {e}") + return False + except Exception as e: + logger.error(f"Unexpected error while verifying model availability: {e}") + return False + + def _handle_request_error(self, error: requests.exceptions.RequestException) -> OpenRouterError: + """Convert requests exceptions to OpenRouterError.""" + if error.response is not None: + try: + error_data = error.response.json() + message = error_data.get("error", {}).get("message", str(error)) + return OpenRouterError( + message=message, + status_code=error.response.status_code, + response=error_data + ) + except ValueError: + pass + return OpenRouterError(str(error)) + +def initialize_openrouter_client(api_key: str, model_name: str) -> OpenRouterClient: + """ + Initialize and verify OpenRouter client. + + Args: + api_key: OpenRouter API key + model_name: Name of the model to use + + Returns: + Initialized OpenRouterClient + + Raises: + ValueError: If client initialization or verification fails + """ + try: + client = OpenRouterClient(api_key=api_key, model_name=model_name) + + # Verify connection and model availability + if not client.verify_model_availability(): + raise ValueError(f"Model {model_name} not available") + + logger.debug(f"Successfully initialized OpenRouter client with model: {model_name}") + return client + except Exception as e: + logger.error(f"Failed to initialize OpenRouter client: {e}") + raise \ No newline at end of file diff --git a/my-app/utils/resume_analysis.py b/my-app/utils/resume_analysis.py index b35439a..73db7ed 100755 --- a/my-app/utils/resume_analysis.py +++ b/my-app/utils/resume_analysis.py @@ -6,20 +6,27 @@ import json import logging from datetime import datetime, timezone import uuid -from typing import Optional, Any +from typing import Optional, Any, Dict import time from dotenv import load_dotenv import pymongo -import openai from pdfminer.high_level import extract_text +from openrouter_client import initialize_openrouter_client, OpenRouterError, OpenRouterResponse + # Load environment variables load_dotenv() # Configuration -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") -MODEL_NAME = os.getenv("MODEL_NAME") +OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") +if not OPENROUTER_API_KEY: + raise ValueError("OPENROUTER_API_KEY environment variable is required") + +OPENROUTER_MODEL_NAME = os.getenv("OPENROUTER_MODEL_NAME") +if not OPENROUTER_MODEL_NAME: + raise ValueError("OPENROUTER_MODEL_NAME environment variable is required") + MAX_TOKENS = int(os.getenv("MAX_TOKENS", 500)) USE_MOCKUP = os.getenv("USE_MOCKUP", "false").lower() == "true" MOCKUP_FILE_PATH = os.getenv("MOCKUP_FILE_PATH") @@ -28,9 +35,6 @@ MONGODB_DATABASE = os.getenv("MONGODB_DATABASE") MONGO_COLLECTION_NAME = "cv_processing_collection" -# Initialize OpenAI client -openai.api_key = OPENAI_API_KEY - # Logging setup LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG").upper() @@ -40,6 +44,25 @@ logging.basicConfig( datefmt="%Y-%m-%dT%H:%M:%S%z", ) +logger = logging.getLogger(__name__) + +# Initialize OpenRouter client +logger.info("Initializing OpenRouter client...") +logger.debug(f"Using model: {OPENROUTER_MODEL_NAME}") +logger.debug("API Key present and valid format: %s", bool(OPENROUTER_API_KEY and OPENROUTER_API_KEY.startswith("sk-or-v1-"))) + +try: + llm_client = initialize_openrouter_client( + api_key=OPENROUTER_API_KEY, + model_name=OPENROUTER_MODEL_NAME + ) + logger.info(f"Successfully initialized OpenRouter client with model: {OPENROUTER_MODEL_NAME}") +except ValueError as e: + logger.error(f"Configuration error: {e}") + sys.exit(1) +except Exception as e: + logger.error(f"Failed to initialize OpenRouter client: {e}", exc_info=True) + sys.exit(1) def get_mongo_collection(): """Initialize and return MongoDB collection.""" @@ -51,39 +74,39 @@ def get_mongo_collection(): logger = logging.getLogger(__name__) -def main(): - """Main function to process the resume.""" +def parse_arguments(): + """Parses command line arguments.""" parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, - description="""This tool analyzes resumes using OpenAI's API. Parameters are required to run the analysis. + description="""This tool analyzes resumes using the OpenRouter API. Parameters are required to run the analysis. Required Environment Variables: -- OPENAI_API_KEY: Your OpenAI API key -- MODEL_NAME: OpenAI model to use (e.g. gpt-3.5-turbo) -- MONGODB_URI: MongoDB connection string (optional for mockup mode)""", +- OPENROUTER_API_KEY: Your OpenRouter API key +- OPENROUTER_MODEL_NAME: OpenRouter model to use (e.g. google/gemma-7b-it) +- MONGODB_URI: MongoDB connection string (optional for mockup mode) +- MAX_TOKENS: Maximum tokens for response (default: 500)""", usage="resume_analysis.py [-h] [-f FILE] [-m]", epilog="""Examples: Analyze a resume: resume_analysis.py -f my_resume.pdf - Test with mockup data: resume_analysis.py -f test.pdf -m""", + Test with mockup data: resume_analysis.py -f test.pdf -m + +Note: Make sure your OpenRouter API key and model name are properly configured in the .env file.""", ) parser.add_argument( "-f", "--file", help="Path to the resume file to analyze (PDF or text)" ) parser.add_argument( - "-m", "--mockup", action="store_true", help="Use mockup response instead of calling OpenAI API" + "-m", "--mockup", action="store_true", help="Use mockup response instead of calling LLM API" ) - - # If no arguments provided, show help and exit if len(sys.argv) == 1: parser.print_help() - sys.exit(1) + return None + return parser.parse_args() - args = parser.parse_args() - # Determine whether to use mockup based on the -m flag, overriding USE_MOCKUP +def load_resume_text(args): + """Loads resume text from a file or uses mockup text.""" use_mockup = args.mockup - - # Load the resume text from the provided file or use mockup if use_mockup: resume_text = "Mockup resume text" else: @@ -102,39 +125,75 @@ Required Environment Variables: resume_text = f.read() file_read_time = time.time() - start_file_read_time logger.debug(f"File read time: {file_read_time:.2f} seconds") + return resume_text - # Call the OpenAI API with the resume text + +def analyze_resume_with_llm(resume_text, use_mockup): + """Analyzes resume text using OpenRouter API.""" start_time = time.time() - response = call_openai_api(resume_text, use_mockup) - openai_api_time = time.time() - start_time - logger.debug(f"OpenAI API call time: {openai_api_time:.2f} seconds") + response = call_llm_api(resume_text, use_mockup) + llm_api_time = time.time() - start_time + logger.debug(f"LLM API call time: {llm_api_time:.2f} seconds") + return response - # Initialize MongoDB collection only when needed - cv_collection = get_mongo_collection() - # Measure MongoDB insertion time - start_mongo_time = time.time() - if response and response.choices: - message_content = response.choices[0].message.content - try: - summary = json.loads(message_content) - except json.JSONDecodeError as e: - logger.error(f"Failed to parse OpenAI response: {e}") - summary = {"error": "Invalid JSON response from OpenAI"} - else: - summary = {"error": "No response from OpenAI"} +def store_llm_response(response, use_mockup, input_file_path): + """Writes raw LLM response to a file.""" + write_llm_response(response, use_mockup, input_file_path) + + +def save_processing_data(resume_text, summary, response, args, processing_id, use_mockup, cv_collection): + """Saves processing data to MongoDB.""" insert_processing_data( resume_text, summary, response, args, - str(uuid.uuid4()), + processing_id, use_mockup, cv_collection, ) - mongo_insert_time = time.time() - start_mongo_time - logger.debug(f"MongoDB insert time: {mongo_insert_time:.2f} seconds") - write_openai_response(response, use_mockup, args.file) + + +def get_cv_summary_from_response(response): + """Extracts CV summary from LLM response.""" + if response and hasattr(response, "choices"): + message_content = response.choices[0].message.content + try: + summary = json.loads(message_content) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse LLM response: {e}") + summary = {"error": "Invalid JSON response from LLM"} + else: + summary = {"error": "No response from LLM"} + return summary + + +def main(): + """Main function to process the resume.""" + args = parse_arguments() + if args is None: + return + use_mockup = args.mockup # Ustal, czy używać makiety na podstawie flagi -m + + try: + resume_text = load_resume_text(args) + except FileNotFoundError as e: + logger.error(f"File error: {e}") + sys.exit(1) + except Exception as e: + logger.error(f"Error loading resume text: {e}") + sys.exit(1) + + response = analyze_resume_with_llm(resume_text, use_mockup) + store_llm_response(response, use_mockup, args.file) + + cv_collection = get_mongo_collection() + processing_id = str(uuid.uuid4()) + summary = get_cv_summary_from_response(response) + save_processing_data(resume_text, summary, response, args, processing_id, use_mockup, cv_collection) + + logger.info(f"Resume analysis completed. Processing ID: {processing_id}") def load_mockup_response(mockup_file_path: str) -> dict: @@ -145,130 +204,179 @@ def load_mockup_response(mockup_file_path: str) -> dict: with open(mockup_file_path, "r") as f: response = json.load(f) response.setdefault( - "openai_stats", {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} + "llm_stats", {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} ) return response -def call_openai_api(text: str, use_mockup: bool) -> Optional[Any]: - """Call OpenAI API to analyze resume text.""" - logger.debug("Calling OpenAI API.") +def call_llm_api(text: str, use_mockup: bool) -> Optional[OpenRouterResponse]: + """Call OpenRouter API to analyze resume text.""" + if use_mockup: + logger.debug("Using mockup response.") + return load_mockup_response(MOCKUP_FILE_PATH) + + prompt_path = os.path.join(os.path.dirname(__file__), "prompt.txt") + logger.debug(f"Loading system prompt from: {prompt_path}") + try: - if use_mockup: - return load_mockup_response(MOCKUP_FILE_PATH) - - with open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r") as prompt_file: + # Load system prompt + if not os.path.exists(prompt_path): + raise FileNotFoundError(f"System prompt file not found: {prompt_path}") + + with open(prompt_path, "r") as prompt_file: system_content = prompt_file.read() + + if not system_content.strip(): + raise ValueError("System prompt file is empty") - response = openai.chat.completions.create( - model=MODEL_NAME, - messages=[ - {"role": "system", "content": system_content}, - {"role": "user", "content": text}, - ], - max_tokens=MAX_TOKENS, + # Prepare messages + messages = [ + {"role": "system", "content": system_content}, + {"role": "user", "content": text} + ] + + logger.debug("Prepared messages for API call:") + logger.debug(f"System message length: {len(system_content)} chars") + logger.debug(f"User message length: {len(text)} chars") + + # Call OpenRouter API + logger.info(f"Calling OpenRouter API with model: {OPENROUTER_MODEL_NAME}") + logger.debug(f"Max tokens set to: {MAX_TOKENS}") + + response = llm_client.create_chat_completion( + messages=messages, + max_tokens=MAX_TOKENS ) - logger.debug(f"OpenAI API response: {response}") + + # Validate response + if not response.choices: + logger.warning("API response contains no choices") + return None + + # Log response details + logger.info("Successfully received API response") + logger.debug(f"Response model: {response.model}") + logger.debug(f"Token usage: {response.usage}") + logger.debug(f"Number of choices: {len(response.choices)}") + return response + + except FileNotFoundError as e: + logger.error(f"File error: {e}") + return None + except OpenRouterError as e: + logger.error(f"OpenRouter API error: {e}", exc_info=True) + if hasattr(e, 'response'): + logger.error(f"Error response: {e.response}") + return None except Exception as e: - logger.error(f"Error during OpenAI API call: {e}", exc_info=True) + logger.error(f"Unexpected error during API call: {e}", exc_info=True) return None -def write_openai_response( - response: Any, use_mockup: bool, input_file_path: str = None -) -> None: - """Write raw OpenAI response to a file.""" +def write_llm_response( + response: Optional[OpenRouterResponse], use_mockup: bool, input_file_path: str = None +) -> None: + """Write raw LLM response to a file.""" if use_mockup: - logger.debug("Using mockup response; no OpenAI message to write.") + logger.debug("Using mockup response; no LLM message to write.") return - if response and response.choices: # Changed from hasattr to direct attribute access - message_content = response.choices[0].message.content - logger.debug(f"Raw OpenAI message content: {message_content}") + + if response is None: + logger.warning("No response to write") + return + + if not response.choices: + logger.warning("No choices in LLM response") + logger.debug(f"Response object: {response.raw_response}") + return + + try: + # Get output directory and base filename output_dir = os.path.dirname(input_file_path) if input_file_path else "." base_filename = ( os.path.splitext(os.path.basename(input_file_path))[0] if input_file_path else "default" ) + + # Generate unique file path processing_id = str(uuid.uuid4()) file_path = os.path.join( - output_dir, f"{base_filename}_openai_response_{processing_id}" + output_dir, f"{base_filename}_llm_response_{processing_id}" ) + ".json" - try: - serializable_response = { # Create a serializable dictionary - "choices": [ - { - "message": { - "content": choice.message.content, - "role": choice.message.role, - }, - "finish_reason": choice.finish_reason, - "index": choice.index, - } - for choice in response.choices - ], - "openai_stats": { - "input_tokens": response.usage.prompt_tokens, - "output_tokens": response.usage.completion_tokens, - "total_tokens": response.usage.total_tokens, - }, - "model": response.model, - } - with open(file_path, "w") as f: - json.dump(serializable_response, f, indent=2) # Dump the serializable dictionary - logger.debug(f"OpenAI response written to {file_path}") - except IOError as e: - logger.error(f"Failed to write OpenAI response to file: {e}") - else: - logger.warning("No choices in OpenAI response to extract message from.") - logger.debug(f"Response object: {response}") + + # Prepare serializable response + serializable_response = { + "choices": response.choices, + "usage": response.usage, + "model": response.model, + "raw_response": response.raw_response + } + + # Write response to file + with open(file_path, "w") as f: + json.dump(serializable_response, f, indent=2) + logger.debug(f"LLM response written to {file_path}") + + except IOError as e: + logger.error(f"Failed to write LLM response to file: {e}") + except Exception as e: + logger.error(f"Unexpected error while writing response: {e}", exc_info=True) + def insert_processing_data( text_content: str, summary: dict, - response: Any, + response: Optional[OpenRouterResponse], args: argparse.Namespace, processing_id: str, use_mockup: bool, cv_collection, ) -> None: """Insert processing data into MongoDB.""" - logger.debug("Inserting processing data into MongoDB.") - if not use_mockup: - if response and response.choices: - message_content = response.choices[0].message.content - openai_stats = summary.get("openai_stats", {}) - usage = response.usage - input_tokens = usage.prompt_tokens - output_tokens = usage.completion_tokens - total_tokens = usage.total_tokens - else: - logger.error("Invalid response format or missing usage data.") - input_tokens = output_tokens = total_tokens = 0 - openai_stats = {} - usage = {} + if use_mockup: + logger.debug("Using mockup; skipping MongoDB insertion.") + return - processing_data = { - "processing_id": processing_id, - "timestamp": datetime.now(timezone.utc).isoformat(), - "text_content": text_content, - "summary": summary, - "input_tokens": input_tokens, - "output_tokens": output_tokens, - "total_tokens": total_tokens, + logger.debug("Preparing processing data for MongoDB insertion.") + + # Initialize default values + usage_data = { + "input_tokens": 0, + "output_tokens": 0, + "total_tokens": 0 + } + + # Extract usage data if available + if response and response.usage: + usage_data = { + "input_tokens": response.usage.get("prompt_tokens", 0), + "output_tokens": response.usage.get("completion_tokens", 0), + "total_tokens": response.usage.get("total_tokens", 0) } - try: - cv_collection.insert_one(processing_data) - logger.debug(f"Inserted processing data for ID: {processing_id}") - except Exception as e: - logger.error( - f"Failed to insert processing data into MongoDB: {e}", exc_info=True - ) - else: - logger.debug("Using mockup; skipping MongoDB insertion.") + # Prepare processing data + processing_data = { + "processing_id": processing_id, + "timestamp": datetime.now(timezone.utc).isoformat(), + "text_content": text_content, + "summary": summary, + "model": response.model if response else None, + **usage_data, + "raw_response": response.raw_response if response else None + } + + # Insert into MongoDB + try: + cv_collection.insert_one(processing_data) + logger.debug(f"Successfully inserted processing data for ID: {processing_id}") + logger.debug(f"Token usage - Input: {usage_data['input_tokens']}, " + f"Output: {usage_data['output_tokens']}, " + f"Total: {usage_data['total_tokens']}") + except Exception as e: + logger.error(f"Failed to insert processing data into MongoDB: {e}", exc_info=True) if __name__ == "__main__":