Fix by Sonnet
This commit is contained in:
		
							parent
							
								
									159f78ccb5
								
							
						
					
					
						commit
						c93212508a
					
				
							
								
								
									
										160
									
								
								my-app/docs/openrouter-refactoring-plan.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										160
									
								
								my-app/docs/openrouter-refactoring-plan.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,160 @@ | |||||||
|  | # Plan refaktoryzacji integracji OpenRouter | ||||||
|  | 
 | ||||||
|  | ## Cel | ||||||
|  | Refaktoryzacja kodu w `resume_analysis.py` w celu eliminacji wszystkich zależności od OpenAI API i wykorzystania wyłącznie OpenRouter API, z poprawą obecnej implementacji połączenia z OpenRouter. | ||||||
|  | 
 | ||||||
|  | ## Diagram przepływu zmian | ||||||
|  | ```mermaid | ||||||
|  | graph TD | ||||||
|  |     A[Obecna implementacja] --> B[Faza 1: Usunięcie zależności OpenAI] | ||||||
|  |     B --> C[Faza 2: Refaktoryzacja klienta OpenRouter] | ||||||
|  |     C --> D[Faza 3: Optymalizacja obsługi odpowiedzi] | ||||||
|  |     D --> E[Faza 4: Testy i walidacja] | ||||||
|  | 
 | ||||||
|  |     subgraph "Faza 1: Usunięcie zależności OpenAI" | ||||||
|  |         B1[Usuń importy OpenAI] | ||||||
|  |         B2[Usuń zmienne konfiguracyjne OpenAI] | ||||||
|  |         B3[Usuń logikę wyboru klienta] | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     subgraph "Faza 2: Refaktoryzacja klienta OpenRouter" | ||||||
|  |         C1[Stwórz dedykowaną klasę OpenRouterClient] | ||||||
|  |         C2[Implementuj prawidłową konfigurację nagłówków] | ||||||
|  |         C3[Dodaj obsługę różnych modeli] | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     subgraph "Faza 3: Optymalizacja obsługi odpowiedzi" | ||||||
|  |         D1[Ujednolicenie formatu odpowiedzi] | ||||||
|  |         D2[Implementacja lepszej obsługi błędów] | ||||||
|  |         D3[Dodanie walidacji odpowiedzi] | ||||||
|  |     end | ||||||
|  | 
 | ||||||
|  |     subgraph "Faza 4: Testy i walidacja" | ||||||
|  |         E1[Testy jednostkowe] | ||||||
|  |         E2[Testy integracyjne] | ||||||
|  |         E3[Dokumentacja zmian] | ||||||
|  |     end | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ## Szczegółowa implementacja | ||||||
|  | 
 | ||||||
|  | ### 1. Dedykowana klasa OpenRouterClient | ||||||
|  | 
 | ||||||
|  | ```python | ||||||
|  | class OpenRouterClient: | ||||||
|  |     def __init__(self, api_key: str, model_name: str): | ||||||
|  |         self.api_key = api_key | ||||||
|  |         self.model_name = model_name | ||||||
|  |         self.base_url = "https://openrouter.ai/api/v1" | ||||||
|  |         self.session = requests.Session() | ||||||
|  |         self.session.headers.update({ | ||||||
|  |             "Authorization": f"Bearer {api_key}", | ||||||
|  |             "HTTP-Referer": "https://github.com/OpenRouterTeam/openrouter-examples", | ||||||
|  |             "X-Title": "CV Analysis Tool" | ||||||
|  |         }) | ||||||
|  | 
 | ||||||
|  |     def create_chat_completion(self, messages: list, max_tokens: int = None): | ||||||
|  |         endpoint = f"{self.base_url}/chat/completions" | ||||||
|  |         payload = { | ||||||
|  |             "model": self.model_name, | ||||||
|  |             "messages": messages, | ||||||
|  |             "max_tokens": max_tokens | ||||||
|  |         } | ||||||
|  |          | ||||||
|  |         response = self.session.post(endpoint, json=payload) | ||||||
|  |         response.raise_for_status() | ||||||
|  |         return response.json() | ||||||
|  | 
 | ||||||
|  |     def get_available_models(self): | ||||||
|  |         endpoint = f"{self.base_url}/models" | ||||||
|  |         response = self.session.get(endpoint) | ||||||
|  |         response.raise_for_status() | ||||||
|  |         return response.json() | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ### 2. Konfiguracja i inicjalizacja | ||||||
|  | 
 | ||||||
|  | ```python | ||||||
|  | def initialize_openrouter_client(): | ||||||
|  |     if not OPENROUTER_API_KEY: | ||||||
|  |         raise ValueError("OPENROUTER_API_KEY is required") | ||||||
|  |      | ||||||
|  |     client = OpenRouterClient( | ||||||
|  |         api_key=OPENROUTER_API_KEY, | ||||||
|  |         model_name=OPENROUTER_MODEL_NAME | ||||||
|  |     ) | ||||||
|  |      | ||||||
|  |     # Verify connection and model availability | ||||||
|  |     try: | ||||||
|  |         models = client.get_available_models() | ||||||
|  |         if not any(model["id"] == OPENROUTER_MODEL_NAME for model in models): | ||||||
|  |             raise ValueError(f"Model {OPENROUTER_MODEL_NAME} not available") | ||||||
|  |         logger.debug(f"Successfully connected to OpenRouter. Available models: {models}") | ||||||
|  |         return client | ||||||
|  |     except Exception as e: | ||||||
|  |         logger.error(f"Failed to initialize OpenRouter client: {e}") | ||||||
|  |         raise | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ### 3. Obsługa odpowiedzi | ||||||
|  | 
 | ||||||
|  | ```python | ||||||
|  | class OpenRouterResponse: | ||||||
|  |     def __init__(self, raw_response: dict): | ||||||
|  |         self.raw_response = raw_response | ||||||
|  |         self.choices = self._parse_choices() | ||||||
|  |         self.usage = self._parse_usage() | ||||||
|  |         self.model = raw_response.get("model") | ||||||
|  | 
 | ||||||
|  |     def _parse_choices(self): | ||||||
|  |         choices = self.raw_response.get("choices", []) | ||||||
|  |         return [ | ||||||
|  |             { | ||||||
|  |                 "message": choice.get("message", {}), | ||||||
|  |                 "finish_reason": choice.get("finish_reason"), | ||||||
|  |                 "index": choice.get("index") | ||||||
|  |             } | ||||||
|  |             for choice in choices | ||||||
|  |         ] | ||||||
|  | 
 | ||||||
|  |     def _parse_usage(self): | ||||||
|  |         usage = self.raw_response.get("usage", {}) | ||||||
|  |         return { | ||||||
|  |             "prompt_tokens": usage.get("prompt_tokens", 0), | ||||||
|  |             "completion_tokens": usage.get("completion_tokens", 0), | ||||||
|  |             "total_tokens": usage.get("total_tokens", 0) | ||||||
|  |         } | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ### 4. Obsługa błędów | ||||||
|  | 
 | ||||||
|  | ```python | ||||||
|  | class OpenRouterError(Exception): | ||||||
|  |     def __init__(self, message: str, status_code: int = None, response: dict = None): | ||||||
|  |         super().__init__(message) | ||||||
|  |         self.status_code = status_code | ||||||
|  |         self.response = response | ||||||
|  | 
 | ||||||
|  | def handle_openrouter_error(error: Exception) -> OpenRouterError: | ||||||
|  |     if isinstance(error, requests.exceptions.RequestException): | ||||||
|  |         if error.response is not None: | ||||||
|  |             try: | ||||||
|  |                 error_data = error.response.json() | ||||||
|  |                 message = error_data.get("error", {}).get("message", str(error)) | ||||||
|  |                 return OpenRouterError( | ||||||
|  |                     message=message, | ||||||
|  |                     status_code=error.response.status_code, | ||||||
|  |                     response=error_data | ||||||
|  |                 ) | ||||||
|  |             except ValueError: | ||||||
|  |                 pass | ||||||
|  |     return OpenRouterError(str(error)) | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | ## Kolejne kroki | ||||||
|  | 
 | ||||||
|  | 1. Implementacja powyższych klas i funkcji | ||||||
|  | 2. Usunięcie wszystkich zależności OpenAI | ||||||
|  | 3. Aktualizacja istniejącego kodu do korzystania z nowego klienta | ||||||
|  | 4. Dodanie testów jednostkowych i integracyjnych | ||||||
|  | 5. Aktualizacja dokumentacji | ||||||
							
								
								
									
										
											BIN
										
									
								
								my-app/utils/__pycache__/openrouter_client.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								my-app/utils/__pycache__/openrouter_client.cpython-312.pyc
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										186
									
								
								my-app/utils/openrouter_client.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										186
									
								
								my-app/utils/openrouter_client.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,186 @@ | |||||||
|  | #!/usr/bin/env python3 | ||||||
|  | import logging | ||||||
|  | import requests | ||||||
|  | from typing import Optional, Dict, List, Any | ||||||
|  | 
 | ||||||
|  | logger = logging.getLogger(__name__) | ||||||
|  | 
 | ||||||
|  | class OpenRouterError(Exception): | ||||||
|  |     """Custom exception for OpenRouter API errors.""" | ||||||
|  |     def __init__(self, message: str, status_code: int = None, response: dict = None): | ||||||
|  |         super().__init__(message) | ||||||
|  |         self.status_code = status_code | ||||||
|  |         self.response = response | ||||||
|  | 
 | ||||||
|  | class OpenRouterResponse: | ||||||
|  |     """Wrapper for OpenRouter API responses.""" | ||||||
|  |     def __init__(self, raw_response: dict): | ||||||
|  |         self.raw_response = raw_response | ||||||
|  |         self.choices = self._parse_choices() | ||||||
|  |         self.usage = self._parse_usage() | ||||||
|  |         self.model = raw_response.get("model") | ||||||
|  | 
 | ||||||
|  |     def _parse_choices(self) -> List[Dict[str, Any]]: | ||||||
|  |         choices = self.raw_response.get("choices", []) | ||||||
|  |         return [ | ||||||
|  |             { | ||||||
|  |                 "message": choice.get("message", {}), | ||||||
|  |                 "finish_reason": choice.get("finish_reason"), | ||||||
|  |                 "index": choice.get("index") | ||||||
|  |             } | ||||||
|  |             for choice in choices | ||||||
|  |         ] | ||||||
|  | 
 | ||||||
|  |     def _parse_usage(self) -> Dict[str, int]: | ||||||
|  |         usage = self.raw_response.get("usage", {}) | ||||||
|  |         return { | ||||||
|  |             "prompt_tokens": usage.get("prompt_tokens", 0), | ||||||
|  |             "completion_tokens": usage.get("completion_tokens", 0), | ||||||
|  |             "total_tokens": usage.get("total_tokens", 0) | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  | class OpenRouterClient: | ||||||
|  |     """Client for interacting with the OpenRouter API.""" | ||||||
|  |     def __init__(self, api_key: str, model_name: str): | ||||||
|  |         if not api_key: | ||||||
|  |             raise ValueError("OpenRouter API key is required") | ||||||
|  |         if not model_name: | ||||||
|  |             raise ValueError("Model name is required") | ||||||
|  | 
 | ||||||
|  |         self.api_key = api_key | ||||||
|  |         self.model_name = model_name | ||||||
|  |         self.base_url = "https://openrouter.ai/api/v1" | ||||||
|  |         self.session = requests.Session() | ||||||
|  |         self.session.headers.update({ | ||||||
|  |             "Authorization": f"Bearer {api_key}", | ||||||
|  |             "HTTP-Referer": "https://github.com/OpenRouterTeam/openrouter-examples", | ||||||
|  |             "X-Title": "CV Analysis Tool", | ||||||
|  |             "Content-Type": "application/json" | ||||||
|  |         }) | ||||||
|  | 
 | ||||||
|  |     def create_chat_completion( | ||||||
|  |         self,  | ||||||
|  |         messages: List[Dict[str, str]],  | ||||||
|  |         max_tokens: Optional[int] = None | ||||||
|  |     ) -> OpenRouterResponse: | ||||||
|  |         """ | ||||||
|  |         Create a chat completion using the OpenRouter API. | ||||||
|  |          | ||||||
|  |         Args: | ||||||
|  |             messages: List of message dictionaries with 'role' and 'content' keys | ||||||
|  |             max_tokens: Maximum number of tokens to generate | ||||||
|  |              | ||||||
|  |         Returns: | ||||||
|  |             OpenRouterResponse object containing the API response | ||||||
|  |              | ||||||
|  |         Raises: | ||||||
|  |             OpenRouterError: If the API request fails | ||||||
|  |         """ | ||||||
|  |         endpoint = f"{self.base_url}/chat/completions" | ||||||
|  |         payload = { | ||||||
|  |             "model": self.model_name, | ||||||
|  |             "messages": messages | ||||||
|  |         } | ||||||
|  |          | ||||||
|  |         if max_tokens is not None: | ||||||
|  |             payload["max_tokens"] = max_tokens | ||||||
|  | 
 | ||||||
|  |         try: | ||||||
|  |             response = self.session.post(endpoint, json=payload) | ||||||
|  |             response.raise_for_status() | ||||||
|  |             return OpenRouterResponse(response.json()) | ||||||
|  |         except requests.exceptions.RequestException as e: | ||||||
|  |             raise self._handle_request_error(e) | ||||||
|  | 
 | ||||||
|  |     def get_available_models(self) -> List[Dict[str, Any]]: | ||||||
|  |         """ | ||||||
|  |         Get list of available models from OpenRouter API. | ||||||
|  |          | ||||||
|  |         Returns: | ||||||
|  |             List of model information dictionaries | ||||||
|  |              | ||||||
|  |         Raises: | ||||||
|  |             OpenRouterError: If the API request fails | ||||||
|  |         """ | ||||||
|  |         endpoint = f"{self.base_url}/models" | ||||||
|  |          | ||||||
|  |         try: | ||||||
|  |             logger.debug(f"Fetching available models from: {endpoint}") | ||||||
|  |             response = self.session.get(endpoint) | ||||||
|  |             response.raise_for_status() | ||||||
|  |              | ||||||
|  |             data = response.json() | ||||||
|  |             logger.debug(f"Raw API response: {data}") | ||||||
|  |              | ||||||
|  |             if not isinstance(data, dict) or "data" not in data: | ||||||
|  |                 raise OpenRouterError( | ||||||
|  |                     message="Invalid response format from OpenRouter API", | ||||||
|  |                     response=data | ||||||
|  |                 ) | ||||||
|  |                  | ||||||
|  |             return data | ||||||
|  |         except requests.exceptions.RequestException as e: | ||||||
|  |             raise self._handle_request_error(e) | ||||||
|  | 
 | ||||||
|  |     def verify_model_availability(self) -> bool: | ||||||
|  |         """ | ||||||
|  |         Verify if the configured model is available. | ||||||
|  |          | ||||||
|  |         Returns: | ||||||
|  |             True if model is available, False otherwise | ||||||
|  |         """ | ||||||
|  |         try: | ||||||
|  |             response = self.get_available_models() | ||||||
|  |             # OpenRouter API zwraca listę modeli w formacie: | ||||||
|  |             # {"data": [{"id": "model_name", ...}, ...]} | ||||||
|  |             models = response.get("data", []) | ||||||
|  |             logger.debug(f"Available models: {[model.get('id') for model in models]}") | ||||||
|  |             return any(model.get("id") == self.model_name for model in models) | ||||||
|  |         except OpenRouterError as e: | ||||||
|  |             logger.error(f"Failed to verify model availability: {e}") | ||||||
|  |             return False | ||||||
|  |         except Exception as e: | ||||||
|  |             logger.error(f"Unexpected error while verifying model availability: {e}") | ||||||
|  |             return False | ||||||
|  | 
 | ||||||
|  |     def _handle_request_error(self, error: requests.exceptions.RequestException) -> OpenRouterError: | ||||||
|  |         """Convert requests exceptions to OpenRouterError.""" | ||||||
|  |         if error.response is not None: | ||||||
|  |             try: | ||||||
|  |                 error_data = error.response.json() | ||||||
|  |                 message = error_data.get("error", {}).get("message", str(error)) | ||||||
|  |                 return OpenRouterError( | ||||||
|  |                     message=message, | ||||||
|  |                     status_code=error.response.status_code, | ||||||
|  |                     response=error_data | ||||||
|  |                 ) | ||||||
|  |             except ValueError: | ||||||
|  |                 pass | ||||||
|  |         return OpenRouterError(str(error)) | ||||||
|  | 
 | ||||||
|  | def initialize_openrouter_client(api_key: str, model_name: str) -> OpenRouterClient: | ||||||
|  |     """ | ||||||
|  |     Initialize and verify OpenRouter client. | ||||||
|  |      | ||||||
|  |     Args: | ||||||
|  |         api_key: OpenRouter API key | ||||||
|  |         model_name: Name of the model to use | ||||||
|  |          | ||||||
|  |     Returns: | ||||||
|  |         Initialized OpenRouterClient | ||||||
|  |          | ||||||
|  |     Raises: | ||||||
|  |         ValueError: If client initialization or verification fails | ||||||
|  |     """ | ||||||
|  |     try: | ||||||
|  |         client = OpenRouterClient(api_key=api_key, model_name=model_name) | ||||||
|  |          | ||||||
|  |         # Verify connection and model availability | ||||||
|  |         if not client.verify_model_availability(): | ||||||
|  |             raise ValueError(f"Model {model_name} not available") | ||||||
|  |              | ||||||
|  |         logger.debug(f"Successfully initialized OpenRouter client with model: {model_name}") | ||||||
|  |         return client | ||||||
|  |     except Exception as e: | ||||||
|  |         logger.error(f"Failed to initialize OpenRouter client: {e}") | ||||||
|  |         raise | ||||||
| @ -6,20 +6,27 @@ import json | |||||||
| import logging | import logging | ||||||
| from datetime import datetime, timezone | from datetime import datetime, timezone | ||||||
| import uuid | import uuid | ||||||
| from typing import Optional, Any | from typing import Optional, Any, Dict | ||||||
| import time | import time | ||||||
| 
 | 
 | ||||||
| from dotenv import load_dotenv | from dotenv import load_dotenv | ||||||
| import pymongo | import pymongo | ||||||
| import openai |  | ||||||
| from pdfminer.high_level import extract_text | from pdfminer.high_level import extract_text | ||||||
| 
 | 
 | ||||||
|  | from openrouter_client import initialize_openrouter_client, OpenRouterError, OpenRouterResponse | ||||||
|  | 
 | ||||||
| # Load environment variables | # Load environment variables | ||||||
| load_dotenv() | load_dotenv() | ||||||
| 
 | 
 | ||||||
| # Configuration | # Configuration | ||||||
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") | ||||||
| MODEL_NAME = os.getenv("MODEL_NAME") | if not OPENROUTER_API_KEY: | ||||||
|  |     raise ValueError("OPENROUTER_API_KEY environment variable is required") | ||||||
|  | 
 | ||||||
|  | OPENROUTER_MODEL_NAME = os.getenv("OPENROUTER_MODEL_NAME") | ||||||
|  | if not OPENROUTER_MODEL_NAME: | ||||||
|  |     raise ValueError("OPENROUTER_MODEL_NAME environment variable is required") | ||||||
|  | 
 | ||||||
| MAX_TOKENS = int(os.getenv("MAX_TOKENS", 500)) | MAX_TOKENS = int(os.getenv("MAX_TOKENS", 500)) | ||||||
| USE_MOCKUP = os.getenv("USE_MOCKUP", "false").lower() == "true" | USE_MOCKUP = os.getenv("USE_MOCKUP", "false").lower() == "true" | ||||||
| MOCKUP_FILE_PATH = os.getenv("MOCKUP_FILE_PATH") | MOCKUP_FILE_PATH = os.getenv("MOCKUP_FILE_PATH") | ||||||
| @ -28,9 +35,6 @@ MONGODB_DATABASE = os.getenv("MONGODB_DATABASE") | |||||||
| 
 | 
 | ||||||
| MONGO_COLLECTION_NAME = "cv_processing_collection" | MONGO_COLLECTION_NAME = "cv_processing_collection" | ||||||
| 
 | 
 | ||||||
| # Initialize OpenAI client |  | ||||||
| openai.api_key = OPENAI_API_KEY |  | ||||||
| 
 |  | ||||||
| # Logging setup | # Logging setup | ||||||
| LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG").upper() | LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG").upper() | ||||||
| 
 | 
 | ||||||
| @ -40,6 +44,25 @@ logging.basicConfig( | |||||||
|     datefmt="%Y-%m-%dT%H:%M:%S%z", |     datefmt="%Y-%m-%dT%H:%M:%S%z", | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | logger = logging.getLogger(__name__) | ||||||
|  | 
 | ||||||
|  | # Initialize OpenRouter client | ||||||
|  | logger.info("Initializing OpenRouter client...") | ||||||
|  | logger.debug(f"Using model: {OPENROUTER_MODEL_NAME}") | ||||||
|  | logger.debug("API Key present and valid format: %s", bool(OPENROUTER_API_KEY and OPENROUTER_API_KEY.startswith("sk-or-v1-"))) | ||||||
|  | 
 | ||||||
|  | try: | ||||||
|  |     llm_client = initialize_openrouter_client( | ||||||
|  |         api_key=OPENROUTER_API_KEY, | ||||||
|  |         model_name=OPENROUTER_MODEL_NAME | ||||||
|  |     ) | ||||||
|  |     logger.info(f"Successfully initialized OpenRouter client with model: {OPENROUTER_MODEL_NAME}") | ||||||
|  | except ValueError as e: | ||||||
|  |     logger.error(f"Configuration error: {e}") | ||||||
|  |     sys.exit(1) | ||||||
|  | except Exception as e: | ||||||
|  |     logger.error(f"Failed to initialize OpenRouter client: {e}", exc_info=True) | ||||||
|  |     sys.exit(1) | ||||||
| 
 | 
 | ||||||
| def get_mongo_collection(): | def get_mongo_collection(): | ||||||
|     """Initialize and return MongoDB collection.""" |     """Initialize and return MongoDB collection.""" | ||||||
| @ -51,39 +74,39 @@ def get_mongo_collection(): | |||||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def main(): | def parse_arguments(): | ||||||
|     """Main function to process the resume.""" |     """Parses command line arguments.""" | ||||||
|     parser = argparse.ArgumentParser( |     parser = argparse.ArgumentParser( | ||||||
|         formatter_class=argparse.RawDescriptionHelpFormatter, |         formatter_class=argparse.RawDescriptionHelpFormatter, | ||||||
|         description="""This tool analyzes resumes using OpenAI's API. Parameters are required to run the analysis. |         description="""This tool analyzes resumes using the OpenRouter API. Parameters are required to run the analysis. | ||||||
| 
 | 
 | ||||||
| Required Environment Variables: | Required Environment Variables: | ||||||
| - OPENAI_API_KEY: Your OpenAI API key | - OPENROUTER_API_KEY: Your OpenRouter API key | ||||||
| - MODEL_NAME: OpenAI model to use (e.g. gpt-3.5-turbo) | - OPENROUTER_MODEL_NAME: OpenRouter model to use (e.g. google/gemma-7b-it) | ||||||
| - MONGODB_URI: MongoDB connection string (optional for mockup mode)""", | - MONGODB_URI: MongoDB connection string (optional for mockup mode) | ||||||
|  | - MAX_TOKENS: Maximum tokens for response (default: 500)""", | ||||||
|         usage="resume_analysis.py [-h] [-f FILE] [-m]", |         usage="resume_analysis.py [-h] [-f FILE] [-m]", | ||||||
|         epilog="""Examples: |         epilog="""Examples: | ||||||
|   Analyze a resume:        resume_analysis.py -f my_resume.pdf |   Analyze a resume:        resume_analysis.py -f my_resume.pdf | ||||||
|   Test with mockup data:   resume_analysis.py -f test.pdf -m""", |   Test with mockup data:   resume_analysis.py -f test.pdf -m | ||||||
|  |    | ||||||
|  | Note: Make sure your OpenRouter API key and model name are properly configured in the .env file.""", | ||||||
|     ) |     ) | ||||||
|     parser.add_argument( |     parser.add_argument( | ||||||
|         "-f", "--file", help="Path to the resume file to analyze (PDF or text)" |         "-f", "--file", help="Path to the resume file to analyze (PDF or text)" | ||||||
|     ) |     ) | ||||||
|     parser.add_argument( |     parser.add_argument( | ||||||
|         "-m", "--mockup", action="store_true", help="Use mockup response instead of calling OpenAI API" |         "-m", "--mockup", action="store_true", help="Use mockup response instead of calling LLM API" | ||||||
|     ) |     ) | ||||||
| 
 |  | ||||||
|     # If no arguments provided, show help and exit |  | ||||||
|     if len(sys.argv) == 1: |     if len(sys.argv) == 1: | ||||||
|         parser.print_help() |         parser.print_help() | ||||||
|         sys.exit(1) |         return None | ||||||
|  |     return parser.parse_args() | ||||||
| 
 | 
 | ||||||
|     args = parser.parse_args() |  | ||||||
| 
 | 
 | ||||||
|     # Determine whether to use mockup based on the -m flag, overriding USE_MOCKUP | def load_resume_text(args): | ||||||
|  |     """Loads resume text from a file or uses mockup text.""" | ||||||
|     use_mockup = args.mockup |     use_mockup = args.mockup | ||||||
| 
 |  | ||||||
|     # Load the resume text from the provided file or use mockup |  | ||||||
|     if use_mockup: |     if use_mockup: | ||||||
|         resume_text = "Mockup resume text" |         resume_text = "Mockup resume text" | ||||||
|     else: |     else: | ||||||
| @ -102,39 +125,75 @@ Required Environment Variables: | |||||||
|                 resume_text = f.read() |                 resume_text = f.read() | ||||||
|         file_read_time = time.time() - start_file_read_time |         file_read_time = time.time() - start_file_read_time | ||||||
|         logger.debug(f"File read time: {file_read_time:.2f} seconds") |         logger.debug(f"File read time: {file_read_time:.2f} seconds") | ||||||
|  |     return resume_text | ||||||
| 
 | 
 | ||||||
|     # Call the OpenAI API with the resume text | 
 | ||||||
|  | def analyze_resume_with_llm(resume_text, use_mockup): | ||||||
|  |     """Analyzes resume text using OpenRouter API.""" | ||||||
|     start_time = time.time() |     start_time = time.time() | ||||||
|     response = call_openai_api(resume_text, use_mockup) |     response = call_llm_api(resume_text, use_mockup) | ||||||
|     openai_api_time = time.time() - start_time |     llm_api_time = time.time() - start_time | ||||||
|     logger.debug(f"OpenAI API call time: {openai_api_time:.2f} seconds") |     logger.debug(f"LLM API call time: {llm_api_time:.2f} seconds") | ||||||
|  |     return response | ||||||
| 
 | 
 | ||||||
|     # Initialize MongoDB collection only when needed |  | ||||||
|     cv_collection = get_mongo_collection() |  | ||||||
| 
 | 
 | ||||||
|     # Measure MongoDB insertion time | def store_llm_response(response, use_mockup, input_file_path): | ||||||
|     start_mongo_time = time.time() |     """Writes raw LLM response to a file.""" | ||||||
|     if response and response.choices: |     write_llm_response(response, use_mockup, input_file_path) | ||||||
|         message_content = response.choices[0].message.content | 
 | ||||||
|         try: | 
 | ||||||
|             summary = json.loads(message_content) | def save_processing_data(resume_text, summary, response, args, processing_id, use_mockup, cv_collection): | ||||||
|         except json.JSONDecodeError as e: |     """Saves processing data to MongoDB.""" | ||||||
|             logger.error(f"Failed to parse OpenAI response: {e}") |  | ||||||
|             summary = {"error": "Invalid JSON response from OpenAI"} |  | ||||||
|     else: |  | ||||||
|         summary = {"error": "No response from OpenAI"} |  | ||||||
|     insert_processing_data( |     insert_processing_data( | ||||||
|         resume_text, |         resume_text, | ||||||
|         summary, |         summary, | ||||||
|         response, |         response, | ||||||
|         args, |         args, | ||||||
|         str(uuid.uuid4()), |         processing_id, | ||||||
|         use_mockup, |         use_mockup, | ||||||
|         cv_collection, |         cv_collection, | ||||||
|     ) |     ) | ||||||
|     mongo_insert_time = time.time() - start_mongo_time | 
 | ||||||
|     logger.debug(f"MongoDB insert time: {mongo_insert_time:.2f} seconds") | 
 | ||||||
|     write_openai_response(response, use_mockup, args.file) | def get_cv_summary_from_response(response): | ||||||
|  |     """Extracts CV summary from LLM response.""" | ||||||
|  |     if response and hasattr(response, "choices"): | ||||||
|  |         message_content = response.choices[0].message.content | ||||||
|  |         try: | ||||||
|  |             summary = json.loads(message_content) | ||||||
|  |         except json.JSONDecodeError as e: | ||||||
|  |             logger.error(f"Failed to parse LLM response: {e}") | ||||||
|  |             summary = {"error": "Invalid JSON response from LLM"} | ||||||
|  |     else: | ||||||
|  |         summary = {"error": "No response from LLM"} | ||||||
|  |     return summary | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def main(): | ||||||
|  |     """Main function to process the resume.""" | ||||||
|  |     args = parse_arguments() | ||||||
|  |     if args is None: | ||||||
|  |         return | ||||||
|  |     use_mockup = args.mockup  # Ustal, czy używać makiety na podstawie flagi -m | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         resume_text = load_resume_text(args) | ||||||
|  |     except FileNotFoundError as e: | ||||||
|  |         logger.error(f"File error: {e}") | ||||||
|  |         sys.exit(1) | ||||||
|  |     except Exception as e: | ||||||
|  |         logger.error(f"Error loading resume text: {e}") | ||||||
|  |         sys.exit(1) | ||||||
|  | 
 | ||||||
|  |     response = analyze_resume_with_llm(resume_text, use_mockup) | ||||||
|  |     store_llm_response(response, use_mockup, args.file) | ||||||
|  | 
 | ||||||
|  |     cv_collection = get_mongo_collection() | ||||||
|  |     processing_id = str(uuid.uuid4()) | ||||||
|  |     summary = get_cv_summary_from_response(response) | ||||||
|  |     save_processing_data(resume_text, summary, response, args, processing_id, use_mockup, cv_collection) | ||||||
|  | 
 | ||||||
|  |     logger.info(f"Resume analysis completed. Processing ID: {processing_id}") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def load_mockup_response(mockup_file_path: str) -> dict: | def load_mockup_response(mockup_file_path: str) -> dict: | ||||||
| @ -145,130 +204,179 @@ def load_mockup_response(mockup_file_path: str) -> dict: | |||||||
|     with open(mockup_file_path, "r") as f: |     with open(mockup_file_path, "r") as f: | ||||||
|         response = json.load(f) |         response = json.load(f) | ||||||
|     response.setdefault( |     response.setdefault( | ||||||
|         "openai_stats", {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} |         "llm_stats", {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} | ||||||
|     ) |     ) | ||||||
|     return response |     return response | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def call_openai_api(text: str, use_mockup: bool) -> Optional[Any]: | def call_llm_api(text: str, use_mockup: bool) -> Optional[OpenRouterResponse]: | ||||||
|     """Call OpenAI API to analyze resume text.""" |     """Call OpenRouter API to analyze resume text.""" | ||||||
|     logger.debug("Calling OpenAI API.") |  | ||||||
|     try: |  | ||||||
|     if use_mockup: |     if use_mockup: | ||||||
|  |         logger.debug("Using mockup response.") | ||||||
|         return load_mockup_response(MOCKUP_FILE_PATH) |         return load_mockup_response(MOCKUP_FILE_PATH) | ||||||
| 
 | 
 | ||||||
|         with open(os.path.join(os.path.dirname(__file__), "prompt.txt"), "r") as prompt_file: |     prompt_path = os.path.join(os.path.dirname(__file__), "prompt.txt") | ||||||
|  |     logger.debug(f"Loading system prompt from: {prompt_path}") | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         # Load system prompt | ||||||
|  |         if not os.path.exists(prompt_path): | ||||||
|  |             raise FileNotFoundError(f"System prompt file not found: {prompt_path}") | ||||||
|  |              | ||||||
|  |         with open(prompt_path, "r") as prompt_file: | ||||||
|             system_content = prompt_file.read() |             system_content = prompt_file.read() | ||||||
|              |              | ||||||
|         response = openai.chat.completions.create( |         if not system_content.strip(): | ||||||
|             model=MODEL_NAME, |             raise ValueError("System prompt file is empty") | ||||||
|             messages=[ | 
 | ||||||
|  |         # Prepare messages | ||||||
|  |         messages = [ | ||||||
|             {"role": "system", "content": system_content}, |             {"role": "system", "content": system_content}, | ||||||
|                 {"role": "user", "content": text}, |             {"role": "user", "content": text} | ||||||
|             ], |         ] | ||||||
|             max_tokens=MAX_TOKENS, |          | ||||||
|  |         logger.debug("Prepared messages for API call:") | ||||||
|  |         logger.debug(f"System message length: {len(system_content)} chars") | ||||||
|  |         logger.debug(f"User message length: {len(text)} chars") | ||||||
|  | 
 | ||||||
|  |         # Call OpenRouter API | ||||||
|  |         logger.info(f"Calling OpenRouter API with model: {OPENROUTER_MODEL_NAME}") | ||||||
|  |         logger.debug(f"Max tokens set to: {MAX_TOKENS}") | ||||||
|  |          | ||||||
|  |         response = llm_client.create_chat_completion( | ||||||
|  |             messages=messages, | ||||||
|  |             max_tokens=MAX_TOKENS | ||||||
|         ) |         ) | ||||||
|         logger.debug(f"OpenAI API response: {response}") |          | ||||||
|  |         # Validate response | ||||||
|  |         if not response.choices: | ||||||
|  |             logger.warning("API response contains no choices") | ||||||
|  |             return None | ||||||
|  |              | ||||||
|  |         # Log response details | ||||||
|  |         logger.info("Successfully received API response") | ||||||
|  |         logger.debug(f"Response model: {response.model}") | ||||||
|  |         logger.debug(f"Token usage: {response.usage}") | ||||||
|  |         logger.debug(f"Number of choices: {len(response.choices)}") | ||||||
|  |          | ||||||
|         return response |         return response | ||||||
|  | 
 | ||||||
|  |     except FileNotFoundError as e: | ||||||
|  |         logger.error(f"File error: {e}") | ||||||
|  |         return None | ||||||
|  |     except OpenRouterError as e: | ||||||
|  |         logger.error(f"OpenRouter API error: {e}", exc_info=True) | ||||||
|  |         if hasattr(e, 'response'): | ||||||
|  |             logger.error(f"Error response: {e.response}") | ||||||
|  |         return None | ||||||
|     except Exception as e: |     except Exception as e: | ||||||
|         logger.error(f"Error during OpenAI API call: {e}", exc_info=True) |         logger.error(f"Unexpected error during API call: {e}", exc_info=True) | ||||||
|         return None |         return None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def write_openai_response( | def write_llm_response( | ||||||
|     response: Any, use_mockup: bool, input_file_path: str = None |     response: Optional[OpenRouterResponse], use_mockup: bool, input_file_path: str = None | ||||||
| ) -> None: | ) -> None: | ||||||
|     """Write raw OpenAI response to a file.""" |     """Write raw LLM response to a file.""" | ||||||
|     if use_mockup: |     if use_mockup: | ||||||
|         logger.debug("Using mockup response; no OpenAI message to write.") |         logger.debug("Using mockup response; no LLM message to write.") | ||||||
|         return |         return | ||||||
|     if response and response.choices:  # Changed from hasattr to direct attribute access | 
 | ||||||
|         message_content = response.choices[0].message.content |     if response is None: | ||||||
|         logger.debug(f"Raw OpenAI message content: {message_content}") |         logger.warning("No response to write") | ||||||
|  |         return | ||||||
|  | 
 | ||||||
|  |     if not response.choices: | ||||||
|  |         logger.warning("No choices in LLM response") | ||||||
|  |         logger.debug(f"Response object: {response.raw_response}") | ||||||
|  |         return | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         # Get output directory and base filename | ||||||
|         output_dir = os.path.dirname(input_file_path) if input_file_path else "." |         output_dir = os.path.dirname(input_file_path) if input_file_path else "." | ||||||
|         base_filename = ( |         base_filename = ( | ||||||
|             os.path.splitext(os.path.basename(input_file_path))[0] |             os.path.splitext(os.path.basename(input_file_path))[0] | ||||||
|             if input_file_path |             if input_file_path | ||||||
|             else "default" |             else "default" | ||||||
|         ) |         ) | ||||||
|  |          | ||||||
|  |         # Generate unique file path | ||||||
|         processing_id = str(uuid.uuid4()) |         processing_id = str(uuid.uuid4()) | ||||||
|         file_path = os.path.join( |         file_path = os.path.join( | ||||||
|             output_dir, f"{base_filename}_openai_response_{processing_id}" |             output_dir, f"{base_filename}_llm_response_{processing_id}" | ||||||
|         ) + ".json" |         ) + ".json" | ||||||
|         try: | 
 | ||||||
|             serializable_response = {  # Create a serializable dictionary |         # Prepare serializable response | ||||||
|                 "choices": [ |         serializable_response = { | ||||||
|                     { |             "choices": response.choices, | ||||||
|                         "message": { |             "usage": response.usage, | ||||||
|                             "content": choice.message.content, |  | ||||||
|                             "role": choice.message.role, |  | ||||||
|                         }, |  | ||||||
|                         "finish_reason": choice.finish_reason, |  | ||||||
|                         "index": choice.index, |  | ||||||
|                     } |  | ||||||
|                     for choice in response.choices |  | ||||||
|                 ], |  | ||||||
|                 "openai_stats": { |  | ||||||
|                     "input_tokens": response.usage.prompt_tokens, |  | ||||||
|                     "output_tokens": response.usage.completion_tokens, |  | ||||||
|                     "total_tokens": response.usage.total_tokens, |  | ||||||
|                 }, |  | ||||||
|             "model": response.model, |             "model": response.model, | ||||||
|  |             "raw_response": response.raw_response | ||||||
|         } |         } | ||||||
|  | 
 | ||||||
|  |         # Write response to file | ||||||
|         with open(file_path, "w") as f: |         with open(file_path, "w") as f: | ||||||
|                 json.dump(serializable_response, f, indent=2)  # Dump the serializable dictionary |             json.dump(serializable_response, f, indent=2) | ||||||
|             logger.debug(f"OpenAI response written to {file_path}") |         logger.debug(f"LLM response written to {file_path}") | ||||||
|  |          | ||||||
|     except IOError as e: |     except IOError as e: | ||||||
|             logger.error(f"Failed to write OpenAI response to file: {e}") |         logger.error(f"Failed to write LLM response to file: {e}") | ||||||
|     else: |     except Exception as e: | ||||||
|         logger.warning("No choices in OpenAI response to extract message from.") |         logger.error(f"Unexpected error while writing response: {e}", exc_info=True) | ||||||
|         logger.debug(f"Response object: {response}") | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def insert_processing_data( | def insert_processing_data( | ||||||
|     text_content: str, |     text_content: str, | ||||||
|     summary: dict, |     summary: dict, | ||||||
|     response: Any, |     response: Optional[OpenRouterResponse], | ||||||
|     args: argparse.Namespace, |     args: argparse.Namespace, | ||||||
|     processing_id: str, |     processing_id: str, | ||||||
|     use_mockup: bool, |     use_mockup: bool, | ||||||
|     cv_collection, |     cv_collection, | ||||||
| ) -> None: | ) -> None: | ||||||
|     """Insert processing data into MongoDB.""" |     """Insert processing data into MongoDB.""" | ||||||
|     logger.debug("Inserting processing data into MongoDB.") |     if use_mockup: | ||||||
|     if not use_mockup: |         logger.debug("Using mockup; skipping MongoDB insertion.") | ||||||
|         if response and response.choices: |         return | ||||||
|             message_content = response.choices[0].message.content |  | ||||||
|             openai_stats = summary.get("openai_stats", {}) |  | ||||||
|             usage = response.usage |  | ||||||
|             input_tokens = usage.prompt_tokens |  | ||||||
|             output_tokens = usage.completion_tokens |  | ||||||
|             total_tokens = usage.total_tokens |  | ||||||
|         else: |  | ||||||
|             logger.error("Invalid response format or missing usage data.") |  | ||||||
|             input_tokens = output_tokens = total_tokens = 0 |  | ||||||
|             openai_stats = {} |  | ||||||
|             usage = {} |  | ||||||
| 
 | 
 | ||||||
|  |     logger.debug("Preparing processing data for MongoDB insertion.") | ||||||
|  |      | ||||||
|  |     # Initialize default values | ||||||
|  |     usage_data = { | ||||||
|  |         "input_tokens": 0, | ||||||
|  |         "output_tokens": 0, | ||||||
|  |         "total_tokens": 0 | ||||||
|  |     } | ||||||
|  |      | ||||||
|  |     # Extract usage data if available | ||||||
|  |     if response and response.usage: | ||||||
|  |         usage_data = { | ||||||
|  |             "input_tokens": response.usage.get("prompt_tokens", 0), | ||||||
|  |             "output_tokens": response.usage.get("completion_tokens", 0), | ||||||
|  |             "total_tokens": response.usage.get("total_tokens", 0) | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |     # Prepare processing data | ||||||
|     processing_data = { |     processing_data = { | ||||||
|         "processing_id": processing_id, |         "processing_id": processing_id, | ||||||
|         "timestamp": datetime.now(timezone.utc).isoformat(), |         "timestamp": datetime.now(timezone.utc).isoformat(), | ||||||
|         "text_content": text_content, |         "text_content": text_content, | ||||||
|         "summary": summary, |         "summary": summary, | ||||||
|             "input_tokens": input_tokens, |         "model": response.model if response else None, | ||||||
|             "output_tokens": output_tokens, |         **usage_data, | ||||||
|             "total_tokens": total_tokens, |         "raw_response": response.raw_response if response else None | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     # Insert into MongoDB | ||||||
|     try: |     try: | ||||||
|         cv_collection.insert_one(processing_data) |         cv_collection.insert_one(processing_data) | ||||||
|             logger.debug(f"Inserted processing data for ID: {processing_id}") |         logger.debug(f"Successfully inserted processing data for ID: {processing_id}") | ||||||
|  |         logger.debug(f"Token usage - Input: {usage_data['input_tokens']}, " | ||||||
|  |                     f"Output: {usage_data['output_tokens']}, " | ||||||
|  |                     f"Total: {usage_data['total_tokens']}") | ||||||
|     except Exception as e: |     except Exception as e: | ||||||
|             logger.error( |         logger.error(f"Failed to insert processing data into MongoDB: {e}", exc_info=True) | ||||||
|                 f"Failed to insert processing data into MongoDB: {e}", exc_info=True |  | ||||||
|             ) |  | ||||||
|     else: |  | ||||||
|         logger.debug("Using mockup; skipping MongoDB insertion.") |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user