diff --git a/.env b/.env index 676b715..ab10f8b 100644 --- a/.env +++ b/.env @@ -1,20 +1,21 @@ # Ollama configuration -LLM_OLLAMA_BASE_URL=http://192.168.0.140:11434 +# LLM_OLLAMA_BASE_URL=http://192.168.0.140:11434 # LLM_OLLAMA_BASE_URL=http://192.168.0.122:11434 -# LLM_OLLAMA_BASE_URL="https://api-amer-sandbox-gbl-mdm-hub.pfizer.com/ollama" +LLM_OLLAMA_BASE_URL="https://api-amer-sandbox-gbl-mdm-hub.pfizer.com/ollama" LLM_OLLAMA_MODEL=phi4-mini:latest # LLM_OLLAMA_MODEL=smollm:360m # LLM_OLLAMA_MODEL=qwen3:0.6b # LLM_OLLAMA_MODEL=qwen3:1.7b +# LLM_OLLAMA_MODEL=qwen3:8b # Logging configuration LOG_LEVEL=DEBUG # Ollama API Key (required when using Ollama mode) # Langfuse configuration -LANGFUSE_ENABLED=true -LANGFUSE_PUBLIC_KEY="pk-lf-17dfde63-93e2-4983-8aa7-2673d3ecaab8" -LANGFUSE_SECRET_KEY="sk-lf-ba41a266-6fe5-4c90-a483-bec8a7aaa321" +LANGFUSE_ENABLED=false +LANGFUSE_PUBLIC_KEY="pk-lf-" +LANGFUSE_SECRET_KEY="sk-lf-" LANGFUSE_HOST="https://cloud.langfuse.com" # Gemini configuration -LLM_GEMINI_API_KEY="AIzaSyDl12gxyTf2xCaTbT6OMJg0I-Rc82Ib77c" +LLM_GEMINI_API_KEY="" LLM_GEMINI_MODEL="gemini-2.5-flash" -LLM_MODE=gemini +LLM_MODE=ollama \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 1725a8a..1d9f9c8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,11 +42,11 @@ ENV PYTHONDONTWRITEBYTECODE=1 \ COPY config ./config # Copy your application source code. -COPY jira_webhook_llm.py . +COPY main.py . COPY config.py . # Expose the port your application listens on. EXPOSE 8000 # Define the command to run your application. -CMD ["uvicorn", "jira-webhook-llm:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/config/application.yml b/config/application.yml index 3d56b38..7285191 100644 --- a/config/application.yml +++ b/config/application.yml @@ -3,7 +3,7 @@ llm: # The mode to run the application in. # Can be 'openai' or 'ollama'. # This can be overridden by the LLM_MODE environment variable. - mode: gemini # Change mode to gemini + mode: ollama # Change mode to gemini # Settings for OpenAI-compatible APIs (like OpenRouter) openai: @@ -26,7 +26,7 @@ llm: # It's HIGHLY recommended to set this via an environment variable # instead of saving it in this file. # Can be overridden by GEMINI_API_KEY - api_key: "AIzaSyDl12gxyTf2xCaTbT6OMJg0I-Rc82Ib77c" # Move from openai + api_key: "" # Can be overridden by GEMINI_MODEL # model: "gemini-2.5-flash" @@ -38,7 +38,7 @@ llm: # Settings for Ollama ollama: # Can be overridden by OLLAMA_BASE_URL - #base_url: "http://192.168.0.140:11434" + # base_url: "http://192.168.0.122:11434" base_url: "https://api-amer-sandbox-gbl-mdm-hub.pfizer.com/ollama" @@ -47,17 +47,18 @@ llm: # model: "qwen3:1.7b" # model: "smollm:360m" # model: "qwen3:0.6b" + # model: "qwen3:8b" # Langfuse configuration for observability and analytics langfuse: # Enable or disable Langfuse integration # Can be overridden by LANGFUSE_ENABLED environment variable - enabled: true + enabled: false # Langfuse API credentials # It's HIGHLY recommended to set these via environment variables # instead of saving them in this file - public_key: "pk-lf-17dfde63-93e2-4983-8aa7-2673d3ecaab8" - secret_key: "sk-lf-ba41a266-6fe5-4c90-a483-bec8a7aaa321" + public_key: "pk-lf-" + secret_key: "sk-lf-" host: "https://cloud.langfuse.com" # Processor configuration @@ -68,7 +69,7 @@ processor: # Maximum number of retries for failed Jira analysis requests # Can be overridden by PROCESSOR_MAX_RETRIES environment variable - max_retries: 5 + max_retries: 0 # Initial delay in seconds before the first retry attempt (exponential backoff) # Can be overridden by PROCESSOR_INITIAL_RETRY_DELAY_SECONDS environment variable diff --git a/docker-compose.yml b/docker-compose.yml index 7bb3e97..3420c05 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,7 +8,7 @@ services: # Service for your FastAPI application jira-webhook-llm: - image: artifactory.pfizer.com/mdmhub-docker-dev/mdmtools/ollama/jira-webhook-llm:0.1.8 + image: artifactory.pfizer.com/mdmhub-docker-dev/mdmtools/ollama/jira-webhook-llm:0.2.0 ports: - "8000:8000" environment: @@ -30,4 +30,4 @@ services: # Command to run your FastAPI application using Uvicorn # --host 0.0.0.0 is crucial for the app to be accessible from outside the container # --reload is good for development; remove for production - command: uvicorn jira-webhook-llm:app --host 0.0.0.0 --port 8000 \ No newline at end of file + command: uvicorn main:app --host 0.0.0.0 --port 8000 \ No newline at end of file diff --git a/llm/chains.py b/llm/chains.py index 3eee15e..002e02e 100644 --- a/llm/chains.py +++ b/llm/chains.py @@ -32,7 +32,6 @@ if settings.llm.mode == 'openai': llm = ChatOpenAI( model=settings.llm.openai_model if settings.llm.openai_model else "", # Ensure model is str temperature=0.7, - max_tokens=2000, api_key=settings.llm.openai_api_key, # type: ignore # Suppress Pylance error due to SecretStr type mismatch base_url=settings.llm.openai_api_base_url ) @@ -52,7 +51,8 @@ elif settings.llm.mode == 'ollama': llm = OllamaLLM( model=settings.llm.ollama_model, - base_url=base_url + base_url=base_url, + num_ctx=32000 # Removed streaming, timeout, max_retries as they are not valid parameters for OllamaLLM ) @@ -92,7 +92,6 @@ elif settings.llm.mode == 'gemini': # New: Add Gemini initialization llm = ChatGoogleGenerativeAI( model=settings.llm.gemini_model, temperature=0.7, - max_tokens=2000, google_api_key=settings.llm.gemini_api_key ) @@ -131,7 +130,7 @@ if llm is None: llm_runnable: Runnable = llm # type: ignore # Set up Output Parser for structured JSON -parser = JsonOutputParser(pydantic_object=AnalysisFlags) +parser = JsonOutputParser() # Load prompt template from file def load_prompt_template(version="v1.2.0"): @@ -222,8 +221,8 @@ def validate_response(response: Union[dict, str], issue_key: str = "N/A") -> boo AnalysisFlags.model_validate(response) return True except Exception as e: - logger.error(f"[{issue_key}] Pydantic validation error: {e}. Invalid response: {response}") - return False + logger.warning(f"[{issue_key}] Pydantic validation failed: {e}. Continuing with raw response: {response}") + return True # Allow processing even if validation fails except Exception as e: logger.error(f"[{issue_key}] Unexpected error during response validation: {e}. Response: {response}") return False \ No newline at end of file diff --git a/llm/jira_analysis_v1.2.0.txt b/llm/jira_analysis_v1.2.0.txt index 7ea8e3b..99407d3 100644 --- a/llm/jira_analysis_v1.2.0.txt +++ b/llm/jira_analysis_v1.2.0.txt @@ -1,60 +1,58 @@ SYSTEM: -You are a precise AI assistant that analyzes Jira tickets and outputs a JSON object. -Your task is to analyze the provided Jira ticket data and generate a JSON object based on the rules below. -Your output MUST be ONLY the JSON object, with no additional text or explanations. +You are an expert AI assistant that analyzes Jira tickets and outputs a concise summary in a valid JSON format. +Your output MUST be a single JSON object and nothing else. ## JSON Output Schema {format_instructions} ## Field-by-Field Instructions -### `hasMultipleEscalations` (boolean) -- Set to `true` ONLY if the user has made multiple requests for help from the "MDM HUB team" without getting a response. -- A normal back-and-forth conversation is NOT an escalation. +1. **`issueCategory` (string)** + * Classify the core problem. Choose ONE: "technical_issue", "data_request", "access_problem", "general_question", "other". -### `customerSentiment` (string: "neutral", "frustrated", "calm") -- Set to `"frustrated"` if the user mentions blockers, deadlines, or uses urgent language (e.g., "urgent", "asap", "blocked"). -- Set to `"calm"` if the language is polite and patient. -- Set to `"neutral"` otherwise. +2. **`area` (string)** + * Classify the technical domain. Choose the BEST fit from the following options: + * `"Direct Channel"` + * `"Streaming Channel"` + * `"Java Batch Channel"` + * `"ETL Batch Channel"` + * `"DCR Service"` + * `"API Gateway"` + * `"Callback Service"` + * `"Publisher"` + * `"Reconciliation"` + * `"Snowflake"` + * `"Authentication"` + * `"Other"` -### `issueCategory` (string: "technical_issue", "data_request", "access_problem", "general_question", "other") -- `"technical_issue"`: Errors, bugs, system failures, API problems. -- `"data_request"`: Asking for data exports, reports, or information retrieval. -- `"access_problem"`: User cannot log in, has permission issues. -- `"general_question"`: "How do I..." or other general inquiries. -- `"other"`: If it doesn't fit any other category. +3. **`customerSentiment` (string)** + * Analyze the user's tone. + * `"frustrated"`: User mentions blockers, deadlines, or uses urgent language ("ASAP", "urgent", "blocked"). + * `"neutral"`: Default, non-emotional tone. -### `area` (string) -- Classify the ticket into ONE of the following areas based on keywords: -- `"Direct Channel"`: "REST API", "API Gateway", "Create/Update HCP/HCO" -- `"Streaming Channel"`: "Kafka", "SQS", "Reltio events", "Snowflake" -- `"Java Batch Channel"`: "Batch", "File loader", "Airflow" -- `"ETL Batch Channel"`: "ETL", "Informatica" -- `"DCR Service"`: "DCR", "PforceRx", "OneKey", "Veeva" -- `"API Gateway"`: "Kong", "authentication", "routing" -- `"Callback Service"`: "Callback", "HCO names", "ranking" -- `"Publisher"`: "Publisher", "routing rules" -- `"Reconciliation"`: "Reconciliation", "sync" -- `"Snowflake"`: "Snowflake", "Data Mart", "SQL" -- `"Authentication"`: "PingFederate", "OAuth2", "Key-Auth" -- `"Other"`: If it doesn't fit any other category. +4. **`isEscalated` (boolean)** + * Set to `true` if the user explicitly states they are escalating, mentions previous unanswered requests, or if the tone is highly frustrated and urgent. + * Set to `false` otherwise. + +5. **`oneSentenceSummary` (string)** + * A single paragraph in concise English that summarizes the discussion. ## Example ### Input: - Summary: "DCR Rejected by OneKey" -- Description: "Our DCR for PforceRx was rejected by OneKey. Can the MDM HUB team investigate?" +- Description: "Our DCR for PforceRx was rejected by OneKey. Can the MDM HUB team investigate? This is blocking our weekly report." - Comment: "" ### Output: ```json {{ - "Hasmultipleescalations": false, - "CustomerSentiment": "neutral", - "IssueCategory": "technical_issue", - "Area": "DCR Service" + "issueCategory": "technical_issue", + "area": "Application Service", + "customerSentiment": "frustrated", + "isEscalated": false, + "oneSentenceSummary": "A DCR for PforceRx was rejected by OneKey, which is blocking a weekly report." }} -``` USER: Analyze the following Jira ticket: diff --git a/llm/models.py b/llm/models.py index 089535d..1430bb1 100644 --- a/llm/models.py +++ b/llm/models.py @@ -57,11 +57,13 @@ class JiraWebhookPayload(BaseModel): updated: Optional[str] = None class AnalysisFlags(BaseModel): - hasMultipleEscalations: bool = Field(alias="Hasmultipleescalations", description="Is there evidence of multiple escalation attempts?") - customerSentiment: Optional[CustomerSentiment] = Field(alias="CustomerSentiment", description="Overall customer sentiment (e.g., 'neutral', 'frustrated', 'calm').") - # New: Add category and area fields - issueCategory: IssueCategory = Field(alias="IssueCategory", description="The primary category of the Jira ticket.") - area: Area = Field(alias="Area", description="The technical area of the MDM HUB related to the issue.") + model_config = ConfigDict(alias_generator=lambda x: ''.join(word.capitalize() if i > 0 else word for i, word in enumerate(x.split('_'))), populate_by_name=True) + + issueCategory: IssueCategory = Field(..., description="The primary category of the Jira ticket.") + area: Area = Field(..., description="The technical area of the MDM HUB related to the issue.") + customerSentiment: Optional[CustomerSentiment] = Field(..., description="Overall customer sentiment (e.g., 'neutral', 'frustrated', 'calm').") + isEscalated: bool = Field(..., description="Is there evidence of multiple escalation attempts?") + oneSentenceSummary: str = Field(..., description="A single paragraph in concise English that summarizes the discussion.") class JiraAnalysisResponse(BaseModel):