Coś tam lepiej
This commit is contained in:
parent
aadf1fe94c
commit
159f78ccb5
@ -36,17 +36,21 @@ LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG").upper()
|
|||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=LOG_LEVEL,
|
level=LOG_LEVEL,
|
||||||
format='[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s',
|
format="[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s",
|
||||||
datefmt='%Y-%m-%dT%H:%M:%S%z'
|
datefmt="%Y-%m-%dT%H:%M:%S%z",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_mongo_collection():
|
def get_mongo_collection():
|
||||||
"""Initialize and return MongoDB collection."""
|
"""Initialize and return MongoDB collection."""
|
||||||
mongo_client = pymongo.MongoClient(MONGODB_URI)
|
mongo_client = pymongo.MongoClient(MONGODB_URI)
|
||||||
db = mongo_client[MONGODB_DATABASE]
|
db = mongo_client[MONGODB_DATABASE]
|
||||||
return db[MONGO_COLLECTION_NAME]
|
return db[MONGO_COLLECTION_NAME]
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main function to process the resume."""
|
"""Main function to process the resume."""
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
@ -60,10 +64,14 @@ Required Environment Variables:
|
|||||||
usage="resume_analysis.py [-h] [-f FILE] [-m]",
|
usage="resume_analysis.py [-h] [-f FILE] [-m]",
|
||||||
epilog="""Examples:
|
epilog="""Examples:
|
||||||
Analyze a resume: resume_analysis.py -f my_resume.pdf
|
Analyze a resume: resume_analysis.py -f my_resume.pdf
|
||||||
Test with mockup data: resume_analysis.py -f test.pdf -m"""
|
Test with mockup data: resume_analysis.py -f test.pdf -m""",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-f", "--file", help="Path to the resume file to analyze (PDF or text)"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-m", "--mockup", action="store_true", help="Use mockup response instead of calling OpenAI API"
|
||||||
)
|
)
|
||||||
parser.add_argument('-f', '--file', help='Path to the resume file to analyze (PDF or text)')
|
|
||||||
parser.add_argument('-m', '--mockup', action='store_true', help='Use mockup response instead of calling OpenAI API')
|
|
||||||
|
|
||||||
# If no arguments provided, show help and exit
|
# If no arguments provided, show help and exit
|
||||||
if len(sys.argv) == 1:
|
if len(sys.argv) == 1:
|
||||||
@ -84,8 +92,14 @@ Required Environment Variables:
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
start_file_read_time = time.time()
|
start_file_read_time = time.time()
|
||||||
with open(args.file, 'r') as f:
|
if args.file.lower().endswith(".pdf"):
|
||||||
resume_text = f.read()
|
logger.debug(f"Using pdfminer to extract text from PDF: {args.file}")
|
||||||
|
resume_text = extract_text(args.file)
|
||||||
|
else:
|
||||||
|
with open(
|
||||||
|
args.file, "r", encoding="utf-8"
|
||||||
|
) as f: # Explicitly specify utf-8 encoding for text files
|
||||||
|
resume_text = f.read()
|
||||||
file_read_time = time.time() - start_file_read_time
|
file_read_time = time.time() - start_file_read_time
|
||||||
logger.debug(f"File read time: {file_read_time:.2f} seconds")
|
logger.debug(f"File read time: {file_read_time:.2f} seconds")
|
||||||
|
|
||||||
@ -94,15 +108,34 @@ Required Environment Variables:
|
|||||||
response = call_openai_api(resume_text, use_mockup)
|
response = call_openai_api(resume_text, use_mockup)
|
||||||
openai_api_time = time.time() - start_time
|
openai_api_time = time.time() - start_time
|
||||||
logger.debug(f"OpenAI API call time: {openai_api_time:.2f} seconds")
|
logger.debug(f"OpenAI API call time: {openai_api_time:.2f} seconds")
|
||||||
|
|
||||||
# Initialize MongoDB collection only when needed
|
# Initialize MongoDB collection only when needed
|
||||||
cv_collection = get_mongo_collection()
|
cv_collection = get_mongo_collection()
|
||||||
|
|
||||||
# Measure MongoDB insertion time
|
# Measure MongoDB insertion time
|
||||||
start_mongo_time = time.time()
|
start_mongo_time = time.time()
|
||||||
cost = insert_processing_data(resume_text, {}, response, args, str(uuid.uuid4()), use_mockup, cv_collection)
|
if response and response.choices:
|
||||||
|
message_content = response.choices[0].message.content
|
||||||
|
try:
|
||||||
|
summary = json.loads(message_content)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"Failed to parse OpenAI response: {e}")
|
||||||
|
summary = {"error": "Invalid JSON response from OpenAI"}
|
||||||
|
else:
|
||||||
|
summary = {"error": "No response from OpenAI"}
|
||||||
|
insert_processing_data(
|
||||||
|
resume_text,
|
||||||
|
summary,
|
||||||
|
response,
|
||||||
|
args,
|
||||||
|
str(uuid.uuid4()),
|
||||||
|
use_mockup,
|
||||||
|
cv_collection,
|
||||||
|
)
|
||||||
mongo_insert_time = time.time() - start_mongo_time
|
mongo_insert_time = time.time() - start_mongo_time
|
||||||
logger.debug(f"MongoDB insert time: {mongo_insert_time:.2f} seconds")
|
logger.debug(f"MongoDB insert time: {mongo_insert_time:.2f} seconds")
|
||||||
write_openai_response(response, use_mockup, args.file, cost)
|
write_openai_response(response, use_mockup, args.file)
|
||||||
|
|
||||||
|
|
||||||
def load_mockup_response(mockup_file_path: str) -> dict:
|
def load_mockup_response(mockup_file_path: str) -> dict:
|
||||||
"""Load mockup response from a JSON file."""
|
"""Load mockup response from a JSON file."""
|
||||||
@ -111,9 +144,12 @@ def load_mockup_response(mockup_file_path: str) -> dict:
|
|||||||
raise FileNotFoundError(f"Mockup file not found at: {mockup_file_path}")
|
raise FileNotFoundError(f"Mockup file not found at: {mockup_file_path}")
|
||||||
with open(mockup_file_path, "r") as f:
|
with open(mockup_file_path, "r") as f:
|
||||||
response = json.load(f)
|
response = json.load(f)
|
||||||
response.setdefault("openai_stats", {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
|
response.setdefault(
|
||||||
|
"openai_stats", {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
||||||
|
)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
def call_openai_api(text: str, use_mockup: bool) -> Optional[Any]:
|
def call_openai_api(text: str, use_mockup: bool) -> Optional[Any]:
|
||||||
"""Call OpenAI API to analyze resume text."""
|
"""Call OpenAI API to analyze resume text."""
|
||||||
logger.debug("Calling OpenAI API.")
|
logger.debug("Calling OpenAI API.")
|
||||||
@ -128,9 +164,9 @@ def call_openai_api(text: str, use_mockup: bool) -> Optional[Any]:
|
|||||||
model=MODEL_NAME,
|
model=MODEL_NAME,
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": system_content},
|
{"role": "system", "content": system_content},
|
||||||
{"role": "user", "content": text}
|
{"role": "user", "content": text},
|
||||||
],
|
],
|
||||||
max_tokens=MAX_TOKENS
|
max_tokens=MAX_TOKENS,
|
||||||
)
|
)
|
||||||
logger.debug(f"OpenAI API response: {response}")
|
logger.debug(f"OpenAI API response: {response}")
|
||||||
return response
|
return response
|
||||||
@ -138,40 +174,49 @@ def call_openai_api(text: str, use_mockup: bool) -> Optional[Any]:
|
|||||||
logger.error(f"Error during OpenAI API call: {e}", exc_info=True)
|
logger.error(f"Error during OpenAI API call: {e}", exc_info=True)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def write_openai_response(response: Any, use_mockup: bool, input_file_path: str = None, cost: float = 0) -> None: # Add cost argument
|
|
||||||
|
def write_openai_response(
|
||||||
|
response: Any, use_mockup: bool, input_file_path: str = None
|
||||||
|
) -> None:
|
||||||
"""Write raw OpenAI response to a file."""
|
"""Write raw OpenAI response to a file."""
|
||||||
if use_mockup:
|
if use_mockup:
|
||||||
logger.debug("Using mockup response; no OpenAI message to write.")
|
logger.debug("Using mockup response; no OpenAI message to write.")
|
||||||
return
|
return
|
||||||
if response and response.choices: # Changed from hasattr to direct attribute access
|
if response and response.choices: # Changed from hasattr to direct attribute access
|
||||||
message_content = response.choices[0].message.content
|
message_content = response.choices[0].message.content
|
||||||
logger.debug(f"Raw OpenAI message content: {message_content}")
|
logger.debug(f"Raw OpenAI message content: {message_content}")
|
||||||
output_dir = os.path.dirname(input_file_path) if input_file_path else '.'
|
output_dir = os.path.dirname(input_file_path) if input_file_path else "."
|
||||||
base_filename = os.path.splitext(os.path.basename(input_file_path))[0] if input_file_path else "default"
|
base_filename = (
|
||||||
|
os.path.splitext(os.path.basename(input_file_path))[0]
|
||||||
|
if input_file_path
|
||||||
|
else "default"
|
||||||
|
)
|
||||||
processing_id = str(uuid.uuid4())
|
processing_id = str(uuid.uuid4())
|
||||||
file_path = os.path.join(output_dir, f"{base_filename}_openai_response_{processing_id}") + ".json"
|
file_path = os.path.join(
|
||||||
|
output_dir, f"{base_filename}_openai_response_{processing_id}"
|
||||||
|
) + ".json"
|
||||||
try:
|
try:
|
||||||
serializable_response = { # Create a serializable dictionary
|
serializable_response = { # Create a serializable dictionary
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"message": {
|
"message": {
|
||||||
"content": choice.message.content,
|
"content": choice.message.content,
|
||||||
"role": choice.message.role
|
"role": choice.message.role,
|
||||||
},
|
},
|
||||||
"finish_reason": choice.finish_reason,
|
"finish_reason": choice.finish_reason,
|
||||||
"index": choice.index
|
"index": choice.index,
|
||||||
} for choice in response.choices
|
}
|
||||||
|
for choice in response.choices
|
||||||
],
|
],
|
||||||
"openai_stats": {
|
"openai_stats": {
|
||||||
"prompt_tokens": response.usage.prompt_tokens,
|
"input_tokens": response.usage.prompt_tokens,
|
||||||
"completion_tokens": response.usage.completion_tokens,
|
"output_tokens": response.usage.completion_tokens,
|
||||||
"total_tokens": response.usage.total_tokens
|
"total_tokens": response.usage.total_tokens,
|
||||||
},
|
},
|
||||||
"cost": cost, # Include cost in the output JSON
|
"model": response.model,
|
||||||
"model": response.model
|
|
||||||
}
|
}
|
||||||
with open(file_path, "w") as f:
|
with open(file_path, "w") as f:
|
||||||
json.dump(serializable_response, f, indent=2) # Dump the serializable dictionary
|
json.dump(serializable_response, f, indent=2) # Dump the serializable dictionary
|
||||||
logger.debug(f"OpenAI response written to {file_path}")
|
logger.debug(f"OpenAI response written to {file_path}")
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
logger.error(f"Failed to write OpenAI response to file: {e}")
|
logger.error(f"Failed to write OpenAI response to file: {e}")
|
||||||
@ -179,21 +224,22 @@ def write_openai_response(response: Any, use_mockup: bool, input_file_path: str
|
|||||||
logger.warning("No choices in OpenAI response to extract message from.")
|
logger.warning("No choices in OpenAI response to extract message from.")
|
||||||
logger.debug(f"Response object: {response}")
|
logger.debug(f"Response object: {response}")
|
||||||
|
|
||||||
def insert_processing_data(text_content: str, summary: dict, response: Any, args: argparse.Namespace, processing_id: str, use_mockup: bool, cv_collection) -> None:
|
|
||||||
|
def insert_processing_data(
|
||||||
|
text_content: str,
|
||||||
|
summary: dict,
|
||||||
|
response: Any,
|
||||||
|
args: argparse.Namespace,
|
||||||
|
processing_id: str,
|
||||||
|
use_mockup: bool,
|
||||||
|
cv_collection,
|
||||||
|
) -> None:
|
||||||
"""Insert processing data into MongoDB."""
|
"""Insert processing data into MongoDB."""
|
||||||
logger.debug("Inserting processing data into MongoDB.")
|
logger.debug("Inserting processing data into MongoDB.")
|
||||||
if not use_mockup:
|
if not use_mockup:
|
||||||
if response and response.choices:
|
if response and response.choices:
|
||||||
message_content = response.choices[0].message.content
|
message_content = response.choices[0].message.content
|
||||||
try:
|
openai_stats = summary.get("openai_stats", {})
|
||||||
openai_stats_content = json.loads(message_content)
|
|
||||||
openai_stats = openai_stats_content.get("openai_stats", {})
|
|
||||||
cost = openai_stats.get("cost", 0)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
logger.error("Failed to decode JSON from message content for openai_stats.")
|
|
||||||
openai_stats = {}
|
|
||||||
cost = 0
|
|
||||||
|
|
||||||
usage = response.usage
|
usage = response.usage
|
||||||
input_tokens = usage.prompt_tokens
|
input_tokens = usage.prompt_tokens
|
||||||
output_tokens = usage.completion_tokens
|
output_tokens = usage.completion_tokens
|
||||||
@ -201,34 +247,29 @@ def insert_processing_data(text_content: str, summary: dict, response: Any, args
|
|||||||
else:
|
else:
|
||||||
logger.error("Invalid response format or missing usage data.")
|
logger.error("Invalid response format or missing usage data.")
|
||||||
input_tokens = output_tokens = total_tokens = 0
|
input_tokens = output_tokens = total_tokens = 0
|
||||||
cost = 0
|
|
||||||
openai_stats = {}
|
openai_stats = {}
|
||||||
usage = {}
|
usage = {}
|
||||||
|
|
||||||
|
|
||||||
processing_data = {
|
processing_data = {
|
||||||
"processing_id": processing_id,
|
"processing_id": processing_id,
|
||||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
"text_content": text_content,
|
"text_content": text_content,
|
||||||
"summary": summary,
|
"summary": summary,
|
||||||
"usage_prompt_tokens": input_tokens, # Renamed to avoid collision
|
"input_tokens": input_tokens,
|
||||||
"usage_completion_tokens": output_tokens, # Renamed to avoid collision
|
"output_tokens": output_tokens,
|
||||||
"usage_total_tokens": total_tokens, # Renamed to avoid collision
|
"total_tokens": total_tokens,
|
||||||
"openai_stats_input_tokens": openai_stats.get("input_tokens"),
|
|
||||||
"openai_stats_output_tokens": openai_stats.get("output_tokens"),
|
|
||||||
"openai_stats_total_tokens": openai_stats.get("total_tokens"),
|
|
||||||
"cost": cost
|
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cv_collection.insert_one(processing_data)
|
cv_collection.insert_one(processing_data)
|
||||||
logger.debug(f"Inserted processing data for ID: {processing_id}")
|
logger.debug(f"Inserted processing data for ID: {processing_id}")
|
||||||
return cost # Return the cost
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to insert processing data into MongoDB: {e}", exc_info=True)
|
logger.error(
|
||||||
|
f"Failed to insert processing data into MongoDB: {e}", exc_info=True
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
logger.debug("Using mockup; skipping MongoDB insertion.")
|
logger.debug("Using mockup; skipping MongoDB insertion.")
|
||||||
return 0 # Return 0 for mockup mode
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user