影刀AI Power
影刀AI Power
使用AI提取简历信息的简单应用
提取图片简历信息工作流
扣子(coze)
主页 - 扣子
API 介绍 - 文档 - 扣子
创建智能体
代码实现
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
| import os from PyPDF2 import PdfReader import requests import json import time
def get_info_by_ai(text): result = "" url = "https://api.coze.cn/v3/chat" headers = { "Authorization": "Bearer pat_CEHGFgHprgT4dBVj8qiSHcfrisi5GooR7pIIjiIdiSz1n90oxUrlyFn86pgZ4Amv", "Content-Type": "application/json", "Accept": "application/json" } payload = { "bot_id": "7535731177307947048", "user_id": "123456789", "stream": False, "auto_save_history": True, "additional_messages": [ { "role": "user", "content": text, "content_type": "text" } ] }
try: response = requests.post( url=url, headers=headers, data=json.dumps(payload), timeout=30 ) response.raise_for_status() init_result = response.json() print("初始响应(AI开始处理):") print(json.dumps(init_result, ensure_ascii=False, indent=2))
conversation_id = init_result["data"]["conversation_id"] chat_id = init_result["data"]["id"]
retrieve_url = "https://api.coze.cn/v3/chat/retrieve" status = init_result["data"]["status"] max_wait_seconds = 120 wait_count = 0
while status != "completed" and wait_count < max_wait_seconds: print(f"AI处理中({wait_count}秒),状态:{status}...") time.sleep(1) wait_count += 1
retrieve_params = { "conversation_id": conversation_id, "chat_id": chat_id } retrieve_response = requests.get( retrieve_url, headers=headers, params=retrieve_params, timeout=30 ) retrieve_response.raise_for_status() status = retrieve_response.json()["data"]["status"]
if status != "completed": error_msg = f"对话未完成,最终状态:{status}" print(error_msg) return error_msg
messages_url = "https://api.coze.cn/v3/chat/message/list" messages_params = { "conversation_id": conversation_id, "chat_id": chat_id } messages_response = requests.get( messages_url, headers=headers, params=messages_params, timeout=30 ) messages_response.raise_for_status() messages_result = messages_response.json()
ai_answers = [ msg["content"] for msg in messages_result["data"] if msg.get("role") == "assistant" and msg.get("type") == "answer" ]
if ai_answers: return ai_answers[-1] else: return "未找到AI的有效回复"
except requests.exceptions.HTTPError as e: error_msg = f"HTTP错误:{e}\n错误响应内容:{e.response.text}" print(error_msg) return error_msg except KeyError as e: error_msg = f"响应格式错误,缺少字段:{e}" print(error_msg) return error_msg except Exception as e: error_msg = f"处理错误:{str(e)}" print(error_msg) return error_msg
def handle_resumes(): resume_dir = r"D:\学习\智云大数据\简历" if not os.path.exists(resume_dir): print(f"错误: 目录 {resume_dir} 不存在") return with os.scandir(resume_dir) as files: for file in files: if file.is_file() and file.name.lower().endswith('.pdf'): print(f"\n处理PDF文件: {file.path}") try: reader = PdfReader(file.path) text = "" for page in reader.pages: text += page.extract_text() or "" if not text.strip(): print("警告: PDF文件未提取到文本内容") continue print(f"提取的文本长度: {len(text)} 字符")
ai_response = get_info_by_ai(text) print(f"AI处理结果:\n{ai_response}") except Exception as e: print(f"处理PDF文件 {file.name} 时出错: {e}")
if __name__ == "__main__": handle_resumes()
|
运行结果
