影刀AI Power

影刀AI Power
使用AI提取简历信息的简单应用

提取图片简历信息工作流

扣子(coze)

主页 - 扣子
API 介绍 - 文档 - 扣子

创建智能体

代码实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/bin/python3
import os
from PyPDF2 import PdfReader
import requests
import json
import time
# 调用AI处理简历数据
def get_info_by_ai(text):
result = ""
# 调用扣子的智能体应用的API
url = "https://api.coze.cn/v3/chat"
# 个人令牌
headers = {
"Authorization": "Bearer pat_CEHGFgHprgT4dBVj8qiSHcfrisi5GooR7pIIjiIdiSz1n90oxUrlyFn86pgZ4Amv",
"Content-Type": "application/json",
"Accept": "application/json"
}
payload = {
"bot_id": "7535731177307947048",
"user_id": "123456789",
"stream": False,
"auto_save_history": True,
"additional_messages": [
{
"role": "user",
"content": text,
"content_type": "text"
}
]
}

try:
# 发送初始请求
response = requests.post(
url=url,
headers=headers,
data=json.dumps(payload),
timeout=30
)
response.raise_for_status()
init_result = response.json()
print("初始响应(AI开始处理):")
print(json.dumps(init_result, ensure_ascii=False, indent=2))

# 提取关键ID
conversation_id = init_result["data"]["conversation_id"]
chat_id = init_result["data"]["id"]

# 轮询等待对话完成(延长超时时间至2分钟)
retrieve_url = "https://api.coze.cn/v3/chat/retrieve"
status = init_result["data"]["status"]
max_wait_seconds = 120 # 延长至2分钟
wait_count = 0

while status != "completed" and wait_count < max_wait_seconds:
print(f"AI处理中({wait_count}秒),状态:{status}...")
time.sleep(1)
wait_count += 1

# 调用查看对话详情接口确认状态
retrieve_params = {
"conversation_id": conversation_id,
"chat_id": chat_id
}
retrieve_response = requests.get(
retrieve_url,
headers=headers,
params=retrieve_params,
timeout=30
)
retrieve_response.raise_for_status()
status = retrieve_response.json()["data"]["status"]

# 处理不同状态
if status != "completed":
error_msg = f"对话未完成,最终状态:{status}"
print(error_msg)
return error_msg

# 获取对话消息详情
messages_url = "https://api.coze.cn/v3/chat/message/list"
messages_params = {
"conversation_id": conversation_id,
"chat_id": chat_id
}
messages_response = requests.get(
messages_url,
headers=headers,
params=messages_params,
timeout=30
)
messages_response.raise_for_status()
messages_result = messages_response.json()

# 提取AI的有效回复
ai_answers = [
msg["content"] for msg in messages_result["data"]
if msg.get("role") == "assistant" and msg.get("type") == "answer"
]

if ai_answers:
return ai_answers[-1]
else:
return "未找到AI的有效回复"

except requests.exceptions.HTTPError as e:
error_msg = f"HTTP错误:{e}\n错误响应内容:{e.response.text}"
print(error_msg)
return error_msg
except KeyError as e:
error_msg = f"响应格式错误,缺少字段:{e}"
print(error_msg)
return error_msg
except Exception as e:
error_msg = f"处理错误:{str(e)}"
print(error_msg)
return error_msg


# 处理简历列表
def handle_resumes():
resume_dir = r"D:\学习\智云大数据\简历"
# 检查目录是否存在
if not os.path.exists(resume_dir):
print(f"错误: 目录 {resume_dir} 不存在")
return
# 遍历目录中的文件
with os.scandir(resume_dir) as files:
for file in files:
if file.is_file() and file.name.lower().endswith('.pdf'):
print(f"\n处理PDF文件: {file.path}")
try:
# 创建PDF阅读器对象
reader = PdfReader(file.path)
text = ""

# 遍历每一页并提取文本
for page in reader.pages:
text += page.extract_text() or ""
if not text.strip():
print("警告: PDF文件未提取到文本内容")
continue
print(f"提取的文本长度: {len(text)} 字符")

# 调用AI处理简历
ai_response = get_info_by_ai(text)
print(f"AI处理结果:\n{ai_response}")

except Exception as e:
print(f"处理PDF文件 {file.name} 时出错: {e}")
# 主程序入口
if __name__ == "__main__":
handle_resumes()

运行结果