AI在线开发
实验04-多模态图文比较分析
实验准备:
- 确保已接入火山引擎豆包ai
- 寻找一张格式为jpg图片,作为实验素材
实验步骤:
cd AI_online#进入主目录python examples/03_multimodal_chat.py#运行示例程序
参考运行指令:
- 你好(直接输入文字对话即可)
/analyze assets/sample.jpg颜色与风格 (分析图片)/image assets/sample.jpg这张图片里描述的场景是什么?(图文对话)/compare assets/sample.jpg assets/sample.jpg色彩与风格对比 (两图比较,可自行额外添加图片)
终端打印如下:


# -*- coding: utf-8 -*-
"""
图像对话功能示例
支持上传图像并进行多轮对话
"""
import os
import sys
import requests
import base64
from typing import List, Dict, Optional
# 添加父目录到路径
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import API_KEY, MODEL_ENDPOINT, API_BASE_URL, REQUEST_TIMEOUT
from utils.image_processor import ImageProcessor
class ImageChatBot:
"""图像对话机器人"""
def __init__(self):
self.api_key = API_KEY
self.model_endpoint = MODEL_ENDPOINT
self.base_url = API_BASE_URL
self.timeout = REQUEST_TIMEOUT
self.processor = ImageProcessor()
# 对话历史
self.chat_history: List[Dict] = []
self.current_image_base64: Optional[str] = None
self.current_image_path: Optional[str] = None
# 检查配置
self._check_config()
def _check_config(self):
"""检查API配置"""
if not self.api_key or self.api_key == "你的API_KEY":
raise ValueError("请在config.py中配置正确的API_KEY")
if not self.model_endpoint or self.model_endpoint == "你的接入点ID":
raise ValueError("请在config.py中配置正确的MODEL_ENDPOINT")
def load_image(self, image_path: str) -> bool:
"""
加载图像
Args:
image_path: 图像文件路径
Returns:
bool: 是否成功加载
"""
try:
# 对齐 01 的行为:仅按扩展名检查 JPG/JPEG
ext = os.path.splitext(image_path)[1].lower()
if ext not in [".jpg", ".jpeg"]:
print("仅支持JPG/JPEG格式,请选择 .jpg 或 .jpeg 文件")
return False
if not os.path.exists(image_path):
print(f"图像文件不存在: {image_path}")
return False
# 转换为base64(与 01 一致,直接读取文件字节)
base64_data = self.processor.image_to_base64(image_path)
if not base64_data:
print("图像编码失败")
return False
self.current_image_base64 = base64_data
self.current_image_path = image_path
# 获取图像信息(用于提示显示,不作为严格格式校验)
image_info = self.processor.get_image_info(image_path)
width = image_info.get('width', 0)
height = image_info.get('height', 0)
file_size = image_info.get('file_size', 0)
print(f"? 图像加载成功: {os.path.basename(image_path)}")
print(f" 尺寸: {width}x{height}")
print(f" 大小: {file_size / 1024:.1f}KB")
return True
except Exception as e:
print(f"图像加载失败: {e}")
return False
def send_message(self, message: str, include_image: bool = True) -> Optional[str]:
"""
发送消息并获取回复
Args:
message: 用户消息
include_image: 是否包含当前图像
Returns:
str: AI回复,失败返回None
"""
try:
# 构建消息内容
content = [{"type": "text", "text": message}]
# 如果需要包含图像且有当前图像
if include_image and self.current_image_base64:
content.append({
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{self.current_image_base64}"
}
})
# 添加到对话历史
user_message = {"role": "user", "content": content}
# 构建完整的消息列表(包含历史)
messages = self.chat_history + [user_message]
# 构建API请求
# 1) API_BASE_URL 已配置为完整端点(.../chat/completions),直接使用
# 2) API_BASE_URL 为基础路径(.../api/v3),则补齐 /chat/completions
base = self.base_url.rstrip('/')
url = base if base.endswith('chat/completions') else f"{base}/chat/completions"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
data = {
"model": self.model_endpoint,
"messages": messages,
"temperature": 0.7,
"max_tokens": 1000
}
print("?? AI正在思考...")
response = requests.post(url, json=data, headers=headers, timeout=self.timeout)
if response.status_code == 200:
result = response.json()
if 'choices' in result and len(result['choices']) > 0:
ai_reply = result['choices'][0]['message']['content']
# 更新对话历史
self.chat_history.append(user_message)
self.chat_history.append({
"role": "assistant",
"content": ai_reply
})
return ai_reply
else:
print("API响应格式异常")
return None
else:
print(f"API请求失败: {response.status_code}")
if response.status_code == 401:
print("认证失败,请检查API_KEY")
elif response.status_code == 404:
print("模型端点不存在,请检查MODEL_ENDPOINT")
else:
print(f"错误详情: {response.text}")
return None
except requests.exceptions.Timeout:
print("请求超时,请检查网络连接")
return None
except requests.exceptions.RequestException as e:
print(f"网络请求错误: {e}")
return None
except Exception as e:
print(f"发送消息失败: {e}")
return None
def clear_history(self):
"""清除对话历史"""
self.chat_history = []
print("? 对话历史已清除")
def show_history(self):
"""显示对话历史"""
if not self.chat_history:
print("暂无对话历史")
return
print("\n=== 对话历史 ===")
for i, msg in enumerate(self.chat_history, 1):
role = "用户" if msg["role"] == "user" else "AI"
content = msg["content"]
if isinstance(content, list):
# 提取文本内容
text_content = ""
has_image = False
for item in content:
if item["type"] == "text":
text_content = item["text"]
elif item["type"] == "image_url":
has_image = True
print(f"{i}. {role}: {text_content}")
if has_image:
print(" [包含图像]")
else:
print(f"{i}. {role}: {content}")
print("=" * 30)
def main():
"""主函数"""
print("=== 火山引擎图像对话系统 ===")
print("支持上传图像并进行多轮对话")
# 创建对话机器人
try:
chatbot = ImageChatBot()
except ValueError as e:
print(f"配置错误: {e}")
return
print("\n可用命令:")
print("- /load <图像路径> : 加载图像")
print("- /clear : 清除对话历史")
print("- /history : 显示对话历史")
print("- /help : 显示帮助")
print("- /quit : 退出程序")
print("- 直接输入文字进行对话")
print("\n[路径提示] 可使用以下示例路径:")
if os.name == 'nt':
print("1. 绝对路径: C:\\Users\\Administrator\\Pictures\\image.jpg")
print("2. 相对路径: assets\\sample.jpg")
print("3. 当前目录: .\\assets\\sample.jpg")
else:
print("1. 绝对路径: /home/sunrise/Pictures/image.jpg")
print("2. 相对路径: assets/sample.jpg")
print("3. 当前目录: ./assets/sample.jpg")
print("注意: 仅支持JPG/JPEG格式")
while True:
try:
user_input = input("\n?? 您: ").strip()
if not user_input:
continue
# 处理命令(仅识别已知命令,避免把 Linux 绝对路径当作命令)
recognized_commands = {"/load", "/clear", "/history", "/help", "/quit"}
if user_input.startswith("/") and user_input.split(" ", 1)[0].lower() in recognized_commands:
command_parts = user_input.split(" ", 1)
command = command_parts[0].lower()
if command == "/quit":
print("感谢使用图像对话系统!")
break
elif command == "/load":
if len(command_parts) < 2:
print("请提供图像路径: /load <图像路径>")
continue
image_path = command_parts[1].strip().strip('\"').strip("'")
# 非 Windows 平台将反斜杠转换为正斜杠,并展开 ~
if os.name != 'nt':
image_path = image_path.replace('\\', '/')
image_path = os.path.expanduser(image_path)
# 与 01 保持一致:支持项目根相对路径与当前工作目录相对路径
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
full_path = os.path.join(project_root, image_path)
if os.path.exists(full_path):
image_path = full_path
elif os.path.exists(image_path):
pass
else:
print(f"图像文件不存在: {image_path}")
print("路径示例:")
if os.name == 'nt':
print(" - 绝对路径: C:\\Users\\Administrator\\Pictures\\image.jpg")
print(" - 相对路径: assets\\sample.jpg")
print(" - 当前目录: .\\assets\\sample.jpg")
else:
print(" - 绝对路径: /home/sunrise/Pictures/image.jpg")
print(" - 相对路径: assets/sample.jpg")
print(" - 当前目录: ./assets/sample.jpg")
print(" - 仅支持JPG/JPEG格式 (.jpg/.jpeg)")
continue
ext = os.path.splitext(image_path)[1].lower()
if ext not in [".jpg", ".jpeg"]:
print("仅支持JPG/JPEG格式,请选择 .jpg 或 .jpeg 文件")
continue
if chatbot.load_image(image_path):
print("现在可以开始关于这张图片的对话了!")
else:
print("图像加载失败")
elif command == "/clear":
chatbot.clear_history()
elif command == "/history":
chatbot.show_history()
elif command == "/help":
print("\n可用命令:")
print("- /load <图像路径> : 加载图像")
print("- /clear : 清除对话历史")
print("- /history : 显示对话历史")
print("- /help : 显示帮助")
print("- /quit : 退出程序")
print("- 直接输入文字进行对话")
print("\n[路径提示] 可使用以下示例路径:")
if os.name == 'nt':
print("1. 绝对路径: C:\\Users\\Administrator\\Pictures\\image.jpg")
print("2. 相对路径: assets\\sample.jpg")
print("3. 当前目录: .\\assets\\sample.jpg")
else:
print("1. 绝对路径: /home/sunrise/Pictures/image.jpg")
print("2. 相对路径: assets/sample.jpg")
print("3. 当前目录: ./assets/sample.jpg")
print("注意: 仅支持JPG/JPEG格式")
else:
print("未知命令,输入 /help 查看帮助")
else:
# 支持直接输入路径进行加载(参考 01 的交互方式)
possible_path = user_input.strip().strip('\"').strip("'")
looks_like_path = any(sep in possible_path for sep in ['\\', '/']) or possible_path.lower().endswith(('.jpg', '.jpeg'))
# 非 Windows 平台将反斜杠转换为正斜杠,并展开 ~
if os.name != 'nt':
possible_path = possible_path.replace('\\', '/')
possible_path = os.path.expanduser(possible_path)
if looks_like_path:
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
full_path = os.path.join(project_root, possible_path)
target_path = full_path if os.path.exists(full_path) else possible_path
if not os.path.exists(target_path):
print(f"图像文件不存在: {possible_path}")
print("路径示例:")
if os.name == 'nt':
print(" - 绝对路径: C:\\Users\\Administrator\\Pictures\\image.jpg")
print(" - 相对路径: assets\\sample.jpg")
print(" - 当前目录: .\\assets\\sample.jpg")
else:
print(" - 绝对路径: /home/sunrise/Pictures/image.jpg")
print(" - 相对路径: assets/sample.jpg")
print(" - 当前目录: ./assets/sample.jpg")
print(" - 仅支持JPG/JPEG格式 (.jpg/.jpeg)")
else:
ext = os.path.splitext(target_path)[1].lower()
if ext not in [".jpg", ".jpeg"]:
print("仅支持JPG/JPEG格式,请选择 .jpg 或 .jpeg 文件")
elif chatbot.load_image(target_path):
print("现在可以开始关于这张图片的对话了!")
else:
print("图像加载失败")
continue
# 普通对话
if not chatbot.current_image_base64:
print("提示: 还未加载图像,使用 /load <图像路径> 加载图像后可进行图像相关对话")
reply = chatbot.send_message(user_input)
if reply:
print(f"?? AI: {reply}")
else:
print("? 获取回复失败,请重试")
except KeyboardInterrupt:
print("\n\n程序被用户中断")
break
except Exception as e:
print(f"发生错误: {e}")
if __name__ == "__main__":
main()