AI在线开发

实验02-图片分析

实验准备：
确保已接入火山引擎豆包ai
寻找一张格式为jpg图片，作为实验素材
实验步骤：
cd AI_online #进入主目录
python examples/01_image_analysis.py #运行示例程序
终端打印如下：
可使用功能包内置的相对路径图像，如若要使用绝对路径，需在用户主目录下新建文件夹名为Pictures，在其子目录下导入命名为image.jpg图像
实验结果：
# -*- coding: utf-8 -*-
"""
基础图像分析示例
演示如何使用火山引擎豆包API进行图像分析

使用方法:
1. 确保config.py中配置了正确的API_KEY和MODEL_ENDPOINT
2. 运行: python examples/01_image_analysis.py
3. 输入图像路径进行分析

支持的图像格式: JPG, PNG, GIF, BMP, WEBP
"""

import os
import sys
import requests
import base64
from typing import Optional
from PIL import Image
import io

# 添加父目录到路径，以便导入配置
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

try:
    from config import API_KEY, MODEL_ENDPOINT, API_BASE_URL, REQUEST_TIMEOUT
except ImportError:
    print("错误: 无法导入config.py，请确保config.py文件存在且配置正确")
    sys.exit(1)

class ImageProcessor:
    """图像处理器 - 简化版本，仅支持JPG格式"""
  
    @staticmethod
    def encode_image_to_base64(image_path: str) -> str:
        """将图像编码为base64格式"""
        try:
            # 检查文件扩展名
            file_ext = os.path.splitext(image_path)[1].lower()
            if file_ext not in ['.jpg', '.jpeg']:
                raise ValueError(f"不支持的文件格式: {file_ext}，仅支持JPG/JPEG格式")
        
            # 直接读取JPG文件并编码
            with open(image_path, 'rb') as f:
                img_data = f.read()
        
            return base64.b64encode(img_data).decode('utf-8')
        except Exception as e:
            raise ValueError(f"图像处理失败: {e}")
  
    @staticmethod
    def get_image_info(image_path: str) -> dict:
        """获取图像信息"""
        try:
            file_ext = os.path.splitext(image_path)[1].lower()
            if file_ext not in ['.jpg', '.jpeg']:
                return {'error': f'不支持的文件格式: {file_ext}，仅支持JPG/JPEG格式'}
        
            # 使用PIL获取JPG信息
            with Image.open(image_path) as img:
                return {
                    'format': 'JPEG',
                    'mode': img.mode,
                    'size': img.size,
                    'file_size': os.path.getsize(image_path)
                }
        except Exception as e:
            return {'error': str(e)}

class ImageAnalyzer:
    """图像分析器"""
  
    def __init__(self):
        self.api_key = API_KEY
        self.model_endpoint = MODEL_ENDPOINT
        self.base_url = API_BASE_URL
        self.timeout = REQUEST_TIMEOUT
        self.processor = ImageProcessor()
    
        # 检查配置
        self._check_config()
  
    def _check_config(self):
        """检查API配置"""
        if not self.api_key or self.api_key == "你的API_KEY":
            raise ValueError("请在config.py中配置正确的API_KEY")
    
        if not self.model_endpoint or self.model_endpoint == "你的接入点ID":
            raise ValueError("请在config.py中配置正确的MODEL_ENDPOINT")
  
    def analyze_image(self, image_path: str, prompt: str = "请详细描述这张图片的内容") -> Optional[str]:
        """
        分析图像内容
    
        Args:
            image_path: 图像文件路径
            prompt: 分析提示词
        
        Returns:
            str: 分析结果，失败返回None
        """
        try:
            # 编码图像
            base64_image = self.processor.encode_image_to_base64(image_path)
        
            # 构建请求
            headers = {
                'Authorization': f'Bearer {self.api_key}',
                'Content-Type': 'application/json'
            }
        
            data = {
                "model": self.model_endpoint,
                "messages": [
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": prompt
                            },
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{base64_image}"
                                }
                            }
                        ]
                    }
                ]
            }
        
            # 发送请求
            response = requests.post(
                self.base_url,
                headers=headers,
                json=data,
                timeout=self.timeout
            )
        
            if response.status_code == 200:
                result = response.json()
                if 'choices' in result and len(result['choices']) > 0:
                    return result['choices'][0]['message']['content']
                else:
                    print(f"API返回格式异常: {result}")
                    return None
            else:
                print(f"API请求失败: {response.status_code}")
                print(f"错误信息: {response.text}")
                return None
            
        except requests.exceptions.Timeout:
            print("请求超时，请检查网络连接")
            return None
        except requests.exceptions.RequestException as e:
            print(f"网络请求错误: {e}")
            return None
        except Exception as e:
            print(f"分析过程中发生错误: {e}")
            return None

def main():
    """主函数"""
    print("=== 火山引擎图像分析示例 ===")
  
    # 创建分析器
    try:
        analyzer = ImageAnalyzer()
    except ValueError as e:
        print(f"配置错误: {e}")
        print("\n请检查config.py文件中的API_KEY和MODEL_ENDPOINT配置")
        return
  
    # 提供示例图像路径提示
    print("\n[提示] 你可以使用以下方式获取图像:")
    print("1. 使用绝对路径: /home/sunrise/Pictures/image.jpg")
    print("2. 使用相对路径: assets/sample.jpg")
    print("3. 从网络下载JPG图像到本地后使用")
    print("4. 当前目录示例: ./assets/sample.jpg")
    print("注意: 仅支持JPG/JPEG格式")
  
    # 交互式图像分析
    while True:
        print("\n请选择操作:")
        print("1. 分析图像")
        print("2. 退出")
    
        choice = input("请输入选择 (1-2): ").strip()
    
        if choice == "1":
            # 输入图像路径
            image_path = input("请输入图像文件路径: ").strip()
        
            # 去除可能的引号
            image_path = image_path.strip('"').strip("'")
        
            # 处理相对路径
            if not os.path.isabs(image_path):
                # 如果是相对路径，尝试从项目根目录查找
                project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
                full_path = os.path.join(project_root, image_path)
                if os.path.exists(full_path):
                    image_path = full_path
                elif os.path.exists(image_path):
                    # 使用当前工作目录的相对路径
                    pass
                else:
                    print(f"[错误] 文件不存在: {image_path}")
                    print("请检查路径是否正确。")
                    print("提示:")
                    print("  - 绝对路径示例: /home/sunrise/Pictures/image.jpg")
                    print("  - 相对路径示例: assets/sample.jpg")
                    print("  - 当前目录示例: ./assets/sample.jpg")
                    print("支持的格式: JPG/JPEG")
                    continue
            elif not os.path.exists(image_path):
                print(f"[错误] 文件不存在: {image_path}")
                print("请检查绝对路径是否正确，支持的格式: JPG/JPEG")
                continue
        
            # 显示图像信息
            processor = ImageProcessor()
            img_info = processor.get_image_info(image_path)
            if 'error' not in img_info:
                print(f"[图像信息] {img_info['format']} | {img_info['size'][0]}x{img_info['size'][1]} | {img_info['file_size']/1024:.1f}KB")
            else:
                print(f"[错误] {img_info['error']}")
                continue
        
            # 输入分析提示（可选）
            custom_prompt = input("请输入分析提示（回车使用默认）: ").strip()
            prompt = custom_prompt if custom_prompt else "请详细描述这张图片的内容"
        
            print("[处理中] 正在分析图像...")
        
            # 执行分析
            result = analyzer.analyze_image(image_path, prompt)
        
            if result:
                print("\n=== 分析结果 ===")
                print(result)
                print("=" * 50)
            else:
                print("[错误] 分析失败，请检查:")
                print("- 图像文件是否完整")
                print("- 网络连接是否正常")
                print("- API配置是否正确")
    
        elif choice == "2":
            print("感谢使用！")
            break
    
        else:
            print("无效选择，请重新输入")

if __name__ == "__main__":
    main()