机器视觉技术开发
实验1-打开 USB 摄像头
cd OPENCV#打开OPENCV功能包sudo python3 ./camera\_display.py#运行py文件
终端显示:

此时Linux系统上会显示摄像头实时画面,我们需要在窗口焦点下测试按键,效果如下:


#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
摄像头读取程序
功能:打开摄像头并实时显示画面,支持多种功能
"""
import cv2
import numpy as np
import sys
import os
import time
import argparse
from datetime import datetime
def main():
"""
主函数:打开摄像头并显示实时画面
"""
# 解析命令行参数
parser = argparse.ArgumentParser(description='摄像头实时显示程序')
parser.add_argument('--width', type=int, default=1280, help='显示窗口宽度')
parser.add_argument('--height', type=int, default=720, help='显示窗口高度')
args = parser.parse_args()
# 打开默认摄像头(通常是0,如果有多个摄像头可以尝试1,2等)
cap = cv2.VideoCapture(0)
# 检查摄像头是否成功打开
if not cap.isOpened():
print("错误:无法打开摄像头")
sys.exit(1)
# 设置摄像头分辨率
cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)
# 创建一个可调整大小的窗口
cv2.namedWindow('摄像头', cv2.WINDOW_NORMAL)
cv2.resizeWindow('摄像头', args.width, args.height)
print("摄像头已成功打开")
print(f"窗口大小设置为: {args.width}x{args.height}")
print("按键说明:")
print("- 'q':退出程序")
print("- 'g':切换灰度/彩色模式")
print("- 'b':应用模糊效果")
print("- 'e':应用边缘检测")
print("- 'n':恢复正常模式")
print("- 's':保存当前帧为图片")
print("- '+':增大窗口")
print("- '-':缩小窗口")
# 默认设置
gray_mode = False
blur_mode = False
edge_mode = False
window_width = args.width
window_height = args.height
# 创建保存图像的目录
save_dir = "captured_images"
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# 循环读取摄像头画面
while True:
# 读取一帧图像
ret, frame = cap.read()
# 如果读取失败,退出循环
if not ret:
print("错误:无法读取摄像头画面")
break
# 处理图像
if gray_mode:
# 转换为灰度图
processed_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 转回三通道以便显示文字
display_frame = cv2.cvtColor(processed_frame, cv2.COLOR_GRAY2BGR)
mode_text = "Gray Mode"
else:
processed_frame = frame.copy()
display_frame = processed_frame
mode_text = "Color Mode"
# 应用额外效果
if blur_mode:
processed_frame = cv2.GaussianBlur(processed_frame, (15, 15), 0)
display_frame = processed_frame
mode_text += " + Blur"
if edge_mode and gray_mode:
# 边缘检测需要灰度图像
processed_frame = cv2.Canny(processed_frame, 100, 200)
# 转回三通道以便显示文字
display_frame = cv2.cvtColor(processed_frame, cv2.COLOR_GRAY2BGR)
mode_text += " + Edge"
elif edge_mode:
# 如果不是灰度模式,先转换为灰度再进行边缘检测
edges = cv2.Canny(cv2.cvtColor(processed_frame, cv2.COLOR_BGR2GRAY), 100, 200)
# 将边缘叠加到原图上
display_frame = processed_frame.copy()
display_frame[edges > 0] = [0, 255, 255] # 黄色边缘
mode_text += " + Edge"
# 添加模式文字
cv2.putText(display_frame, mode_text, (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
# 显示图像
cv2.imshow('摄像头', display_frame)
# 等待按键,如果是'q'则退出
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
print("用户退出程序")
break
elif key == ord('g'):
# 切换灰度/彩色模式
gray_mode = not gray_mode
print("切换到", "灰度模式" if gray_mode else "彩色模式")
elif key == ord('b'):
# 切换模糊效果
blur_mode = not blur_mode
print("模糊效果:", "开启" if blur_mode else "关闭")
elif key == ord('e'):
# 切换边缘检测
edge_mode = not edge_mode
print("边缘检测:", "开启" if edge_mode else "关闭")
elif key == ord('n'):
# 恢复正常模式
gray_mode = False
blur_mode = False
edge_mode = False
print("已恢复正常模式")
elif key == ord('s'):
# 保存当前帧
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = os.path.join(save_dir, f"capture_{timestamp}.jpg")
cv2.imwrite(filename, frame)
print(f"图像已保存: {filename}")
# 释放摄像头资源
cap.release()
# 关闭所有OpenCV窗口
cv2.destroyAllWindows()
print("程序已退出")
if __name__ == "__main__":
try:
main()
except Exception as e:
print(f"程序发生错误: {e}")
sys.exit(1)## 实验2-颜色识别检测
pip install opencv-python #下载open-cv数据库(另外需自行安装python3,如已下载可忽略)cd OPENCV#打开OPENCV功能包sudo python3 ./color\_detection.py#运行py文件
终端显示:

此时Linux系统上会显示摄像头实时画面,我们需要在窗口焦点下测试按键,效果如下:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
多颜色同时识别程序
功能:实时识别摄像头中的多种颜色物体
"""
import cv2
import numpy as np
import sys
import os
import argparse
def main():
"""
主函数:打开摄像头并进行多颜色同时识别
"""
# 解析命令行参数
parser = argparse.ArgumentParser(description='多颜色同时识别程序')
parser.add_argument('--width', type=int, default=2560, help='显示窗口宽度')
parser.add_argument('--height', type=int, default=1440, help='显示窗口高度')
args = parser.parse_args()
# 打开默认摄像头
cap = cv2.VideoCapture(0)
# 检查摄像头是否成功打开
if not cap.isOpened():
print("错误:无法打开摄像头")
sys.exit(1)
# 设置摄像头分辨率
cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height)
# 创建窗口并设置大小
cv2.namedWindow('Original', cv2.WINDOW_NORMAL)
cv2.namedWindow('Color Detection', cv2.WINDOW_NORMAL)
cv2.namedWindow('Controls', cv2.WINDOW_NORMAL)
# 设置窗口大小
cv2.resizeWindow('Original', args.width // 2, args.height // 2)
cv2.resizeWindow('Color Detection', args.width // 2, args.height // 2)
cv2.resizeWindow('Controls', 600, 300)
# 创建HSV颜色范围的滑动条
cv2.createTrackbar('H_min', 'Controls', 0, 179, lambda x: None)
cv2.createTrackbar('H_max', 'Controls', 179, 179, lambda x: None)
cv2.createTrackbar('S_min', 'Controls', 0, 255, lambda x: None)
cv2.createTrackbar('S_max', 'Controls', 255, 255, lambda x: None)
cv2.createTrackbar('V_min', 'Controls', 0, 255, lambda x: None)
cv2.createTrackbar('V_max', 'Controls', 255, 255, lambda x: None)
# 定义颜色范围和对应的颜色名称及显示颜色
color_ranges = {
'red': {
'ranges': [(0, 50, 50), (10, 255, 255), (160, 50, 50), (179, 255, 255)], # 红色有两个范围
'color': (0, 0, 255) # BGR格式:蓝=0, 绿=0, 红=255
},
'green': {
'ranges': [(35, 50, 50), (85, 255, 255)],
'color': (0, 255, 0) # BGR格式:蓝=0, 绿=255, 红=0
},
'blue': {
'ranges': [(100, 50, 50), (130, 255, 255)],
'color': (255, 0, 0) # BGR格式:蓝=255, 绿=0, 红=0
},
'yellow': {
'ranges': [(20, 100, 100), (30, 255, 255)],
'color': (0, 255, 255) # BGR格式:蓝=0, 绿=255, 红=255
},
'white': {
'ranges': [(0, 0, 200), (180, 30, 255)],
'color': (255, 255, 255) # BGR格式:蓝=255, 绿=255, 红=255
},
'black': {
'ranges': [(0, 0, 0), (180, 255, 30)],
'color': (0, 0, 0) # BGR格式:蓝=0, 绿=0, 红=0
}
}
# 设置初始滑动条位置为自定义颜色
cv2.setTrackbarPos('H_min', 'Controls', 0)
cv2.setTrackbarPos('S_min', 'Controls', 0)
cv2.setTrackbarPos('V_min', 'Controls', 0)
cv2.setTrackbarPos('H_max', 'Controls', 179)
cv2.setTrackbarPos('S_max', 'Controls', 255)
cv2.setTrackbarPos('V_max', 'Controls', 255)
print("多颜色同时识别程序已启动")
print("按键说明:")
print("- 'q':退出程序")
print("- 's':保存当前帧和检测结果")
print("- '+'/'-':调整窗口大小")
# 循环读取摄像头画面
while True:
# 读取一帧图像
ret, frame = cap.read()
# 如果读取失败,退出循环
if not ret:
print("错误:无法读取摄像头画面")
break
# 转换到HSV颜色空间
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# 获取当前滑动条的值(用于自定义颜色检测)
h_min = cv2.getTrackbarPos('H_min', 'Controls')
h_max = cv2.getTrackbarPos('H_max', 'Controls')
s_min = cv2.getTrackbarPos('S_min', 'Controls')
s_max = cv2.getTrackbarPos('S_max', 'Controls')
v_min = cv2.getTrackbarPos('V_min', 'Controls')
v_max = cv2.getTrackbarPos('V_max', 'Controls')
# 创建自定义颜色掩码
custom_lower = np.array([h_min, s_min, v_min])
custom_upper = np.array([h_max, s_max, v_max])
custom_mask = cv2.inRange(hsv, custom_lower, custom_upper)
# 创建检测结果图像
detection_frame = frame.copy()
# 处理自定义颜色
contours, _ = cv2.findContours(custom_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
area = cv2.contourArea(contour)
if area < 500: # 忽略太小的轮廓
continue
# 绘制轮廓
cv2.drawContours(detection_frame, [contour], -1, (255, 255, 0), 2) # 青色
# 计算轮廓的外接矩形
x, y, w, h = cv2.boundingRect(contour)
# 在矩形上方显示"自定义"
cv2.putText(detection_frame, "Custom", (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
# 绘制矩形框
cv2.rectangle(detection_frame, (x, y), (x + w, y + h), (255, 255, 0), 2)
# 对每种预定义颜色进行检测
for color_name, color_info in color_ranges.items():
# 创建掩码
if color_name == 'red': # 红色需要特殊处理(两个范围)
lower1 = np.array(color_info['ranges'][0])
upper1 = np.array(color_info['ranges'][1])
lower2 = np.array(color_info['ranges'][2])
upper2 = np.array(color_info['ranges'][3])
mask1 = cv2.inRange(hsv, lower1, upper1)
mask2 = cv2.inRange(hsv, lower2, upper2)
color_mask = cv2.bitwise_or(mask1, mask2)
else:
lower = np.array(color_info['ranges'][0])
upper = np.array(color_info['ranges'][1])
color_mask = cv2.inRange(hsv, lower, upper)
# 查找轮廓
contours, _ = cv2.findContours(color_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 处理轮廓
for contour in contours:
area = cv2.contourArea(contour)
if area < 500: # 忽略太小的轮廓
continue
# 绘制轮廓
cv2.drawContours(detection_frame, [contour], -1, color_info['color'], 2)
# 计算轮廓的外接矩形
x, y, w, h = cv2.boundingRect(contour)
# 在矩形上方显示颜色名称
cv2.putText(detection_frame, color_name, (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, color_info['color'], 2)
# 绘制矩形框
cv2.rectangle(detection_frame, (x, y), (x + w, y + h), color_info['color'], 2)
# 显示图像
cv2.imshow('Original', frame)
cv2.imshow('Color Detection', detection_frame)
# 等待按键
key = cv2.waitKey(30) & 0xFF
# 处理按键
if key == ord('q'):
print("用户退出程序")
break
elif key == ord('s'):
# 创建保存目录
save_dir = "color_detection_images"
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# 生成文件名
import time
timestamp = time.strftime("%Y%m%d_%H%M%S")
original_filename = os.path.join(save_dir, f"original_{timestamp}.jpg")
detection_filename = os.path.join(save_dir, f"detection_{timestamp}.jpg")
# 保存图像
cv2.imwrite(original_filename, frame)
cv2.imwrite(detection_filename, detection_frame)
print(f"已保存图像: {original_filename}, {detection_filename}")
elif key == ord('+') or key == ord('='): # '='键和'+'键通常在同一个键位
# 增大窗口
current_width = cv2.getWindowImageRect('Color Detection')[2]
current_height = cv2.getWindowImageRect('Color Detection')[3]
new_width = int(current_width * 1.1)
new_height = int(current_height * 1.1)
cv2.resizeWindow('Original', new_width, new_height)
cv2.resizeWindow('Color Detection', new_width, new_height)
print(f"窗口大小增加到: {new_width}x{new_height}")
elif key == ord('-'):
# 减小窗口
current_width = cv2.getWindowImageRect('Color Detection')[2]
current_height = cv2.getWindowImageRect('Color Detection')[3]
new_width = int(current_width * 0.9)
new_height = int(current_height * 0.9)
cv2.resizeWindow('Original', new_width, new_height)
cv2.resizeWindow('Color Detection', new_width, new_height)
print(f"窗口大小减小到: {new_width}x{new_height}")
# 释放资源
cap.release()
cv2.destroyAllWindows()
print("程序已退出")
if __name__ == "__main__":
try:
main()
except Exception as e:
print(f"程序发生错误: {e}")
sys.exit(1)## 实验3-手势识别体验
第一步:系统准备
sudo apt update && sudo apt upgrade -y
sudo apt install -y build-essential cmake pkg-config python3-dev python3-pip(如若已装python3可忽略)
第二步:创建虚拟环境
cd OPENCV
python3 -m venv rdkx5\_vision\_envsource rdkx5\_vision\_env/bin/activate第三步:安装依赖
pip install --upgrade pip
pip install -r requirements.txt第四步:测试环境
python3 mediapipe\_gesture\_demo.py


示例程序包含以下功能:
- ✅ 实时手势检测 - 支持双手同时识别
- ✅ 数字手势识别 - 识别1-5的手指数量
- ✅ 特殊手势识别 - OK手势、点赞手势
- ✅ 性能监控 - 实时FPS显示
- ✅ 可视化反馈 - 手部关键点绘制
import cv2
import numpy as np
import math
import time
# 初始化摄像头
cap = cv2.VideoCapture(0)
# 设置窗口大小
window_width = 1280
window_height = 720
# 调整摄像头分辨率
cap.set(cv2.CAP_PROP_FRAME_WIDTH, window_width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, window_height)
# 创建窗口
cv2.namedWindow('Hand Gesture Recognition', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Hand Gesture Recognition', window_width, window_height)
# 创建调整肤色阈值的滑动条窗口
cv2.namedWindow('Skin Detection Controls')
cv2.resizeWindow('Skin Detection Controls', 400, 250)
# 创建肤色检测的HSV阈值滑动条
cv2.createTrackbar('H_min', 'Skin Detection Controls', 0, 179, lambda x: None)
cv2.createTrackbar('H_max', 'Skin Detection Controls', 20, 179, lambda x: None)
cv2.createTrackbar('S_min', 'Skin Detection Controls', 30, 255, lambda x: None)
cv2.createTrackbar('S_max', 'Skin Detection Controls', 150, 255, lambda x: None)
cv2.createTrackbar('V_min', 'Skin Detection Controls', 60, 255, lambda x: None)
cv2.createTrackbar('V_max', 'Skin Detection Controls', 255, 255, lambda x: None)
# 设置默认值
cv2.setTrackbarPos('H_min', 'Skin Detection Controls', 0)
cv2.setTrackbarPos('H_max', 'Skin Detection Controls', 20)
cv2.setTrackbarPos('S_min', 'Skin Detection Controls', 30)
cv2.setTrackbarPos('S_max', 'Skin Detection Controls', 150)
cv2.setTrackbarPos('V_min', 'Skin Detection Controls', 60)
cv2.setTrackbarPos('V_max', 'Skin Detection Controls', 255)
# 计算手指数量的函数
def count_fingers(contour, drawing):
# 计算凸包
hull = cv2.convexHull(contour, returnPoints=False)
# 如果凸包点数太少,无法计算缺陷
if len(hull) < 3:
return 0
# 计算凸包缺陷
defects = cv2.convexityDefects(contour, hull)
if defects is None:
return 0
# 计数有效的凸包缺陷(手指之间的缝隙)
finger_count = 0
for i in range(defects.shape[0]):
s, e, f, d = defects[i, 0]
start = tuple(contour[s][0])
end = tuple(contour[e][0])
far = tuple(contour[f][0])
# 计算三角形三边长度
a = math.sqrt((end[0] - start[0]) ** 2 + (end[1] - start[1]) ** 2)
b = math.sqrt((far[0] - start[0]) ** 2 + (far[1] - start[1]) ** 2)
c = math.sqrt((end[0] - far[0]) ** 2 + (end[1] - far[1]) ** 2)
# 使用余弦定理计算角度
angle = math.degrees(math.acos((b ** 2 + c ** 2 - a ** 2) / (2 * b * c)))
# 如果角度小于90度,认为是手指之间的缝隙
if angle <= 90:
# 在图像上标记缺陷点
cv2.circle(drawing, far, 5, [0, 0, 255], -1)
finger_count += 1
# 缺陷数加1等于手指数(因为缺陷是指手指之间的空隙)
return finger_count + 1
# 主循环
while cap.isOpened():
success, image = cap.read()
if not success:
print("无法获取摄像头画面")
break
# 水平翻转图像,使其更像镜子
image = cv2.flip(image, 1)
# 创建一个副本用于绘制
drawing = image.copy()
# 转换为HSV颜色空间
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# 获取当前肤色阈值
h_min = cv2.getTrackbarPos('H_min', 'Skin Detection Controls')
h_max = cv2.getTrackbarPos('H_max', 'Skin Detection Controls')
s_min = cv2.getTrackbarPos('S_min', 'Skin Detection Controls')
s_max = cv2.getTrackbarPos('S_max', 'Skin Detection Controls')
v_min = cv2.getTrackbarPos('V_min', 'Skin Detection Controls')
v_max = cv2.getTrackbarPos('V_max', 'Skin Detection Controls')
# 创建肤色掩码
lower_skin = np.array([h_min, s_min, v_min])
upper_skin = np.array([h_max, s_max, v_max])
mask = cv2.inRange(hsv, lower_skin, upper_skin)
# 应用形态学操作改善掩码
kernel = np.ones((5, 5), np.uint8)
mask = cv2.dilate(mask, kernel, iterations=2)
mask = cv2.erode(mask, kernel, iterations=1)
mask = cv2.GaussianBlur(mask, (5, 5), 100)
# 查找轮廓
contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# 找到最大的轮廓(假设是手)
if contours:
max_contour = max(contours, key=cv2.contourArea)
# 只处理足够大的轮廓
if cv2.contourArea(max_contour) > 5000:
# 绘制轮廓
cv2.drawContours(drawing, [max_contour], 0, (0, 255, 0), 2)
# 计算并显示手指数量
finger_count = count_fingers(max_contour, drawing)
# 限制手指数量在1-5之间
finger_count = max(1, min(5, finger_count))
# 在图像上显示数字
cv2.putText(
drawing,
f"Fingers: {finger_count}",
(50, 50),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
cv2.LINE_AA
)
# 显示肤色检测结果
cv2.imshow('Skin Detection', mask)
# 显示最终结果
cv2.imshow('Hand Gesture Recognition', drawing)
# 显示使用说明
cv2.putText(
drawing,
"Adjust sliders to detect skin color properly",
(10, drawing.shape[0] - 40),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 255),
1,
cv2.LINE_AA
)
cv2.putText(
drawing,
"Press 'q' to quit",
(10, drawing.shape[0] - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 255),
1,
cv2.LINE_AA
)
# 按'q'退出
if cv2.waitKey(5) & 0xFF == ord('q'):
break
# 释放资源
cap.release()
cv2.destroyAllWindows()## 实验4-Yolov5物体检测
实验步骤:
- 需安装python3、opencv、conda环境。(如已安装可略过、可参考实验1-3的环境安装流程)
- 克隆YOLOv5模型,终端输入指令:git clone https://github.com/ultralytics/yolov5

3.cp -r /home/sunrise/yolov5 /home/sunrise/OPENCV/ #将yolov5文件包拷贝到功能包中保存
cd OPENCV
pip install -r /home/sunrise/OPENCV/requirements\_yolov5\_torch.txt(安装 YOLOv5 运行的最小依赖)

4.source rdkx5\_vision\_env/bin/activate #激活虚拟环境
5.安装关联包:
在OPENCV目录下
先升级 pip:python -m pip install --upgrade pip
运行以下命令:
pip install torch torchvision --extra-index-url [https://download.pytorch.org/whl/cpu](https://download.pytorch.org/whl/cpu))
pip install ultralyticspip install pandas psutil thop scipypython -m
pip install tqdm后续可能用到,建议一起安装:python -m pip install pandas psutil thop pillow pyyaml requests matplotlib seaborn
(以下步骤可先忽略,先尝试运行示例文件,如若版本依赖过低导致无法运行模型再进行更新:
升级基础安装工具:python -m pip install -U pip wheel setuptools==70.0.0
安装/升级缺失依赖:python -m pip install -U gitpython pillow==10.3.0
)
cd yolov5#进入文件包python detect.py --weights yolov5s.pt --source 0#运行摄像头版示例文件,需确保摄像头正常连接
终端打印如下:


如若无摄像头,可选择本地图片或视频导入:
- 使用本地图片快速验证
- python detect.py --weights yolov5s.pt --source path\\to\\image.jpg #source后修改为图片路径
- 使用视频文件验证
- python detect.py --weights yolov5s.pt --source path\\to\\video.mp4 #source后修改为视频路径
#!/usr/bin/env python3
"""
RDK X5 MediaPipe手势识别示例程序
适用于地瓜派RDK X5开发板的视觉开发
"""
import cv2
import mediapipe as mp
import numpy as np
import time
import math
class MediaPipeGestureRecognizer:
def __init__(self, camera_id=0, min_detection_confidence=0.7, min_tracking_confidence=0.5):
"""
初始化MediaPipe手势识别器
Args:
camera_id: 摄像头ID
min_detection_confidence: 最小检测置信度
min_tracking_confidence: 最小跟踪置信度
"""
# 初始化MediaPipe
self.mp_hands = mp.solutions.hands
self.mp_drawing = mp.solutions.drawing_utils
self.mp_drawing_styles = mp.solutions.drawing_styles
# 配置手部检测
self.hands = self.mp_hands.Hands(
static_image_mode=False,
max_num_hands=2,
min_detection_confidence=min_detection_confidence,
min_tracking_confidence=min_tracking_confidence
)
# 初始化摄像头
self.cap = cv2.VideoCapture(camera_id)
self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
self.cap.set(cv2.CAP_PROP_FPS, 30)
# 性能监控
self.fps_counter = 0
self.fps_start_time = time.time()
self.current_fps = 0
def calculate_distance(self, point1, point2):
"""计算两点之间的距离"""
return math.sqrt((point1.x - point2.x)**2 + (point1.y - point2.y)**2)
def count_fingers(self, landmarks):
"""
计算伸出的手指数量
Args:
landmarks: 手部关键点
Returns:
int: 伸出的手指数量
"""
# 手指关键点ID
finger_tips = [4, 8, 12, 16, 20] # 拇指、食指、中指、无名指、小指
finger_pips = [3, 6, 10, 14, 18] # 对应的PIP关节
fingers_up = 0
# 检查拇指(特殊处理,因为拇指的方向不同)
if landmarks[finger_tips[0]].x > landmarks[finger_pips[0]].x:
fingers_up += 1
# 检查其他四个手指
for i in range(1, 5):
if landmarks[finger_tips[i]].y < landmarks[finger_pips[i]].y:
fingers_up += 1
return fingers_up
def detect_gesture(self, landmarks):
"""
检测手势类型
Args:
landmarks: 手部关键点
Returns:
str: 手势名称
"""
fingers_count = self.count_fingers(landmarks)
# 基本数字手势(英文)
if fingers_count == 0:
return "Fist"
elif fingers_count == 1:
return "One"
elif fingers_count == 2:
return "Two"
elif fingers_count == 3:
return "Three"
elif fingers_count == 4:
return "Four"
elif fingers_count == 5:
return "Five"
# 可以添加更复杂的手势识别逻辑
# 例如:OK手势、点赞手势等
return f"Unknown ({fingers_count} fingers)"
def detect_ok_gesture(self, landmarks):
"""检测OK手势"""
# 拇指尖和食指尖的距离
thumb_tip = landmarks[4]
index_tip = landmarks[8]
distance = self.calculate_distance(thumb_tip, index_tip)
# 如果拇指和食指很接近,可能是OK手势
if distance < 0.05:
return True
return False
def detect_thumbs_up(self, landmarks):
"""检测点赞手势"""
# 拇指向上,其他手指弯曲
thumb_tip = landmarks[4]
thumb_mcp = landmarks[2]
# 检查拇指是否向上
if thumb_tip.y < thumb_mcp.y:
# 检查其他手指是否弯曲
fingers_down = 0
finger_tips = [8, 12, 16, 20]
finger_pips = [6, 10, 14, 18]
for i in range(4):
if landmarks[finger_tips[i]].y > landmarks[finger_pips[i]].y:
fingers_down += 1
if fingers_down >= 3:
return True
return False
def update_fps(self):
"""更新FPS计算"""
self.fps_counter += 1
if self.fps_counter >= 30:
end_time = time.time()
self.current_fps = 30 / (end_time - self.fps_start_time)
self.fps_counter = 0
self.fps_start_time = end_time
def draw_info(self, image, gesture_text, hand_count):
"""在图像上绘制信息"""
# 根据图像大小调整信息框大小
height, width = image.shape[:2]
info_width = min(500, width - 20)
info_height = 140
# 绘制背景矩形
cv2.rectangle(image, (10, 10), (10 + info_width, 10 + info_height), (0, 0, 0), -1)
# 根据图像大小调整字体大小
font_scale = max(0.8, width / 800)
thickness = max(2, int(width / 400))
# 绘制文本信息(英文)
cv2.putText(image, f"FPS: {self.current_fps:.1f}", (20, 45),
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 255, 0), thickness)
cv2.putText(image, f"Hands Detected: {hand_count}", (20, 85),
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 255, 0), thickness)
cv2.putText(image, f"Gesture: {gesture_text}", (20, 125),
cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 255, 255), thickness)
def run(self):
"""运行手势识别主循环"""
print("RDK X5 MediaPipe手势识别启动...")
print("按 'q' 键退出程序")
# 创建窗口(只创建一次)
cv2.namedWindow('RDK X5 Gesture Recognition', cv2.WINDOW_NORMAL)
cv2.resizeWindow('RDK X5 Gesture Recognition', 1280, 720)
while True:
ret, frame = self.cap.read()
if not ret:
print("无法读取摄像头数据")
break
# 翻转图像(镜像效果)
frame = cv2.flip(frame, 1)
# 转换颜色空间
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# 处理图像
results = self.hands.process(rgb_frame)
gesture_text = "No Gesture"
hand_count = 0
# 如果检测到手部
if results.multi_hand_landmarks:
hand_count = len(results.multi_hand_landmarks)
for hand_landmarks in results.multi_hand_landmarks:
# 绘制手部关键点
self.mp_drawing.draw_landmarks(
frame,
hand_landmarks,
self.mp_hands.HAND_CONNECTIONS,
self.mp_drawing_styles.get_default_hand_landmarks_style(),
self.mp_drawing_styles.get_default_hand_connections_style()
)
# 识别手势
gesture_text = self.detect_gesture(hand_landmarks.landmark)
# 检测特殊手势
if self.detect_ok_gesture(hand_landmarks.landmark):
gesture_text = "OK Gesture"
elif self.detect_thumbs_up(hand_landmarks.landmark):
gesture_text = "Thumbs Up"
# 更新FPS
self.update_fps()
# 绘制信息
self.draw_info(frame, gesture_text, hand_count)
# 显示结果(只更新图像内容)
cv2.imshow('RDK X5 Gesture Recognition', frame)
# 检查退出条件
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# 清理资源
self.cap.release()
cv2.destroyAllWindows()
print("程序已退出")
def main():
"""主函数"""
try:
# 创建手势识别器
recognizer = MediaPipeGestureRecognizer(
camera_id=0,
min_detection_confidence=0.7,
min_tracking_confidence=0.5
)
# 运行识别程序
recognizer.run()
except Exception as e:
print(f"程序运行出错: {e}")
print("请检查:")
print("1. 摄像头是否正确连接")
print("2. MediaPipe是否正确安装")
print("3. OpenCV是否正确安装")
if __name__ == "__main__":
main()