马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。
您需要 登录 才可以下载或查看,没有账号?立即注册
x
这是一个基于 OpenCV 的人体姿态估计系统,可以或许从摄像头视频流中实时检测人体关键点,并通过简化算法重建 3D 姿态,最后在 3D 空间中进行仿真展示。系统主要包含 2D 姿态检测、3D 姿态重建和 3D 仿真三个焦点模块。
模块导入与环境准备
python
运行
- import cv2
- import numpy as np
- import os
- import time
- import matplotlib.pyplot as plt
- from mpl_toolkits.mplot3d import Axes3D
- # 确保目录存在
- os.makedirs("results/2d_poses", exist_ok=True)
- os.makedirs("results/3d_poses", exist_ok=True)
- os.makedirs("results/simulations", exist_ok=True)
复制代码
- 导入须要的库:计算机视觉 (cv2)、数值计算 (numpy)、文件操纵 (os)、时间丈量 (time) 和画图工具 (matplotlib)
- 创建结果生存目次,exist_ok=True 确保目次存在时不会报错
常量定义
python
运行
- JOINT_CONNECTIONS = [
- (0, 1), (0, 4), (1, 2), (2, 3), (4, 5), (5, 6), (6, 7), # 头部
- (0, 11), (0, 12), (11, 12), # 躯干
- (11, 13), (13, 15), (15, 17), (17, 19), (19, 21), # 左臂
- (12, 14), (14, 16), (16, 18), (18, 20), (20, 22), # 右臂
- (11, 23), (12, 24), (23, 24), # 骨盆
- (23, 25), (25, 27), (27, 29), (29, 31), # 左腿
- (24, 26), (26, 28), (28, 30), (30, 32) # 右腿
- ]
复制代码
- 定义 33 个人体关键点的毗连关系,用于后续绘制骨架
2D 姿态估计类
python
运行
- class HumanPoseEstimator:
- def __init__(self):
- """初始化OpenCV人体姿态估计器"""
- # 使用OpenCV的DNN模块加载预训练的姿态估计模型
- self.proto_file = "pose_deploy_linevec_faster_4_stages.prototxt"
- self.weights_file = "pose_iter_160000.caffemodel"
- self.n_points = 18
-
- # 检查模型文件是否存在
- if not os.path.exists(self.proto_file) or not os.path.exists(self.weights_file):
- print("警告: 找不到OpenCV姿态估计模型文件")
- print("请从https://github.com/CMU-Perceptual-Computing-Lab/openpose下载模型文件")
- self.net = None
- else:
- self.net = cv2.dnn.readNetFromCaffe(self.proto_file, self.weights_file)
-
- # 定义COCO人体关键点映射到33点格式
- self.coco_to_mp = {
- 0: 0, # 鼻子
- 1: 1, # 脖子
- 2: 12, # 右肩
- 3: 14, # 右肘
- 4: 16, # 右腕
- 5: 11, # 左肩
- 6: 13, # 左肘
- 7: 15, # 左腕
- 8: 24, # 右髋
- 9: 26, # 右膝
- 10: 28, # 右踝
- 11: 23, # 左髋
- 12: 25, # 左膝
- 13: 27, # 左踝
- 14: 5, # 右眼
- 15: 2, # 左眼
- 16: 7, # 右耳
- 17: 4 # 左耳
- }
复制代码
- 类初始化:加载 OpenCV 预训练的 Caffe 模型
- 关键点映射表:将 COCO 数据集的 18 个关键点映射到 MediaPipe 的 33 点格式
python
运行
- def detect_keypoints(self, image):
- """
- 从图像中检测人体关键点
-
- 返回:
- keypoints_2d: 二维关键点坐标 [33, 3] (x, y, confidence)
- annotated_image: 标注后的图像
- """
- if self.net is None:
- print("错误: 姿态估计模型未正确加载")
- return None, image
-
- # 准备输入
- blob = cv2.dnn.blobFromImage(
- image, 1.0 / 255, (368, 368), (0, 0, 0), swapRB=False, crop=False
- )
- self.net.setInput(blob)
-
- # 前向传播
- output = self.net.forward()
-
- # 获取图像尺寸
- h, w = image.shape[:2]
-
- # 初始化33个关键点的数组
- keypoints_2d = np.zeros((33, 3))
-
- # 处理检测结果
- points = []
- for i in range(self.n_points):
- # 查找关键点的置信度图
- prob_map = output[0, i, :, :]
- min_val, prob, min_loc, point = cv2.minMaxLoc(prob_map)
-
- # 缩放坐标
- x = (w * point[0]) / output.shape[3]
- y = (h * point[1]) / output.shape[2]
-
- if prob > 0.1: # 置信度阈值
- points.append((int(x), int(y)))
-
- # 映射到33点格式
- if i in self.coco_to_mp:
- mp_idx = self.coco_to_mp[i]
- keypoints_2d[mp_idx] = [x / w, y / h, prob]
- else:
- points.append(None)
-
- # 可视化关键点
- annotated_image = image.copy()
- for i, p in enumerate(points):
- if p is not None:
- cv2.circle(annotated_image, p, 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
- cv2.putText(annotated_image, f"{i}", p, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
-
- # 绘制骨架连接
- skeleton_pairs = [
- (1, 2), (1, 5), (2, 3), (3, 4), (5, 6), (6, 7),
- (1, 8), (8, 9), (9, 10), (1, 11), (11, 12), (12, 13),
- (1, 0), (0, 14), (14, 16), (0, 15), (15, 17)
- ]
-
- for pair in skeleton_pairs:
- part_a, part_b = pair
- if points[part_a] and points[part_b]:
- cv2.line(annotated_image, points[part_a], points[part_b], (0, 255, 0), 2)
-
- return keypoints_2d, annotated_image
复制代码
- 图像预处置处罚:将输入图像转换为网络可担当的格式 (368x368)
- 模型推理:通过前向传播获取关键点的置信度图
- 后处置处罚:从置信度图中提取关键点坐标,应用阈值过滤低置信度点
- 可视化:在原图上绘制关键点和骨架毗连,返回标准化的关键点坐标和可视化后的图像
3D 姿态估计类
python
运行
- class Simple3DPoseEstimator:
- def __init__(self):
- """简单的3D姿态估计器,使用固定比例关系"""
- # 定义人体各部分的平均比例(单位:米)
- self.body_proportions = {
- "head": 0.25,
- "torso": 0.5,
- "upper_arm": 0.3,
- "forearm": 0.25,
- "hand": 0.1,
- "upper_leg": 0.5,
- "lower_leg": 0.5,
- "foot": 0.2
- }
-
- # 用于可视化
- self.fig = plt.figure(figsize=(10, 8))
- self.ax = self.fig.add_subplot(111, projection='3d')
复制代码
- 初始化:定义人体各部门的标准比例(单位:米)
- 创建 3D 画图环境用于可视化 3D 姿态
python
运行
- def estimate_3d_pose(self, keypoints_2d, image_shape, visualize=False):
- """
- 简单估计3D姿态
-
- 参数:
- keypoints_2d: 二维关键点 [33, 3]
- image_shape: 图像形状 (h, w)
- visualize: 是否可视化3D姿态
-
- 返回:
- keypoints_3d: 3D关键点 numpy数组 [33, 3]
- """
- if keypoints_2d is None:
- return None
-
- h, w = image_shape[:2]
-
- # 创建3D关键点数组
- keypoints_3d = np.zeros((33, 3))
-
- # 提取有效关键点
- valid_mask = keypoints_2d[:, 2] > 0.3
- if not np.any(valid_mask):
- return None
-
- # 将2D坐标转换为图像坐标系
- kp_2d_img = keypoints_2d.copy()
- kp_2d_img[:, 0] *= w
- kp_2d_img[:, 1] *= h
-
- # 计算人体中心
- center = np.mean(kp_2d_img[valid_mask, :2], axis=0)
-
- # 估计人体尺寸
- # 这里简化为使用肩宽作为参考
- if valid_mask[11] and valid_mask[12]: # 左右肩
- shoulder_width = np.linalg.norm(kp_2d_img[11, :2] - kp_2d_img[12, :2])
- scale = 0.4 / shoulder_width # 假设平均肩宽为0.4米
- else:
- scale = 0.001 # 默认缩放比例
-
- # 基于2D关键点和人体比例估计3D位置
- # 这里使用简化模型,主要基于深度感知和人体比例
- for i in range(33):
- if valid_mask[i]:
- x, y = kp_2d_img[i, :2]
-
- # 计算相对中心的位置
- rel_x = (x - center[0]) * scale
- rel_y = (y - center[1]) * scale
-
- # 估计深度(z轴)
- # 这里使用简化方法:离图像中心越远的点假设越远
- depth_factor = np.sqrt(rel_x**2 + rel_y**2) / max(w, h) * 0.5
-
- # 设置3D坐标
- keypoints_3d[i] = [rel_x, rel_y, depth_factor]
-
- # 可视化
- if visualize:
- self.visualize_3d_pose(keypoints_3d)
-
- return keypoints_3d
复制代码
- 3D 姿态估计:基于 2D 关键点和人体比例关系计算 3D 坐标
- 坐标缩放:利用肩宽作为参考来估计人体尺寸比例
- 深度估计:利用离图像中心的间隔来大略估计深度信息(z 轴)
python
运行
- def visualize_3d_pose(self, keypoints_3d, frame_id=None):
- """可视化3D姿态"""
- self.ax.clear()
-
- # 设置坐标轴范围
- max_range = np.max(np.abs(keypoints_3d))
- self.ax.set_xlim(-max_range, max_range)
- self.ax.set_ylim(-max_range, max_range)
- self.ax.set_zlim(-max_range, max_range)
-
- # 设置坐标轴标签
- self.ax.set_xlabel('X')
- self.ax.set_ylabel('Y')
- self.ax.set_zlabel('Z')
-
- # 绘制关键点
- self.ax.scatter(keypoints_3d[:, 0], keypoints_3d[:, 1], keypoints_3d[:, 2], c='r', s=50)
-
- # 绘制连接关系
- for connection in JOINT_CONNECTIONS:
- start_idx, end_idx = connection
- if start_idx < len(keypoints_3d) and end_idx < len(keypoints_3d):
- self.ax.plot(
- [keypoints_3d[start_idx, 0], keypoints_3d[end_idx, 0]],
- [keypoints_3d[start_idx, 1], keypoints_3d[end_idx, 1]],
- [keypoints_3d[start_idx, 2], keypoints_3d[end_idx, 2]],
- c='b', linewidth=2
- )
-
- # 设置视角
- self.ax.view_init(elev=-90, azim=90) # 俯视视角
-
- # 保存图像
- if frame_id is not None:
- plt.savefig(f"results/3d_poses/3d_pose_frame_{frame_id}.png", dpi=300, bbox_inches='tight')
- else:
- plt.pause(0.01)
复制代码
- 3D 姿态可视化:在 3D 空间中绘制关键点和骨架毗连
- 视角设置:默认利用俯视视角 (-90 度仰角,90 度方位角)
- 图像生存:根据需要生存 3D 姿态图像
3D 仿真器类
python
运行
- class SimpleSimulator:
- def __init__(self, use_gui=True):
- """简单的3D仿真器,使用matplotlib进行可视化"""
- self.use_gui = use_gui
-
- # 用于可视化
- self.fig = plt.figure(figsize=(10, 8))
- self.ax = self.fig.add_subplot(111, projection='3d')
-
- # 设置固定的相机位置
- self.ax.set_xlim(-1.5, 1.5)
- self.ax.set_ylim(-1.5, 1.5)
- self.ax.set_zlim(0, 2)
-
- self.ax.set_xlabel('X')
- self.ax.set_ylabel('Y')
- self.ax.set_zlabel('Z')
-
- # 绘制地面
- x = np.linspace(-1.5, 1.5, 100)
- y = np.linspace(-1.5, 1.5, 100)
- X, Y = np.meshgrid(x, y)
- Z = np.zeros_like(X)
- self.ax.plot_surface(X, Y, Z, alpha=0.3, color='g')
-
- print("使用简单的3D可视化模拟器")
复制代码
- 初始化:创建 3D 画图环境和固定巨细的场景
- 绘制地面平面:利用绿色半透明平面表现地面
python
运行
- def update_pose(self, keypoints_3d):
- """
- 根据3D姿态更新仿真模型
-
- 参数:
- keypoints_3d: 3D关键点 [33, 3]
- """
- if keypoints_3d is None:
- return
-
- self.ax.clear()
-
- # 设置坐标轴范围
- self.ax.set_xlim(-1.5, 1.5)
- self.ax.set_ylim(-1.5, 1.5)
- self.ax.set_zlim(0, 2)
-
- # 设置坐标轴标签
- self.ax.set_xlabel('X')
- self.ax.set_ylabel('Y')
- self.ax.set_zlabel('Z')
-
- # 绘制地面
- x = np.linspace(-1.5, 1.5, 100)
- y = np.linspace(-1.5, 1.5, 100)
- X, Y = np.meshgrid(x, y)
- Z = np.zeros_like(X)
- self.ax.plot_surface(X, Y, Z, alpha=0.3, color='g')
-
- # 绘制关键点
- self.ax.scatter(keypoints_3d[:, 0], keypoints_3d[:, 1], keypoints_3d[:, 2], c='r', s=50)
-
- # 绘制连接关系
- for connection in JOINT_CONNECTIONS:
- start_idx, end_idx = connection
- if start_idx < len(keypoints_3d) and end_idx < len(keypoints_3d):
- self.ax.plot(
- [keypoints_3d[start_idx, 0], keypoints_3d[end_idx, 0]],
- [keypoints_3d[start_idx, 1], keypoints_3d[end_idx, 1]],
- [keypoints_3d[start_idx, 2], keypoints_3d[end_idx, 2]],
- c='b', linewidth=2
- )
-
- # 设置视角
- self.ax.view_init(elev=30, azim=45) # 侧视视角
-
- if self.use_gui:
- plt.pause(0.01)
-
- def render_scene(self, frame_id):
- """
- 渲染当前场景并保存
-
- 参数:
- frame_id: 帧ID
- """
- plt.savefig(f"results/simulations/simulation_frame_{frame_id}.png", dpi=300, bbox_inches='tight')
复制代码
- 更新姿态:根据新的 3D 关键点数据更新场景
- 固定视角:利用侧视视角 (30 度仰角,45 度方位角)
- 场景渲染:将当前场景生存为图像
主函数
python
运行
- def main(camera_id=0, use_gui=True):
- """
- 完整流程:从摄像头读取到3D仿真
-
- 参数:
- camera_id: 摄像头ID,0表示默认摄像头
- use_gui: 是否使用GUI模式
- """
- # 1. 初始化模块
- pose_estimator = HumanPoseEstimator()
- pose_3d_estimator = Simple3DPoseEstimator()
- simulator = SimpleSimulator(use_gui=use_gui)
-
- # 2. 打开摄像头
- cap = cv2.VideoCapture(camera_id)
-
- # 检查摄像头是否成功打开
- if not cap.isOpened():
- print(f"无法打开摄像头 {camera_id}")
- return
-
- # 获取摄像头信息
- fps = cap.get(cv2.CAP_PROP_FPS)
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
- print(f"摄像头参数: {width}x{height}, 帧率: {fps}")
-
- # 创建窗口
- cv2.namedWindow("2D Pose Estimation", cv2.WINDOW_NORMAL)
- cv2.resizeWindow("2D Pose Estimation", 800, 600)
-
- frame_id = 0
-
- # 3. 处理摄像头帧
- while True:
- ret, frame = cap.read()
- if not ret:
- print("无法获取帧,退出...")
- break
-
- # 翻转帧,使其成为镜像效果
- frame = cv2.flip(frame, 1)
-
- print(f"处理第{frame_id}帧...")
-
- # 3.1 2D姿态识别
- start_time = time.time()
- keypoints_2d, vis_frame = pose_estimator.detect_keypoints(frame)
-
- # 显示2D姿态结果
- cv2.imshow("2D Pose Estimation", vis_frame)
-
- # 保存2D姿态结果
- cv2.imwrite(f"results/2d_poses/2d_pose_frame_{frame_id}.png", vis_frame)
-
- # 3.2 3D姿态重建
- keypoints_3d = pose_3d_estimator.estimate_3d_pose(
- keypoints_2d, frame.shape, visualize=False
- )
-
- # 可视化3D姿态
- if keypoints_3d is not None:
- pose_3d_estimator.visualize_3d_pose(keypoints_3d, frame_id)
-
- # 3.3 更新3D仿真
- simulator.update_pose(keypoints_3d)
-
- # 3.4 渲染场景
- simulator.render_scene(frame_id)
-
- # 计算处理时间
- process_time = time.time() - start_time
- print(f"处理时间: {process_time:.3f}秒")
-
- frame_id += 1
-
- # 按ESC键退出
- key = cv2.waitKey(1)
- if key == 27: # ESC键
- break
-
- # 4. 释放资源
- cap.release()
- cv2.destroyAllWindows()
- print(f"处理完成,共{frame_id}帧,结果保存在results目录")
复制代码
- 初始化所有模块:2D 姿态估计器、3D 姿态估计器和 3D 仿真器
- 打开摄像头并获取视频流参数
- 主循环处置处罚每一帧:
- 读取摄像头帧并翻转
- 进行 2D 姿态检测
- 基于 2D 结果进行 3D 姿态重建
- 更新 3D 仿真场景
- 生存所有处置处罚结果
- 计算处置处罚时间
- 资源释放:关闭摄像头和窗口
程序入口
python
运行
- if __name__ == "__main__":
- # 运行主程序
- main(
- camera_id=0, # 摄像头ID,0表示默认摄像头
- use_gui=True # 是否使用GUI模式
- )
复制代码
- 程序入口点,调用 main 函数启动整个系统
- 可以通过修改参数来调解系统举动
总结
这段代码实现了一个完整的人体姿态估计和 3D 仿真系统,主要特点包括:
- 利用 OpenCV 预训练模型进行 2D 姿态检测
- 基于人体比例关系的简化 3D 姿态重建方法
- 利用 matplotlib 进行 3D 姿态可视化和仿真
- 实时处置处罚摄像头视频流
- 生存所有处置处罚结果到指定目次
该系统可以用于姿势分析、运动跟踪、人机交互等多种应用场景,并且提供了精良的扩展性,可以根据需求进一步优化 3D 姿态估计算法或添加更多功能。
完整代码
- import cv2
- import numpy as np
- import os
- import time
- import matplotlib.pyplot as plt
- from mpl_toolkits.mplot3d import Axes3D
- # 确保目录存在
- os.makedirs("results/2d_poses", exist_ok=True)
- os.makedirs("results/3d_poses", exist_ok=True)
- os.makedirs("results/simulations", exist_ok=True)
- # 定义常量JOINT_CONNECTIONS = [
- (0, 1), (0, 4), (1, 2), (2, 3), (4, 5), (5, 6), (6, 7), # 头部
- (0, 11), (0, 12), (11, 12), # 躯干
- (11, 13), (13, 15), (15, 17), (17, 19), (19, 21), # 左臂
- (12, 14), (14, 16), (16, 18), (18, 20), (20, 22), # 右臂
- (11, 23), (12, 24), (23, 24), # 骨盆
- (23, 25), (25, 27), (27, 29), (29, 31), # 左腿
- (24, 26), (26, 28), (28, 30), (30, 32) # 右腿
- ]
- class HumanPoseEstimator:
- def __init__(self):
- """初始化OpenCV人体姿态估计器"""
- # 使用OpenCV的DNN模块加载预训练的姿态估计模型
- self.proto_file = "pose_deploy_linevec_faster_4_stages.prototxt"
- self.weights_file = "pose_iter_160000.caffemodel"
- self.n_points = 18
-
- # 检查模型文件是否存在
- if not os.path.exists(self.proto_file) or not os.path.exists(self.weights_file):
- print("警告: 找不到OpenCV姿态估计模型文件")
- print("请从https://github.com/CMU-Perceptual-Computing-Lab/openpose下载模型文件")
- self.net = None
- else:
- self.net = cv2.dnn.readNetFromCaffe(self.proto_file, self.weights_file)
-
- # 定义COCO人体关键点映射到33点格式
- self.coco_to_mp = {
- 0: 0, # 鼻子
- 1: 1, # 脖子
- 2: 12, # 右肩
- 3: 14, # 右肘
- 4: 16, # 右腕
- 5: 11, # 左肩
- 6: 13, # 左肘
- 7: 15, # 左腕
- 8: 24, # 右髋
- 9: 26, # 右膝
- 10: 28, # 右踝
- 11: 23, # 左髋
- 12: 25, # 左膝
- 13: 27, # 左踝
- 14: 5, # 右眼
- 15: 2, # 左眼
- 16: 7, # 右耳
- 17: 4 # 左耳
- }
- def detect_keypoints(self, image):
- """
- 从图像中检测人体关键点
-
- 返回:
- keypoints_2d: 二维关键点坐标 [33, 3] (x, y, confidence)
- annotated_image: 标注后的图像
- """
- if self.net is None:
- print("错误: 姿态估计模型未正确加载")
- return None, image
-
- # 准备输入
- blob = cv2.dnn.blobFromImage(
- image, 1.0 / 255, (368, 368), (0, 0, 0), swapRB=False, crop=False
- )
- self.net.setInput(blob)
-
- # 前向传播
- output = self.net.forward()
-
- # 获取图像尺寸
- h, w = image.shape[:2]
-
- # 初始化33个关键点的数组
- keypoints_2d = np.zeros((33, 3))
-
- # 处理检测结果
- points = []
- for i in range(self.n_points):
- # 查找关键点的置信度图
- prob_map = output[0, i, :, :]
- min_val, prob, min_loc, point = cv2.minMaxLoc(prob_map)
-
- # 缩放坐标
- x = (w * point[0]) / output.shape[3]
- y = (h * point[1]) / output.shape[2]
-
- if prob > 0.1: # 置信度阈值
- points.append((int(x), int(y)))
-
- # 映射到33点格式
- if i in self.coco_to_mp:
- mp_idx = self.coco_to_mp[i]
- keypoints_2d[mp_idx] = [x / w, y / h, prob]
- else:
- points.append(None)
-
- # 可视化关键点
- annotated_image = image.copy()
- for i, p in enumerate(points):
- if p is not None:
- cv2.circle(annotated_image, p, 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
- cv2.putText(annotated_image, f"{i}", p, cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
-
- # 绘制骨架连接
- skeleton_pairs = [
- (1, 2), (1, 5), (2, 3), (3, 4), (5, 6), (6, 7),
- (1, 8), (8, 9), (9, 10), (1, 11), (11, 12), (12, 13),
- (1, 0), (0, 14), (14, 16), (0, 15), (15, 17)
- ]
-
- for pair in skeleton_pairs:
- part_a, part_b = pair
- if points[part_a] and points[part_b]:
- cv2.line(annotated_image, points[part_a], points[part_b], (0, 255, 0), 2)
-
- return keypoints_2d, annotated_image
- class Simple3DPoseEstimator:
- def __init__(self):
- """简单的3D姿态估计器,使用固定比例关系"""
- # 定义人体各部分的平均比例(单位:米)
- self.body_proportions = {
- "head": 0.25,
- "torso": 0.5,
- "upper_arm": 0.3,
- "forearm": 0.25,
- "hand": 0.1,
- "upper_leg": 0.5,
- "lower_leg": 0.5,
- "foot": 0.2
- }
-
- # 用于可视化
- self.fig = plt.figure(figsize=(10, 8))
- self.ax = self.fig.add_subplot(111, projection='3d')
- def estimate_3d_pose(self, keypoints_2d, image_shape, visualize=False):
- """
- 简单估计3D姿态
-
- 参数:
- keypoints_2d: 二维关键点 [33, 3]
- image_shape: 图像形状 (h, w)
- visualize: 是否可视化3D姿态
-
- 返回:
- keypoints_3d: 3D关键点 numpy数组 [33, 3]
- """
- if keypoints_2d is None:
- return None
-
- h, w = image_shape[:2]
-
- # 创建3D关键点数组
- keypoints_3d = np.zeros((33, 3))
-
- # 提取有效关键点
- valid_mask = keypoints_2d[:, 2] > 0.3
- if not np.any(valid_mask):
- return None
-
- # 将2D坐标转换为图像坐标系
- kp_2d_img = keypoints_2d.copy()
- kp_2d_img[:, 0] *= w
- kp_2d_img[:, 1] *= h
-
- # 计算人体中心
- center = np.mean(kp_2d_img[valid_mask, :2], axis=0)
-
- # 估计人体尺寸
- # 这里简化为使用肩宽作为参考
- if valid_mask[11] and valid_mask[12]: # 左右肩
- shoulder_width = np.linalg.norm(kp_2d_img[11, :2] - kp_2d_img[12, :2])
- scale = 0.4 / shoulder_width # 假设平均肩宽为0.4米
- else:
- scale = 0.001 # 默认缩放比例
-
- # 基于2D关键点和人体比例估计3D位置
- # 这里使用简化模型,主要基于深度感知和人体比例
- for i in range(33):
- if valid_mask[i]:
- x, y = kp_2d_img[i, :2]
-
- # 计算相对中心的位置
- rel_x = (x - center[0]) * scale
- rel_y = (y - center[1]) * scale
-
- # 估计深度(z轴)
- # 这里使用简化方法:离图像中心越远的点假设越远
- depth_factor = np.sqrt(rel_x**2 + rel_y**2) / max(w, h) * 0.5
-
- # 设置3D坐标
- keypoints_3d[i] = [rel_x, rel_y, depth_factor]
-
- # 可视化
- if visualize:
- self.visualize_3d_pose(keypoints_3d)
-
- return keypoints_3d
- def visualize_3d_pose(self, keypoints_3d, frame_id=None):
- """可视化3D姿态"""
- self.ax.clear()
-
- # 设置坐标轴范围
- max_range = np.max(np.abs(keypoints_3d))
- self.ax.set_xlim(-max_range, max_range)
- self.ax.set_ylim(-max_range, max_range)
- self.ax.set_zlim(-max_range, max_range)
-
- # 设置坐标轴标签
- self.ax.set_xlabel('X')
- self.ax.set_ylabel('Y')
- self.ax.set_zlabel('Z')
-
- # 绘制关键点
- self.ax.scatter(keypoints_3d[:, 0], keypoints_3d[:, 1], keypoints_3d[:, 2], c='r', s=50)
-
- # 绘制连接关系
- for connection in JOINT_CONNECTIONS:
- start_idx, end_idx = connection
- if start_idx < len(keypoints_3d) and end_idx < len(keypoints_3d):
- self.ax.plot(
- [keypoints_3d[start_idx, 0], keypoints_3d[end_idx, 0]],
- [keypoints_3d[start_idx, 1], keypoints_3d[end_idx, 1]],
- [keypoints_3d[start_idx, 2], keypoints_3d[end_idx, 2]],
- c='b', linewidth=2
- )
-
- # 设置视角
- self.ax.view_init(elev=-90, azim=90) # 俯视视角
-
- # 保存图像
- if frame_id is not None:
- plt.savefig(f"results/3d_poses/3d_pose_frame_{frame_id}.png", dpi=300, bbox_inches='tight')
- else:
- plt.pause(0.01)
- class SimpleSimulator:
- def __init__(self, use_gui=True):
- """简单的3D仿真器,使用matplotlib进行可视化"""
- self.use_gui = use_gui
-
- # 用于可视化
- self.fig = plt.figure(figsize=(10, 8))
- self.ax = self.fig.add_subplot(111, projection='3d')
-
- # 设置固定的相机位置
- self.ax.set_xlim(-1.5, 1.5)
- self.ax.set_ylim(-1.5, 1.5)
- self.ax.set_zlim(0, 2)
-
- self.ax.set_xlabel('X')
- self.ax.set_ylabel('Y')
- self.ax.set_zlabel('Z')
-
- # 绘制地面
- x = np.linspace(-1.5, 1.5, 100)
- y = np.linspace(-1.5, 1.5, 100)
- X, Y = np.meshgrid(x, y)
- Z = np.zeros_like(X)
- self.ax.plot_surface(X, Y, Z, alpha=0.3, color='g')
-
- print("使用简单的3D可视化模拟器")
- def update_pose(self, keypoints_3d):
- """
- 根据3D姿态更新仿真模型
-
- 参数:
- keypoints_3d: 3D关键点 [33, 3]
- """
- if keypoints_3d is None:
- return
-
- self.ax.clear()
-
- # 设置坐标轴范围
- self.ax.set_xlim(-1.5, 1.5)
- self.ax.set_ylim(-1.5, 1.5)
- self.ax.set_zlim(0, 2)
-
- # 设置坐标轴标签
- self.ax.set_xlabel('X')
- self.ax.set_ylabel('Y')
- self.ax.set_zlabel('Z')
-
- # 绘制地面
- x = np.linspace(-1.5, 1.5, 100)
- y = np.linspace(-1.5, 1.5, 100)
- X, Y = np.meshgrid(x, y)
- Z = np.zeros_like(X)
- self.ax.plot_surface(X, Y, Z, alpha=0.3, color='g')
-
- # 绘制关键点
- self.ax.scatter(keypoints_3d[:, 0], keypoints_3d[:, 1], keypoints_3d[:, 2], c='r', s=50)
-
- # 绘制连接关系
- for connection in JOINT_CONNECTIONS:
- start_idx, end_idx = connection
- if start_idx < len(keypoints_3d) and end_idx < len(keypoints_3d):
- self.ax.plot(
- [keypoints_3d[start_idx, 0], keypoints_3d[end_idx, 0]],
- [keypoints_3d[start_idx, 1], keypoints_3d[end_idx, 1]],
- [keypoints_3d[start_idx, 2], keypoints_3d[end_idx, 2]],
- c='b', linewidth=2
- )
-
- # 设置视角
- self.ax.view_init(elev=30, azim=45) # 侧视视角
-
- if self.use_gui:
- plt.pause(0.01)
-
- def render_scene(self, frame_id):
- """
- 渲染当前场景并保存
-
- 参数:
- frame_id: 帧ID
- """
- plt.savefig(f"results/simulations/simulation_frame_{frame_id}.png", dpi=300, bbox_inches='tight')
- def main(camera_id=0, use_gui=True):
- """
- 完整流程:从摄像头读取到3D仿真
-
- 参数:
- camera_id: 摄像头ID,0表示默认摄像头
- use_gui: 是否使用GUI模式
- """
- # 1. 初始化模块
- pose_estimator = HumanPoseEstimator()
- pose_3d_estimator = Simple3DPoseEstimator()
- simulator = SimpleSimulator(use_gui=use_gui)
-
- # 2. 打开摄像头
- cap = cv2.VideoCapture(camera_id)
-
- # 检查摄像头是否成功打开
- if not cap.isOpened():
- print(f"无法打开摄像头 {camera_id}")
- return
-
- # 获取摄像头信息
- fps = cap.get(cv2.CAP_PROP_FPS)
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
- print(f"摄像头参数: {width}x{height}, 帧率: {fps}")
-
- # 创建窗口
- cv2.namedWindow("2D Pose Estimation", cv2.WINDOW_NORMAL)
- cv2.resizeWindow("2D Pose Estimation", 800, 600)
-
- frame_id = 0
-
- # 3. 处理摄像头帧
- while True:
- ret, frame = cap.read()
- if not ret:
- print("无法获取帧,退出...")
- break
-
- # 翻转帧,使其成为镜像效果
- frame = cv2.flip(frame, 1)
-
- print(f"处理第{frame_id}帧...")
-
- # 3.1 2D姿态识别
- start_time = time.time()
- keypoints_2d, vis_frame = pose_estimator.detect_keypoints(frame)
-
- # 显示2D姿态结果
- cv2.imshow("2D Pose Estimation", vis_frame)
-
- # 保存2D姿态结果
- cv2.imwrite(f"results/2d_poses/2d_pose_frame_{frame_id}.png", vis_frame)
-
- # 3.2 3D姿态重建
- keypoints_3d = pose_3d_estimator.estimate_3d_pose(
- keypoints_2d, frame.shape, visualize=False
- )
-
- # 可视化3D姿态
- if keypoints_3d is not None:
- pose_3d_estimator.visualize_3d_pose(keypoints_3d, frame_id)
-
- # 3.3 更新3D仿真
- simulator.update_pose(keypoints_3d)
-
- # 3.4 渲染场景
- simulator.render_scene(frame_id)
-
- # 计算处理时间
- process_time = time.time() - start_time
- print(f"处理时间: {process_time:.3f}秒")
-
- frame_id += 1
-
- # 按ESC键退出
- key = cv2.waitKey(1)
- if key == 27: # ESC键
- break
-
- # 4. 释放资源
- cap.release()
- cv2.destroyAllWindows()
- print(f"处理完成,共{frame_id}帧,结果保存在results目录")
- if __name__ == "__main__":
- # 运行主程序
- main(
- camera_id=0, # 摄像头ID,0表示默认摄像头
- use_gui=True # 是否使用GUI模式
- )
复制代码
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |