Files
prehadler/根据位置画框.py

199 lines
8.6 KiB
Python
Raw Normal View History

2025-10-10 11:39:23 +08:00
import cv2
import numpy as np
import os
from tqdm import tqdm
# -------------------------- 1. 核心配置(请根据需求修改) --------------------------
# 输入目录(图片和标签需一一对应,文件名相同,仅后缀不同)
INPUT_IMAGE_DIR = r"D:\DataPreHandler\data\train\images" # 原始图片目录
INPUT_LABEL_DIR = r"D:\DataPreHandler\data\train\labels" # 原始YOLO标签目录
# 输出目录(标注后的图片会保存在这里)
OUTPUT_IMAGE_DIR = r"D:\DataPreHandler\data\test\da\output2"
# 关键配置类别ID与类别名称的映射必须与你的YOLO训练类别顺序一致
CLASS_CONFIG = [
(0, "Abdomen", (0, 255, 0)),
(1, "Hips", (0, 255, 0)),
(2, "Chest", (0, 255, 0)),
(3, "vulva", (0, 255, 0)),
(4, "back", (0, 255, 0)),
(5, "penis", (0, 255, 0)),
(6, "Horror", (0, 255, 0))
]
# 绘制参数(可按需调整)
BOX_THICKNESS = 2 # 边界框线条厚度(像素)
FONT_FACE = cv2.FONT_HERSHEY_SIMPLEX # 字体类型
FONT_SCALE = 0.6 # 字体大小(根据图片尺寸调整)
FONT_THICKNESS = 1 # 字体线条厚度
TEXT_PADDING = 5 # 文字与边界框的间距(像素)
TEXT_BG_OPACITY = 0.7 # 文字背景的透明度0-10为完全透明
# 支持的图片格式(无需修改)
SUPPORTED_IMAGE_FORMATS = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff')
# -------------------------- 2. 工具函数 --------------------------
def yolo2pixel(yolo_coords, img_w, img_h):
"""
将YOLO相对坐标转换为图片像素坐标边界框x1, y1, x2, y2
:param yolo_coords: YOLO坐标列表 [xc, yc, w, h]相对值0-1
:param img_w: 图片宽度像素
:param img_h: 图片高度像素
:return: 像素坐标元组 (x1, y1, x2, y2)
"""
xc, yc, w, h = yolo_coords
# 计算边界框左上角和右下角坐标
x1 = int((xc - w / 2) * img_w)
y1 = int((yc - h / 2) * img_h)
x2 = int((xc + w / 2) * img_w)
y2 = int((yc + h / 2) * img_h)
# 确保坐标不超出图片范围
x1 = max(0, x1)
y1 = max(0, y1)
x2 = min(img_w, x2)
y2 = min(img_h, y2)
return x1, y1, x2, y2
def draw_annotation(img, bbox, class_name, color):
"""
在图片上绘制边界框和类别名称
:param img: 原始图片OpenCV格式BGR通道
:param bbox: 像素坐标边界框 (x1, y1, x2, y2)
:param class_name: 类别名称字符串
:param color: 边界框和文字颜色BGR元组 (0,255,0) 代表绿色
:return: 标注后的图片
"""
img_h, img_w = img.shape[:2]
x1, y1, x2, y2 = bbox
# 1. 绘制边界框
cv2.rectangle(img, (x1, y1), (x2, y2), color, BOX_THICKNESS)
# 2. 计算文字尺寸(用于创建文字背景)
text_size, _ = cv2.getTextSize(class_name, FONT_FACE, FONT_SCALE, FONT_THICKNESS)
text_w, text_h = text_size
# 3. 确定文字位置(避免超出图片范围)
# 文字默认放在边界框左上角,若左上角空间不足则放在右上角
text_x = x1 + TEXT_PADDING
text_y = y1 - TEXT_PADDING - text_h # 文字基线在y轴上方
if text_y < 0: # 左上角超出图片顶部,调整到右上角
text_x = x2 - TEXT_PADDING - text_w
text_y = y1 + TEXT_PADDING + text_h
# 4. 绘制文字背景(半透明矩形,避免遮挡图片内容)
bg_x1 = text_x - TEXT_PADDING
bg_y1 = text_y - text_h - TEXT_PADDING
bg_x2 = text_x + text_w + TEXT_PADDING
bg_y2 = text_y + TEXT_PADDING
# 确保背景不超出图片范围
bg_x1 = max(0, bg_x1)
bg_y1 = max(0, bg_y1)
bg_x2 = min(img_w, bg_x2)
bg_y2 = min(img_h, bg_y2)
# 半透明背景:先创建背景层,再与原图混合
bg = img[bg_y1:bg_y2, bg_x1:bg_x2].copy()
bg = cv2.rectangle(bg, (0, 0), (bg_x2 - bg_x1, bg_y2 - bg_y1), color, -1) # 实心矩形
img[bg_y1:bg_y2, bg_x1:bg_x2] = cv2.addWeighted(bg, TEXT_BG_OPACITY, img[bg_y1:bg_y2, bg_x1:bg_x2], 1 - TEXT_BG_OPACITY, 0)
# 5. 绘制类别名称
cv2.putText(img, class_name, (text_x, text_y), FONT_FACE, FONT_SCALE, (0, 0, 0), FONT_THICKNESS) # 白色文字
return img
# -------------------------- 3. 主函数 --------------------------
def main():
# 1. 创建输出目录(若不存在)
os.makedirs(OUTPUT_IMAGE_DIR, exist_ok=True)
print(f"标注后的图片将保存到:{OUTPUT_IMAGE_DIR}\n")
# 2. 构建类别ID到名称+颜色)的映射字典
class_map = {cls_id: (cls_name, cls_color) for cls_id, cls_name, cls_color in CLASS_CONFIG}
print("类别配置:")
for cls_id, cls_name, cls_color in CLASS_CONFIG:
print(f" ID {cls_id} → 名称:{cls_name},颜色:{cls_color}")
print()
# 3. 获取所有图片文件(仅处理支持的格式)
image_files = [f for f in os.listdir(INPUT_IMAGE_DIR) if f.lower().endswith(SUPPORTED_IMAGE_FORMATS)]
if not image_files:
raise FileNotFoundError(f"{INPUT_IMAGE_DIR} 中未找到任何支持的图片文件({SUPPORTED_IMAGE_FORMATS}")
print(f"找到 {len(image_files)} 张图片,开始标注...\n")
# 4. 遍历图片并标注
for img_filename in tqdm(image_files, desc="处理进度"):
# 4.1 构建图片和标签的路径
img_name, img_ext = os.path.splitext(img_filename)
img_path = os.path.join(INPUT_IMAGE_DIR, img_filename)
label_path = os.path.join(INPUT_LABEL_DIR, f"{img_name}.txt") # 标签文件与图片同名后缀为txt
# 4.2 读取图片OpenCV默认读取为BGR通道
img = cv2.imread(img_path)
if img is None:
tqdm.write(f"⚠️ 跳过:无法读取图片 {img_filename}(可能损坏或格式不支持)")
continue
img_h, img_w = img.shape[:2]
# 4.3 读取标签文件(若不存在则跳过标注,直接保存原图)
if not os.path.exists(label_path):
tqdm.write(f"⚠️ 警告:图片 {img_filename} 无对应标签文件 {os.path.basename(label_path)},直接保存原图")
annotated_img = img.copy()
else:
# 复制原图用于标注(避免修改原始图片)
annotated_img = img.copy()
# 读取标签内容
with open(label_path, 'r', encoding='utf-8') as f:
label_lines = [line.strip() for line in f.readlines() if line.strip()] # 过滤空行
# 4.4 解析每个标签并绘制
for line_idx, line in enumerate(label_lines):
try:
# YOLO标签格式class_id xc yc w h空格分隔
parts = line.split()
if len(parts) != 5:
raise ValueError(f"格式错误需5个字段实际{len(parts)}个)")
# 解析类别ID和坐标
cls_id = int(float(parts[0]))
yolo_coords = [float(p) for p in parts[1:]]
# 检查YOLO坐标有效性必须在0-1范围内
if not all(0 <= coord <= 1 for coord in yolo_coords):
raise ValueError(f"YOLO坐标超出0-1范围{yolo_coords}")
# 4.5 转换坐标并绘制
# 检查类别ID是否在配置中
if cls_id not in class_map:
tqdm.write(f"⚠️ 跳过:图片 {img_filename} 标签第{line_idx+1}未知类别ID {cls_id}未在CLASS_CONFIG中配置")
continue
# 获取类别名称和颜色
cls_name, cls_color = class_map[cls_id]
# 转换YOLO坐标为像素坐标
bbox = yolo2pixel(yolo_coords, img_w, img_h)
# 绘制标注
annotated_img = draw_annotation(annotated_img, bbox, cls_name, cls_color)
except Exception as e:
tqdm.write(f"⚠️ 跳过:图片 {img_filename} 标签第{line_idx+1}行解析失败 → {str(e)}")
continue
# 4.6 保存标注后的图片
output_img_path = os.path.join(OUTPUT_IMAGE_DIR, f"{img_name}_annotated{img_ext}")
# 保存为JPG格式若原始是PNG也可改为img_ext保持原格式
# 注JPG不支持透明通道若原始是PNG且有透明建议保留img_ext
cv2.imwrite(output_img_path, annotated_img)
# 5. 完成提示
print(f"\n✅ 标注完成!共处理 {len(image_files)} 张图片,标注后的图片已保存到:")
print(f" {OUTPUT_IMAGE_DIR}")
if __name__ == "__main__":
try:
main()
except Exception as e:
print(f"\n❌ 程序异常终止:{str(e)}")