本文记录使用 YOLOv8 训练自定义目标检测数据集的完整流程，包括数据标注、格式转换、配置编写、训练验证，帮助你快速上手 YOLOv8 自定义训练。

数据集标注

标注工具选择

Labelme（推荐）

# 安装
pip install labelme

# 启动
labelme

Labelme 特点：

支持多边形、矩形、圆形标注
支持快捷键，提高标注效率
输出 JSON 格式

ROBOFLOW

网址：https://roboflow.com

优点：

云端标注，无需安装
自动标注辅助
直接支持 YOLO 格式导出
支持团队协作

标注规范

# 标注注意事项：

# 1. 边界框要紧贴目标
#    ┌─────┐       ┌─────┐
#    │目标 │  ✓    │目标 │  ✗  (留白太多)
#    └─────┘       └─────┘

# 2. 小目标要特别仔细
#    - 包含足够的上下文
#    - 避免截断

# 3. 遮挡目标处理
#    - 可见部分 > 50% 正常标注
#    - 可见部分 < 50% 可选择忽略或标注

# 4. 类别命名规范
#    - 统一命名，避免拼写错误
#    - 推荐：person, car, dog, cat
#    - 不推荐：Person, Car, 人, 车

数据集格式转换

YOLO TXT 格式

# YOLO 数据集结构
dataset/
├── images/
│   ├── train/
│   │   ├── image1.jpg
│   │   └── image2.jpg
│   └── val/
│       └── image3.jpg
├── labels/
│   ├── train/
│   │   ├── image1.txt
│   │   └── image2.txt
│   └── val/
│       └── image3.txt
└── data.yaml

Labelme JSON → YOLO TXT

# labelme2yolo.py

import json
import os
from pathlib import Path

def convert_labelme_to_yolo(json_file, output_dir, class_names):
    """将 Labelme JSON 转换为 YOLO TXT"""

    with open(json_file, 'r') as f:
        data = json.load(f)

    # 获取图像尺寸
    img_width = data['imageWidth']
    img_height = data['imageHeight']

    # 获取文件名
    image_path = Path(json_file)
    txt_name = image_path.stem + '.txt'
    txt_path = os.path.join(output_dir, txt_name)

    with open(txt_path, 'w') as f:
        for shape in data['shapes']:
            label = shape['label']
            points = shape['points']  # [[x1, y1], [x2, y2], ...]

            # 类别索引
            if label not in class_names:
                print(f"警告：未知类别 {label}")
                continue
            class_id = class_names.index(label)

            # 转换为 YOLO 格式：class_id x_center y_center width height
            # 坐标归一化到 [0, 1]
            if shape['shape_type'] == 'rectangle':
                x1, y1 = points[0]
                x2, y2 = points[1]

                x_center = ((x1 + x2) / 2) / img_width
                y_center = ((y1 + y2) / 2) / img_height
                width = abs(x2 - x1) / img_width
                height = abs(y2 - y1) / img_height

                f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

            elif shape['shape_type'] == 'polygon':
                # 多边形：计算最小外接矩形
                xs = [p[0] for p in points]
                ys = [p[1] for p in points]

                x_min, x_max = min(xs), max(xs)
                y_min, y_max = min(ys), max(ys)

                x_center = ((x_min + x_max) / 2) / img_width
                y_center = ((y_min + y_max) / 2) / img_height
                width = (x_max - x_min) / img_width
                height = (y_max - y_min) / img_height

                f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

# 使用示例
class_names = ['cat', 'dog']  # 你的类别

# 转换训练集
json_dir = 'dataset/train/json'
output_dir = 'dataset/train/labels'
os.makedirs(output_dir, exist_ok=True)

for json_file in Path(json_dir).glob('*.json'):
    convert_labelme_to_yolo(json_file, output_dir, class_names)

COCO JSON → YOLO TXT

# coco2yolo.py

import json
import os
from pathlib import Path

def convert_coco_to_yolo(coco_json, output_dir, class_names):
    """将 COCO JSON 转换为 YOLO TXT"""

    with open(coco_json, 'r') as f:
        coco = json.load(f)

    # 建立 image_id -> image_name 映射
    images = {img['id']: img for img in coco['images']}

    # 建立 annotations
    for ann in coco['annotations']:
        image_id = ann['image_id']
        image_info = images[image_id]
        img_width = image_info['width']
        img_height = image_info['height']

        # 获取文件名
        file_name = Path(image_info['file_name']).stem + '.txt'
        txt_path = os.path.join(output_dir, file_name)

        # bbox: [x, y, width, height] (COCO 格式)
        x, y, w, h = ann['bbox']
        category_id = ann['category_id']

        # 转换为 YOLO 格式
        x_center = (x + w / 2) / img_width
        y_center = (y + h / 2) / img_height
        width = w / img_width
        height = h / img_height

        with open(txt_path, 'a') as f:
            f.write(f"{category_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

# 使用
class_names = ['person', 'car', 'dog']  # 按 COCO 类别顺序
convert_coco_to_yolo('instances_train.json', 'labels/train', class_names)

YAML 配置文件编写

data.yaml

# 数据集配置
path: /path/to/dataset  # 数据集根目录
train: images/train     # 训练集图像路径
val: images/val        # 验证集图像路径

# 类别数量
nc: 3

# 类别名称（顺序要与标注一致）
names:
  0: cat
  1: dog
  2: person

模型配置文件

# yolov8.yaml (使用默认或自定义)

# YOLOv8n/s/m/l/x 配置示例
# 骨干网络通道数
backbone:
  # [from, repeats, channel, kernel, stride]
  - [-1, 1, 16, 3, 2]          # 0: Conv
  - [-1, 1, 32, 3, 2]          # 1: Conv
  - [-1, 2, 32, 3, 2]          # 2: C2f
  - [-1, 1, 64, 3, 2]          # 3: Conv
  - [-1, 2, 64, 3, 2]          # 4: C2f
  - [-1, 1, 128, 3, 2]         # 5: Conv
  - [-1, 2, 128, 3, 2]         # 6: C2f
  - [-1, 1, 256, 3, 2]         # 7: Conv
  - [-1, 2, 256, 3, 2]          # 8: C2f
  - [-1, 1, 512, 1, 1]         # 9: Conv
  - [-1, 1, 512, 1, 1]         # 10: SPPF

# 检测头
head:
  - [-1, 1, 256, 1, 1]         # 11: upsample
  - [[-1, 6], 1, 256, 1, 1]   # 12: Concat
  - [-1, 2, 256, 1, 1]         # 13: C2f
  - [-1, 1, 256, 1, 1]         # 14: upsample
  - [[-1, 4], 1, 256, 1, 1]   # 15: Concat
  - [-1, 2, 256, 1, 1]         # 16: C2f
  - [[17, 14, 10], 1, 256, 1, 1]  # 17: Detect

训练命令与参数

基础训练

# 使用 Python API
from ultralytics import YOLO

# 加载预训练模型
model = YOLO('yolov8s.pt')

# 开始训练
results = model.train(
    data='dataset/data.yaml',
    epochs=100,
    imgsz=640,
    batch=16,
    device=0,  # GPU 编号，'cpu' 使用 CPU
    project='runs/detect',
    name='train',
    exist_ok=True,
)

训练参数详解

# 完整参数示例
results = model.train(
    # 数据相关
    data='dataset/data.yaml',     # 数据集配置
    task='detect',                # 任务类型
    mode='train',                 # 模式

    # 训练超参数
    epochs=300,                   # 训练轮数
    patience=50,                 # 早停耐心值
    batch=16,                    # batch size
    imgsz=640,                   # 输入图像尺寸
    save=True,                    # 保存模型
    save_period=-1,              # 每隔多少 epoch 保存一次

    # 优化器
    optimizer='SGD',             # 优化器: SGD, Adam, AdamW
    lr0=0.01,                    # 初始学习率
    lrf=0.01,                    # 最终学习率
    momentum=0.937,              # 动量
    weight_decay=0.0005,         # 权重衰减
    warmup_epochs=3.0,           # 预热轮数
    warmup_momentum=0.8,         # 预热动量
    warmup_bias_lr=0.1,          # 预热偏置学习率

    # 数据增强
    hsv_h=0.015,                # HSV 色调
    hsv_s=0.7,                  # HSV 饱和度
    hsv_v=0.4,                  # HSV 明度
    degrees=0.0,                # 旋转角度
    translate=0.1,              # 平移
    scale=0.5,                  # 缩放
    shear=0.0,                  # 剪切
    perspective=0.0,            # 透视
    flipud=0.0,                 # 上下翻转
    fliplr=0.5,                 # 左右翻转
    mosaic=1.0,                 # Mosaic 增强
    mixup=0.0,                  # MixUp 增强
    copy_paste=0.0,             # Copy-Paste 增强

    # 其他
    amp=True,                    # 混合精度训练
    fraction=1.0,               # 使用数据比例
    profile=False,               # ONNX profiling
    freeze=None,                # 冻结层
    lr_decay=None,             # 学习率衰减
)

多GPU训练

# 单机多卡训练
python -m torch.distributed.launch --nproc_per_node 8 train.py

# 或使用 Ultralytics
from ultralytics import YOLO
model = YOLO('yolov8s.pt')

# 多卡训练
results = model.train(
    data='dataset/data.yaml',
    epochs=300,
    device=[0, 1, 2, 3, 4, 5, 6, 7],  # 8 卡
    batch=16,  # 每卡 batch size
)

模型验证

验证命令

# Python API 验证
from ultralytics import YOLO

model = YOLO('runs/detect/train/weights/best.pt')

# 验证
metrics = model.val(
    data='dataset/data.yaml',
    split='val',         # val, test
    batch=16,
    imgsz=640,
    conf=0.001,          # 置信度阈值
    iou=0.6,            # NMS IoU 阈值
    device=0,
)

# 打印指标
print(f"mAP50: {metrics.box.map50:.3f}")
print(f"mAP50-95: {metrics.box.map:.3f}")
print(f"Precision: {metrics.box.mp:.3f}")
print(f"Recall: {metrics.box.mr:.3f}")

指标解读

# YOLOv8 输出的主要指标

# mAP@0.5 (mAP50)
# - IoU 阈值固定为 0.5
# - 综合考虑 precision 和 recall
# - 值越高越好，接近 1.0 最佳

# mAP@0.5:0.95 (mAP50-95)
# - 在 IoU 0.5 到 0.95 上平均
# - 更严格，更接近实际应用
# - 目标检测的标准指标

# Precision (精确率)
# - TP / (TP + FP)
# - 预测为正的目标中，实际为正的比例

# Recall (召回率)
# - TP / (TP + FN)
# - 实际为正的目标中，被正确预测的比例

每类指标

# 查看每类的 AP
print(metrics.box.ap50)      # 每类的 AP@0.5
print(metrics.box.ap)        # 每类的 AP@0.5:0.95
print(metrics.box.names)     # 类别名称

# 输出示例：
# {'cat': 0.89, 'dog': 0.85, 'person': 0.92}

预测与推理

基础预测

from ultralytics import YOLO

model = YOLO('runs/detect/train/weights/best.pt')

# 单张图像预测
results = model.predict(
    source='test/image.jpg',
    conf=0.25,              # 置信度阈值
    iou=0.45,              # NMS IoU 阈值
    save=True,              # 保存结果
    save_txt=True,         # 保存标签
    save_conf=True,         # 保存置信度
)

# 处理结果
for r in results:
    boxes = r.boxes  # 检测框
    for box in boxes:
        # 边界框坐标
        x1, y1, x2, y2 = box.xyxy[0]
        # 类别
        cls = box.cls[0]
        # 置信度
        conf = box.conf[0]
        # 类别名称
        name = r.names[int(cls)]

批量预测

# 批量预测
results = model.predict(
    source='test/images',    # 文件夹
    batch=8,                # 批量大小
    save=True,
)

# 视频预测
results = model.predict(
    source='video.mp4',
    save=True,
    save_txt=True,
)

结果可视化

# 绘制结果
from ultralytics import YOLO
import cv2

model = YOLO('best.pt')
img = cv2.imread('test.jpg')

# 预测
results = model(img)

# 绘制边界框
annotated = results[0].plot()

# 显示
cv2.imshow('result', annotated)
cv2.waitKey(0)

常见问题排查

数据问题

# 问题1：找不到数据
# 错误：Dataset not found
# 解决：检查 data.yaml 中的 path 是否正确

# 问题2：类别数不匹配
# 错误：nc=3 but found 5 classes in labels
# 解决：检查类别名称是否正确

# 问题3：标注文件不存在
# 警告：No labels found
# 解决：检查 images 和 labels 文件夹对应关系

训练问题

# 问题1：GPU 显存不足
# 解决：减小 batch size 或 imgsz

# 问题2：loss 不下降
# 解决：
# - 检查数据标注是否正确
# - 降低学习率
# - 增加训练轮数

# 问题3：mAP 为 0
# 解决：
# - 检查标注格式是否正确
# - 检查类别名称是否一致
# - 检查图像是否能正常读取

推理问题

# 问题1：检测框位置偏移
# 原因：训练和推理的输入尺寸不一致
# 解决：推理时使用与训练相同的 imgsz

# 问题2：小目标漏检
# 解决：
# - 降低 conf 阈值
# - 提高 imgsz (1280)
# - 使用更高分辨率的模型

总结

YOLOv8 自定义训练流程：

数据标注：使用 Labelme 或 ROBOFLOW
格式转换：Labelme JSON → YOLO TXT
目录结构：images/ 和 labels/ 对应
编写配置：data.yaml 和模型配置
开始训练：model.train() 或 CLI
验证评估：model.val() 查看 mAP
推理预测：model.predict() 实际应用

关键参数建议：

参数	建议值
epochs	100-300
batch	16-32（根据显存）
imgsz	640（小目标用 1280）
conf	0.25（推理时）
optimizer	SGD（默认效果好）
lr0	0.01（SGD）

张会挽's Blog

YOLOv8 自定义数据集实战：从标注到训练

数据集标注

标注工具选择

Labelme（推荐）

ROBOFLOW

标注规范

数据集格式转换

YOLO TXT 格式

Labelme JSON → YOLO TXT

COCO JSON → YOLO TXT

YAML 配置文件编写

data.yaml

模型配置文件

训练命令与参数

基础训练

训练参数详解

多GPU训练

模型验证

验证命令

指标解读

每类指标

预测与推理

基础预测

批量预测

结果可视化

常见问题排查

数据问题

训练问题

推理问题

总结