本文深入解读 YOLOv8 的核心源码，包括模型配置文件解析、网络结构定义、前向传播流程和推理后处理，帮助理解其内部实现细节。

模型配置文件解析

YAML 配置结构

# yolov8.yaml 核心结构

# 参数
nc: 80  # 类别数
scales:
  # n/s/m/l/x 五种尺寸
  n: [0.33, 0.25, 1024]
  s: [0.33, 0.50, 1024]
  # ...

# 骨干网络
backbone:
  - [-1, 1, 64, 3, 2]           # 0: Conv
  - [-1, 1, 128, 3, 2]          # 2: Conv
  - [-1, 3, 128, 1, 1]          # 3: C2f
  - [-1, 1, 256, 3, 2]          # 4: Conv
  - [-1, 6, 256, 1, 1]          # 5: C2f
  - [-1, 1, 512, 3, 2]          # 6: Conv
  - [-1, 6, 512, 1, 1]          # 7: C2f
  - [-1, 1, 512, 1, 1]          # 8: SPPF
  - [-1, 1, 1024, 3, 2]         # 9: Conv
  - [-1, 6, 1024, 1, 1]         # 10: C2f

# 检测头
head:
  - [-1, 1, 512, 1, 1]          # 11: upsample
  - [-1, 6, 512, 1, 1]          # 12: Concat
  - [-1, 6, 512, 1, 1]          # 13: C2f
  - [-1, 1, 256, 1, 1]          # 14: upsample
  - [-1, 6, 256, 1, 1]          # 15: Concat
  - [-1, 6, 256, 1, 1]          # 16: C2f
  - [[17, 14, 10], 1, 256, 1, 0] # 17: Detect

配置解析代码

# ultralytics/nn/tasks.py

def parse_model(d, ch):
    """解析 YAML 配置，构建模型"""
    import math

    # Logger
    LOGGER.info(f"\n{'':>3}{'from':>20}{'n':>3}{'params':>10}  {'module':<45}{'arguments':<30}")

    # 统计
    nc, act, scales = d['nc'], d.get('activation'), d.get('scales')
    depth, width, kpt_shape = (d.get(x, 1.0) for x in ('depth_multiple', 'width_multiple', 'kpt_shape'))

    # 定义层
    layers, save, c2 = [], [], ch[-1]

    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):
        # 解析模块名
        m = eval(m) if isinstance(m, str) else m

        # 深度缩放
        n = max(round(n * depth), 1) if n > 1 else n

        # 宽度缩放
        if m in (Conv, C2f, Bottleneck, SPPF, SPP, DWConv, DWConvTranspose2d, ConvTranspose):
            c1, c2 = ch[f], args[0]
            c2 = max(math.ceil(c2 * width), 64)
            args = [c1, c2, *args[1:]]

        elif m is nn.BatchNorm2d:
            args = [ch[f]]

        # 实例化模块
        m = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)

        # 记录输出通道
        t = str(m)[-1:-21:-1]  # type
        m.i, m.f, m.type, m.np = i, f, t, len(m)  # index, from, type, number of params

        # 输出通道数
        c2 = ch.append(c2) if isinstance(c2, int) else ch.append(c2[-1])

    return nn.Sequential(*layers)

网络结构定义

基础模块

Conv 卷积模块

# ultralytics/nn/modules.py

class Conv(nn.Module):
    """标准卷积：Conv + BN + SiLU"""
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
        super().__init__()
        # 1x1 卷积降维
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        # SiLU (Sigmoid Linear Unit) / Swish
        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

    def forward(self, x):
        return self.act(self.bn(self.conv(x)))

Bottleneck 残差块

class Bottleneck(nn.Module):
    """标准残差块"""
    def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
        super().__init__()
        c_ = int(c2 * e)  # 隐藏层通道
        self.cv1 = Conv(c1, c_, k[0], 1)  # 1x1 降维
        self.cv2 = Conv(c_, c2, k[1], 1, g=g)  # 3x3 升维
        self.add = shortcut and c1 == c2  # 残差连接

    def forward(self, x):
        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))

C2f 模块（核心创新）

class C2f(nn.Module):
    """YOLOv8 核心模块，比 C3 保留更多梯度"""
    def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
        super().__init__()
        self.c = int(c2 * 2)  # 隐藏通道
        self.cv1 = Conv(c1, 2 * self.c, 1, 1)  # 通道分割
        self.cv2 = Conv((2 + n) * self.c, c2, 1)  # 输出
        self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3)), e=1.0) for _ in range(n))

    def forward(self, x):
        # 分割为两份
        y = list(self.cv1(x).split((self.c, self.c), 1))
        # 通过所有 Bottleneck，保留完整梯度流
        y.extend(m(y[-1]) for m in self.m)
        return self.cv2(torch.cat(y, 1))

C2f vs C3 对比

# C3 (YOLOv5):
# - 残差分支只有 Bottleneck 的输出
# - 丢失了部分原始特征

# C2f (YOLOv8):
# - 残差分支包含完整特征
# - 保留了输入信息的完整性
# - 梯度流更顺畅

# 效果：
# C2f 增加了少量计算，但显著提升了梯度传递

SPPF 模块

class SPPF(nn.Module):
    """空间金字塔池化 - Fast 版本"""
    def __init__(self, c1, c2, k=5):
        super().__init__()
        c_ = c1 // 2
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c_ * 4, c2, 1, 1)
        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)

    def forward(self, x):
        x = self.cv1(x)
        y1 = self.m(x)
        y2 = self.m(y1)
        y3 = self.m(y2)
        return self.cv2(torch.cat([x, y1, y2, y3], 1))

前向传播流程

整体流程

# ultralytics/nn/tasks.py

class DetectionModel(BaseModel):
    def __init__(self, cfg='yolov8.yaml', ch=3, nc=None, verbose=True):
        super().__init__(cfg, ch, nc, verbose)

    def forward(self, x, *args, **kwargs):
        """前向传播"""
        if self.training:
            # 训练模式：返回所有输出
            return self._forward_once(x)
        else:
            # 推理模式：返回 NMS 后的结果
            return self._forward_once(x)

    def _forward_once(self, x):
        """单次前向"""
        y, dt = [], []  # 输出列表，时间记录
        for m in self.model:
            # 处理跳连
            if m.f != -1:
                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]

            # 前向计算
            x = m(x)
            y.append(x if m.i in self.save else None)

        return x

训练模式 Forward

# 训练模式下返回原始输出
# 每个 Detect 头的输出

def forward_train(x):
    # x: [batch, 3, 640, 640]

    # Backbone 输出
    x3 = backbone(x)   # [batch, 256, 80, 80]
                       # [batch, 512, 40, 40]
                       # [batch, 512, 20, 20]

    # Neck 融合
    x4 = neck(x3)

    # Detect 输出
    # p: [batch, nc+4*reg_max, 80, 80]  (小目标)
    # p: [batch, nc+4*reg_max, 40, 40]  (中目标)
    # p: [batch, nc+4*reg_max, 20, 20]  (大目标)
    p = detect_head(x4)

    return p

推理模式 Forward

# 推理模式下，Detect 包含后处理

class Detect(nn.Module):
    def __init__(self, nc=80, ch=()):
        super().__init__()
        self.nc = nc  # 类别数
        self.nl = len(ch)  # 检测层数
        self.reg_max = 16  # DFL 通道
        self.no = nc + self.reg_max * 4  # 输出通道
        self.stride = torch.zeros(self.nl)  # 步长

    def forward(self, x):
        """推理前向，返回 NMS 前的原始输出"""
        for i in range(self.nl):
            x[i] = torch.cat([self.cv2[i](x[i]), self.cv3[i](x[i])], 1)

        # 训练模式返回原始输出
        if self.training:
            return x

        # 推理模式：拼接所有尺度输出
        return torch.cat([xi.view(xi.shape[0], self.no, -1) for xi in x], 2)

NMS 后处理

NMS 实现

# ultralytics/utils/ops.py

def non_max_suppression(
    prediction,
    conf_thres=0.25,
    iou_thres=0.45,
    classes=None,
    agnostic=False,
    multi_label=False,
    max_det=300,
):
    """
    NMS 非极大值抑制
    """
    # 设备
    bs = prediction.shape[0]  # batch size
    nc = prediction.shape[2] - self.no  # 类别数
    xc = prediction[..., 4] > conf_thres  # 置信度过滤

    # 输出
    output = [torch.zeros((0, 6))] * bs
    xi = 0

    # 遍历每张图
    for i in range(bs):
        x = prediction[i]

        # 置信度过滤
        x = x[xc[i]]

        # 无检测框
        if not x.shape[0]:
            continue

        # 解析：box + conf + cls
        box = x[:, :4]  # 边界框
        conf = x[:, 4:5]  # 置信度
        cls = x[:, 5:].argmax(1, keepdim=True)  # 类别

        # 多标签（非 YOLOv8 常用）
        if multi_label:
            x[:, 5:] = x[:, 5:] > conf_thres

        # 检测框数量
        n = x.shape[0]

        # 边界框坐标转换（xyxy -> xywh）
        box = xywh2xyxy(box)

        # 按类别循环或全部一起处理
        if not agnostic:
            c = x[:, 5:6] * (0 if agnostic else 4096)
        else:
            c = x[:, 5:6] * 0

        # 按置信度排序
        boxes, scores = box + c, conf.squeeze(1)
        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
        i = i[:max_det]  # 限制数量

        output[i] = torch.cat((boxes[i], scores[i:i+1, None], cls[i:i+1, None].float()), 1)

    return output

坐标转换

# ultralytics/utils/ops.py

def xywh2xyxy(x):
    """中心点格式 -> 左上右下格式"""
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2  # x1
    y[..., 1] = x[..., 1] - x[..., 3] / 2  # y1
    y[..., 2] = x[..., 0] + x[..., 2] / 2  # x2
    y[..., 3] = x[..., 1] + x[..., 3] / 2  # y2
    return y

后处理完整流程

def postprocess(preds, img_size, orig_imgs):
    """推理后处理完整流程"""

    # 1. 解析预测输出
    # preds: [batch, num_boxes, 4+1+nc]

    # 2. 坐标反归一化（如果是归一化的输出）
    preds[..., :4] = scale_coords(img_size, preds[..., :4], orig_imgs.shape)

    # 3. NMS
    preds = non_max_suppression(
        preds,
        conf_thres=0.25,
        iou_thres=0.45,
        max_det=300,
    )

    return preds

推理结果解析

结果结构

# ultralytics/engine/results.py

class Results:
    def __init__(self, orig_img, path, names, boxes=None, masks=None, probs=None):
        self.orig_img = orig_img
        self.boxes = Boxes(boxes, orig_img.shape) if boxes is not None else None
        self.masks = Masks(masks, orig_img.shape) if masks is not None else None
        self.probs = probs if probs is not None else None
        self.names = names
        self.path = path

Boxes 对象

class Boxes:
    """检测框封装"""
    def __init__(self, boxes, orig_shape):
        self.boxes = boxes  # [x1, y1, x2, y2, conf, cls]
        self.orig_shape = orig_shape

    @property
    def xyxy(self):
        """返回 xyxy 格式边界框"""
        return self.boxes[:, :4]

    @property
    def xywh(self):
        """返回 xywh 格式边界框"""
        boxes = self.boxes[:, :4]
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]  # w
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]  # h
        boxes[:, 0] = boxes[:, 0] + boxes[:, 2] / 2  # cx
        boxes[:, 1] = boxes[:, 1] + boxes[:, 3] / 2  # cy
        return boxes

    @property
    def conf(self):
        """返回置信度"""
        return self.boxes[:, 4]

    @property
    def cls(self):
        """返回类别"""
        return self.boxes[:, 5]

使用示例

from ultralytics import YOLO

model = YOLO('best.pt')
results = model('test.jpg')

# 遍历结果
for r in results:
    boxes = r.boxes  # 检测框

    # 方法1：numpy 数组
    print(boxes.xyxy)   # [x1, y1, x2, y2]
    print(boxes.conf)   # 置信度
    print(boxes.cls)    # 类别索引

    # 方法2：Tensor
    print(boxes.xyxy[0])  # 单个框

    # 方法3：Python 标量
    for box in boxes:
        x1, y1, x2, y2 = box.xyxy.tolist()
        conf = float(box.conf)
        cls = int(box.cls)
        name = r.names[cls]
        print(f"{name}: {conf:.2f} at [{x1:.0f}, {y1:.0f}, {x2:.0f}, {y2:.0f}]")

关键代码片段分析

DFL (Distribution Focal Loss)

# YOLOv8 使用 DFL 将回归转为分类

class DistributionFocalLoss(nn.Module):
    def __init__(self, reg_max):
        super().__init__()
        self.reg_max = reg_max

    def forward(self, pred_dist, target):
        # pred_dist: [N, 4*reg_max, H*W]
        # target: [N, 4, H*W]

        # 获取目标位置
        target = target.clamp(0, self.reg_max - 1 - 0.01)

        # 展开分布
        # 计算每个离散位置的损失
        tl = target.long()  # 目标左边界
        tr = tl + 1         # 右边界

        # 加权求和
        # loss = -log(p_left * (1-weight) + p_right * weight)

训练时的损失计算

# ultralytics/models/yolo/detect/train.py

class DetectionTrainer(Trainer):
    def build_targets(self, targets, batch_size):
        """构建训练目标"""
        # 为每个 GT 生成正样本
        # 使用 TAL (Task Alignment Learning)
        pass

    def compute_loss(self, preds, batch):
        """计算损失"""
        loss = torch.zeros(3, device=self.device)

        # 解析预测
        pred = preds[0] if isinstance(preds, tuple) else preds

        # Box Loss
        loss[0] = self.bce(pred.box, target.bbox)

        # Classification Loss
        loss[1] = self.bce(pred.cls, target.cls)

        # DFL Loss
        loss[2] = self.dfl(pred.dist, target.dist)

        return loss.sum() * self.loss_gain

总结

YOLOv8 源码核心要点：

模块	位置	说明
Conv	modules.py	Conv+BN+SiLU
C2f	modules.py	核心特征融合模块
SPPF	modules.py	空间金字塔池化
Detect	modules.py	检测头
parse_model	tasks.py	YAML 配置解析
forward	tasks.py	整体前向
NMS	ops.py	非极大值抑制
Results	results.py	结果封装

推理流程：

输入预处理：Letterbox 缩放 + 归一化
Backbone：特征提取 + 下采样
Neck：多尺度特征融合
Head：分类 + 回归
后处理：坐标转换 + NMS
输出：检测框 + 置信度 + 类别

张会挽's Blog

YOLOv8 源码解读：模型定义与推理流程

模型配置文件解析

YAML 配置结构

配置解析代码

网络结构定义

基础模块

Conv 卷积模块

Bottleneck 残差块

C2f 模块（核心创新）

C2f vs C3 对比

SPPF 模块

前向传播流程

整体流程

训练模式 Forward

推理模式 Forward

NMS 后处理

NMS 实现

坐标转换

后处理完整流程

推理结果解析

结果结构

Boxes 对象

使用示例

关键代码片段分析

DFL (Distribution Focal Loss)

训练时的损失计算

总结