Gloria 发布的文章

你是否真实调研了?
是否掺杂了一些理想的/乌托邦的想法?
是否掺杂了一夜暴富的想法?

import torch
import torch.nn as nn

class LeakyBlock(nn.Module):
    """
    论文规定:除最后一层外,所有层全部使用 Leaky ReLU (leaky=0.1)
    """
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super(LeakyBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=True)
        self.leaky = nn.LeakyReLU(0.1)

    def forward(self, x):
        return self.leaky(self.conv(x))


class YOLOv1(nn.Module):
    def __init__(self, num_boxes=2, num_classes=20):
        super(YOLOv1, self).__init__()
        self.B = num_boxes
        self.C = num_classes
        
        # 1. 卷积层部分:完全对照论文 Figure 3 的 24 个卷积层和最大池化
        self.features = nn.Sequential(
            # Conv Layer 1: 448x448x3 -> 112x112x192
            LeakyBlock(3, 64, kernel_size=7, stride=2, padding=3),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Conv Layer 2: 112x112x64 -> 56x56x256
            LeakyBlock(64, 192, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Conv Layer 3-5: 56x56x192 -> 28x28x512
            LeakyBlock(192, 128, kernel_size=1, stride=1, padding=0),
            LeakyBlock(128, 256, kernel_size=3, stride=1, padding=1),
            LeakyBlock(256, 256, kernel_size=1, stride=1, padding=0),
            LeakyBlock(256, 512, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Conv Layer 6-15: 重复4次的 1x1 和 3x3 卷积组合
            LeakyBlock(512, 256, kernel_size=1, stride=1, padding=0),
            LeakyBlock(256, 512, kernel_size=3, stride=1, padding=1),
            LeakyBlock(512, 256, kernel_size=1, stride=1, padding=0),
            LeakyBlock(256, 512, kernel_size=3, stride=1, padding=1),
            LeakyBlock(512, 256, kernel_size=1, stride=1, padding=0),
            LeakyBlock(256, 512, kernel_size=3, stride=1, padding=1),
            LeakyBlock(512, 256, kernel_size=1, stride=1, padding=0),
            LeakyBlock(256, 512, kernel_size=3, stride=1, padding=1),
            LeakyBlock(512, 512, kernel_size=1, stride=1, padding=0),
            LeakyBlock(512, 1024, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Conv Layer 16-23: 重复2次的 1x1 和 3x3 卷积组合
            LeakyBlock(1024, 512, kernel_size=1, stride=1, padding=0),
            LeakyBlock(512, 1024, kernel_size=3, stride=1, padding=1),
            LeakyBlock(1024, 512, kernel_size=1, stride=1, padding=0),
            LeakyBlock(512, 1024, kernel_size=3, stride=1, padding=1),
            LeakyBlock(1024, 1024, kernel_size=3, stride=1, padding=1),
            LeakyBlock(1024, 1024, kernel_size=3, stride=2, padding=1), # 步长为2,进一步降维

            # Conv Layer 24
            LeakyBlock(1024, 1024, kernel_size=3, stride=1, padding=1),
            LeakyBlock(1024, 1024, kernel_size=3, stride=1, padding=1),
        )
        
        # 2. 全连接回归 Head(这里就是全篇最吃内存、包含 Dropout 的地方)
        self.fc_head = nn.Sequential(
            nn.Flatten(), # 将 7x7x1024 的特征图展平成 50176 维的一维向量
            
            # 第一层全连接:从 50176 轰到 4096 维
            nn.Linear(7 * 7 * 1024, 4096),
            nn.LeakyReLU(0.1),
            
            # 👈 这里就是你刚才问到的 Dropout!丢弃率 0.5,防过拟合的灵魂所在
            nn.Dropout(p=0.5), 
            
            # 第二层全连接:从 4096 轰到最后的 1470 维 (7 * 7 * 30)
            # 👈 这里就是 Linear Activation(没有加任何类似 Sigmoid/Softmax 的整流)
            nn.Linear(4096, 7 * 7 * (self.B * 5 + self.C)) 
        )

    def forward(self, x):
        # x 形状: [Batch_Size, 3, 448, 448]
        x = self.features(x)   # 经过24个卷积层后形状: [Batch_Size, 1024, 7, 7]
        x = self.fc_head(x)    # 经过全连接层后形状: [Batch_Size, 1470]
        
        # 强行把一维向量重塑成我们聊了一整天的 [Batch_Size, 7, 7, 30] 冰冷张量
        x = x.view(-1, 7, 7, self.B * 5 + self.C)
        return x

# --------- 验证一下网络输出 ---------
if __name__ == "__main__":
    model = YOLOv1()
    # 模拟输入一张 448x448 的标准 YOLO 图像
    dummy_input = torch.randn(1, 3, 448, 448) 
    output = model(dummy_input)
    
    print("模型输入形状:", dummy_input.shape)
    print("模型直出张量形状:", output.shape) # 应该是 [1, 7, 7, 30]

「高盛」20260430:2026中国经济展望,政治局会议后的再校准,就业和收入恶化拖累消费复苏,服务消费疲软又拖累服务业就业增长,完成全年增长目标将依赖出口和投资,维持经济增速预期4.7%|外资研报