三问
你是否真实调研了?
是否掺杂了一些理想的/乌托邦的想法?
是否掺杂了一夜暴富的想法?
你是否真实调研了?
是否掺杂了一些理想的/乌托邦的想法?
是否掺杂了一夜暴富的想法?
import torch
import torch.nn as nn
class LeakyBlock(nn.Module):
"""
论文规定:除最后一层外,所有层全部使用 Leaky ReLU (leaky=0.1)
"""
def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
super(LeakyBlock, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=True)
self.leaky = nn.LeakyReLU(0.1)
def forward(self, x):
return self.leaky(self.conv(x))
class YOLOv1(nn.Module):
def __init__(self, num_boxes=2, num_classes=20):
super(YOLOv1, self).__init__()
self.B = num_boxes
self.C = num_classes
# 1. 卷积层部分:完全对照论文 Figure 3 的 24 个卷积层和最大池化
self.features = nn.Sequential(
# Conv Layer 1: 448x448x3 -> 112x112x192
LeakyBlock(3, 64, kernel_size=7, stride=2, padding=3),
nn.MaxPool2d(kernel_size=2, stride=2),
# Conv Layer 2: 112x112x64 -> 56x56x256
LeakyBlock(64, 192, kernel_size=3, stride=1, padding=1),
nn.MaxPool2d(kernel_size=2, stride=2),
# Conv Layer 3-5: 56x56x192 -> 28x28x512
LeakyBlock(192, 128, kernel_size=1, stride=1, padding=0),
LeakyBlock(128, 256, kernel_size=3, stride=1, padding=1),
LeakyBlock(256, 256, kernel_size=1, stride=1, padding=0),
LeakyBlock(256, 512, kernel_size=3, stride=1, padding=1),
nn.MaxPool2d(kernel_size=2, stride=2),
# Conv Layer 6-15: 重复4次的 1x1 和 3x3 卷积组合
LeakyBlock(512, 256, kernel_size=1, stride=1, padding=0),
LeakyBlock(256, 512, kernel_size=3, stride=1, padding=1),
LeakyBlock(512, 256, kernel_size=1, stride=1, padding=0),
LeakyBlock(256, 512, kernel_size=3, stride=1, padding=1),
LeakyBlock(512, 256, kernel_size=1, stride=1, padding=0),
LeakyBlock(256, 512, kernel_size=3, stride=1, padding=1),
LeakyBlock(512, 256, kernel_size=1, stride=1, padding=0),
LeakyBlock(256, 512, kernel_size=3, stride=1, padding=1),
LeakyBlock(512, 512, kernel_size=1, stride=1, padding=0),
LeakyBlock(512, 1024, kernel_size=3, stride=1, padding=1),
nn.MaxPool2d(kernel_size=2, stride=2),
# Conv Layer 16-23: 重复2次的 1x1 和 3x3 卷积组合
LeakyBlock(1024, 512, kernel_size=1, stride=1, padding=0),
LeakyBlock(512, 1024, kernel_size=3, stride=1, padding=1),
LeakyBlock(1024, 512, kernel_size=1, stride=1, padding=0),
LeakyBlock(512, 1024, kernel_size=3, stride=1, padding=1),
LeakyBlock(1024, 1024, kernel_size=3, stride=1, padding=1),
LeakyBlock(1024, 1024, kernel_size=3, stride=2, padding=1), # 步长为2,进一步降维
# Conv Layer 24
LeakyBlock(1024, 1024, kernel_size=3, stride=1, padding=1),
LeakyBlock(1024, 1024, kernel_size=3, stride=1, padding=1),
)
# 2. 全连接回归 Head(这里就是全篇最吃内存、包含 Dropout 的地方)
self.fc_head = nn.Sequential(
nn.Flatten(), # 将 7x7x1024 的特征图展平成 50176 维的一维向量
# 第一层全连接:从 50176 轰到 4096 维
nn.Linear(7 * 7 * 1024, 4096),
nn.LeakyReLU(0.1),
# 👈 这里就是你刚才问到的 Dropout!丢弃率 0.5,防过拟合的灵魂所在
nn.Dropout(p=0.5),
# 第二层全连接:从 4096 轰到最后的 1470 维 (7 * 7 * 30)
# 👈 这里就是 Linear Activation(没有加任何类似 Sigmoid/Softmax 的整流)
nn.Linear(4096, 7 * 7 * (self.B * 5 + self.C))
)
def forward(self, x):
# x 形状: [Batch_Size, 3, 448, 448]
x = self.features(x) # 经过24个卷积层后形状: [Batch_Size, 1024, 7, 7]
x = self.fc_head(x) # 经过全连接层后形状: [Batch_Size, 1470]
# 强行把一维向量重塑成我们聊了一整天的 [Batch_Size, 7, 7, 30] 冰冷张量
x = x.view(-1, 7, 7, self.B * 5 + self.C)
return x
# --------- 验证一下网络输出 ---------
if __name__ == "__main__":
model = YOLOv1()
# 模拟输入一张 448x448 的标准 YOLO 图像
dummy_input = torch.randn(1, 3, 448, 448)
output = model(dummy_input)
print("模型输入形状:", dummy_input.shape)
print("模型直出张量形状:", output.shape) # 应该是 [1, 7, 7, 30] 将图像拼接成一个新的图像,每个子图像的目标框的位置需要重新计算,以适应拼接后图像的新坐标系
「高盛」20260430:2026中国经济展望,政治局会议后的再校准,就业和收入恶化拖累消费复苏,服务消费疲软又拖累服务业就业增长,完成全年增长目标将依赖出口和投资,维持经济增速预期4.7%|外资研报