What does ResNet-152 Transfer mean?

Open ziweizhao1993 opened this issue 7 years ago • 1 comments

Hi Gunnar,

I was just wondering what "ResNet-152 Transfer" stands for. The papers said "It uses the Charades model to predict the activities in the third person video, and then uses those labels as supervision for the first-person video."

Does it mean you run the Charades model on CharadesEgo_only3rd videos, then warp the labels to their corresponding 1st person video, and use the labels as supervision to finetune the original Charades model?

Thank you.

Best, Ziwei

Mar 01 '19 00:03 ziweizhao1993

Yes, exactly. I'm attaching part of the code that was used below. Hope that helps.

Also, you should know that if you start noticing unexpected numbers, don't be alarmed. I'm in the process of clarifying some problems with the numbers presented in the CharadesEgo paper, it looks like there was some mix-up between using Charades_v0 and Charades_v1 for testing. See https://github.com/gsig/actor-observer/issues/7 for discussion.

import torch
from torch.autograd import Variable
import torch.nn as nn
from models.utils import load_sub_architecture


class CopyBaselineModel(nn.Module):
    def __init__(self, teacher, student):
        super(CopyBaselineModel, self).__init__()
        self.teacher = teacher
        self.student = student
        self.basenet = self.student

    def forward(self, x, y, z):
        """ assuming:
            x: first person positive
            y: third person
            z: first person negative
        """
        s = self.student(x)
        y = y.detach()
        t = self.teacher(y)
        return Variable(torch.ones(s.shape[0])), Variable(torch.ones(s.shape[0])), t, s


class CopyBaseline(CopyBaselineModel):
    def __init__(self, args):
        model = load_sub_architecture(args)
        model2 = load_sub_architecture(args)
        super(CopyBaseline, self).__init__(model, model2)

import torch
import torch.nn as nn
from models.layers.BlockGradient import BlockGradient


def nll_loss(soft_target, logdist, reduce=True):
    # @Hongyi_Zhang
    # assumes soft_target is normalized to 1 and between [0,1]
    # logdist is a (normalized) log distribution
    soft_target, = BlockGradient.apply(soft_target)
    if soft_target.dim() == 3:
        out = (-soft_target * logdist).sum(2).sum(1)
    else:
        out = (-soft_target * logdist).sum(1)
    if reduce:
        return out.mean()
    else:
        return out


class CopyBaselineLoss(nn.Module):
    def __init__(self, args):
        super(CopyBaselineLoss, self).__init__()
        self.clsweight = args.clsweight
        self.softmax = nn.Softmax()
        self.logsoftmax = nn.LogSoftmax()

    def cross_entropy(self, log_target, log_pred, reduce=True):
        return nll_loss(self.softmax(log_target), self.logsoftmax(log_pred), reduce)

    def forward(self, dummy1, dummy2, teacher, student, target, ids):
        loss = self.cross_entropy(teacher, student)
        return loss, torch.ones(target.shape[0])

Mar 08 '19 16:03 gsig