i have another implement . is it correct?

Open zhangbo2008 opened this issue 4 years ago • 0 comments

#========== ` def forward(self, z): B, C, H, W = z.size()

    z_e = self.proj(z)
    z_e = z_e.permute(0, 2, 3, 1) # make (B, H, W, C)
    flatten = z_e.reshape(-1, self.embedding_dim)

    # DeepMind def does not do this but I find I have to... ;\
    if self.training and self.data_initialized.item() == 0:
        print('running kmeans!!') # data driven initialization for the embeddings
        rp = torch.randperm(flatten.size(0))
        kd = kmeans2(flatten[rp[:20000]].data.cpu().numpy(), self.n_embed, minit='points')
        self.embed.weight.data.copy_(torch.from_numpy(kd[0]))
        self.data_initialized.fill_(1)
        # TODO: this won't work in multi-GPU setups

    dist = (
        flatten.pow(2).sum(1, keepdim=True)
        - 2 * flatten @ self.embed.weight.t()
        + self.embed.weight.pow(2).sum(1, keepdim=True).t()
    )  # 距离公式.使用均值不等式.保证了距离大于0.
    _, ind = (-dist).max(1)
    ind = ind.view(B, H, W)  # 每一个像素点都做vq 所以一共是64*128个向量.

    # vector quantization cost that trains the embedding vectors
    z_q = self.embed_code(ind) # (B, H, W, C)
    commitment_cost = 0.25
    diff = commitment_cost * (z_q.detach() - z_e).pow(2).mean() + (z_q - z_e.detach()).pow(2).mean()
    diff *= self.kld_scale



    diff2=(z_q - z_e).pow(2).mean()
    z_q = z_e + (z_q - z_e).detach() # noop in forward pass, straight-through gradient estimator in backward pass
    z_q = z_q.permute(0, 3, 1, 2) # stack encodings into channels again: (B, C, H, W)

    diff2*=self.kld_scale

    return z_q, diff2, ind  #  z_q 是量化之后的图片,  diff是距离, ind是量化的索引.`

i change diff to diff2? i have no clue about whether it is correct . i trained it converge more faster.

Sep 15 '21 02:09 zhangbo2008