How does one implement PWCCA in PyTorch so that it matches the original PWCCA implemented in Numpy?

Question

I am trying to implement pwcca from the tutorial but I cannot have my code match their original code:

Google's: pwcca_mean=0.21446671574218149
Google's (fixed): pwcca_mean2=0.21446671574218149
Our code: pwcca_ultimateanatome=tensor(0.2131, dtype=torch.float64)
Our code: pwcca_ultimateanatome_L1=tensor(0.2131, dtype=torch.float64)
Our code: pwcca_ultimateanatome_L2=tensor(0.2120, dtype=torch.float64)
Our code: pwcca_extended_anatome=tensor(0.2112, dtype=torch.float64)
Our code: pwcca_extended_anatome_L1=tensor(0.2112, dtype=torch.float64)
Our code: pwcca_extended_anatome_L2=tensor(0.2116, dtype=torch.float64)

there is a lengthy discussion in this git issue trying to make it match but seems we cannot - even after implementing the weird if statement the original svcca has:

# - choose layer that had less small values removed (since the sum of true indices would be less)
    # to me this is puzzling because since we removed them when computing the
    # sigma_xxs, sigma_yys etc. why does it matter which one we choose? perhaps becuase at the end it uses
    # the actual acts1 value bellow which makes a difference
    if np.sum(sresults["x_idxs"]) <= np.sum(sresults["y_idxs"]):

and correcting the bug from original anatome that used the cca weights instead of the cca vectors/canonical neurons:

def pwcca_distance(x: Tensor,
                   y: Tensor,
                   backend: str
                   ) -> Tensor:
    """ Projection Weighted CCA proposed in Marcos et al. 2018.
    Args:
        x: input tensor of Shape DxH, where D>H
        y: input tensor of Shape DxW, where D>H
        backend: svd or qr
    Returns:
    """
    # not what the original paper does
    a, b, diag = cca(x, y, backend)
    a, _ = torch.linalg.qr(a)  # reorthonormalize
    alpha = (x @ a).abs_().sum(dim=0)
    alpha /= alpha.sum()
    return 1 - alpha @ diag

I tried plugging in the original svcca by just transforming torch.Tensors to np.darrays but that gives errors:

def _pwcca_distance_from_original_svcca(L1: Tensor,
                                        L2: Tensor
                                        ):
    """ Projection Weighted CCA proposed in Marcos et al. 2018.

    Args:
        x: input tensor of Shape NxD1, where it's recommended that N>Di
        y: input tensor of Shape NxD2, where it's recommended that N>Di
        backend: svd or qr

    Returns:

    """
    from uutils.torch_uu.metrics.cca.pwcca import compute_pwcca
    acts1: np.ndarray = L1.T.detach().cpu().numpy()
    acts2: np.ndarray = L2.T.detach().cpu().numpy()
    pwcca, _, _ = compute_pwcca(acts1=acts1, acts2=acts2)
    pwcca: Tensor = uutils.torch_uu.tensorify(pwcca)
    return 1.0 - pwcca

I also tried copy pasting their code and changing the np.CODE to torch.CODE but that never finishes running (not scalable, probably due to the covariance computation torch.cov):

def _compute_cca_traditional_equation(acts1, acts2,
                                      epsilon=0., threshold=0.98):
    """
    Compute cca values according to standard equation (no tricks):
        cca_k = sig_k = sqrt{lambda_k} = sqrt{EigVal(M^T M)} = Left or Right SingVal(M)
        M = sig_X**-1/2 Sig_X,Y Sig_Y**-1/2

    Notes:
        - \tilde a = Sig_X**1/2 a, \tilde b = Sig_X**1/2 b
        - M = sig_X**-1/2 Sig_X,Y Sig_Y**-1/2
        - lambda_k = EigVal(M^T M)
        - sig_k = LeftSingVal(M) = RightSingVal(M) = lambda_k**0.5
        - cca corr:
            - rho_k = corr(a_k, b_k) = lambda_k**0.5 = sig_k
                - for kth cca value
    :return:
    """
    # - compute covariance matrices
    # compute covariance with numpy function for extra stability
    numx = acts1.shape[0]
    numy = acts2.shape[0]

    # covariance = np.cov(acts1, acts2)
    # covariance = torch.cov(acts1, acts2)
    covariance = torch_uu.cov(acts1, acts2)
    sigma_xx = covariance[:numx, :numx]
    sigma_xy = covariance[:numx, numx:]
    sigma_yx = covariance[numx:, :numx]
    sigma_yy = covariance[numx:, numx:]

    # rescale covariance to make cca computation more stable
    xmax = torch.max(torch.abs(sigma_xx))
    ymax = torch.max(torch.abs(sigma_yy))
    sigma_xx /= xmax
    sigma_yy /= ymax
    sigma_xy /= torch.sqrt(xmax * ymax)
    sigma_yx /= torch.sqrt(xmax * ymax)

    # - compute_ccas
    # (sigma_xx, sigma_xy, sigma_yx, sigma_yy,
    #  x_idxs, y_idxs) = remove_small(sigma_xx, sigma_xy, sigma_yx, sigma_yy, epsilon)

    numx = sigma_xx.shape[0]
    numy = sigma_yy.shape[0]

    # if numx == 0 or numy == 0:
    #     return ([0, 0, 0], [0, 0, 0], np.zeros_like(sigma_xx),
    #             np.zeros_like(sigma_yy), x_idxs, y_idxs)

    sigma_xx += epsilon * torch.eye(numx)
    sigma_yy += epsilon * torch.eye(numy)
    inv_xx = torch.linalg.pinv(sigma_xx)
    inv_yy = torch.linalg.pinv(sigma_yy)

    invsqrt_xx = _positive_def_matrix_sqrt(inv_xx)
    invsqrt_yy = _positive_def_matrix_sqrt(inv_yy)

    # arr = torch.dot(invsqrt_xx, torch.dot(sigma_xy, invsqrt_yy))
    arr = invsqrt_xx @ (sigma_xy @ invsqrt_yy)

    u, s, v = torch.linalg.svd(arr)

    # return [u, np.abs(s), v], invsqrt_xx, invsqrt_yy, x_idxs, y_idxs
    # ([u, s, v], invsqrt_xx, invsqrt_yy,i dx_idxs, y_idxs)

    return s


def _positive_def_matrix_sqrt(array):
    """Stable method for computing matrix square roots, supports complex matrices.

    Args:
              array: A numpy 2d array, can be complex valued that is a positive
                     definite symmetric (or hermitian) matrix

    Returns:
              sqrtarray: The matrix square root of array
    """
    w, v = torch.linalg.eigh(array)
    #  A - np.dot(v, np.dot(np.diag(w), v.T))
    wsqrt = torch.sqrt(w)
    # sqrtarray = torch.dot(v, torch.dot(torch.diag(wsqrt), torch.conj(v).T))
    sqrtarray = v @ (torch.diag(wsqrt) @ torch.conj(v).T)
    return sqrtarray

So I ran out of ideas...perhaps it's impossible have them match? And it seems that the implementation I have that looks correct gives weird results in real experiments.

Currently my best attempt:

def pwcca_distance_choose_best_layer_matrix(x: Tensor,
                                            y: Tensor,
                                            backend: str,
                                            use_layer_matrix: Optional[str] = None,
                                            epsilon: float = 1e-10
                                            ) -> Tensor:
    """ Projection Weighted CCA proposed in Marcos et al. 2018.

    ref:
        - https://github.com/moskomule/anatome/issues/30

    Args:
        x: input tensor of Shape NxD1, where it's recommended that N>Di
        y: input tensor of Shape NxD2, where it's recommended that N>Di
        backend: svd or qr
    Returns:
    """
    x = _zero_mean(x, dim=0)
    y = _zero_mean(y, dim=0)
    # x = _divide_by_max(_zero_mean(x, dim=0))
    # y = _divide_by_max(_zero_mean(y, dim=0))
    B, D1 = x.size()
    B2, D2 = y.size()
    assert B == B2
    C_ = min(D1, D2)
    a, b, diag = cca(x, y, backend)
    C = diag.size(0)
    assert (C == C_)
    assert a.size() == torch.Size([D1, C])
    assert diag.size() == torch.Size([C])
    assert b.size() == torch.Size([D2, C])
    if use_layer_matrix is None:
        # sigma_xx_approx = x
        # sigma_yy_approx = y
        sigma_xx_approx = x.T @ x
        sigma_yy_approx = y.T @ y
        x_diag = torch.diag(sigma_xx_approx.abs())
        y_diag = torch.diag(sigma_yy_approx.abs())
        x_idxs = (x_diag >= epsilon)
        y_idxs = (y_diag >= epsilon)
        use_layer_matrix: str = 'x' if x_idxs.sum() <= y_idxs.sum() else 'y'
    if use_layer_matrix == 'x':
        x_tilde = x @ a
        assert x_tilde.size() == torch.Size([B, C])
        x_tilde, _ = torch.linalg.qr(input=x_tilde)
        assert x_tilde.size() == torch.Size([B, C])
        alpha_tilde_dot_x_abs = (x_tilde.T @ x).abs_()
        assert alpha_tilde_dot_x_abs.size() == torch.Size([C, D1])
        alpha_tilde = alpha_tilde_dot_x_abs.sum(dim=1)
        assert alpha_tilde.size() == torch.Size([C])
    elif use_layer_matrix == 'y':
        y_tilde = y @ b
        assert y_tilde.size() == torch.Size([B, C])
        y_tilde, _ = torch.linalg.qr(input=y_tilde)
        assert y_tilde.size() == torch.Size([B, C])
        alpha_tilde_dot_y_abs = (y_tilde.T @ y).abs_()
        assert alpha_tilde_dot_y_abs.size() == torch.Size([C, D2])
        alpha_tilde = alpha_tilde_dot_y_abs.sum(dim=1)
        assert alpha_tilde.size() == torch.Size([C])
    else:
        raise ValueError(f"Invalid input: {use_layer_matrix=}")
    assert alpha_tilde.size() == torch.Size([C])
    alpha = alpha_tilde / alpha_tilde.sum()
    assert alpha_tilde.size() == torch.Size([C])
    return 1.0 - (alpha @ diag)

How does one implement PWCCA in PyTorch so that it matches the original PWCCA implemented in Numpy?

0 Answers0