Spaces:
Runtime error
Runtime error
| # Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # | |
| # This work is made available under the Nvidia Source Code License-NC. | |
| # To view a copy of this license, check out LICENSE.md | |
| # flake8: noqa | |
| from scipy.sparse import lil_matrix, diags, eye | |
| import math | |
| import numpy as np | |
| import torch | |
| EPSILON = 1e-6 | |
| NORMALIZATION = 1e6 | |
| def _get_msid(x, y, ts=np.logspace(-1, 1, 256), k=5, m=10, niters=100, | |
| rademacher=False, graph_builder='full', | |
| msid_mode='max', normalized_laplacian=True, normalize='empty'): | |
| """ | |
| Compute the msid score between two samples, x and y | |
| Arguments: | |
| x: x samples | |
| y: y samples | |
| ts: temperature values | |
| k: number of neighbours for graph construction | |
| m: Lanczos steps in SLQ | |
| niters: number of starting random vectors for SLQ | |
| rademacher: if True, sample random vectors from Rademacher | |
| distributions, else sample standard normal distribution | |
| graph_builder: if 'kgraph', uses faster graph construction (options: | |
| 'sparse', 'kgraph') | |
| msid_mode: 'l2' to compute the l2 norm of the distance between `msid1` | |
| and `msid2`; 'max' to find the maximum abosulute difference between | |
| two descriptors over temperature | |
| normalized_laplacian: if True, use normalized Laplacian | |
| normalize: 'empty' for average heat kernel (corresponds to the empty | |
| graph normalization of NetLSD), 'complete' for the complete, 'er' | |
| for erdos-renyi normalization, 'none' for no normalization | |
| Returns: | |
| msid_score: the scalar value of the distance between discriptors | |
| """ | |
| normed_msidx = msid_descriptor(x, ts, k, m, niters, rademacher, | |
| graph_builder, normalized_laplacian, | |
| normalize) | |
| normed_msidy = msid_descriptor(y, ts, k, m, niters, rademacher, | |
| graph_builder, normalized_laplacian, | |
| normalize) | |
| c = np.exp(-2 * (ts + 1 / ts)) | |
| if msid_mode == 'l2': | |
| score = np.linalg.norm(normed_msidx - normed_msidy) | |
| elif msid_mode == 'max': | |
| score = np.amax(c * np.abs(normed_msidx - normed_msidy)) | |
| else: | |
| raise Exception('Use either l2 or max mode.') | |
| return {'IMD': score} | |
| def msid_descriptor(x, ts=np.logspace(-1, 1, 256), k=5, m=10, niters=100, | |
| rademacher=False, graph_builder='full', | |
| normalized_laplacian=True, normalize='empty'): | |
| """ | |
| Compute the msid descriptor for a single sample x | |
| Arguments: | |
| x: x samples | |
| ts: temperature values | |
| k: number of neighbours for graph construction | |
| m: Lanczos steps in SLQ | |
| niters: number of starting random vectors for SLQ | |
| rademacher: if True, sample random vectors from Rademacher | |
| distributions, else sample standard normal distribution | |
| graph_builder: if 'kgraph', uses faster graph construction | |
| (options: ' | |
| rse', 'kgraph') | |
| normalized_laplacian: if True, use normalized Laplacian | |
| normalize: 'empty' for average heat kernel (corresponds to the empty | |
| graph normalization of NetLSD), 'complete' for the complete, 'er' | |
| for erdos-renyi normalization, 'none' for no normalization | |
| Returns: | |
| normed_msidx: normalized msid descriptor | |
| """ | |
| Lx = _build_graph(x, k, graph_builder, normalized_laplacian) | |
| nx = Lx.shape[0] | |
| msidx = slq_red_var(Lx, m, niters, ts, rademacher) | |
| normed_msidx = _normalize_msid(msidx, normalize, nx, k, ts) * NORMALIZATION | |
| return normed_msidx | |
| def _build_graph(data, k=5, graph_builder='full', normalized=True): | |
| """ | |
| Return Laplacian from data or load preconstructed from path | |
| Arguments: | |
| data: samples | |
| k: number of neighbours for graph construction | |
| graph_builder: if 'kgraph', use faster graph construction | |
| normalized: if True, use nnormalized Laplacian | |
| Returns: | |
| L: Laplacian of the graph constructed with data | |
| """ | |
| if graph_builder == 'sparse': | |
| A = construct_graph_sparse(data.cpu().numpy(), k) | |
| elif graph_builder == 'kgraph': | |
| A = construct_graph_kgraph(data.cpu().numpy(), k) | |
| elif graph_builder == 'full': | |
| A = lil_matrix(construct_graph(data, k).cpu().numpy()).tocsr() | |
| else: | |
| raise Exception('Please specify graph builder: sparse or kgraph.') | |
| A = (A + A.T) / 2 | |
| A.data = np.ones(A.data.shape) | |
| L = _laplacian_sparse(A, normalized) | |
| return L | |
| def _normalize_msid(msid, normalization, n, k, ts): | |
| normed_msid = msid.copy() | |
| if normalization == 'empty': | |
| normed_msid /= n | |
| elif normalization == 'complete': | |
| normed_msid /= (1 + (n - 1) * np.exp(-(1 + 1 / (n - 1)) * ts)) | |
| elif normalization == 'er': | |
| xs = np.linspace(0, 1, n) | |
| er_spectrum = 4 / np.sqrt(k) * xs + 1 - 2 / np.sqrt(k) | |
| er_msid = np.exp(-np.outer(ts, er_spectrum)).sum(-1) | |
| normed_msid = normed_msid / (er_msid + EPSILON) | |
| elif normalization == 'none' or normalization is None: | |
| pass | |
| else: | |
| raise ValueError('Unknown normalization parameter!') | |
| return normed_msid | |
| def _lanczos_m(A, m, nv, rademacher, SV=None): | |
| """ | |
| Lanczos algorithm computes symmetric m x m tridiagonal matrix T and | |
| matrix V with orthogonal rows constituting the basis of the Krylov | |
| subspace K_m(A, x), where x is an arbitrary starting unit vector. This | |
| implementation parallelizes `nv` starting vectors. | |
| Arguments: | |
| m: number of Lanczos steps | |
| nv: number of random vectors | |
| rademacher: True to use Rademacher distribution, | |
| False - standard normal for random vectors | |
| SV: specified starting vectors | |
| Returns: T: a nv x m x m tensor, T[i, :, :] is the ith symmetric | |
| tridiagonal matrix V: a n x m x nv tensor, V[:, :, i] is the ith matrix | |
| with orthogonal rows | |
| """ | |
| orthtol = 1e-5 | |
| if type(SV) != np.ndarray: | |
| if rademacher: | |
| SV = np.sign(np.random.randn(A.shape[0], nv)) | |
| else: | |
| SV = np.random.randn(A.shape[0], | |
| nv) # init random vectors in columns: n x nv | |
| V = np.zeros((SV.shape[0], m, nv)) | |
| T = np.zeros((nv, m, m)) | |
| np.divide(SV, np.linalg.norm(SV, axis=0), out=SV) # normalize each column | |
| V[:, 0, :] = SV | |
| w = A.dot(SV) | |
| alpha = np.einsum('ij,ij->j', w, SV) | |
| w -= alpha[None, :] * SV | |
| beta = np.einsum('ij,ij->j', w, w) | |
| np.sqrt(beta, beta) | |
| T[:, 0, 0] = alpha | |
| T[:, 0, 1] = beta | |
| T[:, 1, 0] = beta | |
| np.divide(w, beta[None, :], out=w) | |
| V[:, 1, :] = w | |
| t = np.zeros((m, nv)) | |
| for i in range(1, m): | |
| SVold = V[:, i - 1, :] | |
| SV = V[:, i, :] | |
| w = A.dot(SV) # sparse @ dense | |
| w -= beta[None, :] * SVold # n x nv | |
| np.einsum('ij,ij->j', w, SV, out=alpha) | |
| T[:, i, i] = alpha | |
| if i < m - 1: | |
| w -= alpha[None, :] * SV # n x nv | |
| # reortho | |
| np.einsum('ijk,ik->jk', V, w, out=t) | |
| w -= np.einsum('ijk,jk->ik', V, t) | |
| np.einsum('ij,ij->j', w, w, out=beta) | |
| np.sqrt(beta, beta) | |
| np.divide(w, beta[None, :], out=w) | |
| T[:, i, i + 1] = beta | |
| T[:, i + 1, i] = beta | |
| # more reotho | |
| innerprod = np.einsum('ijk,ik->jk', V, w) | |
| reortho = False | |
| for _ in range(100): | |
| if not (innerprod > orthtol).sum(): | |
| reortho = True | |
| break | |
| np.einsum('ijk,ik->jk', V, w, out=t) | |
| w -= np.einsum('ijk,jk->ik', V, t) | |
| np.divide(w, np.linalg.norm(w, axis=0)[None, :], out=w) | |
| innerprod = np.einsum('ijk,ik->jk', V, w) | |
| V[:, i + 1, :] = w | |
| if (np.abs(beta) > 1e-6).sum() == 0 or not reortho: | |
| break | |
| return T, V | |
| def _slq(A, m, niters, rademacher): | |
| """ | |
| Compute the trace of matrix exponential | |
| Arguments: | |
| A: square matrix in trace(exp(A)) | |
| m: number of Lanczos steps | |
| niters: number of quadratures (also, the number of random vectors in the | |
| hutchinson trace estimator) | |
| rademacher: True to use Rademacher distribution, False - standard normal | |
| for random vectors in Hutchinson | |
| Returns: trace: estimate of trace of matrix exponential | |
| """ | |
| T, _ = _lanczos_m(A, m, niters, rademacher) | |
| eigvals, eigvecs = np.linalg.eigh(T) | |
| expeig = np.exp(eigvals) | |
| sqeigv1 = np.power(eigvecs[:, 0, :], 2) | |
| trace = A.shape[-1] * (expeig * sqeigv1).sum() / niters | |
| return trace | |
| def _slq_ts(A, m, niters, ts, rademacher): | |
| """ | |
| Compute the trace of matrix exponential | |
| Arguments: | |
| A: square matrix in trace(exp(-t*A)), where t is temperature | |
| m: number of Lanczos steps | |
| niters: number of quadratures (also, the number of random vectors in the | |
| hutchinson trace estimator) | |
| ts: an array with temperatures | |
| rademacher: True to use Rademacher distribution, False - standard normal | |
| for random vectors in Hutchinson | |
| Returns: | |
| trace: estimate of trace of matrix exponential across temperatures `ts` | |
| """ | |
| T, _ = _lanczos_m(A, m, niters, rademacher) | |
| eigvals, eigvecs = np.linalg.eigh(T) | |
| expeig = np.exp(-np.outer(ts, eigvals)).reshape(ts.shape[0], niters, m) | |
| sqeigv1 = np.power(eigvecs[:, 0, :], 2) | |
| traces = A.shape[-1] * (expeig * sqeigv1).sum(-1).mean(-1) | |
| return traces | |
| def _slq_ts_fs(A, m, niters, ts, rademacher, fs): | |
| """ | |
| Compute the trace of matrix functions | |
| Arguments: | |
| A: square matrix in trace(exp(-t*A)), where t is temperature | |
| m: number of Lanczos steps | |
| niters: number of quadratures (also, the number of random vectors in the | |
| hutchinson trace estimator) | |
| ts: an array with temperatures | |
| rademacher: True to use Rademacher distribution, else - standard normal | |
| for random vectors in Hutchinson | |
| fs: a list of functions | |
| Returns: | |
| traces: estimate of traces for each of the functions in fs | |
| """ | |
| T, _ = _lanczos_m(A, m, niters, rademacher) | |
| eigvals, eigvecs = np.linalg.eigh(T) | |
| traces = np.zeros((len(fs), len(ts))) | |
| for i, f in enumerate(fs): | |
| expeig = f(-np.outer(ts, eigvals)).reshape(ts.shape[0], niters, m) | |
| sqeigv1 = np.power(eigvecs[:, 0, :], 2) | |
| traces[i, :] = A.shape[-1] * (expeig * sqeigv1).sum(-1).mean(-1) | |
| return traces | |
| def slq_red_var(A, m, niters, ts, rademacher): | |
| """ | |
| Compute the trace of matrix exponential with reduced variance | |
| Arguments: | |
| A: square matrix in trace(exp(-t*A)), where t is temperature | |
| m: number of Lanczos steps | |
| niters: number of quadratures (also, the number of random vectors in the | |
| hutchinson trace estimator) | |
| ts: an array with temperatures | |
| Returns: | |
| traces: estimate of trace for each temperature value in `ts` | |
| """ | |
| fs = [np.exp, lambda x: x] | |
| traces = _slq_ts_fs(A, m, niters, ts, rademacher, fs) | |
| subee = traces[0, :] - traces[1, :] / np.exp(ts) | |
| sub = - ts * A.shape[0] / np.exp(ts) | |
| return subee + sub | |
| def np_euc_cdist(data): | |
| dd = np.sum(data * data, axis=1) | |
| dist = -2 * np.dot(data, data.T) | |
| dist += dd + dd[:, np.newaxis] | |
| np.fill_diagonal(dist, 0) | |
| np.sqrt(dist, dist) | |
| return dist | |
| def construct_graph_sparse(data, k): | |
| n = len(data) | |
| spmat = lil_matrix((n, n)) | |
| dd = np.sum(data * data, axis=1) | |
| for i in range(n): | |
| dists = dd - 2 * data[i, :].dot(data.T) | |
| inds = np.argpartition(dists, k + 1)[:k + 1] | |
| inds = inds[inds != i] | |
| spmat[i, inds] = 1 | |
| return spmat.tocsr() | |
| def construct_graph_kgraph(data, k): | |
| raise NotImplementedError | |
| # | |
| # n = len(data) | |
| # spmat = lil_matrix((n, n)) | |
| # index = pykgraph.KGraph(data, 'euclidean') | |
| # index.build(reverse=0, K=2 * k + 1, L=2 * k + 50) | |
| # result = index.search(data, K=k + 1)[:, 1:] | |
| # spmat[np.repeat(np.arange(n), k, 0), result.ravel()] = 1 | |
| # return spmat.tocsr() | |
| def construct_graph(input_features, k, num_splits=10): | |
| num_samples = input_features.shape[0] | |
| indices = [] | |
| for i in range(num_splits): | |
| batch_size = math.ceil(num_samples / num_splits) | |
| start_idx = i * batch_size | |
| end_idx = min((i + 1) * batch_size, num_samples) | |
| dist = torch.cdist(input_features[start_idx:end_idx], | |
| input_features) | |
| indices.append(torch.topk(dist, k + 1, dim=-1, largest=False)[1].cpu()) | |
| indices = torch.cat(indices, dim=0) | |
| graph = torch.zeros(num_samples, num_samples, device=indices.device) | |
| graph.scatter_(1, indices, 1) | |
| graph -= torch.eye(num_samples, device=graph.device) | |
| return graph | |
| def _laplacian_sparse(A, normalized=True): | |
| D = A.sum(1).A1 | |
| if normalized: | |
| Dsqrt = diags(1 / np.sqrt(D)) | |
| L = eye(A.shape[0]) - Dsqrt.dot(A).dot(Dsqrt) | |
| else: | |
| L = diags(D) - A | |
| return L | |