Spaces:

Mqleet
/

AutoPage

Running

App Files Files Community

AutoPage / templates /kushalvyas.github.io /strainer.html

Mqleet

[update] templates

a3d3755 18 days ago

raw

history blame contribute delete

19.7 kB

	<!doctype html>
	<html lang="en">
	<head>

	<meta charset="UTF-8">

	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<!-- Google tag (gtag.js) -->
	<script async src="https://www.googletagmanager.com/gtag/js?id=G-EL03DLW9KF"></script>
	<script>
	window.dataLayer = window.dataLayer \|\| [];
	function gtag(){dataLayer.push(arguments);}
	gtag('js', new Date());

	gtag('config', 'G-EL03DLW9KF');
	</script>


	<link rel="stylesheet" type="text/css" href="theme/css/bootstrap.min.css">
	<script src="theme/js/bootstrap.min.js"></script>
	<link rel="stylesheet" type="text/css" href="theme/fontawesome/css/fontawesome.min.css">
	<link rel="stylesheet" type="text/css" href="theme/css/styles.css">
	<script src=
	"https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js">
	</script>
	<script src=
	"https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.16.0/umd/popper.min.js">
	</script>
	<link href='http://fonts.googleapis.com/css?family=Roboto' rel='stylesheet' type='text/css'>
	<style>
	.navbar-nav {
	margin-left: auto;
	};
	.jumbotron_class{
	background-image: "background.jpeg";
	};
	body{
	font-family: 'Roboto', sans-serif;
	}

	</style>

	<script>
	$("document").ready(function(){
	$("#backdrop_image").click(function(e){
	e.preventDefault();
	});

	// $("#page_banner").click(function(e){
	// window.location.href = "./"
	// });
	});
	</script>

	<script src="theme/js/photoswipe.umd.min.js"></script>
	<script src="theme/js/photoswipe-lightbox.umd.min.js"></script>
	<link rel="stylesheet" href="theme/css/photoswipe.css">
	<script src="theme/js/scramble.js"></script>

	<style>
	li {
	margin: 0;
	padding: 0em;
	}
	</style>

	<script src="theme/js/spotlight.bundle.js"></script>
	</head>
	<body>
	<div class="container">
	<div id="page_header">
	<nav class="navbar navbar-expand-md bg-light">

	<a class="nav-link" href="https://kushalvyas.github.io/">  HOME \|   </a>
	<a class="navbar-brand abs" href="https://www.linkedin.com/in/kushalvyaskv/"><svg xmlns="http://www.w3.org/2000/svg" width="1.5em" height="24" viewBox="0 0 24 24"><path d="M19 0h-14c-2.761 0-5 2.239-5 5v14c0 2.761 2.239 5 5 5h14c2.762 0 5-2.239 5-5v-14c0-2.761-2.238-5-5-5zm-11 19h-3v-11h3v11zm-1.5-12.268c-.966 0-1.75-.79-1.75-1.764s.784-1.764 1.75-1.764 1.75.79 1.75 1.764-.783 1.764-1.75 1.764zm13.5 12.268h-3v-5.604c0-3.368-4-3.113-4 0v5.604h-3v-11h3v1.765c1.396-2.586 7-2.777 7 2.476v6.759z"/></svg></a>

	<a class="navbar-brand abs" href="https://github.com/kushalvyas"><svg xmlns="http://www.w3.org/2000/svg" height="1.5em" viewBox="0 0 496 512"><!--! Font Awesome Free 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. --><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg></a>

	<a class="navbar-brand abs" href="https://scholar.google.com/citations?user=0SxLnLcAAAAJ&hl=en"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" height="1.5em"><!--!Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free Copyright 2024 Fonticons, Inc.--><path d="M390.9 298.5c0 0 0 .1 .1 .1c9.2 19.4 14.4 41.1 14.4 64C405.3 445.1 338.5 512 256 512s-149.3-66.9-149.3-149.3c0-22.9 5.2-44.6 14.4-64h0c1.7-3.6 3.6-7.2 5.6-10.7c4.4-7.6 9.4-14.7 15-21.3c27.4-32.6 68.5-53.3 114.4-53.3c33.6 0 64.6 11.1 89.6 29.9c9.1 6.9 17.4 14.7 24.8 23.5c5.6 6.6 10.6 13.8 15 21.3c2 3.4 3.8 7 5.5 10.5zm26.4-18.8c-30.1-58.4-91-98.4-161.3-98.4s-131.2 40-161.3 98.4L0 202.7 256 0 512 202.7l-94.7 77.1z"/></svg></a>


	<!-- <a class="navbar-brand abs"></a> -->

	<ul class="navbar-nav ms-auto navbar-right">
	<li class="nav-item">
	<a class="nav-link" href="https://kushalvyas.github.io/research.html">
	Research
	</a>
	</li>
	<li class="nav-item">
	<a class="nav-link" href="https://kushalvyas.github.io/photography.html">
	Photography & Music
	</a>
	</li>
	<li class="nav-item">
	<a class="nav-link" href="https://kushalvyas.github.io/blog.html">
	Blog
	</a>
	</li>
	</ul>
	</nav>

	</div> <div id="page_banner">
	<div>
	<!-- Jumbotron -->
	<div class="p-5 text-center bg-image rounded-3">
	<div class="mask" style="background-color: rgba(0, 0, 0, 0.6);">
	<div class="d-flex justify-content-center align-items-center h-100">
	<div class="text-white">
	<h1 class="mb-3">Learning Transferable Features for Implicit Neural Representations</h1>
	<h6 class="mb-3">
	<a href="https://kushalvyas.github.io/" style="color: white;"><b>Kushal Vyas</b></a>
	<a href="https://imtiazhumayun.github.io/" style="color: white;">Ahmed Imtiaz Humayun</a>
	<a href="https://aniketdashpute.github.io/" style="color: white;">Aniket Dashpute</a>
	<a href="https://richb.rice.edu/" style="color: white;">Richard G Baranuik</a>
	<a href="https://profiles.rice.edu/faculty/ashok-veeraraghavan" style="color: white;">Ashok Veeraraghavan</a>
	<a href="https://www.guhabalakrishnan.com/home" style="color: white;">Guha Balakrishnan</a>
	<p><i>NeurIPS, 2024</i></p>
	</h6>

	</div>
	</div>
	</div>
	</div>
	<!-- Jumbotron -->
	</div> </div>

	<div class="container">
	<div class="justify-content-center">
	<article>
	<br>
	<div class="row justify-content-md-center">
	<div class="col-md-auto box text-center">
	<div>
	<img src="projects/images/strainer/arxiv_thumbnail.png" style="width: 100px; height: 100px; object-fit: cover;">
	<p><a href="https://arxiv.org/abs/2409.09566">arXiV</a></p>
	</div>
	</div>
	<div class="col-md-auto box text-center">
	<div>
	<img src="projects/images/strainer/colab_thumbnail.png" style="width: 100px; height: 100px; object-fit: cover;">
	<p><a href="https://colab.research.google.com/drive/1fBZAwqE8C_lrRPAe-hQZJTWrMJuAKtG2?usp=sharing">Google Colab</a></p>
	</div>
	</div>
	<div class="col-md-auto box text-center">
	<div>
	<img src="projects/images/strainer/github_thumbnail.png" style="width: 100px; height: 100px; object-fit: cover;">
	<p><a href="https://github.com/kushalvyas">Code</a></p>
	</div>
	</div>
	</div>
	</div>
	<br>
	<div class="justify-content-center" style="text-align: justify;">
	<!-- \| ![img](projects/images/strainer/strainer.png){width="80%"} \|
	\|:--:\|
	\| STRAINER - Learning Transferable Features for Implicit Neural Representations. During training time (a), STRAINER divides an INR into encoder and decoder layers. STRAINER fits similar signals while sharing the encoder layers, capturing a rich set of transferrable features. At test- time, STRAINER serves as powerful initialization for fitting a new signal (b). An INR initialized with STRAINER’s learned encoder features achieves (c) faster convergence and better quality reconstruction compared to baseline SIREN models. \| -->

	<figure class="figure text-center">
	<img src="projects/images/strainer/strainer.png" class="figure-img img-fluid rounded" alt="strainer cover fig">
	<figcaption class="figure-caption text-center text-justify">STRAINER - Learning Transferable Features for Implicit Neural Representations. During training time (a), STRAINER divides an INR into encoder and decoder layers. STRAINER fits similar signals while sharing the encoder layers, capturing a rich set of transferrable features. At test- time, STRAINER serves as powerful initialization for fitting a new signal (b). An INR initialized with STRAINER’s learned encoder features achieves (c) faster convergence and better quality reconstruction compared to baseline SIREN models.</figcaption>
	</figure>

	<p><br>
	<br>
	<br></p>
	<p><strong>Abstract</strong>: Implicit neural representations (INRs) have demonstrated success in a variety of applications, including inverse problems and neural rendering. An INR is typically trained to capture one signal of interest, resulting in learned neural features that are highly attuned to that signal. Assumed to be less generalizable, we explore the aspect of transferability of such learned neural features for fitting similar signals. We introduce a new INR training framework, STRAINER that learns transferrable features for fitting INRs to new signals from a given distribution, faster and with better reconstruction quality. Owing to the sequential layer-wise affine operations in an INR, we propose to learn transferable representations by sharing initial encoder layers across multiple INRs with independent decoder layers. At test time, the learned encoder representations are transferred as initialization for an otherwise randomly initialized INR. We find STRAINER to yield extremely powerful initialization for fitting images from the same domain and allow for a ≈ +10dB gain in signal quality early on compared to an untrained INR itself. STRAINER also provides a simple way to encode data-driven priors in INRs. We evaluate STRAINER on multiple in-domain and out-of-domain signal fitting tasks and inverse problems and further provide detailed analysis and discussion on the transferability of STRAINER’s features.</p>
	<p><br>
	<br>
	<br></p>
	<h4>Image Fitting (In domain and Out of Domain)</h4>

	<figure class="figure text-center">
	<img src="projects/images/strainer/psnr_quality_strainer.png" class="figure-img img-fluid rounded" alt="strainer pspnr fig" width="60%">
	<figcaption class="figure-caption text-center text-justify">STRAINER captures a highly transferable representation from just 10 images and 24 seconds of training time! Refer to Table 3,5 in the paper for baseline evaluation for in-domain image fitting and training complexity. STRAINER features are also powerful initialization for out-of-domain image fitting indicating that STRAINER captures features highly generalizable to other natural images (Table 2,3).</figcaption>
	</figure>

	<p><br>
	<br>
	<br></p>
	<h4>STRAINER Learns High Frequency Faster</h4>

	<!-- \| ![img](projects/images/strainer/pca_cat_plot_v4.png) \|
	\|:--:\|
	\| We visualize (a) the first principal component of the learned encoder features for STRAINER and corresponding layer for SIREN . At iteration 0, STRAINER’s feature already capture a low dimensional structure allowing it to quickly adapt to the cat image. High frequency detail emerges in STRAINER’s learned features by iteration 50, whereas SIREN is lacking at iteration 100. The inset showing the power spectrum of the reconstructed image further confirms that STRAINER learns high frequency faster. We also show the (b) reconstructed images and remark that STRAINER fits high frequencies faster. \| -->
	<p><br></p>
	<figure class="figure text-center">
	<img src="projects/images/strainer/pca_cat_plot_v4.png" class="figure-img img-fluid rounded" alt="strainer cat fig" width=80%>
	<figcaption class="figure-caption text-center text-justify">We visualize (a) the first principal component of the learned encoder features for STRAINER and corresponding layer for SIREN . At iteration 0, STRAINER’s feature already capture a low dimensional structure allowing it to quickly adapt to the cat image. High frequency detail emerges in STRAINER’s learned features by iteration 50, whereas SIREN is lacking at iteration 100. The inset showing the power spectrum of the reconstructed image further confirms that STRAINER learns high frequency faster. We also show the (b) reconstructed images and remark that STRAINER fits high frequencies faster.</figcaption>
	</figure>

	<p><br>
	<br>
	<br></p>
	<h4>Visualizing Density of Partitions in Input Space of Learned Models</h4>

	<!-- \| ![img](projects/images/strainer/partitions_v7_arxiv.png) \|
	\|:--:\|
	\| We use the method introduced in [20] to approximate the input space partition of the INR. We present the input space partitions for layers 2,3,4 across (a) Meta-learned 5K and STRAINER initialization and (b) at test time optimization. STRAINER learns an input space partitioning which is more attuned to the prior of the dataset, compared to meta learned which is comparatively more random. We also observe that SIREN (iii) learns an input space partitioning highly specific to the image leading to inefficient transferability for fitting a new image (iv) with significantly different underlying partitioned input space This explains the better in-domain performance of STRAINER compared to Meta-learned 5K , as the shallower layers after pre-training provide a better input space subdivision to the deeper layers to further subdivide. \| -->

	<figure class="figure text-center">
	<img src="projects/images/strainer/partitions_v7_arxiv.png" class="figure-img img-fluid rounded" alt="strainer partition fig" width=80%>
	<figcaption class="figure-caption text-center text-justify">We use the method introduced in [20] to approximate the input space partition of the INR. We present the input space partitions for layers 2,3,4 across (a) Meta-learned 5K and STRAINER initialization and (b) at test time optimization. STRAINER learns an input space partitioning which is more attuned to the prior of the dataset, compared to meta learned which is comparatively more random. We also observe that SIREN (iii) learns an input space partitioning highly specific to the image leading to inefficient transferability for fitting a new image (iv) with significantly different underlying partitioned input space This explains the better in-domain performance of STRAINER compared to Meta-learned 5K , as the shallower layers after pre-training provide a better input space subdivision to the deeper layers to further subdivide.</figcaption>
	</figure>

	<p><br>
	<br>
	<br></p>
	<p><strong>For more details, please refer <a href="https://arxiv.org/abs/2409.09566">full paper</a>!</strong></p>
	<p><br></p>
	<p><strong>Citation</strong></p>
	<div class="highlight"><pre><span></span><code><span class="w"> </span><span class="err">@</span><span class="nx">misc</span><span class="p">{</span><span class="nx">vyas2024learningtransferablefeaturesimplicit</span><span class="p">,</span>
	<span class="w"> </span><span class="nx">title</span><span class="p">={</span><span class="nx">Learning</span><span class="w"> </span><span class="nx">Transferable</span><span class="w"> </span><span class="nx">Features</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="nx">Implicit</span><span class="w"> </span><span class="nx">Neural</span><span class="w"> </span><span class="nx">Representations</span><span class="p">},</span>
	<span class="w"> </span><span class="nx">author</span><span class="p">={</span><span class="nx">Kushal</span><span class="w"> </span><span class="nx">Vyas</span><span class="w"> </span><span class="k">and</span><span class="w"> </span><span class="nx">Ahmed</span><span class="w"> </span><span class="nx">Imtiaz</span><span class="w"> </span><span class="nx">Humayun</span><span class="w"> </span><span class="k">and</span><span class="w"> </span><span class="nx">Aniket</span><span class="w"> </span><span class="nx">Dashpute</span><span class="w"> </span><span class="k">and</span><span class="w"> </span><span class="nx">Richard</span><span class="w"> </span><span class="nx">G</span><span class="p">.</span><span class="w"> </span><span class="nx">Baraniuk</span><span class="w"> </span><span class="k">and</span><span class="w"> </span><span class="nx">Ashok</span><span class="w"> </span><span class="nx">Veeraraghavan</span><span class="w"> </span><span class="k">and</span><span class="w"> </span><span class="nx">Guha</span><span class="w"> </span><span class="nx">Balakrishnan</span><span class="p">},</span>
	<span class="w"> </span><span class="nx">year</span><span class="p">={</span><span class="mi">2024</span><span class="p">},</span>
	<span class="w"> </span><span class="nx">eprint</span><span class="p">={</span><span class="m m-Double">2409.09566</span><span class="p">},</span>
	<span class="w"> </span><span class="nx">archivePrefix</span><span class="p">={</span><span class="nx">arXiv</span><span class="p">},</span>
	<span class="w"> </span><span class="nx">primaryClass</span><span class="p">={</span><span class="nx">cs</span><span class="p">.</span><span class="nx">CV</span><span class="p">},</span>
	<span class="w"> </span><span class="nx">url</span><span class="p">={</span><span class="nx">https</span><span class="p">:</span><span class="c1">//arxiv.org/abs/2409.09566},</span>
	<span class="w"> </span><span class="p">}</span>
	</code></pre></div>
	</div>
	</article>
	</div>
	</div>

	</div>

	<footer class="bg-light text-center text-lg-start">

	<div class="text-center p-3" style="background-color: rgba(248,249,250);">
	Created this website using Python, Pelican, Markdown, Jinja, Bootstrap, JQuery, FontAwesome, MDBootstrap
	</div>

	</footer>

	<!-- <footer style="background-color: #f8f9fa; text-align: center; margin-top: auto; position: sticky; top: 100vh;">
	<div style="padding: 1rem; background-color: rgba(248,249,250);">
	Created this website using Python, Pelican, Markdown, Jinja, Bootstrap, JQuery, FontAwesome, MDBootstrap
	</div>
	</footer> -->
	</body>
	</html>