@inproceedings{mukhopadhyay2026scale,
title={{Scale Space Diffusion}},
author={Mukhopadhyay, Soumik and Udhayanan, Prateksha and Shrivastava, Abhinav},
booktitle={CVPR},
year={2026}
}
UPLiFT: Efficient Pixel-Dense Feature Upsampling with Local Attenders
@inproceedings{walmer2026uplift,
title={{UPLiFT: Efficient Pixel-Dense Feature Upsampling with Local Attenders}},
author={Walmer, Matthew and Suri, Saksham and Aggarwal, Anirud and Shrivastava, Abhinav},
booktitle={CVPR},
year={2026}
url={https://arxiv.org/abs/2601.17950}
}
Efficient and High-Fidelity Omni Modality Retrieval
@inproceedings{huynh2026efficient,
title={{Efficient and High-Fidelity Omni Modality Retrieval}},
author={Huynh, Chuong and Luong, Manh and Shrivastava, Abhinav},
booktitle={CVPR},
year={2026}
}
Evolutionary Caching to Accelerate Your Off-the-Shelf Diffusion Model
@inproceedings{ren2026nervdiffusion,
title={{NeRV-Diffusion: Diffuse Implicit Neural Representation for Video Synthesis}},
author={Ren, Yixuan and Wang, Hanyu and Chen, Hao and He, Bo and Shrivastava, Abhinav},
booktitle={ICLR},
year={2026}
url={https://arxiv.org/abs/2509.24353}
}
Towards Understanding Best Practices for Quantization of Vision-Language Models
Gautom Das, Vincent La, Ethan Lau, Abhinav Shrivastava, Matthew Gwilliam
@article{das2026towards,
title={{Towards Understanding Best Practices for Quantization of Vision-Language Models}},
author={Das, Gautom and La, Vincent and Lau, Ethan and Shrivastava, Abhinav and Gwilliam, Matthew},
journal={arXiv},
year={2026}
url={https://arxiv.org/abs/2601.15287}
}
VeriGraph: Scene Graphs for Execution Verifiable Robot Planning
@inproceedings{ekpo2026verigraph,
title={{VeriGraph: Scene Graphs for Execution Verifiable Robot Planning}},
author={Ekpo, Daniel and Swaminathan, Archana and Levy, Mara and Suri, Saksham and Huynh, Chuong and Shrivastava, Abhinav},
booktitle={ICRA},
year={2026}
url={https://arxiv.org/abs/2411.10446}
}
Growing Visual Generative Capacity for Pre-Trained MLLMs
@inproceedings{gwilliam2026how,
title={{How to Design and Train Your Implicit Neural Representation for Video Compression}},
author={Gwilliam, Matthew and Zhang, Roy and Padmanabhan, Namitha and Du, Hongyang and Shrivastava, Abhinav},
booktitle={WACV},
year={2026}
url={https://arxiv.org/abs/2506.24127}
}
2025
Characterizing Motion Encoding in Video Diffusion Timesteps
@article{baherwani2025characterizing,
title={{Characterizing Motion Encoding in Video Diffusion Timesteps}},
author={Baherwani, Vatsal and Ren, Yixuan and Shrivastava, Abhinav},
journal={arXiv},
year={2025}
url={https://arxiv.org/abs/2512.22175}
}
Imagine, Verify, Execute: Memory-guided Agentic Exploration with Vision-Language Models
@inproceedings{lee2025imagine,
title={{Imagine, Verify, Execute: Memory-guided Agentic Exploration with Vision-Language Models}},
author={Lee, Seungjae and Ekpo, Daniel and Liu, Haowen and Huang, Furong and Shrivastava, Abhinav and Huang, Jia-Bin},
booktitle={CoRL},
year={2025}
url={https://arxiv.org/abs/2505.07815}
}
Towards Multimodal Understanding via Stable Diffusion as a Task-Aware Feature Extractor
@article{agarwal2025towards,
title={{Towards Multimodal Understanding via Stable Diffusion as a Task-Aware Feature Extractor}},
author={Agarwal, Vatsal and Gwilliam, Matthew and Kohavi, Gefen and Verma, Eshan and Ulbricht, Daniel and Shrivastava, Abhinav},
journal={arXiv},
year={2025}
url={https://arxiv.org/abs/2507.07106}
}
Trokens: Semantic-Aware Relational Trajectory Tokens for Few-Shot Action Recognition
@inproceedings{kumar2025trokens,
title={{Trokens: Semantic-Aware Relational Trajectory Tokens for Few-Shot Action Recognition}},
author={Kumar, Pulkit and Huang, Shuaiyi and Walmer, Matthew and Rambhatla, Sai Saketh and Shrivastava, Abhinav},
booktitle={ICCV},
year={2025}
url={https://arxiv.org/abs/2508.03695}
}
@inproceedings{agarwal2025maps,
title={{MAPS: Memory Augmented Panoptic Segmentation}},
author={Agarwal, Vatsal and Suri, Saksham and Ehrlich, Max and Shrivastava, Abhinav},
booktitle={MemVis Workshop, ICCV},
year={2025}
}
Multi-entity Video Transformers for Fine-Grained Video Representation Learning
Matthew Walmer, Rose Kanjirathinkal, Kai-Sheng Tai, Keyur Muzumdar, Taipeng Tian, Abhinav Shrivastava
@inproceedings{walmer2025multientity,
title={{Multi-entity Video Transformers for Fine-Grained Video Representation Learning}},
author={Walmer, Matthew and Kanjirathinkal, Rose and Tai, Kai-Sheng and Muzumdar, Keyur and Tian, Taipeng and Shrivastava, Abhinav},
booktitle={FGVC Workshop, CVPR},
year={2025}
url={https://arxiv.org/abs/2311.10873}
}
CoLLM: A Large Language Model for Composed Image Retrieval
@inproceedings{huynh2025collm,
title={{CoLLM: A Large Language Model for Composed Image Retrieval}},
author={Huynh, Chuong and Yang, Jinyu and Tawari, Ashish and Shah, Mubarak and Tran, Son and Hamid, Raffay and Chilimbi, Trishul and Shrivastava, Abhinav},
booktitle={CVPR},
year={2025}
url={https://arxiv.org/abs/2503.19910}
}
LARP: Tokenizing Videos with a Learned Autoregressive Generative Prior
@inproceedings{wang2025larp,
title={{LARP: Tokenizing Videos with a Learned Autoregressive Generative Prior}},
author={Wang, Hanyu and Suri, Saksham and Ren, Yixuan and Chen, Hao and Shrivastava, Abhinav},
booktitle={ICLR},
year={2025}
url={https://arxiv.org/abs/2410.21264}
}
P3-PO: Prescriptive Point Priors for Visuo-Spatial Generalization of Robot Policies
@inproceedings{levy2025ppo,
title={{P3-PO: Prescriptive Point Priors for Visuo-Spatial Generalization of Robot Policies}},
author={Levy, Mara and Haldar, Siddhant and Pinto, Lerrel and Shrivastava, Abhinav},
booktitle={ICRA},
year={2025}
url={https://arxiv.org/abs/2412.06784}
}
TREND: Tri-teaching for Robust Preference-based Reinforcement Learning with Demonstrations
@inproceedings{huang2025trend,
title={{TREND: Tri-teaching for Robust Preference-based Reinforcement Learning with Demonstrations}},
author={Huang, Shuaiyi and Levy, Mara and Anubhav and Ekpo, Daniel and Zheng, Ruijie and Shrivastava, Abhinav},
booktitle={ICRA},
year={2025}
url={https://arxiv.org/abs/2505.06079}
}
A Video is Worth 10,000 Words: Training and Benchmarking with Diverse Captions for Better Long Video Retrieval
Matthew Gwilliam, Michael Cogswell, Meng Ye, Karan Sikka, Abhinav Shrivastava, Ajay Divakaran
@inproceedings{gwilliam2025a,
title={{A Video is Worth 10,000 Words: Training and Benchmarking with Diverse Captions for Better Long Video Retrieval}},
author={Gwilliam, Matthew and Cogswell, Michael and Ye, Meng and Sikka, Karan and Shrivastava, Abhinav and Divakaran, Ajay},
booktitle={WACV},
year={2025}
url={https://arxiv.org/abs/2312.00115}
}
Unified Framework for Open-World Compositional Zero-shot Learning
@article{shrivastava2024efficient,
title={{Efficient Continuous Video Flow Model for Video Prediction}},
author={Shrivastava, Gaurav and Shrivastava, Abhinav},
journal={arXiv},
year={2024}
url={https://arxiv.org/abs/2412.05633}
}
AutoHallusion: Automatic Generation of Hallucination Benchmarks for Vision-Language Models
@inproceedings{wu2024autohallusion,
title={{AutoHallusion: Automatic Generation of Hallucination Benchmarks for Vision-Language Models}},
author={Wu, Xiyang and Guan, Tianrui and Li, Dianqi and Huang, Shuaiyi and Liu, Xiaoyu and Wang, Xijun and Xian, Ruiqi and Shrivastava, Abhinav and Huang, Furong and Boyd-Graber, Jordan and Zhou, Tianyi and Manocha, Dinesh},
booktitle={EMNLP Findings},
year={2024}
url={https://arxiv.org/abs/2406.10900}
}
QUEEN: QUantized Efficient ENcoding of Dynamic Gaussians for Streaming Free-viewpoint Videos
Sharath Girish, Tianye Li, Amrita Mazumdar, Abhinav Shrivastava, David Luebke, Shalini De Mello
@inproceedings{girish2024queen,
title={{QUEEN: QUantized Efficient ENcoding of Dynamic Gaussians for Streaming Free-viewpoint Videos}},
author={Girish, Sharath and Li, Tianye and Mazumdar, Amrita and Shrivastava, Abhinav and Luebke, David and Mello, Shalini De},
booktitle={NeurIPS},
year={2024}
url={https://arxiv.org/abs/2412.04469}
}
Coarse to Fine Human Mesh Recovery with Transformers
@inproceedings{agarwal2024coarse,
title={{Coarse to Fine Human Mesh Recovery with Transformers}},
author={Agarwal, Vatsal and Levy, Mara and Ehrlich, Max and Tang, Yucheng and Zhang, Nanxuan and Shrivastava, Abhinav},
booktitle={T-CAP Workshop, ECCV},
year={2024}
url={https://link.springer.com/chapter/10.1007/978-3-031-91575-8_18}
}
Customize-A-Video: One-Shot Motion Customization of Text-to-Video Diffusion Models
@inproceedings{ren2024customizeavideo,
title={{Customize-A-Video: One-Shot Motion Customization of Text-to-Video Diffusion Models}},
author={Ren, Yixuan and Zhou, Yang and Yang, Jimei and Shi, Jing and Liu, Difan and Liu, Feng and Kwon, Mingi and Shrivastava, Abhinav},
booktitle={ECCV},
year={2024}
url={https://arxiv.org/abs/2402.14780}
}
Do text-free diffusion models learn discriminative visual representations?
@inproceedings{mukhopadhyay2024do,
title={{Do text-free diffusion models learn discriminative visual representations?}},
author={Mukhopadhyay, Soumik and Gwilliam, Matthew and Yamaguchi, Yosuke and Agarwal, Vatsal and Padmanabhan, Namitha and Swaminathan, Archana and Zhou, Tianyi and Ohya, Jun and Shrivastava, Abhinav},
booktitle={ECCV},
year={2024}
url={https://arxiv.org/abs/2311.17921}
}
EAGLES: Efficient Accelerated 3D Gaussians with Lightweight EncodingS
@inproceedings{girish2024eagles,
title={{EAGLES: Efficient Accelerated 3D Gaussians with Lightweight EncodingS}},
author={Girish, Sharath and Gupta, Kamal and Shrivastava, Abhinav},
booktitle={ECCV},
year={2024}
url={https://arxiv.org/abs/2312.04564}
}
Fast Encoding and Decoding for Implicit Video Representation
@inproceedings{chen2024fast,
title={{Fast Encoding and Decoding for Implicit Video Representation}},
author={Chen, Hao and Xie, Saining and Lim, Ser-Nam and Shrivastava, Abhinav},
booktitle={ECCV},
year={2024}
url={https://arxiv.org/abs/2409.19429}
}
Investigating Style Similarity in Diffusion Models
Gowthami Somepalli, Anubhav, Kamal Gupta, Shramay Palta, Micah Goldblum, Jonas Geiping, Abhinav Shrivastava, Tom Goldstein
@inproceedings{somepalli2024investigating,
title={{Investigating Style Similarity in Diffusion Models}},
author={Somepalli, Gowthami and Anubhav and Gupta, Kamal and Palta, Shramay and Goldblum, Micah and Geiping, Jonas and Shrivastava, Abhinav and Goldstein, Tom},
booktitle={ECCV},
year={2024}
url={https://arxiv.org/abs/2404.01292}
}
Latent-INR: A Flexible Framework for Implicit Representations of Videos with Discriminative Semantics
@inproceedings{maiya2024latentinr,
title={{Latent-INR: A Flexible Framework for Implicit Representations of Videos with Discriminative Semantics}},
author={Maiya, Shishira R and Anubhav and Gwilliam, Matthew and Ehrlich, Max and Shrivastava, Abhinav},
booktitle={ECCV},
year={2024}
url={https://arxiv.org/abs/2408.02672}
}
LEIA: Latent View-invariant Embeddings for Implicit 3D Articulation
@inproceedings{swaminathan2024leia,
title={{LEIA: Latent View-invariant Embeddings for Implicit 3D Articulation}},
author={Swaminathan, Archana and Anubhav and Gupta, Kamal and Maiya, Shishira R and Agarwal, Vatsal and Shrivastava, Abhinav},
booktitle={ECCV},
year={2024}
url={https://arxiv.org/abs/2409.06703}
}
LiFT: A Surprisingly Simple Lightweight Feature Transform for Dense ViT Descriptors
@inproceedings{suri2024lift,
title={{LiFT: A Surprisingly Simple Lightweight Feature Transform for Dense ViT Descriptors}},
author={Suri, Saksham and Walmer, Matthew and Gupta, Kamal and Shrivastava, Abhinav},
booktitle={ECCV},
year={2024}
url={https://arxiv.org/abs/2403.14625}
}
Quantifying NBA Shot Quality: A Deep Network Approach
@inproceedings{kambhamettu2024quantifying,
title={{Quantifying NBA Shot Quality: A Deep Network Approach}},
author={Kambhamettu, Archit and Shrivastava, Abhinav and Gwilliam, Matthew},
booktitle={ACM MMSports},
year={2024}
}
Trajectory-aligned Space-time Tokens for Few-shot Action Recognition
@inproceedings{kumar2024trajectoryaligned,
title={{Trajectory-aligned Space-time Tokens for Few-shot Action Recognition}},
author={Kumar, Pulkit and Padmanabhan, Namitha and Luo, Luke and Rambhatla, Sai Saketh and Shrivastava, Abhinav},
booktitle={ECCV},
year={2024}
url={https://arxiv.org/abs/2407.18249}
}
ARDuP: Active Region Video Diffusion for Universal Policies
@inproceedings{huang2024ardup,
title={{ARDuP: Active Region Video Diffusion for Universal Policies}},
author={Huang, Shuaiyi and Levy, Mara and Jiang, Zhenyu and Anandkumar, Anima and Zhu, Yuke and Fan, Linxi and Huang, De-An and Shrivastava, Abhinav},
booktitle={IROS},
year={2024}
url={https://arxiv.org/abs/2406.13301}
}
Challenges, Evaluation and Opportunities for Open-World Learning
@article{kejriwal2024challenges,
title={{Challenges, Evaluation and Opportunities for Open-World Learning}},
author={Kejriwal, Mayank and Kildebeck, Eric and Steininger, Robert and Shrivastava, Abhinav},
journal={Nature Machine Intelligence},
year={2024}
url={https://www.nature.com/articles/s42256-024-00852-4}
}
Agglomerative Clustering of Atomic Actions for Unsupervised Action Segmentation
Pulkit Kumar, Austin Myers, Anurag Arnab, David A. Ross, Abhinav Shrivastava, Sudheendra Vijayanarasimhan
LPVL Workshop, CVPR 2024
BibTeX
@inproceedings{kumar2024agglomerative,
title={{Agglomerative Clustering of Atomic Actions for Unsupervised Action Segmentation}},
author={Kumar, Pulkit and Myers, Austin and Arnab, Anurag and Ross, David A. and Shrivastava, Abhinav and Vijayanarasimhan, Sudheendra},
booktitle={LPVL Workshop, CVPR},
year={2024}
}
@inproceedings{huang2024uvis,
title={{UVIS: Unsupervised Video Instance Segmentation}},
author={Huang, Shuaiyi and Suri, Saksham and Gupta, Kamal and Rambhatla, Sai Saketh and Lim, Ser-Nam and Shrivastava, Abhinav},
booktitle={L3D Workshop, CVPR},
year={2024}
url={https://arxiv.org/abs/2406.06908}
}
@inproceedings{huang2024what,
title={{What is Point Supervision Worth in Video Instance Segmentation?}},
author={Huang, Shuaiyi and Huang, De-An and Yu, Zhiding and Lan, Shiyi and Radhakrishnan, Subhashree and Alvarez, Jose M. and Shrivastava, Abhinav and Anandkumar, Anima},
booktitle={L3D Workshop, CVPR},
year={2024}
url={https://arxiv.org/abs/2404.01990}
}
Beyond Seen Primitive Concepts and Attribute-Object Compositional Learning
@inproceedings{pham2024composing,
title={{Composing Object Relations and Attributes for Image-Text Matching}},
author={Pham, Khoi and Huynh, Chuong and Lim, Ser-Nam and Shrivastava, Abhinav},
booktitle={CVPR},
year={2024}
url={https://arxiv.org/abs/2406.11820}
}
Explaining the Implicit Neural Canvas (XINC): Connecting Pixels to Neurons by Tracing their Contributions
@inproceedings{padmanabhan2024explaining,
title={{Explaining the Implicit Neural Canvas (XINC): Connecting Pixels to Neurons by Tracing their Contributions}},
author={Padmanabhan, Namitha and Gwilliam, Matthew and Kumar, Pulkit and Maiya, Shishira R and Ehrlich, Max and Shrivastava, Abhinav},
booktitle={CVPR},
year={2024}
url={https://arxiv.org/abs/2401.10217}
}
MA-LMM: Memory-Augmented Large Multimodal Model for Long-Term Video Understanding
Bo He, Hengduo Li, Young Kyun Jang, Menglin Jia, Xuefei Cao, Anshul Shah, Ser-Nam Lim, Abhinav Shrivastava
@inproceedings{he2024malmm,
title={{MA-LMM: Memory-Augmented Large Multimodal Model for Long-Term Video Understanding}},
author={He, Bo and Li, Hengduo and Jang, Young Kyun and Jia, Menglin and Cao, Xuefei and Shah, Anshul and Lim, Ser-Nam and Shrivastava, Abhinav},
booktitle={CVPR},
year={2024}
url={https://arxiv.org/abs/2404.05726}
}
MaGGIe: Masked Guided Gradual Human Instance Matting
Chuong Huynh, Seoung Wug Oh, Abhinav Shrivastava, Joon-Young Lee
@inproceedings{levy2024wayex,
title={{WAYEX: Waypoint Exploration using a Single Demonstration}},
author={Levy, Mara and Saini, Nirat and Shrivastava, Abhinav},
booktitle={ICRA},
year={2024}
url={https://arxiv.org/abs/2407.15849}
}
Content-Aware Image Color Editing with Auxiliary Color Restoration Tasks
@inproceedings{ren2024contentaware,
title={{Content-Aware Image Color Editing with Auxiliary Color Restoration Tasks}},
author={Ren, Yixuan and Shi, Jing and Zhang, Zhifei and Fan, Yifei and Lin, Zhe and He, Bo and Shrivastava, Abhinav},
booktitle={WACV},
year={2024}
url={https://openaccess.thecvf.com/content/WACV2024/papers/Ren_Content-Aware_Image_Color_Editing_With_Auxiliary_Color_Restoration_Tasks_WACV_2024_paper.pdf}
}
Diff2Lip: Audio Conditioned Diffusion Models for Lip-Synchronization
@inproceedings{mukhopadhyay2024difflip,
title={{Diff2Lip: Audio Conditioned Diffusion Models for Lip-Synchronization}},
author={Mukhopadhyay, Soumik and Suri, Saksham and Gadde, Ravi Teja and Shrivastava, Abhinav},
booktitle={WACV},
year={2024}
url={https://arxiv.org/abs/2308.09716}
}
GRIT: GAN Residuals for Paired Image-to-Image Translation
@inproceedings{suri2024grit,
title={{GRIT: GAN Residuals for Paired Image-to-Image Translation}},
author={Suri, Saksham and Meshry, Moustafa and Davis, Larry and Shrivastava, Abhinav},
booktitle={WACV},
year={2024}
url={https://www.cs.umd.edu/~sakshams/grit/resources/GRIT_main_paper.pdf}
}
Leveraging Bitstream Metadata for Fast, Accurate, Generalized Compressed Video Quality Enhancement
@inproceedings{ehrlich2024leveraging,
title={{Leveraging Bitstream Metadata for Fast, Accurate, Generalized Compressed Video Quality Enhancement}},
author={Ehrlich, Max and Barker, Jon and Padmanabhan, Namitha and Davis, Larry and Tao, Andrew and Catanzaro, Bryan and Shrivastava, Abhinav},
booktitle={WACV},
year={2024}
url={https://arxiv.org/abs/2202.00011}
}
Multimodality-guided Image Style Transfer using Cross-modal GAN Inversion
Hanyu Wang, Pengxiang Wu, Kevin Dela Rosa, Chen Wang, Abhinav Shrivastava
@inproceedings{wang2024multimodalityguided,
title={{Multimodality-guided Image Style Transfer using Cross-modal GAN Inversion}},
author={Wang, Hanyu and Wu, Pengxiang and Rosa, Kevin Dela and Wang, Chen and Shrivastava, Abhinav},
booktitle={WACV},
year={2024}
url={https://arxiv.org/abs/2312.01671}
}
2023
Video Dynamics Prior: An Internal Learning Approach for Robust Video Enhancements
@inproceedings{shrivastava2023video,
title={{Video Dynamics Prior: An Internal Learning Approach for Robust Video Enhancements}},
author={Shrivastava, Gaurav and Lim, Ser-Nam and Shrivastava, Abhinav},
booktitle={NeurIPS},
year={2023}
url={https://arxiv.org/abs/2312.07835}
}
@inproceedings{maiya2023a,
title={{A Frequency Perspective of Adversarial Robustness}},
author={Maiya, Shishira R and Ehrlich, Max and Agarwal, Vatsal and Lim, Ser-Nam and Goldstein, Tom and Shrivastava, Abhinav},
booktitle={BMVC},
year={2023}
url={https://arxiv.org/abs/2111.00861}
}
@inproceedings{gupta2023asic,
title={{ASIC: Aligning Sparse in-the-wild Image Collections}},
author={Gupta, Kamal and Jampani, Varun and Esteves, Carlos and Shrivastava, Abhinav and Makadia, Ameesh and Snavely, Noah and Kar, Abhishek},
booktitle={ICCV},
year={2023}
url={https://arxiv.org/abs/2303.16201}
}
BT2: Backward-compatible Training with Basis Transformation
@inproceedings{zhou2023bt,
title={{BT2: Backward-compatible Training with Basis Transformation}},
author={Zhou, Yifei and Li, Zilu and Shrivastava, Abhinav and Zhao, Hengshuang and Torralba, Antonio and Tian, Taipeng and Lim, Ser-Nam},
booktitle={ICCV},
year={2023}
url={https://arxiv.org/abs/2211.03989}
}
Chop & Learn: Recognizing and Generating Object-State Compositions
@inproceedings{saini2023chop,
title={{Chop & Learn: Recognizing and Generating Object-State Compositions}},
author={Saini, Nirat and Wang, Hanyu and Swaminathan, Archana and Jayasundara, Vinoj and He, Bo and Gupta, Kamal and Shrivastava, Abhinav},
booktitle={ICCV},
year={2023}
url={https://arxiv.org/abs/2309.14339}
}
MOST: Multiple Object Localization with Self-Supervised Transformers for Object Discovery
@inproceedings{rambhatla2023most,
title={{MOST: Multiple Object Localization with Self-Supervised Transformers for Object Discovery}},
author={Rambhatla, Sai Saketh and Misra, Ishan and Chellappa, Rama and Shrivastava, Abhinav},
booktitle={ICCV},
year={2023}
url={https://arxiv.org/abs/2304.05387}
}
SHACIRA: Scalable HAsh-grid Compression for Implicit Neural Representations
@inproceedings{suri2023sparsedet,
title={{SparseDet: Improving Sparsely Annotated Object Detection with Pseudo-positive Mining}},
author={Suri, Saksham and Rambhatla, Sai Saketh and Chellappa, Rama and Shrivastava, Abhinav},
booktitle={ICCV},
year={2023}
url={https://arxiv.org/abs/2201.04620}
}
Novelty in Image Classification
Mohsen Jafarzadeh, Akshay Raj Dhamija, Steve Cruz, Chunchun Li, Abhinav Shrivastava, Terrance E. Boult
Springer Book Chapter 2023
BibTeX
@article{jafarzadeh2023novelty,
title={{Novelty in Image Classification}},
author={Jafarzadeh, Mohsen and Dhamija, Akshay Raj and Cruz, Steve and Li, Chunchun and Shrivastava, Abhinav and Boult, Terrance E.},
journal={Springer Book Chapter},
year={2023}
}
Align and Attend: Multimodal Summarization with Dual Contrastive Losses
Bo He, Jun Wang, Jielin Qiu, Trung Bui, Abhinav Shrivastava, Zhaowen Wang
@inproceedings{he2023align,
title={{Align and Attend: Multimodal Summarization with Dual Contrastive Losses}},
author={He, Bo and Wang, Jun and Qiu, Jielin and Bui, Trung and Shrivastava, Abhinav and Wang, Zhaowen},
booktitle={CVPR},
year={2023}
url={https://arxiv.org/abs/2303.07284}
}
FlexNeRF: Photorealistic Free-viewpoint Rendering of Moving Humans from Sparse Views
Vinoj Jayasundara, Amit Agrawal, Nicolas Heron, Abhinav Shrivastava, Larry Davis
@inproceedings{jayasundara2023flexnerf,
title={{FlexNeRF: Photorealistic Free-viewpoint Rendering of Moving Humans from Sparse Views}},
author={Jayasundara, Vinoj and Agrawal, Amit and Heron, Nicolas and Shrivastava, Abhinav and Davis, Larry},
booktitle={CVPR},
year={2023}
url={https://arxiv.org/abs/2303.14368}
}
@inproceedings{chen2023hnerv,
title={{HNeRV: A Hybrid Neural Representation for Videos}},
author={Chen, Hao and Gwilliam, Matthew and Lim, Ser-Nam and Shrivastava, Abhinav},
booktitle={CVPR},
year={2023}
url={https://arxiv.org/abs/2304.02633}
}
NIRVANA: Neural Implicit Representations of Videos with Adaptive Networks and Autoregressive Patch-wise Modeling
@inproceedings{maiya2023nirvana,
title={{NIRVANA: Neural Implicit Representations of Videos with Adaptive Networks and Autoregressive Patch-wise Modeling}},
author={Maiya, Shishira R and Girish, Sharath and Ehrlich, Max and Wang, Hanyu and Lee, Kwot Sin and Poirson, Patrick and Wu, Pengxiang and Wang, Chen and Shrivastava, Abhinav},
booktitle={CVPR},
year={2023}
url={https://arxiv.org/abs/2212.14593}
}
SimpSON: Simplifying Photo Cleanup With Single-Click Distracting Object Segmentation Network
@inproceedings{huynh2023simpson,
title={{SimpSON: Simplifying Photo Cleanup With Single-Click Distracting Object Segmentation Network}},
author={Huynh, Chuong and Zhou, Yuqian and Lin, Zhe and Barnes, Connelly and Shechtman, Eli and Amirghodsi, Sohrab and Shrivastava, Abhinav},
booktitle={CVPR},
year={2023}
url={https://openaccess.thecvf.com/content/CVPR2023/html/Huynh_SimpSON_Simplifying_Photo_Cleanup_With_Single-Click_Distracting_Object_Segmentation_Network_CVPR_2023_paper.html}
}
Teaching Matters: Investigating the Role of Supervision in Vision Transformers
@inproceedings{walmer2023teaching,
title={{Teaching Matters: Investigating the Role of Supervision in Vision Transformers}},
author={Walmer, Matthew and Suri, Saksham and Gupta, Kamal and Shrivastava, Abhinav},
booktitle={CVPR},
year={2023}
url={https://arxiv.org/abs/2212.03862}
}
Towards Scalable Neural Representation for Diverse Videos
@inproceedings{he2023towards,
title={{Towards Scalable Neural Representation for Diverse Videos}},
author={He, Bo and Yang, Xitong and Wang, Hanyu and Wu, Zuxuan and Chen, Hao and Huang, Shuaiyi and Ren, Yixuan and Lim, Ser-Nam and Shrivastava, Abhinav},
booktitle={CVPR},
year={2023}
url={https://arxiv.org/abs/2303.14124}
}
COVID-VTS: Fact Extraction and Verification on Short Video Platforms
@inproceedings{liu2023covidvts,
title={{COVID-VTS: Fact Extraction and Verification on Short Video Platforms}},
author={Liu, Fuxiao and Yacoob, Yaser and Shrivastava, Abhinav},
booktitle={EACL},
year={2023}
}
LilNetX: Lightweight Networks with EXtreme Model Compression and Structured Sparsification
@inproceedings{girish2023lilnetx,
title={{LilNetX: Lightweight Networks with EXtreme Model Compression and Structured Sparsification}},
author={Girish, Sharath and Gupta, Kamal and Singh, Saurabh and Shrivastava, Abhinav},
booktitle={ICLR},
year={2023}
url={https://openreview.net/pdf?id=NVZvalzCLg}
}
2022
Burn After Reading: Online Adaptation for Cross-domain Streaming Data
@inproceedings{yang2022burn,
title={{Burn After Reading: Online Adaptation for Cross-domain Streaming Data}},
author={Yang, Luyu and Gao, Mingfei and Chen, Zeyuan and Xu, Ran and Shrivastava, Abhinav and Ramaiah, Chetan},
booktitle={ECCV},
year={2022}
url={https://arxiv.org/abs/2112.04345}
}
Improving Closed and Open Set Attribute Prediction using Transformers
Khoi Pham, Kushal Kafle, Zhe Lin, Zhihong Ding, Scott Cohen, Quan Hung Tran, Abhinav Shrivastava
@inproceedings{pham2022improving,
title={{Improving Closed and Open Set Attribute Prediction using Transformers}},
author={Pham, Khoi and Kafle, Kushal and Lin, Zhe and Ding, Zhihong and Cohen, Scott and Tran, Quan Hung and Shrivastava, Abhinav},
booktitle={ECCV},
year={2022}
url={https://www.ecva.net/papers/eccv_2022/papers_ECCV/papers/136850199.pdf}
}
Learning Semantic Correspondence with Sparse Annotations
@inproceedings{huang2022learning,
title={{Learning Semantic Correspondence with Sparse Annotations}},
author={Huang, Shuaiyi and Yang, Luyu and He, Bo and Zhang, Songyang and He, Xuming and Shrivastava, Abhinav},
booktitle={ECCV},
year={2022}
url={https://arxiv.org/abs/2208.06974}
}
@inproceedings{wang2022neural,
title={{Neural Space-Filling Curves}},
author={Wang, Hanyu and Gupta, Kamal and Davis, Larry and Shrivastava, Abhinav},
booktitle={ECCV},
year={2022}
url={https://arxiv.org/abs/2204.08453}
}
ASM-Loc: Action-aware Segment Modeling for Weakly-Supervised Temporal Action Localization
Bo He, Xitong Yang, Le Kang, Zhiyu Cheng, Xin Zhou, Abhinav Shrivastava
@inproceedings{he2022asmloc,
title={{ASM-Loc: Action-aware Segment Modeling for Weakly-Supervised Temporal Action Localization}},
author={He, Bo and Yang, Xitong and Kang, Le and Cheng, Zhiyu and Zhou, Xin and Shrivastava, Abhinav},
booktitle={CVPR},
year={2022}
url={https://arxiv.org/abs/2203.15187}
}
Beyond Supervised vs. Unsupervised: Representative Benchmarking and Analysis of Image Representation Learning
@inproceedings{gwilliam2022beyond,
title={{Beyond Supervised vs. Unsupervised: Representative Benchmarking and Analysis of Image Representation Learning}},
author={Gwilliam, Matthew and Shrivastava, Abhinav},
booktitle={CVPR},
year={2022}
url={https://arxiv.org/abs/2206.08347}
}
Disentangling Visual Embeddings for Attributes and Objects
@inproceedings{saini2022disentangling,
title={{Disentangling Visual Embeddings for Attributes and Objects}},
author={Saini, Nirat and Pham, Khoi and Shrivastava, Abhinav},
booktitle={CVPR},
year={2022}
url={https://arxiv.org/abs/2205.08536}
}
Dual-Key Multimodal Backdoors for Visual Question Answering
Matthew Walmer, Karan Sikka, Indranil Sur, Abhinav Shrivastava, Susmit Jha
@inproceedings{walmer2022dualkey,
title={{Dual-Key Multimodal Backdoors for Visual Question Answering}},
author={Walmer, Matthew and Sikka, Karan and Sur, Indranil and Shrivastava, Abhinav and Jha, Susmit},
booktitle={CVPR},
year={2022}
url={https://arxiv.org/abs/2112.07668}
}
ObjectFormer for Image Manipulation Detection and Localization
@inproceedings{wang2022objectformer,
title={{ObjectFormer for Image Manipulation Detection and Localization}},
author={Wang, Junke and Wu, Zuxuan and Chen, Jingjing and Han, Xintong and Shrivastava, Abhinav and Lim, Ser-Nam and Jiang, Yu-Gang},
booktitle={CVPR},
year={2022}
url={https://arxiv.org/abs/2203.14681}
}
@inproceedings{shah2022pose,
title={{Pose And Joint-Aware Action Recognition}},
author={Shah, Anshul and Mishra, Shlok and Bansal, Ankan and Chen, Jun-Cheng and Chellappa, Rama and Shrivastava, Abhinav},
booktitle={WACV},
year={2022}
url={https://arxiv.org/abs/2010.08164}
}
Rethinking Pseudo Labels for Semi-Supervised Object Detection
@inproceedings{chen2021nerv,
title={{NeRV: Neural Representations for Videos}},
author={Chen, Hao and He, Bo and Wang, Hanyu and Ren, Yixuan and Lim, Ser-Nam and Shrivastava, Abhinav},
booktitle={NeurIPS},
year={2021}
url={https://arxiv.org/abs/2110.13903}
}
PatchGame: Learning to Signal Mid-level Patches in Referential Games
@inproceedings{gupta2021patchgame,
title={{PatchGame: Learning to Signal Mid-level Patches in Referential Games}},
author={Gupta, Kamal and Somepalli, Gowthami and Anubhav and Jayasundara, Vinoj and Zwicker, Matthias and Shrivastava, Abhinav},
booktitle={NeurIPS},
year={2021}
url={https://arxiv.org/abs/2111.01785}
}
Deep Co-Training with Task Decomposition for Semi-Supervised Domain Adaptation
@inproceedings{yang2021deep,
title={{Deep Co-Training with Task Decomposition for Semi-Supervised Domain Adaptation}},
author={Yang, Luyu and Wang, Yan and Gao, Mingfei and Shrivastava, Abhinav and Weinberger, Kilian and Chao, Wei-Lun and Lim, Ser-Nam},
booktitle={ICCV},
year={2021}
url={https://openaccess.thecvf.com/content/ICCV2021/papers/Yang_Deep_Co-Training_With_Task_Decomposition_for_Semi-Supervised_Domain_Adaptation_ICCV_2021_paper.pdf}
}
Deep Video Inpainting Detection
Peng Zhou, Ning Yu, Zuxuan Wu, Larry Davis, Abhinav Shrivastava, Ser-Nam Lim
@inproceedings{zhou2021deep,
title={{Deep Video Inpainting Detection}},
author={Zhou, Peng and Yu, Ning and Wu, Zuxuan and Davis, Larry and Shrivastava, Abhinav and Lim, Ser-Nam},
booktitle={BMVC},
year={2021}
url={https://arxiv.org/abs/2101.11080}
}
GTA: Global Temporal Attention for Video Action Understanding
@inproceedings{he2021gta,
title={{GTA: Global Temporal Attention for Video Action Understanding}},
author={He, Bo and Yang, Xitong and Wu, Zuxuan and Chen, Hao and Lim, Ser-Nam and Shrivastava, Abhinav},
booktitle={BMVC},
year={2021}
url={https://arxiv.org/abs/2012.08510}
}
HR-RCNN: Hierarchical Relational Reasoning for Object Detection
@inproceedings{gupta2021layout,
title={{Layout Generation and Completion with Self-attention}},
author={Gupta, Kamal and Achille, Alessandro and Lazarow, Justin and Davis, Larry and Mahadevan, Vijay and Shrivastava, Abhinav},
booktitle={ICCV},
year={2021}
url={https://arxiv.org/abs/2006.14615}
}
Learned Spatial Representations for Few-shot Talking-Head Synthesis
@inproceedings{meshry2021learned,
title={{Learned Spatial Representations for Few-shot Talking-Head Synthesis}},
author={Meshry, Moustafa and Suri, Saksham and Davis, Larry and Shrivastava, Abhinav},
booktitle={ICCV},
year={2021}
url={https://arxiv.org/abs/2104.14557}
}
The Pursuit of Knowledge: Discovering and Localizing Novel Categories using Dual Memory
@inproceedings{rambhatla2021the,
title={{The Pursuit of Knowledge: Discovering and Localizing Novel Categories using Dual Memory}},
author={Rambhatla, Sai Saketh and Chellappa, Rama and Shrivastava, Abhinav},
booktitle={ICCV},
year={2021}
url={https://arxiv.org/abs/2105.01652}
}
Towards Discovery and Attribution of Open-world GAN Generated Images
@inproceedings{girish2021towards,
title={{Towards Discovery and Attribution of Open-world GAN Generated Images}},
author={Girish, Sharath and Suri, Saksham and Rambhatla, Sai Saketh and Shrivastava, Abhinav},
booktitle={ICCV},
year={2021}
url={http://www.cs.umd.edu/~sakshams/project_page/resources/full_paper.pdf}
}
Leveraging Hand-Object Interactions in Assistive Egocentric Vision
@inproceedings{li2021d,
title={{2D or not 2D? Adaptive 3D Convolution Selection for Efficient Video Recognition}},
author={Li, Hengduo and Wu, Zuxuan and Shrivastava, Abhinav and Davis, Larry},
booktitle={CVPR},
year={2021}
url={https://arxiv.org/abs/2012.14950}
}
Hierarchical Video Prediction for Human Object Interaction
@inproceedings{bodla2021hierarchical,
title={{Hierarchical Video Prediction for Human Object Interaction}},
author={Bodla, Navaneeth and Shrivastava, Gaurav and Chellappa, Rama and Shrivastava, Abhinav},
booktitle={CVPR},
year={2021}
url={https://horn-video.github.io/camera_ready.pdf}
}
@inproceedings{taha2021knowledge,
title={{Knowledge Evolution in Neural Networks}},
author={Taha, Ahmed and Shrivastava, Abhinav and Davis, Larry},
booktitle={CVPR},
year={2021}
url={https://arxiv.org/abs/2103.05152}
}
Learning Graphs for Knowledge Transfer with Limited Labels
@inproceedings{ghosh2021learning,
title={{Learning Graphs for Knowledge Transfer with Limited Labels}},
author={Ghosh, Pallabi and Saini, Nirat and Davis, Larry and Shrivastava, Abhinav},
booktitle={CVPR},
year={2021}
url={https://openaccess.thecvf.com/content/CVPR2021/papers/Ghosh_Learning_Graphs_for_Knowledge_Transfer_With_Limited_Labels_CVPR_2021_paper.pdf}
}
Learning to Predict Visual Attributes in the Wild
Khoi Pham, Kushal Kafle, Zhe Lin, Zhihong Ding, Scott Cohen, Quan Hung Tran, Abhinav Shrivastava
@inproceedings{pham2021learning,
title={{Learning to Predict Visual Attributes in the Wild}},
author={Pham, Khoi and Kafle, Kushal and Lin, Zhe and Ding, Zhihong and Cohen, Scott and Tran, Quan Hung and Shrivastava, Abhinav},
booktitle={CVPR},
year={2021}
url={http://arxiv.org/abs/2106.09707}
}
Style-based Encoder Pre-training for Multi-modal Image Synthesis
@inproceedings{meshry2021stylebased,
title={{Style-based Encoder Pre-training for Multi-modal Image Synthesis}},
author={Meshry, Moustafa and Ren, Yixuan and Davis, Larry and Shrivastava, Abhinav},
booktitle={CVPR},
year={2021}
url={https://www.cs.umd.edu/~mmeshry/projects/step/resources/step_cvpr2021-full.pdf}
}
The Lottery Ticket Hypothesis for Object Recognition
@inproceedings{girish2021the,
title={{The Lottery Ticket Hypothesis for Object Recognition}},
author={Girish, Sharath and Maiya, Shishira R and Gupta, Kamal and Chen, Hao and Davis, Larry and Shrivastava, Abhinav},
booktitle={CVPR},
year={2021}
url={https://arxiv.org/abs/2012.04643}
}
Diverse Video Generation using a Gaussian Process Trigger
@inproceedings{shrivastava2021diverse,
title={{Diverse Video Generation using a Gaussian Process Trigger}},
author={Shrivastava, Gaurav and Shrivastava, Abhinav},
booktitle={ICLR},
year={2021}
url={https://openreview.net/forum?id=Qm7R_SdqTpT}
}
@inproceedings{levy2021nofrills,
title={{No-frills Dynamic Planning using Static Planners}},
author={Levy, Mara and Ayyagari, Vasista and Shrivastava, Abhinav},
booktitle={ICRA},
year={2021}
url={https://mlevy2525.github.io/files/DynamicPlanningAddOn.pdf}
}
A Unifying Framework for Formal Theories of Novelty
Terrance E. Boult, Przemyslaw A. Grabowicz, Derek S. Prijatelj, Roni Stern, Lawrence Holder, Joshua Alspector, Mohsen Jafarzadeh, Touqeer Ahmad, Akshay Raj Dhamija, Chunchun Li, Steve Cruz, Abhinav Shrivastava, Carl Vondrick, Walter J. Scheirer
@inproceedings{boult2021a,
title={{A Unifying Framework for Formal Theories of Novelty}},
author={Boult, Terrance E. and Grabowicz, Przemyslaw A. and Prijatelj, Derek S. and Stern, Roni and Holder, Lawrence and Alspector, Joshua and Jafarzadeh, Mohsen and Ahmad, Touqeer and Dhamija, Akshay Raj and Li, Chunchun and Cruz, Steve and Shrivastava, Abhinav and Vondrick, Carl and Scheirer, Walter J.},
booktitle={AAAI},
year={2021}
url={https://ojs.aaai.org/index.php/AAAI/article/view/17766/17573}
}
@article{ghosh2020all,
title={{All About Knowledge Graphs for Actions}},
author={Ghosh, Pallabi and Saini, Nirat and Davis, Larry and Shrivastava, Abhinav},
journal={arXiv},
year={2020}
url={https://arxiv.org/abs/2008.12432}
}
A Generic Visualization Approach for Convolutional Neural Networks
Ahmed Taha, Xitong Yang, Abhinav Shrivastava, Larry Davis
@inproceedings{taha2020a,
title={{A Generic Visualization Approach for Convolutional Neural Networks}},
author={Taha, Ahmed and Yang, Xitong and Shrivastava, Abhinav and Davis, Larry},
booktitle={ECCV},
year={2020}
url={https://arxiv.org/abs/2007.09748}
}
Curriculum Manager for Source Selection in Multi-Source Domain Adaptation
@inproceedings{yang2020curriculum,
title={{Curriculum Manager for Source Selection in Multi-Source Domain Adaptation}},
author={Yang, Luyu and Balaji, Yogesh and Lim, Ser-Nam and Shrivastava, Abhinav},
booktitle={ECCV},
year={2020}
url={https://arxiv.org/abs/2007.01261}
}
Depth Completion using a View-constrained Deep Prior
@inproceedings{ghosh2020depth,
title={{Depth Completion using a View-constrained Deep Prior}},
author={Ghosh, Pallabi and Vineet, Vibhav and Davis, Larry and Shrivastava, Abhinav and Sinha, Sudipta and Joshi, Neel},
booktitle={3DV},
year={2020}
url={https://arxiv.org/abs/2001.07791}
}
Group Ensemble: Learning an Ensemble of ConvNets in a single ConvNet
@article{chen2020group,
title={{Group Ensemble: Learning an Ensemble of ConvNets in a single ConvNet}},
author={Chen, Hao and Shrivastava, Abhinav},
journal={arXiv},
year={2020}
url={https://arxiv.org/pdf/2007.00649.pdf}
}
Improved Modeling of 3D Shapes with Multi-view Depth Maps
Kamal Gupta, Susmija Jabbireddy, Ketul Shah, Abhinav Shrivastava, Matthias Zwicker
@inproceedings{gupta2020improved,
title={{Improved Modeling of 3D Shapes with Multi-view Depth Maps}},
author={Gupta, Kamal and Jabbireddy, Susmija and Shah, Ketul and Shrivastava, Abhinav and Zwicker, Matthias},
booktitle={3DV},
year={2020}
url={https://arxiv.org/abs/2009.03298}
}
Quantization Guided JPEG Artifact Correction
Max Ehrlich, Ser-Nam Lim, Larry Davis, Abhinav Shrivastava
@inproceedings{ehrlich2020quantization,
title={{Quantization Guided JPEG Artifact Correction}},
author={Ehrlich, Max and Lim, Ser-Nam and Davis, Larry and Shrivastava, Abhinav},
booktitle={ECCV},
year={2020}
url={https://arxiv.org/abs/2004.09320}
}
End-to-end Learning of Compressible Features
Saurabh Singh, Sami Abu-El-Haija, Nick Johnston, Johannes Balle, Abhinav Shrivastava, George Toderici
@inproceedings{singh2020endtoend,
title={{End-to-end Learning of Compressible Features}},
author={Singh, Saurabh and Abu-El-Haija, Sami and Johnston, Nick and Balle, Johannes and Shrivastava, Abhinav and Toderici, George},
booktitle={ICIP},
year={2020}
url={https://arxiv.org/abs/2007.11797}
}
PatchVAE: Learning Local Latent Codes for Recognition
@inproceedings{gupta2020patchvae,
title={{PatchVAE: Learning Local Latent Codes for Recognition}},
author={Gupta, Kamal and Singh, Saurabh and Shrivastava, Abhinav},
booktitle={CVPR},
year={2020}
url={https://arxiv.org/abs/2004.03623}
}
Scalable Model Compression by Entropy Penalized Reparameterization
@inproceedings{oktay2020scalable,
title={{Scalable Model Compression by Entropy Penalized Reparameterization}},
author={Oktay, Deniz and Balle, Johannes and Singh, Saurabh and Shrivastava, Abhinav},
booktitle={ICLR},
year={2020}
url={https://openreview.net/pdf?id=HkgxW0EYDS}
}
Boosting Standard Classification Architectures Through a Ranking Regularizer
Ahmed Taha, Yi-Ting Chen, Teruhisa Misu, Abhinav Shrivastava, Larry Davis
@inproceedings{taha2020boosting,
title={{Boosting Standard Classification Architectures Through a Ranking Regularizer}},
author={Taha, Ahmed and Chen, Yi-Ting and Misu, Teruhisa and Shrivastava, Abhinav and Davis, Larry},
booktitle={WACV},
year={2020}
url={http://openaccess.thecvf.com/content_WACV_2020/papers/Taha_Boosting_Standard_Classification_Architectures_Through_a_Ranking_Regularizer_WACV_2020_paper.pdf}
}
Hand-Priming in Object Localization for Assistive Egocentric Vision
@inproceedings{bansal2020detecting,
title={{Detecting Human-Object Interactions via Functional Generalization}},
author={Bansal, Ankan and Rambhatla, Sai Saketh and Shrivastava, Abhinav and Chellappa, Rama},
booktitle={AAAI},
year={2020}
url={https://arxiv.org/pdf/1904.03181.pdf}
}
Generate, Segment and Refine: Towards Generic Manipulation Segmentation
Peng Zhou, Bor-Chun Chen, Xintong Han, Mahyar Najibi, Abhinav Shrivastava, Ser-Nam Lim, Larry Davis
@inproceedings{zhou2020generate,
title={{Generate, Segment and Refine: Towards Generic Manipulation Segmentation}},
author={Zhou, Peng and Chen, Bor-Chun and Han, Xintong and Najibi, Mahyar and Shrivastava, Abhinav and Lim, Ser-Nam and Davis, Larry},
booktitle={AAAI},
year={2020}
url={https://arxiv.org/pdf/1811.09729.pdf}
}
2019
Render4Completion: Synthesizing Multi-view Depth Maps for 3D Shape Completion
Tao Hu, Zhizhong Han, Abhinav Shrivastava, Matthias Zwicker
@inproceedings{hu2019rendercompletion,
title={{Render4Completion: Synthesizing Multi-view Depth Maps for 3D Shape Completion}},
author={Hu, Tao and Han, Zhizhong and Shrivastava, Abhinav and Zwicker, Matthias},
booktitle={GeoMDL Workshop, ICCV},
year={2019}
url={https://arxiv.org/pdf/1904.08366.pdf}
}
EvalNorm: Estimating Batch Normalization Statistics for Evaluation
@inproceedings{sun2019relational,
title={{Relational Action Forecasting}},
author={Sun, Chen and Shrivastava, Abhinav and Vondrick, Carl and Sukthankar, Rahul and Murphy, Kevin and Schmid, Cordelia},
booktitle={CVPR},
year={2019}
url={https://arxiv.org/pdf/1904.04231.pdf}
}
@inproceedings{sun2018actorcentric,
title={{Actor-centric Relation Network}},
author={Sun, Chen and Shrivastava, Abhinav and Vondrick, Carl and Murphy, Kevin and Sukthankar, Rahul and Schmid, Cordelia},
booktitle={ECCV},
year={2018}
url={https://arxiv.org/pdf/1807.10982.pdf}
}
@inproceedings{vondrick2018tracking,
title={{Tracking Emerges by Colorizing Videos}},
author={Vondrick, Carl and Shrivastava, Abhinav and Fathi, Alireza and Guadarrama, Sergio and Murphy, Kevin},
booktitle={ECCV},
year={2018}
url={https://arxiv.org/pdf/1806.09594.pdf}
}
2017
Revisiting Unreasonable Effectiveness of Data in Deep Learning Era
@inproceedings{sun2017revisiting,
title={{Revisiting Unreasonable Effectiveness of Data in Deep Learning Era}},
author={Sun, Chen and Shrivastava, Abhinav and Singh, Saurabh and Gupta, Abhinav},
booktitle={ICCV},
year={2017}
url={https://arxiv.org/pdf/1707.02968}
}
A-Fast-RCNN: Hard Positive Generation via Adversary for Object Detection
@inproceedings{wang2017afastrcnn,
title={{A-Fast-RCNN: Hard Positive Generation via Adversary for Object Detection}},
author={Wang, Xiaolong and Shrivastava, Abhinav and Gupta, Abhinav},
booktitle={CVPR},
year={2017}
url={https://arxiv.org/pdf/1704.03414.pdf}
}
2016
Beyond Skip Connections: Top-Down Modulation for Object Detection
@inproceedings{shrivastava2016training,
title={{Training Region-based Object Detectors with Online Hard Example Mining}},
author={Shrivastava, Abhinav and Gupta, Abhinav and Girshick, Ross},
booktitle={CVPR},
year={2016}
url={https://arxiv.org/abs/1604.03540}
}
2015
Applying artificial vision models to human scene understanding
@article{aminoff2015applying,
title={{Applying artificial vision models to human scene understanding}},
author={Aminoff, Elissa M. and Toneva, Mariya and Shrivastava, Abhinav and Chen, Xinlei and Misra, Ishan and Gupta, Abhinav and Tarr, Michael},
journal={Frontiers in Computational Neuroscience},
year={2015}
url={http://journal.frontiersin.org/Journal/10.3389/fncom.2015.00008/pdf}
}
@article{bansal2015midlevel,
title={{Mid-level Elements for Object Detection}},
author={Bansal, Aayush and Shrivastava, Abhinav and Doersch, Carl and Gupta, Abhinav},
journal={arXiv},
year={2015}
url={http://arxiv.org/pdf/1504.07284}
}
Watch and Learn: Semi-supervised Learning of Object Detectors from Videos
@inproceedings{misra2015watch,
title={{Watch and Learn: Semi-supervised Learning of Object Detectors from Videos}},
author={Misra, Ishan and Shrivastava, Abhinav and Hebert, Martial},
booktitle={CVPR},
year={2015}
url={https://arxiv.org/abs/1505.05769}
}
2014
Enriching Visual Knowledge Bases via Object Discovery and Segmentation
@inproceedings{chen2013neil,
title={{NEIL: Extracting Visual Knowledge from Web Data}},
author={Chen, Xinlei and Shrivastava, Abhinav and Gupta, Abhinav},
booktitle={ICCV},
year={2013}
url={http://xinleic.xyz/papers/iccv13.pdf}
}
@article{zhou2013measuring,
title={{Measuring and Increasing the capacity of Natural HOG Statistics}},
author={Zhou, Tinghui and Shrivastava, Abhinav and Obozinski, Guillaume and Gupta, Abhinav and Efros, Alexei A.},
journal={CMU Technical Report},
year={2013}
url={https://arxiv.org/abs/1505.05232}
}
2012
Constrained Semi-Supervised Learning using Attributes and Comparative Attributes
@inproceedings{malisiewicz2012exemplarsvms,
title={{Exemplar-SVMs for Visual Object Detection, Label Transfer and Image Retrieval}},
author={Malisiewicz, Tomasz and Shrivastava, Abhinav and Gupta, Abhinav and Efros, Alexei A.},
booktitle={ICML},
year={2012}
url={http://www.cs.cmu.edu/~tmalisie/projects/iccv11/malisiewicz_icml2012_abstract.pdf}
}
Real-time Household Object Detection from First-person's view using Exemplar-SVMs
@inproceedings{shrivastava2011datadriven,
title={{Data-driven Visual Similarity for Cross-domain Image Matching}},
author={Shrivastava, Abhinav and Malisiewicz, Tomasz and Gupta, Abhinav and Efros, Alexei A.},
booktitle={SIGGRAPH Asia},
year={2011}
url={http://graphics.cs.cmu.edu/projects/crossDomainMatching/abhinav-sa11.pdf}
}
Patents
Action localization in images and videos using relational features
C. Sun, A. Shrivastava, C. L. Schmid, R. Sukthankar, K. P. Murphy, C. M. Vondrick
US 11163989 ยท Google Inc.
Visual Tracking by Colorization
A. Shrivastava, A. Fathi, S. G. Cotado, K. P. Murphy, C. M. Vondrick
US20210089777A1 ยท Google Inc.
Learning Compressible Features
A. Shrivastava, S. Singh, J. Balle, S. A. Haija, N. Johnston, G. Toderici
US20200311548A1 ยท Google Inc.
Compression of Machine-Learned Models via Entropy Penalized Weight Reparameterization