Publications by Vivek Seshadri

×

Status message

The Publications site is currently under construction, as a result some publications might be missing.

2017

Proceedings of the 50th Annual IEEE/ACM International Symposium on Microarchitecture, Cambridge, MA, USA, October 2017
Many important applications trigger bulk bitwise operations, i.e., bitwise operations on large bit vectors. In fact, recent works design techniques that exploit fast bulk bitwise operations to accelerate databases (bitmap indices, BitWeaving) and web search (BitFunnel). Unfortunately, in existing architectures, the throughput of bulk bitwise operations is limited by the memory bandwidth available to the processing unit (e.g., CPU, GPU, FPGA, processing-in-memory). To overcome this bottleneck, we propose Ambit, an Accelerator-in-Memory for bulk bitwise operations. Unlike prior works, Ambit exploits the analog operation of DRAM technology to perform bitwise operations completely inside DRAM, thereby exploiting the full internal DRAM bandwidth. Ambit consists of two components. First, simultaneous activation of three DRAM rows that share the same set of sense amplifiers enables the system to perform bitwise AND and OR operations. Second, with modest changes to the sense amplifier, the system can use the inverters present inside the sense amplifier to perform bitwise NOT operations. With these two components, Ambit can perform any bulk bitwise operation efficiently inside DRAM. Ambit largely exploits existing DRAM structure, and hence incurs low cost on top of commodity DRAM designs (1% of DRAM chip area). Importantly, Ambit uses the modern DRAM interface without any changes, and therefore it can be directly plugged onto the memory bus. Our extensive circuit simulations show that Ambit works as expected even in the presence of significant process variation. Averaged across seven bulk bitwise operations, Ambit improves performance by 32X and reduces energy consumption by 35X compared to state-of-the-art systems. When integrated with Hybrid Memory Cube (HMC), a 3D-stacked DRAM with a logic layer, Ambit improves performance of bulk bitwise operations by 9.7X compared to processing in the logic layer of the HMC. Ambit improves the performance of three real-world data-intensive applications, 1) database bitmap indices, 2) BitWeaving, a technique to accelerate database scans, and 3) bit-vector-based implementation of sets, by 3X-7X compared to a state-of-the-art baseline using SIMD optimizations. We describe four other applications that can benefit from Ambit, including a recent technique proposed to speed up web search. We believe that large performance and energy improvements provided by Ambit can enable other applications to use bulk bitwise operations.
@inproceedings{abc,
	abstract = {Many important applications trigger bulk bitwise operations, i.e., bitwise operations on large bit vectors. In fact, recent works design techniques that exploit fast bulk bitwise operations to accelerate databases (bitmap indices, BitWeaving) and web search (BitFunnel). Unfortunately, in existing architectures, the throughput of bulk bitwise operations is limited by the memory bandwidth available to the processing unit (e.g., CPU, GPU, FPGA, processing-in-memory).

To overcome this bottleneck, we propose Ambit, an Accelerator-in-Memory for bulk bitwise operations. Unlike prior works, Ambit exploits the analog operation of DRAM technology to perform bitwise operations completely inside DRAM, thereby exploiting the full internal DRAM bandwidth. Ambit consists of two components. First, simultaneous activation of three DRAM rows that share the same set of sense amplifiers enables the system to perform bitwise AND and OR operations. Second, with modest changes to the sense amplifier, the system can use the inverters present inside the sense amplifier to perform bitwise NOT operations. With these two components, Ambit can perform any bulk bitwise operation efficiently inside DRAM. Ambit largely exploits existing DRAM structure, and hence incurs low cost on top of commodity DRAM designs (1\% of DRAM chip area). Importantly, Ambit uses the modern DRAM interface without any changes, and therefore it can be directly plugged onto the memory bus.

Our extensive circuit simulations show that Ambit works as expected even in the presence of significant process variation. Averaged across seven bulk bitwise operations, Ambit improves performance by 32X and reduces energy consumption by 35X compared to state-of-the-art systems. When integrated with Hybrid Memory Cube (HMC), a 3D-stacked DRAM with a logic layer, Ambit improves performance of bulk bitwise operations by 9.7X compared to processing in the logic layer of the HMC. Ambit improves the performance of three real-world data-intensive applications, 1) database bitmap indices, 2) BitWeaving, a technique to accelerate database scans, and 3) bit-vector-based implementation of sets, by 3X-7X compared to a state-of-the-art baseline using SIMD optimizations. We describe four other applications that can benefit from Ambit, including a recent technique proposed to speed up web search. We believe that large performance and energy improvements provided by Ambit can enable other applications to use bulk bitwise operations.},
	author = {Vivek Seshadri and Donghyuk Lee and Thomas Mullins and Hasan Hassan and Amirali Boroumand and Jeremie Kim and Michael A. Kozuch and Onur Mutlu and Phillip B. Gibbons and Todd C. Mowry},
	booktitle = {Proceedings of the 50th Annual IEEE/ACM International Symposium on Microarchitecture},
	title = {Ambit: in-memory accelerator for bulk bitwise operations using commodity DRAM technology},
	venue = {Cambridge, MA, USA},
	year = {2017}
}
Proceedings of the 2017 ACM SIGMETRICS / International Conference on Measurement and Modeling of Computer Systems, Urbana-Champaign, IL, USA, June 2017
@inproceedings{abc,
	author = {Donghyuk Lee and Samira Manabi Khan and Lavanya Subramanian and Saugata Ghose and Rachata Ausavarungnirun and Gennady Pekhimenko and Vivek Seshadri and Onur Mutlu},
	booktitle = {Proceedings of the 2017 ACM SIGMETRICS / International Conference on Measurement and Modeling of Computer Systems, Urbana-Champaign, IL, USA},
	title = {Design-Induced Latency Variation in Modern DRAM Chips: Characterization, Analysis, and Latency Reduction Mechanisms.},
	url = {http://doi.acm.org/10.1145/3078505.3078533},
	year = {2017}
}
POMACS, January 2017
@article{abc,
	author = {Donghyuk Lee and Samira Manabi Khan and Lavanya Subramanian and Saugata Ghose and Rachata Ausavarungnirun and Gennady Pekhimenko and Vivek Seshadri and Onur Mutlu},
	journal = {POMACS},
	title = {Design-Induced Latency Variation in Modern DRAM Chips: Characterization, Analysis, and Latency Reduction Mechanisms.},
	url = {http://doi.acm.org/10.1145/3084464},
	year = {2017}
}
Advances in Computers, January 2017
@article{abc,
	author = {Vivek Seshadri and Onur Mutlu},
	journal = {Advances in Computers},
	title = {Chapter Four - Simple Operations in Memory to Reduce Data Movement.},
	url = {https://doi.org/10.1016/bs.adcom.2017.04.004},
	year = {2017}
}

2016

2016 IEEE International Symposium on High Performance Computer Architecture, HPCA 2016, Barcelona, Spain, March 2016
@inproceedings{abc,
	author = {Hasan Hassan and Gennady Pekhimenko and Nandita Vijaykumar and Vivek Seshadri and Donghyuk Lee and Oguz Ergin and Onur Mutlu},
	booktitle = {2016 IEEE International Symposium on High Performance Computer Architecture, HPCA 2016, Barcelona, Spain},
	title = {ChargeCache: Reducing DRAM latency by exploiting row access locality.},
	url = {http://dx.doi.org/10.1109/HPCA.2016.7446096},
	year = {2016}
}
CoRR, January 2016
@article{abc,
	author = {Donghyuk Lee and Yoongu Kim and Vivek Seshadri and Jamie Liu and Lavanya Subramanian and Onur Mutlu},
	journal = {CoRR},
	title = {Tiered-Latency DRAM (TL-DRAM).},
	url = {http://arxiv.org/abs/1601.06903},
	year = {2016}
}
CoRR, January 2016
@article{abc,
	author = {Donghyuk Lee and Yoongu Kim and Gennady Pekhimenko and Samira Manabi Khan and Vivek Seshadri and Kevin Kai-Wei Chang and Onur Mutlu},
	journal = {CoRR},
	title = {Adaptive-Latency DRAM (AL-DRAM).},
	url = {http://arxiv.org/abs/1603.08454},
	year = {2016}
}
IEEE Trans. Parallel Distrib. Syst., January 2016
@inproceedings{abc,
	author = {Lavanya Subramanian and Donghyuk Lee and Vivek Seshadri and Harsha Rastogi and Onur Mutlu},
	booktitle = {IEEE Trans. Parallel Distrib. Syst.},
	title = {BLISS: Balancing Performance, Fairness and Complexity in Memory Access Scheduling.},
	url = {http://dx.doi.org/10.1109/TPDS.2016.2526003},
	year = {2016}
}
CoRR, January 2016
@article{abc,
	author = {Vivek Seshadri and Onur Mutlu},
	journal = {CoRR},
	title = {The Processing Using Memory Paradigm: In-DRAM Bulk Copy, Initialization, Bitwise AND and OR.},
	url = {http://arxiv.org/abs/1610.09603},
	year = {2016}
}
CoRR, January 2016
@article{abc,
	author = {Donghyuk Lee and Samira Manabi Khan and Lavanya Subramanian and Rachata Ausavarungnirun and Gennady Pekhimenko and Vivek Seshadri and Saugata Ghose and Onur Mutlu},
	journal = {CoRR},
	title = {Reducing DRAM Latency by Exploiting Design-Induced Latency Variation in Modern DRAM Chips.},
	url = {http://arxiv.org/abs/1610.09604},
	year = {2016}
}
CoRR, January 2016
@article{abc,
	author = {Vivek Seshadri and Donghyuk Lee and Thomas Mullins and Hasan Hassan and Amirali Boroumand and Jeremie Kim and Michael A. Kozuch and Onur Mutlu and Phillip B. Gibbons and Todd C. Mowry},
	journal = {CoRR},
	title = {Buddy-RAM: Improving the Performance and Efficiency of Bulk Bitwise Operations Using DRAM.},
	url = {http://arxiv.org/abs/1611.09988},
	year = {2016}
}

2015

Proceedings of the 48th International Symposium on Microarchitecture, MICRO 2015, Waikiki, HI, USA, December 2015
@inproceedings{abc,
	author = {Vivek Seshadri and Thomas Mullins and Amirali Boroumand and Onur Mutlu and Phillip B. Gibbons and Michael A. Kozuch and Todd C. Mowry},
	booktitle = {Proceedings of the 48th International Symposium on Microarchitecture, MICRO 2015, Waikiki, HI, USA},
	title = {Gather-scatter DRAM: in-DRAM address translation to improve the spatial locality of non-unit strided accesses.},
	url = {http://doi.acm.org/10.1145/2830772.2830820},
	year = {2015}
}
Proceedings of the 48th International Symposium on Microarchitecture, MICRO 2015, Waikiki, HI, USA, December 2015
@inproceedings{abc,
	author = {Lavanya Subramanian and Vivek Seshadri and Arnab Ghosh and Samira Manabi Khan and Onur Mutlu},
	booktitle = {Proceedings of the 48th International Symposium on Microarchitecture, MICRO 2015, Waikiki, HI, USA},
	title = {The application slowdown model: quantifying and controlling the impact of inter-application interference at shared caches and main memory.},
	url = {http://doi.acm.org/10.1145/2830772.2830803},
	year = {2015}
}
Proceedings of the 42nd Annual International Symposium on Computer Architecture, Portland, OR, USA, June 2015
@inproceedings{abc,
	author = {Vivek Seshadri and Gennady Pekhimenko and Olatunji Ruwase and Onur Mutlu and Phillip B. Gibbons and Michael A. Kozuch and Todd C. Mowry and Trishul M. Chilimbi},
	booktitle = {Proceedings of the 42nd Annual International Symposium on Computer Architecture, Portland, OR, USA},
	title = {Page overlays: an enhanced virtual memory framework to enable fine-grained memory management.},
	url = {http://doi.acm.org/10.1145/2749469.2750379},
	year = {2015}
}
21st IEEE International Symposium on High Performance Computer Architecture, HPCA 2015, Burlingame, CA, USA, February 2015
@inproceedings{abc,
	author = {Donghyuk Lee and Yoongu Kim and Gennady Pekhimenko and Samira Manabi Khan and Vivek Seshadri and Kevin Kai-Wei Chang and Onur Mutlu},
	booktitle = {21st IEEE International Symposium on High Performance Computer Architecture, HPCA 2015, Burlingame, CA, USA},
	title = {Adaptive-latency DRAM: Optimizing DRAM timing for the common-case.},
	url = {http://dx.doi.org/10.1109/HPCA.2015.7056057},
	year = {2015}
}
CoRR, January 2015
@article{abc,
	author = {Lavanya Subramanian and Donghyuk Lee and Vivek Seshadri and Harsha Rastogi and Onur Mutlu},
	journal = {CoRR},
	title = {The Blacklisting Memory Scheduler: Balancing Performance, Fairness and Complexity.},
	url = {http://arxiv.org/abs/1504.00390},
	year = {2015}
}
Computer Architecture Letters, January 2015
@inproceedings{abc,
	author = {Vivek Seshadri and Kevin Hsieh and Amirali Boroumand and Donghyuk Lee and Michael A. Kozuch and Onur Mutlu and Phillip B. Gibbons and Todd C. Mowry},
	booktitle = {Computer Architecture Letters},
	title = {Fast Bulk Bitwise AND and OR in DRAM.},
	url = {http://dx.doi.org/10.1109/LCA.2015.2434872},
	year = {2015}
}

2014

32nd IEEE International Conference on Computer Design, ICCD 2014, Seoul, South Korea, October 2014
@inproceedings{abc,
	author = {Lavanya Subramanian and Donghyuk Lee and Vivek Seshadri and Harsha Rastogi and Onur Mutlu},
	booktitle = {32nd IEEE International Conference on Computer Design, ICCD 2014, Seoul, South Korea},
	title = {The Blacklisting Memory Scheduler: Achieving high performance and fairness at low cost.},
	url = {http://dx.doi.org/10.1109/ICCD.2014.6974655},
	year = {2014}
}
ACM/IEEE 41st International Symposium on Computer Architecture, ISCA 2014, Minneapolis, MN, USA, June 2014
@inproceedings{abc,
	author = {Vivek Seshadri and Abhishek Bhowmick and Onur Mutlu and Phillip B. Gibbons and Michael A. Kozuch and Todd C. Mowry},
	booktitle = {ACM/IEEE 41st International Symposium on Computer Architecture, ISCA 2014, Minneapolis, MN, USA},
	title = {The Dirty-Block Index.},
	url = {http://dx.doi.org/10.1109/ISCA.2014.6853204},
	year = {2014}
}
TACO, January 2014
@inproceedings{abc,
	author = {Vivek Seshadri and Samihan Yedkar and Hongyi Xin and Onur Mutlu and Phillip B. Gibbons and Michael A. Kozuch and Todd C. Mowry},
	booktitle = {TACO},
	title = {Mitigating Prefetcher-Caused Pollution Using Informed Caching Policies for Prefetched Blocks.},
	url = {http://doi.acm.org/10.1145/2677956},
	year = {2014}
}

2013

The 46th Annual IEEE/ACM International Symposium on Microarchitecture, MICRO-46, Davis, CA, USA, December 2013
@inproceedings{abc,
	author = {Vivek Seshadri and Yoongu Kim and Chris Fallin and Donghyuk Lee and Rachata Ausavarungnirun and Gennady Pekhimenko and Yixin Luo and Onur Mutlu and Phillip B. Gibbons and Michael A. Kozuch and Todd C. Mowry},
	booktitle = {The 46th Annual IEEE/ACM International Symposium on Microarchitecture, MICRO-46, Davis, CA, USA},
	title = {RowClone: fast and energy-efficient in-DRAM bulk data copy and initialization.},
	url = {http://doi.acm.org/10.1145/2540708.2540725},
	year = {2013}
}
The 46th Annual IEEE/ACM International Symposium on Microarchitecture, MICRO-46, Davis, CA, USA, December 2013
@inproceedings{abc,
	author = {Gennady Pekhimenko and Vivek Seshadri and Yoongu Kim and Hongyi Xin and Onur Mutlu and Phillip B. Gibbons and Michael A. Kozuch and Todd C. Mowry},
	booktitle = {The 46th Annual IEEE/ACM International Symposium on Microarchitecture, MICRO-46, Davis, CA, USA},
	title = {Linearly compressed pages: a low-complexity, low-latency main memory compression framework.},
	url = {http://doi.acm.org/10.1145/2540708.2540724},
	year = {2013}
}
19th IEEE International Symposium on High Performance Computer Architecture, HPCA 2013, Shenzhen, China, February 2013
@inproceedings{abc,
	author = {Donghyuk Lee and Yoongu Kim and Vivek Seshadri and Jamie Liu and Lavanya Subramanian and Onur Mutlu},
	booktitle = {19th IEEE International Symposium on High Performance Computer Architecture, HPCA 2013, Shenzhen, China},
	title = {Tiered-latency DRAM: A low latency and low cost DRAM architecture.},
	url = {http://dx.doi.org/10.1109/HPCA.2013.6522354},
	year = {2013}
}
19th IEEE International Symposium on High Performance Computer Architecture, HPCA 2013, Shenzhen, China, February 2013
@inproceedings{abc,
	author = {Lavanya Subramanian and Vivek Seshadri and Yoongu Kim and Ben Jaiyen and Onur Mutlu},
	booktitle = {19th IEEE International Symposium on High Performance Computer Architecture, HPCA 2013, Shenzhen, China},
	title = {MISE: Providing performance predictability and improving fairness in shared main memory systems.},
	url = {http://dx.doi.org/10.1109/HPCA.2013.6522356},
	year = {2013}
}

2012

International Conference on Parallel Architectures and Compilation Techniques, PACT '12, Minneapolis, MN, September 2012
@inproceedings{abc,
	author = {Vivek Seshadri and Onur Mutlu and Michael A. Kozuch and Todd C. Mowry},
	booktitle = {International Conference on Parallel Architectures and Compilation Techniques, PACT {\textquoteright}12, Minneapolis, MN},
	title = {The evicted-address filter: a unified mechanism to address both cache pollution and thrashing.},
	url = {http://doi.acm.org/10.1145/2370816.2370868},
	year = {2012}
}
International Conference on Parallel Architectures and Compilation Techniques, PACT '12, Minneapolis, MN, September 2012
@inproceedings{abc,
	author = {Gennady Pekhimenko and Vivek Seshadri and Onur Mutlu and Phillip B. Gibbons and Michael A. Kozuch and Todd C. Mowry},
	booktitle = {International Conference on Parallel Architectures and Compilation Techniques, PACT {\textquoteright}12, Minneapolis, MN},
	title = {Base-delta-immediate compression: practical data compression for on-chip caches.},
	url = {http://doi.acm.org/10.1145/2370816.2370870},
	year = {2012}
}
39th International Symposium on Computer Architecture (ISCA 2012), Portland, OR, USA, June 2012
@inproceedings{abc,
	author = {Yoongu Kim and Vivek Seshadri and Donghyuk Lee and Jamie Liu and Onur Mutlu},
	booktitle = {39th International Symposium on Computer Architecture (ISCA 2012)},
	title = {A case for exploiting subarray-level parallelism (SALP) in DRAM.},
	url = {http://dx.doi.org/10.1109/ISCA.2012.6237032},
	venue = {Portland, OR, USA},
	year = {2012}
}