Publications by Zsolt István
2017
Proceedings of the VLDB Endowment, Munich, Germany, August 2017
The ever increasing amount of data being handled in data centers causes an intrinsic inefficiency: moving data around is expensive in terms of bandwidth, latency, and power consumption, especially given the low computational complexity of many database operations. In this paper we explore near-data processing in database engines, i.e., the option of offloading part of the computation directly to the storage nodes. We implement our ideas in Caribou, an intelligent distributed storage layer incorporating many of the lessons learned while building systems with specialized hardware. Caribou provides access to DRAM/NVRAM storage over the network through a simple key-value store interface, with each storage node providing high-bandwidth near-data processing at line rate and fault tolerance through replication. The result is a highly efficient, distributed, intelligent data storage that can be used to both boost performance and reduce power consumption and real estate usage in the data center thanks to the micro-server architecture adopted.
@inproceedings{abc, abstract = {The ever increasing amount of data being handled in data centers causes an intrinsic inefficiency: moving data around is expensive in terms of bandwidth, latency, and power consumption, especially given the low computational complexity of many database operations. In this paper we explore near-data processing in database engines, i.e., the option of offloading part of the computation directly to the storage nodes. We implement our ideas in Caribou, an intelligent distributed storage layer incorporating many of the lessons learned while building systems with specialized hardware. Caribou provides access to DRAM/NVRAM storage over the network through a simple key-value store interface, with each storage node providing high-bandwidth near-data processing at line rate and fault tolerance through replication. The result is a highly efficient, distributed, intelligent data storage that can be used to both boost performance and reduce power consumption and real estate usage in the data center thanks to the micro-server architecture adopted.}, author = {Zsolt Istv{\'a}n and David Sidler and Gustavo Alonso}, booktitle = {Proceedings of the VLDB Endowment}, title = {Caribou: Intelligent Distributed Storage}, venue = {Munich, Germany}, year = {2017} }
Proceedings of the 2017 ACM International Conference on Management of Data, SIGMOD Conference 2017, Chicago, IL, USA, May 2017
Taking advantage of recently released hybrid multicore architectures, such as the Intel Xeon+FPGA machine, where the FPGA has coherent access to the main memory through the QPI bus, we explore the benefits of specializing operators to hardware. We focus on two commonly used SQL operators for strings: LIKE, and REGEXP_LIKE, and provide a novel and efficient implementation of these operators in reconfigurable hardware. We integrate the hardware accelerator into MonetDB, a main-memory column store, and demonstrate a significant improvement in response time and throughput. Our Hardware User Defined Function (HUDF) can speed up complex pattern matching by an order of magnitude in comparison to the database running on a 10-core CPU. The insights gained from integrating hardware based string operators into MonetDB should also be useful for future designs combining hardware specialization and databases.
@inproceedings{abc, abstract = {Taking advantage of recently released hybrid multicore architectures, such as the Intel Xeon+FPGA machine, where the FPGA has coherent access to the main memory through the QPI bus, we explore the benefits of specializing operators to hardware. We focus on two commonly used SQL operators for strings: LIKE, and REGEXP_LIKE, and provide a novel and efficient implementation of these operators in reconfigurable hardware. We integrate the hardware accelerator into MonetDB, a main-memory column store, and demonstrate a significant improvement in response time and throughput. Our Hardware User Defined Function (HUDF) can speed up complex pattern matching by an order of magnitude in comparison to the database running on a 10-core CPU. The insights gained from integrating hardware based string operators into MonetDB should also be useful for future designs combining hardware specialization and databases.}, author = {David Sidler and Zsolt Istv{\'a}n and Muhsen Owaida and Gustavo Alonso}, booktitle = {Proceedings of the 2017 ACM International Conference on Management of Data, SIGMOD Conference 2017}, title = {Accelerating Pattern Matching Queries in Hybrid CPU-FPGA Architectures.}, url = {http://doi.acm.org/10.1145/3035918.3035954}, venue = {Chicago, IL, USA}, year = {2017} }
Proceedings of the 2017 ACM International Conference on Management of Data, SIGMOD Conference 2017, Chicago, IL, USA, May 2017
@inproceedings{abc, author = {David Sidler and Zsolt Istv{\'a}n and Muhsen Owaida and Kaan Kara and Gustavo Alonso}, booktitle = {Proceedings of the 2017 ACM International Conference on Management of Data, SIGMOD Conference 2017, Chicago, IL, USA}, title = {doppioDB: A Hardware Accelerated Database.}, url = {http://doi.acm.org/10.1145/3035918.3058746}, year = {2017} }
2016
26th International Conference on Field Programmable Logic and Applications, FPL 2016, Lausanne, Switzerland, August 2016
@inproceedings{abc, author = {David Sidler and Zsolt Istv{\'a}n and Gustavo Alonso}, booktitle = {26th International Conference on Field Programmable Logic and Applications, FPL 2016, Lausanne, Switzerland}, title = {Low-latency TCP/IP stack for data center applications.}, url = {http://dx.doi.org/10.1109/FPL.2016.7577319}, year = {2016} }
24th IEEE Annual International Symposium on Field-Programmable Custom Computing Machines, FCCM 2016, Washington, DC, USA, May 2016
@inproceedings{abc, author = {Zsolt Istv{\'a}n and David Sidler and Gustavo Alonso}, booktitle = {24th IEEE Annual International Symposium on Field-Programmable Custom Computing Machines, FCCM 2016, Washington, DC, USA}, title = {Runtime Parameterizable Regular Expression Operators for Databases.}, url = {http://doi.ieeecomputersociety.org/10.1109/FCCM.2016.61}, year = {2016} }
13th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2016, Santa Clara, CA, USA, March 2016
Consensus mechanisms for ensuring consistency are some of the most expensive operations in managing large amounts of data. Often, there is a trade off that involves reducing the coordination overhead at the price of accepting possible data loss or inconsistencies. As the demand for more efficient data centers increases, it is important to provide better ways of ensuring consistency without affecting performance.
In this paper we show that consensus (atomic broadcast) can be removed from the critical path of performance by moving it to hardware. As a proof of concept, we implement Zookeeper’s atomic broadcast at the network level using an FPGA. Our design uses both TCP and an application specific network protocol. The design can be used to push more value into the network, e.g., by extending the functionality of middleboxes or adding inexpensive consensus to in-network processing nodes.
To illustrate how this hardware consensus can be used in practical systems, we have combined it with a mainmemory key value store running on specialized microservers (built as well on FPGAs). This results in a distributed service similar to Zookeeper that exhibits high and stable performance. This work can be used as a blueprint for further specialized designs.
@inproceedings{abc, abstract = {Consensus mechanisms for ensuring consistency are some of the most expensive operations in managing large amounts of data. Often, there is a trade off that involves reducing the coordination overhead at the price of accepting possible data loss or inconsistencies. As the demand for more efficient data centers increases, it is important to provide better ways of ensuring consistency without affecting performance. In this paper we show that consensus (atomic broadcast) can be removed from the critical path of performance by moving it to hardware. As a proof of concept, we implement Zookeeper{\textquoteright}s atomic broadcast at the network level using an FPGA. Our design uses both TCP and an application specific network protocol. The design can be used to push more value into the network, e.g., by extending the functionality of middleboxes or adding inexpensive consensus to in-network processing nodes. To illustrate how this hardware consensus can be used in practical systems, we have combined it with a mainmemory key value store running on specialized microservers (built as well on FPGAs). This results in a distributed service similar to Zookeeper that exhibits high and stable performance. This work can be used as a blueprint for further specialized designs.}, author = {Zsolt Istv{\'a}n and David Sidler and Gustavo Alonso and Marko Vukolic}, booktitle = {13th USENIX Symposium on Networked Systems Design and Implementation, NSDI 2016}, title = {Consensus in a Box: Inexpensive Coordination in Hardware.}, url = {https://www.usenix.org/conference/nsdi16/technical-sessions/presentation/istvan}, venue = {Santa Clara, CA, USA}, year = {2016} }
2015
25th International Conference on Field Programmable Logic and Applications, FPL 2015, London, United Kingdom, September 2015
@inproceedings{abc, author = {Zsolt Istv{\'a}n and David Sidler and Gustavo Alonso}, booktitle = {25th International Conference on Field Programmable Logic and Applications, FPL 2015, London, United Kingdom}, title = {Building a distributed key-value store with FPGA-based microservers.}, url = {http://dx.doi.org/10.1109/FPL.2015.7293967}, year = {2015} }
TRETS, April 2015
@article{abc, author = {Zsolt Istv{\'a}n and Gustavo Alonso and Michaela Blott and Kees A. Vissers}, journal = {TRETS}, title = {A Hash Table for Line-Rate Data Processing.}, url = {http://doi.acm.org/10.1145/2629582}, year = {2015} }
2014
PVLDB, November 2014
@inproceedings{abc, author = {Louis Woods and Zsolt Istv{\'a}n and Gustavo Alonso}, booktitle = {PVLDB}, title = {Ibex - An Intelligent Storage Engine with Support for Advanced SQL Off-loading.}, url = {http://www.vldb.org/pvldb/vol7/p963-woods.pdf}, year = {2014} }
International Conference on Management of Data, SIGMOD 2014, Snowbird, UT, USA, June 2014
@inproceedings{abc, author = {Zsolt Istv{\'a}n and Louis Woods and Gustavo Alonso}, booktitle = {International Conference on Management of Data, SIGMOD 2014, Snowbird, UT, USA}, title = {Histograms as a side effect of data movement for big data.}, url = {http://doi.acm.org/10.1145/2588555.2612174}, year = {2014} }
2013
23rd International Conference on Field programmable Logic and Applications, FPL 2013, Porto, Portugal, September 2013
@inproceedings{abc, author = {Zsolt Istv{\'a}n and Gustavo Alonso and Michaela Blott and Kees A. Vissers}, booktitle = {23rd International Conference on Field programmable Logic and Applications, FPL 2013, Porto, Portugal}, title = {A flexible hash table design for 10GBPS key-value stores on FPGAS.}, url = {http://dx.doi.org/10.1109/FPL.2013.6645520}, year = {2013} }
23rd International Conference on Field programmable Logic and Applications, FPL 2013, Porto, Portugal, September 2013
@inproceedings{abc, author = {Louis Woods and Zsolt Istv{\'a}n and Gustavo Alonso}, booktitle = {23rd International Conference on Field programmable Logic and Applications, FPL 2013, Porto, Portugal}, title = {Hybrid FPGA-accelerated SQL query processing.}, url = {http://dx.doi.org/10.1109/FPL.2013.6645619}, year = {2013} }
Systems Group Master's Thesis, no. 85; Department of Computer Science, March 2013
@mastersthesis{abc, author = {Zsolt Istv{\'a}n}, school = {85}, title = {Hash Table for Large Key-Value Stores on FPGAs}, year = {2013} }
Procedings of HotCloud '13 (5th USENIX Workshop on Hot Topics in Cloud Computing), San Hose, CA, USA, January 2013
@inproceedings{abc, author = {Michaela Blott and Kimon Karras and Ling Liu and Kees Vissers and Jeremia B{\"a}r and Zsolt Istv{\'a}n}, booktitle = {Procedings of HotCloud {\textquoteright}13 (5th USENIX Workshop on Hot Topics in Cloud Computing)}, title = {Achieving 10Gbps line-rate key-value stores with FPGAs}, venue = {San Hose, CA, USA}, year = {2013} }