Publications by Martin Kaufmann
2016
Proceedings of the 2016 International Conference on Management of Data, SIGMOD Conference 2016, San Francisco, CA, USA, June 2016
@inproceedings{abc, author = {Markus Pilman and Martin Kaufmann and Florian K{\"o}hl and Donald Kossmann and Damien Profeta}, booktitle = {Proceedings of the 2016 International Conference on Management of Data, SIGMOD Conference 2016, San Francisco, CA, USA}, title = {ParTime: Parallel Temporal Aggregation.}, url = {http://doi.acm.org/10.1145/2882903.2903732}, year = {2016} }
2015
Analytics in Motion: High Performance Event-Processing AND Real-Time Analytics in the Same Database.
Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data, Melbourne, Victoria, Australia, June 2015@inproceedings{abc, author = {Lucas Braun and Thomas Etter and Georgios Gasparis and Martin Kaufmann and Donald Kossmann and Daniel Widmer and Aharon Avitzur and Anthony Iliopoulos and Eliezer Levy and Ning Liang}, booktitle = {Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data, Melbourne, Victoria, Australia}, title = {Analytics in Motion: High Performance Event-Processing AND Real-Time Analytics in the Same Database.}, url = {http://doi.acm.org/10.1145/2723372.2742783}, year = {2015} }
Proceedings of the 27th International Conference on Scientific and Statistical Database Management, SSDBM '15, La Jolla, CA, USA, June 2015
Bi-temporal databases support system (transaction) and application time, enabling users to query the history as recorded today and as it was known in the past. In this paper, we study windows over both system and application time, i.e., bi-temporal windows. We propose a two-dimensional index that supports one-time and continuous queries over fixed and sliding bi-temporal windows, covering static and streaming data. We demonstrate the advantages of the proposed index compared to the state-of-the-art in terms of query performance, index update overhead and space footprint.
@inproceedings{abc, abstract = {Bi-temporal databases support system (transaction) and application time, enabling users to query the history as recorded today and as it was known in the past. In this paper, we study windows over both system and application time, i.e., bi-temporal windows. We propose a two-dimensional index that supports one-time and continuous queries over fixed and sliding bi-temporal windows, covering static and streaming data. We demonstrate the advantages of the proposed index compared to the state-of-the-art in terms of query performance, index update overhead and space footprint.}, author = {Chang Ge and Martin Kaufmann and Lukasz Golab and Peter M. Fischer and Anil K. Goel}, booktitle = {Proceedings of the 27th International Conference on Scientific and Statistical Database Management, SSDBM {\textquoteright}15}, title = {Indexing bi-temporal windows.}, url = {http://doi.acm.org/10.1145/2791347.2791373}, venue = {La Jolla, CA, USA}, year = {2015} }
31st IEEE International Conference on Data Engineering, ICDE 2015, Seoul, South Korea, April 2015
@inproceedings{abc, author = {Martin Kaufmann and Peter M. Fischer and Norman May and Chang Ge and Anil K. Goel and Donald Kossmann}, booktitle = {31st IEEE International Conference on Data Engineering, ICDE 2015, Seoul, South Korea}, title = {Bi-temporal Timeline Index: A data structure for Processing Queries on bi-temporal data.}, url = {http://dx.doi.org/10.1109/ICDE.2015.7113307}, year = {2015} }
2014
ETH Zürich, Diss. Nr. 21964, June 2014
Supervised by: Prof. Donald Kossmann
Supervised by: Prof. Donald Kossmann
@phdthesis{abc, author = {Martin Kaufmann}, school = {21964}, title = {Storing and Processing Temporal Data in Main Memory Column Stores}, year = {2014} }
EDBT: 17th International Conference on Extending Database Technology, March 2014
After more than a decade of a virtual standstill, the adoption of temporal data management features has recently picked up speed, driven by customer demand and the inclusion of temporal expressions into SQL:2011. Most of the big commercial DBMS now include support for bitemporal data and operators.
In this paper, we perform a thorough analysis of these commercial temporal DBMS: We investigate their architecture, determine their performance and study the impact of performance tuning. This analysis utilizes our recent (TPCTC 2013) benchmark proposal, which includes a comprehensive temporal workload definition.
The results of our analysis show that the support for temporal data is still in its infancy: All systems store their data in regular, statically partitioned tables and rely on standard indexes as well as query rewrites for their operations. As shown by our measurements, this causes considerable performance variations on slight workload variations and a significant effort for performance tuning. In some cases, there is considerable overhead for temporal operations even after extensive tuning.
@inproceedings{abc, abstract = {After more than a decade of a virtual standstill, the adoption of temporal data management features has recently picked up speed, driven by customer demand and the inclusion of temporal expressions into SQL:2011. Most of the big commercial DBMS now include support for bitemporal data and operators. In this paper, we perform a thorough analysis of these commercial temporal DBMS: We investigate their architecture, determine their performance and study the impact of performance tuning. This analysis utilizes our recent (TPCTC 2013) benchmark proposal, which includes a comprehensive temporal workload definition. The results of our analysis show that the support for temporal data is still in its infancy: All systems store their data in regular, statically partitioned tables and rely on standard indexes as well as query rewrites for their operations. As shown by our measurements, this causes considerable performance variations on slight workload variations and a significant effort for performance tuning. In some cases, there is considerable overhead for temporal operations even after extensive tuning.}, author = {Martin Kaufmann and Peter M. Fischer and Norman May and Donald Kossmann}, booktitle = {EDBT: 17th International Conference on Extending Database Technology}, title = {Benchmarking Bitemporal Database Systems: Ready for the Future or Stuck in the Past?}, year = {2014} }
Proceedings of the 17th International Conference on Extending Database Technology, EDBT 2014, Athens, Greece, March 2014
@inproceedings{abc, author = {Martin Kaufmann and Peter M. Fischer and Norman May and Donald Kossmann}, booktitle = {Proceedings of the 17th International Conference on Extending Database Technology, EDBT 2014, Athens, Greece}, title = {Benchmarking Bitemporal Database Systems: Ready for the Future or Stuck in the Past?}, url = {http://dx.doi.org/10.5441/002/edbt.2014.80}, year = {2014} }
2013
Performance Characterization and Benchmarking - 5th TPC Technology Conference, TPCTC 2013, Trento, Italy, Revised Selected Papers, August 2013
@inproceedings{abc, author = {Martin Kaufmann and Peter M. Fischer and Norman May and Andreas Tonder and Donald Kossmann}, booktitle = {Performance Characterization and Benchmarking - 5th TPC Technology Conference, TPCTC 2013, Trento, Italy}, title = {TPC-BiH: A Benchmark for Bitemporal Databases.}, url = {http://dx.doi.org/10.1007/978-3-319-04936-6_2}, venue = {Revised Selected Papers}, year = {2013} }
Proceedings of the ACM SIGMOD International Conference on Management of Data, SIGMOD 2013, New York, NY, USA, June 2013
@inproceedings{abc, author = {Martin Kaufmann and Amin Amiri Manjili and Panagiotis Vagenas and Peter M. Fischer and Donald Kossmann and Franz F{\"a}rber and Norman May}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data, SIGMOD 2013, New York, NY, USA}, title = {Timeline index: a unified data structure for processing queries on temporal data in SAP HANA.}, url = {http://doi.acm.org/10.1145/2463676.2465293}, year = {2013} }
29th IEEE International Conference on Data Engineering, ICDE 2013, Brisbane, Australia, April 2013
@inproceedings{abc, author = {Martin Kaufmann and Amin Amiri Manjili and Stefan Hildenbrand and Donald Kossmann and Andreas Tonder}, booktitle = {29th IEEE International Conference on Data Engineering, ICDE 2013, Brisbane, Australia}, title = {Time travel in column stores.}, url = {http://doi.ieeecomputersociety.org/10.1109/ICDE.2013.6544818}, year = {2013} }
29th IEEE International Conference on Data Engineering, ICDE 2013, Brisbane, Australia, April 2013
@inproceedings{abc, author = {Martin Kaufmann and Peter M. Fischer and Donald Kossmann and Norman May}, booktitle = {29th IEEE International Conference on Data Engineering, ICDE 2013, Brisbane, Australia}, title = {A generic database benchmarking service.}, url = {http://doi.ieeecomputersociety.org/10.1109/ICDE.2013.6544923}, year = {2013} }
January 2013
An increasing number of applications such as risk evaluation in banking or inventory management require support for temporal data.
After more than a decade of standstill, the recent adoption of some bitemporal features in SQL:2011 has reinvigorated the support among commercial database vendors, who incorporate an increasing number of relevant bitemporal features. Naturally, assessing the performance and scalability of temporal data storage and operations is of great concern for potential users.
The cost of keeping and querying history with novel operations (such as time travel, temporal joins or temporal aggregations) is not adequately reflected in any existing benchmark.
In this paper, we present a benchmark proposal which provides comprehensive coverage of the bitemporal data management.
It builds on the solid foundations of TPC-H but extends it with a rich set of queries and update scenarios.
This workload stems both from real-life temporal applications from SAP's customer base and a systematic coverage of temporal operators proposed in the academic literature.
In the accompanying paper we present preliminary results of our benchmark on a number of temporal database systems, also highlighting the need for certain language extensions.
In the appendix of this technical report we provide all details required to implement the benchmark.
@techreport{abc, abstract = {An increasing number of applications such as risk evaluation in banking or inventory management require support for temporal data. After more than a decade of standstill, the recent adoption of some bitemporal features in SQL:2011 has reinvigorated the support among commercial database vendors, who incorporate an increasing number of relevant bitemporal features. Naturally, assessing the performance and scalability of temporal data storage and operations is of great concern for potential users. The cost of keeping and querying history with novel operations (such as time travel, temporal joins or temporal aggregations) is not adequately reflected in any existing benchmark. In this paper, we present a benchmark proposal which provides comprehensive coverage of the bitemporal data management. It builds on the solid foundations of TPC-H but extends it with a rich set of queries and update scenarios. This workload stems both from real-life temporal applications from SAP{\textquoteright}s customer base and a systematic coverage of temporal operators proposed in the academic literature. In the accompanying paper we present preliminary results of our benchmark on a number of temporal database systems, also highlighting the need for certain language extensions. In the appendix of this technical report we provide all details required to implement the benchmark.}, author = {Martin Kaufmann and Peter M. Fischer and Norman May and Donald Kossmann}, title = {Benchmarking Databases with History Support}, url = {http://dx.doi.org/10.3929/ethz-a-009994978}, year = {2013} }
PVLDB, January 2013
@inproceedings{abc, author = {Martin Kaufmann}, booktitle = {PVLDB}, title = {Storing and Processing Temporal Data in a Main Memory Column Store.}, url = {http://www.vldb.org/pvldb/vol6/p1444-kaufmann.pdf}, year = {2013} }
PVLDB, January 2013
@inproceedings{abc, author = {Martin Kaufmann and Panagiotis Vagenas and Peter M. Fischer and Donald Kossmann and Franz F{\"a}rber}, booktitle = {PVLDB}, title = {Comprehensive and Interactive Temporal Query Processing with SAP HANA.}, url = {http://www.vldb.org/pvldb/vol6/p1210-kaufmann.pdf}, year = {2013} }
2011
Proceedings of the XML Prague 2011 Conference, Prague, CZ, January 2011
In our community there are three main models for representing and processing data: Relations, XML and RDF. Each of these models has its "sweet spot" for applications and its own query language; very few implementations cater for more than one of these. We describe a uniform platform which provides interfaces for different query languages to retrieve and modify the same information or combine it with other data sources. This paper presents methods for completely and correctly translating SQL and SPARQL into XQuery since XQuery provides the most expressive foundation. Early results with our current prototype show that the translation from SPARQL to XQuery already achieves very competitive performance, whereas there is still a significant performance gap compared to SQL.
@inproceedings{abc, abstract = {In our community there are three main models for representing and processing data: Relations, XML and RDF. Each of these models has its "sweet spot" for applications and its own query language; very few implementations cater for more than one of these. We describe a uniform platform which provides interfaces for different query languages to retrieve and modify the same information or combine it with other data sources. This paper presents methods for completely and correctly translating SQL and SPARQL into XQuery since XQuery provides the most expressive foundation. Early results with our current prototype show that the translation from SPARQL to XQuery already achieves very competitive performance, whereas there is still a significant performance gap compared to SQL.}, author = {Martin Kaufmann and Daniela Florescu and Donald Kossmann and Peter M. Fischer}, booktitle = {Proceedings of the XML Prague 2011 Conference, Prague, CZ}, title = {Translating SPARQL and SQL to XQuery}, year = {2011} }
2010
Systems Group Master's Thesis, no. ETH Zürich; Department of Computer Science, March 2010
Supervised by: Prof. Donald Kossmann
Supervised by: Prof. Donald Kossmann
For developing an application which deals with a large amount of data, the state-of-the-art is
to use a specialized query language like SQL for relational data or SPARQL for RDF.
Whereas the business logic of an application is implemented in a host programming language
like Java or C++, the query language provides an interface to the storage of persistent data.
These languages often use different data models which cause the well-known impedance
mismatch problem.
XQuery is a declarative programming language that can be applied on all application tiers and
thus leverages a unified technology stack by means of the XML data model. XQuery is very
well suited for querying and manipulating data that is stored in XML collections. Yet, a large
number of legacy applications still exist in companies which produce SQL code as an
interface to a relational database on the one hand and SPARQL code to execute queries on
RDF documents on the other hand.
It is not feasible to replace all legacy databases and applications with XML databases and new
programs written in XQuery at the same time. Therefore this thesis explores how legacy SQL
and SPARQL code can be mapped to XQuery, which pre-conditions and limitations for an
automated mapping hold and how well this transformation performs. In this way, all
information can be moved to XML databases without having to change existing applications.
Both old systems and applications written in XQuery can co-exist by accessing the same data.
This eliminates the need for replication of different database systems and prevents
inconsistencies.
@mastersthesis{abc, abstract = {For developing an application which deals with a large amount of data, the state-of-the-art is to use a specialized query language like SQL for relational data or SPARQL for RDF. Whereas the business logic of an application is implemented in a host programming language like Java or C++, the query language provides an interface to the storage of persistent data. These languages often use different data models which cause the well-known impedance mismatch problem. XQuery is a declarative programming language that can be applied on all application tiers and thus leverages a unified technology stack by means of the XML data model. XQuery is very well suited for querying and manipulating data that is stored in XML collections. Yet, a large number of legacy applications still exist in companies which produce SQL code as an interface to a relational database on the one hand and SPARQL code to execute queries on RDF documents on the other hand. It is not feasible to replace all legacy databases and applications with XML databases and new programs written in XQuery at the same time. Therefore this thesis explores how legacy SQL and SPARQL code can be mapped to XQuery, which pre-conditions and limitations for an automated mapping hold and how well this transformation performs. In this way, all information can be moved to XML databases without having to change existing applications. Both old systems and applications written in XQuery can co-exist by accessing the same data. This eliminates the need for replication of different database systems and prevents inconsistencies.}, author = {Martin Kaufmann}, school = {ETH Z{\"u}rich}, title = {Mapping SPARQL and SQL to XQuery}, year = {2010} }
2009
Web Engineering, 9th International Conference, ICWE 2009, San Sebastián, Spain, January 2009
XQuery is a declarative programming language which can be used to express queries and transformations of XML data. The goal of this paper is to explore the expressive power of XQuery as a general-purpose programming language. To this end, this paper describes how to build an entire enterprise web application in XQuery. It is shown that it is actually possible and quite effective to implement a web application entirely in XQuery and that there are several advantages in doing so. The resulting code has proven to be very concise and elegant. More importantly, the use of XQuery simplifies the overall application architecture and improves flexibility.
@inproceedings{abc, abstract = {XQuery is a declarative programming language which can be used to express queries and transformations of XML data. The goal of this paper is to explore the expressive power of XQuery as a general-purpose programming language. To this end, this paper describes how to build an entire enterprise web application in XQuery. It is shown that it is actually possible and quite effective to implement a web application entirely in XQuery and that there are several advantages in doing so. The resulting code has proven to be very concise and elegant. More importantly, the use of XQuery simplifies the overall application architecture and improves flexibility.}, author = {Martin Kaufmann and Donald Kossmann}, booktitle = {Web Engineering, 9th International Conference, ICWE 2009, San Sebasti{\'a}n, Spain}, title = {Developing an Enterprise Web Application in XQuery.}, url = {http://dx.doi.org/10.1007/978-3-642-02818-2_39}, year = {2009} }
January 2009
ParFE is a fully-parallel micro fnite element (uFE) solver for the modeling of human bone microstructures. We have developed an interface that will make available ParFE on the High Performance Computing (HPC) facility at the Swiss National Supercomputer Centre (CSCS) to a dedicated group of people. The interface has the form of a portal that can be accessed through a web browser. The portal provides a simple and secure means to submit and manage ParFE jobs without having to connect to the HPC facility in CSCS using a secure shell (ssh) connection.
@techreport{abc, abstract = {ParFE is a fully-parallel micro fnite element (uFE) solver for the modeling of human bone microstructures. We have developed an interface that will make available ParFE on the High Performance Computing (HPC) facility at the Swiss National Supercomputer Centre (CSCS) to a dedicated group of people. The interface has the form of a portal that can be accessed through a web browser. The portal provides a simple and secure means to submit and manage ParFE jobs without having to connect to the HPC facility in CSCS using a secure shell (ssh) connection.}, author = {Sumit Paranjape and Martin Kaufmann and Peter Arbenz}, title = {WebParFE: a Web Interface for the High-Performance Parallel Finite Element Solver ParFE}, url = {http://dx.doi.org/10.3929/ethz-a-006833384}, year = {2009} }