Publications by Boris Glavic
2013
The 7th ACM International Conference on Distributed Event-Based Systems, DEBS '13, Arlington, TX, June 2013
@inproceedings{abc, author = {Boris Glavic and Kyumars Sheykh Esmaili and Peter M. Fischer and Nesime Tatbul}, booktitle = {The 7th ACM International Conference on Distributed Event-Based Systems, DEBS {\textquoteright}13, Arlington, TX}, title = {Ariadne: managing fine-grained provenance on data streams.}, url = {http://doi.acm.org/10.1145/2488222.2488256}, year = {2013} }
In Search of Elegance in the Theory and Practice of Computation - Essays Dedicated to Peter Buneman, January 2013
@inproceedings{abc, author = {Boris Glavic and Ren{\'e}e J. Miller and Gustavo Alonso}, booktitle = {In Search of Elegance in the Theory and Practice of Computation - Essays Dedicated to Peter Buneman}, title = {Using SQL for Efficient Generation and Querying of Provenance Information.}, url = {http://dx.doi.org/10.1007/978-3-642-41660-6_16}, year = {2013} }
2012
January 2012
@techreport{abc, author = {Boris Glavic and Kyumars Sheykh Esmaili and Peter M. Fischer and Nesime Tatbul}, title = {Ariadne: Managing Fine-Grained Provenance on Data Streams}, year = {2012} }
2011
Proceedings of the Data Streams and Event Processing Workshop (co-located with BTW 2011), Kaiserslautern, Germany, January 2011
The current state of the art for provenance in data stream management
systems (DSMS) is to provide provenance at a high level of abstraction (such as, from
which sensors in a sensor network an aggregated value is derived from). This limitation
was imposed by high-throughput requirements and an anticipated lack of application
demand for more detailed provenance information. In this work, we first demonstrate
by means of well-chosen use cases that this is a misconception, i.e., coarse-grained
provenance is in fact insufficient for many application domains. We then analyze the
requirements and challenges involved in integrating support for fine-grained provenance
into a streaming system and outline a scalable solution for supporting tuple-level
provenance in DSMS.
@inproceedings{abc, abstract = {The current state of the art for provenance in data stream management systems (DSMS) is to provide provenance at a high level of abstraction (such as, from which sensors in a sensor network an aggregated value is derived from). This limitation was imposed by high-throughput requirements and an anticipated lack of application demand for more detailed provenance information. In this work, we first demonstrate by means of well-chosen use cases that this is a misconception, i.e., coarse-grained provenance is in fact insufficient for many application domains. We then analyze the requirements and challenges involved in integrating support for fine-grained provenance into a streaming system and outline a scalable solution for supporting tuple-level provenance in DSMS.}, author = {Boris Glavic and Peter M. Fischer and Nesime Tatbul and Kyumars Sheykh Esmaili}, booktitle = {Proceedings of the Data Streams and Event Processing Workshop (co-located with BTW 2011), Kaiserslautern, Germany}, title = {The Case for Fine-Grained Stream Provenance}, year = {2011} }
PVLDB, January 2011
@inproceedings{abc, author = {Boris Glavic and Jiang Du and Ren{\'e}e J. Miller and Gustavo Alonso and Laura M. Haas}, booktitle = {PVLDB}, title = {Debugging Data Exchange with Vagabond.}, url = {http://www.vldb.org/pvldb/vol4/p1383-glavic.pdf}, year = {2011} }
2010
PVLDB, January 2010
@article{abc, author = {Boris Glavic and Gustavo Alonso and Ren{\'e}e Miller and Laura M. Haas}, journal = {PVLDB}, title = {TRAMP: Understanding the Behavior of Schema Mappings through Provenance.}, year = {2010} }
2009
Demo, January 2009
In this demonstration we present the Perm provenance management system (PMS). Perm is capable of computing, storing and querying provenance information for the relational data model. Provenance is computed by using query rewriting techniques to annotate tuples with provenance information. Thus, provenance data and provenance computations are represented as relational data and queries and hence can be queried, stored and optimized using standard relational database techniques. This demo will show the complete Perm system and let attendants examine in detail the process of query rewriting and provenance retrieval on what is today the most complete data provenance system available.
@inproceedings{abc, abstract = {In this demonstration we present the Perm provenance management system (PMS). Perm is capable of computing, storing and querying provenance information for the relational data model. Provenance is computed by using query rewriting techniques to annotate tuples with provenance information. Thus, provenance data and provenance computations are represented as relational data and queries and hence can be queried, stored and optimized using standard relational database techniques. This demo will show the complete Perm system and let attendants examine in detail the process of query rewriting and provenance retrieval on what is today the most complete data provenance system available.}, author = {Boris Glavic and Gustavo Alonso}, booktitle = {Demo}, title = {The Perm Provenance Management System in Action}, url = {http://doi.acm.org/10.1145/1559845.1559980}, year = {2009} }
Research, January 2009
Data provenance is essential in applications such as scientific
computing, curated databases, and data warehouses. Several
systems have been developed that provide provenance
functionality for the relational data model. These systems
support only a small subset of SQL, a severe limitation in
practice since most of the application domains that benefit from
provenance information use complex queries. Such queries
typically involve nested subqueries, aggregation and/or user
defined functions. Without support for these constructs, a
provenance management system is of limited use.
In this paper we address this limitation by exploring the
problem of provenance derivation when complex queries are
involved. More precisely, we demonstrate that the widely used
definition of Why-provenance fails in the presence of nested
subqueries, and show how the definition can be modified to
produce meaningful results for nested subqueries. We further
present query rewrite rules to transform an SQL query into a
query propagating provenance. The solution introduced in this
paper allows us to track provenance information for a far wider
subset of SQL than any of the existing approaches. We have
incorporated these ideas into the Perm provenance management
system engine and used it to evaluate the feasibility and
performance of our approach.
@inproceedings{abc, abstract = { Data provenance is essential in applications such as scientific computing, curated databases, and data warehouses. Several systems have been developed that provide provenance functionality for the relational data model. These systems support only a small subset of SQL, a severe limitation in practice since most of the application domains that benefit from provenance information use complex queries. Such queries typically involve nested subqueries, aggregation and/or user defined functions. Without support for these constructs, a provenance management system is of limited use. In this paper we address this limitation by exploring the problem of provenance derivation when complex queries are involved. More precisely, we demonstrate that the widely used definition of Why-provenance fails in the presence of nested subqueries, and show how the definition can be modified to produce meaningful results for nested subqueries. We further present query rewrite rules to transform an SQL query into a query propagating provenance. The solution introduced in this paper allows us to track provenance information for a far wider subset of SQL than any of the existing approaches. We have incorporated these ideas into the Perm provenance management system engine and used it to evaluate the feasibility and performance of our approach. }, author = {Boris Glavic and Gustavo Alonso}, booktitle = {Research}, title = {Provenance for nested subqueries}, url = {http://doi.acm.org/10.1145/1516360.1516472}, year = {2009} }
Proceedings of the 25th International Conference on Data Engineering, ICDE 2009, Shanghai, China, January 2009
@inproceedings{abc, author = {Boris Glavic and Gustavo Alonso}, booktitle = {Proceedings of the 25th International Conference on Data Engineering, ICDE 2009}, title = {Perm: Processing Provenance and Data on the Same Data Model through Query Rewriting.}, url = {http://dx.doi.org/10.1109/ICDE.2009.15}, venue = {Shanghai, China}, year = {2009} }