Publications by Peter M. Fischer
2015
Proceedings of the 27th International Conference on Scientific and Statistical Database Management, SSDBM '15, La Jolla, CA, USA, June 2015
Bi-temporal databases support system (transaction) and application time, enabling users to query the history as recorded today and as it was known in the past. In this paper, we study windows over both system and application time, i.e., bi-temporal windows. We propose a two-dimensional index that supports one-time and continuous queries over fixed and sliding bi-temporal windows, covering static and streaming data. We demonstrate the advantages of the proposed index compared to the state-of-the-art in terms of query performance, index update overhead and space footprint.
@inproceedings{abc, abstract = {Bi-temporal databases support system (transaction) and application time, enabling users to query the history as recorded today and as it was known in the past. In this paper, we study windows over both system and application time, i.e., bi-temporal windows. We propose a two-dimensional index that supports one-time and continuous queries over fixed and sliding bi-temporal windows, covering static and streaming data. We demonstrate the advantages of the proposed index compared to the state-of-the-art in terms of query performance, index update overhead and space footprint.}, author = {Chang Ge and Martin Kaufmann and Lukasz Golab and Peter M. Fischer and Anil K. Goel}, booktitle = {Proceedings of the 27th International Conference on Scientific and Statistical Database Management, SSDBM {\textquoteright}15}, title = {Indexing bi-temporal windows.}, url = {http://doi.acm.org/10.1145/2791347.2791373}, venue = {La Jolla, CA, USA}, year = {2015} }
31st IEEE International Conference on Data Engineering, ICDE 2015, Seoul, South Korea, April 2015
@inproceedings{abc, author = {Martin Kaufmann and Peter M. Fischer and Norman May and Chang Ge and Anil K. Goel and Donald Kossmann}, booktitle = {31st IEEE International Conference on Data Engineering, ICDE 2015, Seoul, South Korea}, title = {Bi-temporal Timeline Index: A data structure for Processing Queries on bi-temporal data.}, url = {http://dx.doi.org/10.1109/ICDE.2015.7113307}, year = {2015} }
2014
Proceedings of the 17th International Conference on Extending Database Technology, EDBT 2014, Athens, Greece, March 2014
@inproceedings{abc, author = {Martin Kaufmann and Peter M. Fischer and Norman May and Donald Kossmann}, booktitle = {Proceedings of the 17th International Conference on Extending Database Technology, EDBT 2014, Athens, Greece}, title = {Benchmarking Bitemporal Database Systems: Ready for the Future or Stuck in the Past?}, url = {http://dx.doi.org/10.5441/002/edbt.2014.80}, year = {2014} }
EDBT: 17th International Conference on Extending Database Technology, March 2014
After more than a decade of a virtual standstill, the adoption of temporal data management features has recently picked up speed, driven by customer demand and the inclusion of temporal expressions into SQL:2011. Most of the big commercial DBMS now include support for bitemporal data and operators.
In this paper, we perform a thorough analysis of these commercial temporal DBMS: We investigate their architecture, determine their performance and study the impact of performance tuning. This analysis utilizes our recent (TPCTC 2013) benchmark proposal, which includes a comprehensive temporal workload definition.
The results of our analysis show that the support for temporal data is still in its infancy: All systems store their data in regular, statically partitioned tables and rely on standard indexes as well as query rewrites for their operations. As shown by our measurements, this causes considerable performance variations on slight workload variations and a significant effort for performance tuning. In some cases, there is considerable overhead for temporal operations even after extensive tuning.
@inproceedings{abc, abstract = {After more than a decade of a virtual standstill, the adoption of temporal data management features has recently picked up speed, driven by customer demand and the inclusion of temporal expressions into SQL:2011. Most of the big commercial DBMS now include support for bitemporal data and operators. In this paper, we perform a thorough analysis of these commercial temporal DBMS: We investigate their architecture, determine their performance and study the impact of performance tuning. This analysis utilizes our recent (TPCTC 2013) benchmark proposal, which includes a comprehensive temporal workload definition. The results of our analysis show that the support for temporal data is still in its infancy: All systems store their data in regular, statically partitioned tables and rely on standard indexes as well as query rewrites for their operations. As shown by our measurements, this causes considerable performance variations on slight workload variations and a significant effort for performance tuning. In some cases, there is considerable overhead for temporal operations even after extensive tuning.}, author = {Martin Kaufmann and Peter M. Fischer and Norman May and Donald Kossmann}, booktitle = {EDBT: 17th International Conference on Extending Database Technology}, title = {Benchmarking Bitemporal Database Systems: Ready for the Future or Stuck in the Past?}, year = {2014} }
2013
Performance Characterization and Benchmarking - 5th TPC Technology Conference, TPCTC 2013, Trento, Italy, Revised Selected Papers, August 2013
@inproceedings{abc, author = {Martin Kaufmann and Peter M. Fischer and Norman May and Andreas Tonder and Donald Kossmann}, booktitle = {Performance Characterization and Benchmarking - 5th TPC Technology Conference, TPCTC 2013, Trento, Italy}, title = {TPC-BiH: A Benchmark for Bitemporal Databases.}, url = {http://dx.doi.org/10.1007/978-3-319-04936-6_2}, venue = {Revised Selected Papers}, year = {2013} }
Proceedings of the ACM SIGMOD International Conference on Management of Data, SIGMOD 2013, New York, NY, USA, June 2013
@inproceedings{abc, author = {Martin Kaufmann and Amin Amiri Manjili and Panagiotis Vagenas and Peter M. Fischer and Donald Kossmann and Franz F{\"a}rber and Norman May}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data, SIGMOD 2013, New York, NY, USA}, title = {Timeline index: a unified data structure for processing queries on temporal data in SAP HANA.}, url = {http://doi.acm.org/10.1145/2463676.2465293}, year = {2013} }
The 7th ACM International Conference on Distributed Event-Based Systems, DEBS '13, Arlington, TX, June 2013
@inproceedings{abc, author = {Boris Glavic and Kyumars Sheykh Esmaili and Peter M. Fischer and Nesime Tatbul}, booktitle = {The 7th ACM International Conference on Distributed Event-Based Systems, DEBS {\textquoteright}13, Arlington, TX}, title = {Ariadne: managing fine-grained provenance on data streams.}, url = {http://doi.acm.org/10.1145/2488222.2488256}, year = {2013} }
29th IEEE International Conference on Data Engineering, ICDE 2013, Brisbane, Australia, April 2013
@inproceedings{abc, author = {Martin Kaufmann and Peter M. Fischer and Donald Kossmann and Norman May}, booktitle = {29th IEEE International Conference on Data Engineering, ICDE 2013, Brisbane, Australia}, title = {A generic database benchmarking service.}, url = {http://doi.ieeecomputersociety.org/10.1109/ICDE.2013.6544923}, year = {2013} }
January 2013
An increasing number of applications such as risk evaluation in banking or inventory management require support for temporal data.
After more than a decade of standstill, the recent adoption of some bitemporal features in SQL:2011 has reinvigorated the support among commercial database vendors, who incorporate an increasing number of relevant bitemporal features. Naturally, assessing the performance and scalability of temporal data storage and operations is of great concern for potential users.
The cost of keeping and querying history with novel operations (such as time travel, temporal joins or temporal aggregations) is not adequately reflected in any existing benchmark.
In this paper, we present a benchmark proposal which provides comprehensive coverage of the bitemporal data management.
It builds on the solid foundations of TPC-H but extends it with a rich set of queries and update scenarios.
This workload stems both from real-life temporal applications from SAP's customer base and a systematic coverage of temporal operators proposed in the academic literature.
In the accompanying paper we present preliminary results of our benchmark on a number of temporal database systems, also highlighting the need for certain language extensions.
In the appendix of this technical report we provide all details required to implement the benchmark.
@techreport{abc, abstract = {An increasing number of applications such as risk evaluation in banking or inventory management require support for temporal data. After more than a decade of standstill, the recent adoption of some bitemporal features in SQL:2011 has reinvigorated the support among commercial database vendors, who incorporate an increasing number of relevant bitemporal features. Naturally, assessing the performance and scalability of temporal data storage and operations is of great concern for potential users. The cost of keeping and querying history with novel operations (such as time travel, temporal joins or temporal aggregations) is not adequately reflected in any existing benchmark. In this paper, we present a benchmark proposal which provides comprehensive coverage of the bitemporal data management. It builds on the solid foundations of TPC-H but extends it with a rich set of queries and update scenarios. This workload stems both from real-life temporal applications from SAP{\textquoteright}s customer base and a systematic coverage of temporal operators proposed in the academic literature. In the accompanying paper we present preliminary results of our benchmark on a number of temporal database systems, also highlighting the need for certain language extensions. In the appendix of this technical report we provide all details required to implement the benchmark.}, author = {Martin Kaufmann and Peter M. Fischer and Norman May and Donald Kossmann}, title = {Benchmarking Databases with History Support}, url = {http://dx.doi.org/10.3929/ethz-a-009994978}, year = {2013} }
PVLDB, January 2013
@inproceedings{abc, author = {Martin Kaufmann and Panagiotis Vagenas and Peter M. Fischer and Donald Kossmann and Franz F{\"a}rber}, booktitle = {PVLDB}, title = {Comprehensive and Interactive Temporal Query Processing with SAP HANA.}, url = {http://www.vldb.org/pvldb/vol6/p1210-kaufmann.pdf}, year = {2013} }
2012
IEEE 28th International Conference on Data Engineering (ICDE 2012), Washington, DC, USA (Arlington, Virginia), April 2012
@inproceedings{abc, author = {Peter M. Fischer and Jens Teubner}, booktitle = {IEEE 28th International Conference on Data Engineering (ICDE 2012), Washington, DC, USA (Arlington, Virginia)}, title = {MXQuery with Hardware Acceleration.}, url = {http://doi.ieeecomputersociety.org/10.1109/ICDE.2012.130}, year = {2012} }
15th International Conference on Extending Database Technology, EDBT '12, Berlin, Germany, January 2012
@inproceedings{abc, author = {Irina Botan and Peter M. Fischer and Donald Kossmann and Nesime Tatbul}, booktitle = {15th International Conference on Extending Database Technology, EDBT {\textquoteright}12, Berlin, Germany}, title = {Transactional Stream Processing}, url = {http://doi.acm.org/10.1145/2247596.2247622}, year = {2012} }
January 2012
@techreport{abc, author = {Boris Glavic and Kyumars Sheykh Esmaili and Peter M. Fischer and Nesime Tatbul}, title = {Ariadne: Managing Fine-Grained Provenance on Data Streams}, year = {2012} }
2011
Proceedings of the Fifth ACM International Conference on Distributed Event-Based Systems, DEBS 2011, New York, NY, USA, July 2011
@inproceedings{abc, author = {Nihal Dindar and Peter M. Fischer and Merve Soner and Nesime Tatbul}, booktitle = {Proceedings of the Fifth ACM International Conference on Distributed Event-Based Systems, DEBS 2011, New York, NY, USA}, title = {Efficiently correlating complex events over live and archived data streams.}, url = {http://doi.acm.org/10.1145/2002259.2002293}, year = {2011} }
Proceedings of the ACM SIGMOD International Conference on Management of Data, SIGMOD 2011, Athens, Greece, June 2011
@inproceedings{abc, author = {Kyumars Sheykh Esmaili and Tahmineh Sanamrad and Peter M. Fischer and Nesime Tatbul}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data, SIGMOD 2011, Athens, Greece}, title = {Changing flights in mid-air: a model for safely modifying continuous queries.}, url = {http://doi.acm.org/10.1145/1989323.1989388}, year = {2011} }
Proceedings of the XML Prague 2011 Conference, Prague, CZ, January 2011
In our community there are three main models for representing and processing data: Relations, XML and RDF. Each of these models has its "sweet spot" for applications and its own query language; very few implementations cater for more than one of these. We describe a uniform platform which provides interfaces for different query languages to retrieve and modify the same information or combine it with other data sources. This paper presents methods for completely and correctly translating SQL and SPARQL into XQuery since XQuery provides the most expressive foundation. Early results with our current prototype show that the translation from SPARQL to XQuery already achieves very competitive performance, whereas there is still a significant performance gap compared to SQL.
@inproceedings{abc, abstract = {In our community there are three main models for representing and processing data: Relations, XML and RDF. Each of these models has its "sweet spot" for applications and its own query language; very few implementations cater for more than one of these. We describe a uniform platform which provides interfaces for different query languages to retrieve and modify the same information or combine it with other data sources. This paper presents methods for completely and correctly translating SQL and SPARQL into XQuery since XQuery provides the most expressive foundation. Early results with our current prototype show that the translation from SPARQL to XQuery already achieves very competitive performance, whereas there is still a significant performance gap compared to SQL.}, author = {Martin Kaufmann and Daniela Florescu and Donald Kossmann and Peter M. Fischer}, booktitle = {Proceedings of the XML Prague 2011 Conference, Prague, CZ}, title = {Translating SPARQL and SQL to XQuery}, year = {2011} }
Proceedings of the Data Streams and Event Processing Workshop (co-located with BTW 2011), Kaiserslautern, Germany, January 2011
The current state of the art for provenance in data stream management
systems (DSMS) is to provide provenance at a high level of abstraction (such as, from
which sensors in a sensor network an aggregated value is derived from). This limitation
was imposed by high-throughput requirements and an anticipated lack of application
demand for more detailed provenance information. In this work, we first demonstrate
by means of well-chosen use cases that this is a misconception, i.e., coarse-grained
provenance is in fact insufficient for many application domains. We then analyze the
requirements and challenges involved in integrating support for fine-grained provenance
into a streaming system and outline a scalable solution for supporting tuple-level
provenance in DSMS.
@inproceedings{abc, abstract = {The current state of the art for provenance in data stream management systems (DSMS) is to provide provenance at a high level of abstraction (such as, from which sensors in a sensor network an aggregated value is derived from). This limitation was imposed by high-throughput requirements and an anticipated lack of application demand for more detailed provenance information. In this work, we first demonstrate by means of well-chosen use cases that this is a misconception, i.e., coarse-grained provenance is in fact insufficient for many application domains. We then analyze the requirements and challenges involved in integrating support for fine-grained provenance into a streaming system and outline a scalable solution for supporting tuple-level provenance in DSMS.}, author = {Boris Glavic and Peter M. Fischer and Nesime Tatbul and Kyumars Sheykh Esmaili}, booktitle = {Proceedings of the Data Streams and Event Processing Workshop (co-located with BTW 2011), Kaiserslautern, Germany}, title = {The Case for Fine-Grained Stream Provenance}, year = {2011} }
Proceedings of the XML Prague 2011 Conference, Prague, CZ, January 2011
Over the years, the HTML-based Web has become a platform for providing
applications and dynamic pages that have little resemblance to the collection
of static documents it had been in the beginning. This was made possible by
the introduction of client-side programmable browsers. Because XML and
HTML are cousins, XML technologies can be almost readily adapted for clientside
programming. In the past, we suggested to do so with XQuery and implemented
it as a plugin. However, using a plugin was seen as an insurmountable
obstacle to a wider adoption of client-side XQuery.
In this paper, we present a version of XQuery in the Browser without any
plugin, needing only JavaScript to interpret XQuery code. This enables use
even on mobile devices, where plugins are not available. Even though our
current version is still considered to be at an alpha stage, we were able to deploy
it successfully on most major desktop and mobile browsers. The size of the JS
code is about 700KB. By activating compression on the web server (reducing
the transfered data to less than 200 KB) as well caching on the client using
the XQuery engine does not cause noticable overhead after the initial loading.
@inproceedings{abc, abstract = {Over the years, the HTML-based Web has become a platform for providing applications and dynamic pages that have little resemblance to the collection of static documents it had been in the beginning. This was made possible by the introduction of client-side programmable browsers. Because XML and HTML are cousins, XML technologies can be almost readily adapted for clientside programming. In the past, we suggested to do so with XQuery and implemented it as a plugin. However, using a plugin was seen as an insurmountable obstacle to a wider adoption of client-side XQuery. In this paper, we present a version of XQuery in the Browser without any plugin, needing only JavaScript to interpret XQuery code. This enables use even on mobile devices, where plugins are not available. Even though our current version is still considered to be at an alpha stage, we were able to deploy it successfully on most major desktop and mobile browsers. The size of the JS code is about 700KB. By activating compression on the web server (reducing the transfered data to less than 200 KB) as well caching on the client using the XQuery engine does not cause noticable overhead after the initial loading.}, author = {Thomas Etter and Peter M. Fischer and Daniela Florescu and Ghislain Fourny and Donald Kossmann}, booktitle = {Proceedings of the XML Prague 2011 Conference, Prague, CZ}, title = {XQuery in the Browser reloaded Riding on the coat-tails of JavaScript}, year = {2011} }
DejaVu: A Complex Event Processing System for Pattern Matching over Live and Historical Data Streams
Proceedings of the ACM International Conference on Distributed Event-Based Systems (DEBS'11), New York, NY, USA, January 2011@inproceedings{abc, author = {Nihal Dindar and Peter M. Fischer and Nesime Tatbul}, booktitle = {Proceedings of the ACM International Conference on Distributed Event-Based Systems (DEBS{\textquoteright}11), New York, NY, USA}, title = {DejaVu: A Complex Event Processing System for Pattern Matching over Live and Historical Data Streams}, year = {2011} }
2010
Database and XML Technologies - 7th International XML Database Symposium, XSym 2010, Singapore, September 2010
@inproceedings{abc, author = {Peter M. Fischer and Aayush Garg and Kyumars Sheykh Esmaili}, booktitle = {Database and XML Technologies - 7th International XML Database Symposium, XSym 2010, Singapore}, title = {Extending XQuery with a Pattern Matching Facility.}, url = {http://dx.doi.org/10.1007/978-3-642-15684-7_5}, year = {2010} }
EDBT 2010, 13th International Conference on Extending Database Technology, Lausanne, Switzerland, January 2010
@inproceedings{abc, author = {Peter M. Fischer and Kyumars Sheykh Esmaili and Ren{\'e}e J. Miller}, booktitle = {EDBT 2010, 13th International Conference on Extending Database Technology, Lausanne, Switzerland}, title = {Stream schema: providing and exploiting static metadata for data stream processing.}, url = {http://doi.acm.org/10.1145/1739041.1739068}, year = {2010} }
Proceedings of the XML Prague 2010, Czech Republic, January 2010
@inproceedings{abc, author = {Peter M. Fischer}, booktitle = {Proceedings of the XML Prague 2010, Czech Republic}, title = {XQBench \&$\#$150; A XQuery Benchmarking Service}, year = {2010} }
2009
PVLDB, January 2009
@inproceedings{abc, author = {Roger Bamford and Vinayak R. Borkar and Matthias Brantner and Peter M. Fischer and Daniela Florescu and David A. Graf and Donald Kossmann and Tim Kraska and Dan Muresan and Sorin Nasoi and Markos Zacharioudaki}, booktitle = {PVLDB}, title = {XQuery Reloaded.}, url = {http://www.vldb.org/pvldb/2/vldb09-1078.pdf}, year = {2009} }
January 2009
@techreport{abc, author = {Ren{\'e}e J. Miller and Peter M. Fischer and Kyumars Sheykh Esmaili}, title = {Stream schema: providing and exploiting static metadata for data stream processing}, year = {2009} }
Research, January 2009
Data Stream Management Systems (DSMS) operate under strict
performance requirements. Key to meeting such requirements is to
efficiently handle time-critical tasks such as managing internal
states of continuous query operators, traffic on the queues
between operators, as well as providing storage support for
shared computation and archived data. In this paper, we
introduce a general purpose storage management framework for
DSMSs that performs these tasks based on a clean,
loosely-coupled, and flexible system design that also
facilitates performance optimization. An important contribution
of the framework is that, in analogy to buffer management
techniques in relational database systems, it uses information
about the access patterns of streaming applications to tune and
customize the performance of the storage manager. In the paper,
we first analyze typical application requirements at different
granularities in order to identify important tunable parameters
and their corresponding values. Based on these parameters, we
define a general-purpose storage management interface. Using the
interface, a developer can use our SMS (Storage Manager for
Streams) to generate a customized storage manager for streaming
applications. We explore the performance and potential of SMS
through a set of experiments using the Linear Road
benchmark.
@inproceedings{abc, abstract = { Data Stream Management Systems (DSMS) operate under strict performance requirements. Key to meeting such requirements is to efficiently handle time-critical tasks such as managing internal states of continuous query operators, traffic on the queues between operators, as well as providing storage support for shared computation and archived data. In this paper, we introduce a general purpose storage management framework for DSMSs that performs these tasks based on a clean, loosely-coupled, and flexible system design that also facilitates performance optimization. An important contribution of the framework is that, in analogy to buffer management techniques in relational database systems, it uses information about the access patterns of streaming applications to tune and customize the performance of the storage manager. In the paper, we first analyze typical application requirements at different granularities in order to identify important tunable parameters and their corresponding values. Based on these parameters, we define a general-purpose storage management interface. Using the interface, a developer can use our SMS (Storage Manager for Streams) to generate a customized storage manager for streaming applications. We explore the performance and potential of SMS through a set of experiments using the Linear Road benchmark. }, author = {Irina Botan and Gustavo Alonso and Nesime Tatbul and Donald Kossmann and Peter M. Fischer}, booktitle = {Research}, title = {Flexible and Scalable Storage Management for Data-intensive Stream Processing}, url = {http://doi.acm.org/10.1145/1516360.1516467}, year = {2009} }
2008
SIGMOD Record, USASIGMOD Record, vol. 37, no. 4, December 2008, pp. 94--99., January 2008
@inproceedings{abc, author = {Gustavo Alonso and Donald Kossmann and Timothy Roscoe and Nesime Tatbul and Andrew Baumann and Carsten Binnig and Peter M. Fischer and Oriana Riva and Jens Teubner}, booktitle = {SIGMOD Record}, title = {The ETH Zurich systems group and enterprise computing center.}, url = {http://doi.acm.org/10.1145/1519103.1519120}, venue = {USASIGMOD Record, vol. 37, no. 4, December 2008, pp. 94--99.}, year = {2008} }
Towards Systematic Achievement of Compliance in Service-Oriented Architectures: The MASTER Approach.
Wirtschaftsinformatik, January 2008@inproceedings{abc, author = {Volkmar Lotz and Emmanuel Pigout and Peter M. Fischer and Donald Kossmann and Fabio Massacci and Alexander Pretschner}, booktitle = {Wirtschaftsinformatik}, title = {Towards Systematic Achievement of Compliance in Service-Oriented Architectures: The MASTER Approach.}, url = {http://dx.doi.org/10.1007/s11576-008-0086-1}, year = {2008} }
2007
Digital Libraries: Research and Development, First International DELOS Conference, Pisa, Italy, Revised Selected Papers, January 2007
@inproceedings{abc, author = {Michael Springmann and Ludger Bischofs and Peter M. Fischer and Hans-J{\"o}rg Schek and Heiko Schuldt and Ulrike Steffens and R. Vogl}, booktitle = {Digital Libraries: Research and Development, First International DELOS Conference, Pisa, Italy}, title = {Management of and Access to Virtual Electronic Health Records.}, url = {http://dx.doi.org/10.1007/978-3-540-77088-6_33}, venue = {Revised Selected Papers}, year = {2007} }
Digital Libraries: Research and Development, First International DELOS Conference, Pisa, Italy, Revised Selected Papers, January 2007
@inproceedings{abc, author = {Gert Brettlecker and Heiko Schuldt and Peter M. Fischer and Hans-J{\"o}rg Schek}, booktitle = {Digital Libraries: Research and Development, First International DELOS Conference, Pisa, Italy}, title = {Integration of Reliable Sensor Data Stream Management into Digital Libraries.}, url = {http://dx.doi.org/10.1007/978-3-540-77088-6_7}, venue = {Revised Selected Papers}, year = {2007} }
Proceedings of the 33rd International Conference on Very Large Data Bases, University of Vienna, Austria, January 2007
@inproceedings{abc, author = {Irina Botan and Peter M. Fischer and Daniela Florescu and Donald Kossmann and Tim Kraska and Rokas Tamosevicius}, booktitle = {Proceedings of the 33rd International Conference on Very Large Data Bases, University of Vienna, Austria}, title = {Extending XQuery with Window Functions.}, url = {http://www.vldb.org/conf/2007/papers/research/p75-botan.pdf}, year = {2007} }
2006
Proceedings of the 3rd Workshop on Data Management for Sensor Networks, in conjunction with VLDB, DMSN 2006, Seoul, Korea, January 2006
@inproceedings{abc, author = {Peter M. Fischer and Donald Kossmann}, booktitle = {Proceedings of the 3rd Workshop on Data Management for Sensor Networks, in conjunction with VLDB, DMSN 2006, Seoul, Korea}, title = {Quality of service in stateful information filters.}, url = {http://doi.acm.org/10.1145/1315903.1315913}, year = {2006} }
ETH Zürich, Diss. Nr. 16671, January 2006
Supervised by: Prof. Donald Kossmann
Supervised by: Prof. Donald Kossmann
In recent years, we have seen a shift in the way information is processed. Departing from the traditional paradigm in which information is first stored and then queried, we are quickly moving to a new paradigm in which new information is directly routed to the relevant recipients. This new paradigm is being adopted by several research communities, databases being only one of them. Information filters represent one of the key components of this new paradigm, as they loosely couple senders and receivers of data items. Receivers of information submit a profile of their interest to the information filter, while producers of information send messages to the information filter. The purpose of an information filter is then to match the messages to the profiles, so that the matching messages can be sent to the relevant receivers. Information filters are used in areas like application integration, personalized content delivery, networking monitoring and many other areas. Simpler versions of information filters are appearing as products on the market place, while research continues into several directions.
In order to enable the information filtering paradigm, techniques like profile indexing and stream processing are used. The main directions of research have been expressiveness of profiles, scalability in terms of profiles and distribution of information filters over networks.
This thesis contributes three new aspects to the area of information filtering: scalability in terms of message throughput, context-aware information filters that use state for the
matching decision and a study of quality of service. Scalability in terms of message throughput is achieved by processing messages in batches instead of processing them one by one, thus reducing the cost of processing an individual message. Context-aware information filters augment existing, stateless information filters by including context state into the matching decision. Since this state receives updates, the key challenge in building such a context-aware information filter is to deal with high message rates and high update rates. The thesis addresses this challenge in two different ways: AGILE, a method to automatically adapt index accuracy to the workload parameters, and batched processing of updates, where a set of updates is processed at once in order to reduce the cost. Quality of service becomes more and more important as information filters are used in settings where the load is unpredictable and might exceed the available resources. This work examines how state of the art approaches to implement quality of service apply to information filters. For the three areas contributed by this thesis, a theoretical analysis and an extensive performance study is provided, illustrating the benefits and trade-offs.
To sum up, this thesis contributes work to improve information filters by increasing the message throughput, including context state in the matching process and studying quality of service. The results provide further support for the adoption of information filters into the mainstream of information processing.
@phdthesis{abc, abstract = {In recent years, we have seen a shift in the way information is processed. Departing from the traditional paradigm in which information is first stored and then queried, we are quickly moving to a new paradigm in which new information is directly routed to the relevant recipients. This new paradigm is being adopted by several research communities, databases being only one of them. Information filters represent one of the key components of this new paradigm, as they loosely couple senders and receivers of data items. Receivers of information submit a profile of their interest to the information filter, while producers of information send messages to the information filter. The purpose of an information filter is then to match the messages to the profiles, so that the matching messages can be sent to the relevant receivers. Information filters are used in areas like application integration, personalized content delivery, networking monitoring and many other areas. Simpler versions of information filters are appearing as products on the market place, while research continues into several directions. In order to enable the information filtering paradigm, techniques like profile indexing and stream processing are used. The main directions of research have been expressiveness of profiles, scalability in terms of profiles and distribution of information filters over networks. This thesis contributes three new aspects to the area of information filtering: scalability in terms of message throughput, context-aware information filters that use state for the matching decision and a study of quality of service. Scalability in terms of message throughput is achieved by processing messages in batches instead of processing them one by one, thus reducing the cost of processing an individual message. Context-aware information filters augment existing, stateless information filters by including context state into the matching decision. Since this state receives updates, the key challenge in building such a context-aware information filter is to deal with high message rates and high update rates. The thesis addresses this challenge in two different ways: AGILE, a method to automatically adapt index accuracy to the workload parameters, and batched processing of updates, where a set of updates is processed at once in order to reduce the cost. Quality of service becomes more and more important as information filters are used in settings where the load is unpredictable and might exceed the available resources. This work examines how state of the art approaches to implement quality of service apply to information filters. For the three areas contributed by this thesis, a theoretical analysis and an extensive performance study is provided, illustrating the benefits and trade-offs. To sum up, this thesis contributes work to improve information filters by increasing the message throughput, including context state in the matching process and studying quality of service. The results provide further support for the adoption of information filters into the mainstream of information processing.}, author = {Peter M. Fischer}, school = {16671}, title = {Adaptive Optimization Techniques for Context-Aware Information Filters}, year = {2006} }
January 2006
@techreport{abc, author = {Peter M. Fischer and Donald Kossmann and Tim Kraska and Rokas Tamosevicius}, title = {Windows for XQuery - Use Cases}, year = {2006} }
January 2006
@techreport{abc, author = {Irina Botan and Peter M. Fischer and Daniela Florescu and Donald Kossmann and Tim Kraska and Rokas Tamosevicius}, title = {Extending XQuery with Window Functions}, year = {2006} }
2005
Proceedings of the ACM SIGMOD International Conference on Management of Data, Baltimore, Maryland, USA, January 2005
@inproceedings{abc, author = {Jens Dittrich and Peter M. Fischer and Donald Kossmann}, booktitle = {Proceedings of the ACM SIGMOD International Conference on Management of Data, Baltimore, Maryland, USA}, title = {AGILE: Adaptive Indexing for Context-Aware Information Filters.}, url = {http://doi.acm.org/10.1145/1066157.1066183}, year = {2005} }
Proceedings of the 21st International Conference on Data Engineering, ICDE 2005, Tokyo, Japan, January 2005
@inproceedings{abc, author = {Peter M. Fischer and Donald Kossmann}, booktitle = {Proceedings of the 21st International Conference on Data Engineering, ICDE 2005}, title = {Batched Processing for Information Filters.}, url = {http://csdl.computer.org/comp/proceedings/icde/2005/2285/00/22850174abs.htm}, venue = {Tokyo, Japan}, year = {2005} }
2004
Beitragsband zum Workshop "Grundlagen und Anwendungen mobiler Informationstechnologie" des GI-Arbeitskreises Mobile Datenbanken und Informationssysteme, Heidelberg, 23.-24. März 2004, January 2004
@inproceedings{abc, author = {Peter M. Fischer and Donald Kossmann}, booktitle = {Beitragsband zum Workshop "Grundlagen und Anwendungen mobiler Informationstechnologie" des GI-Arbeitskreises Mobile Datenbanken und Informationssysteme, Heidelberg, 23.-24. M{\"a}rz 2004}, title = {Kontextsensitive Informationsfilter.}, year = {2004} }
2003
ACM Trans. Database Syst., January 2003
@inproceedings{abc, author = {Yanlei Diao and Mehmet Altinel and Michael J. Franklin and Hao Zhang and Peter M. Fischer}, booktitle = {ACM Trans. Database Syst.}, title = {Path sharing and predicate evaluation for high-performance XML filtering.}, url = {http://doi.acm.org/10.1145/958942.958947}, year = {2003} }
2002
Proceedings of the 18th International Conference on Data Engineering, San Jose, CA, USA, January 2002
@inproceedings{abc, author = {Yanlei Diao and Peter M. Fischer and Michael J. Franklin and Raymond To}, booktitle = {Proceedings of the 18th International Conference on Data Engineering, San Jose, CA, USA}, title = {YFilter: Efficient and Scalable Filtering of XML Documents.}, url = {http://csdl.computer.org/comp/proceedings/icde/2002/1531/00/15310341.pdf}, year = {2002} }