Publications by Feng Niu

×

Status message

The Publications site is currently under construction, as a result some publications might be missing.

2016

Proceedings of the 2016 International Conference on Management of Data, SIGMOD Conference 2016, San Francisco, CA, USA, June 2016
DeepDive is a system for extracting relational databases from dark data: the mass of text, tables, and images that are widely collected and stored but which cannot be exploited by standard relational tools. If the information in dark data - scientific papers, Web classified ads, customer service notes, and so on - were instead in a relational database, it would give analysts a massive and valuable new set of "big data." DeepDive is distinctive when compared to previous information extraction systems in its ability to obtain very high precision and recall at reasonable engineering cost; in a number of applications, we have used DeepDive to create databases with accuracy that meets that of human annotators. To date we have successfully deployed DeepDive to create data-centric applications for insurance, materials science, genomics, paleontologists, law enforcement, and others. The data unlocked by DeepDive represents a massive opportunity for industry, government, and scientific researchers. DeepDive is enabled by an unusual design that combines large-scale probabilistic inference with a novel developer interaction cycle. This design is enabled by several core innovations around probabilistic training and inference.
@inproceedings{abc,
	abstract = {DeepDive is a system for extracting relational databases from dark data: the mass of text, tables, and images that are widely collected and stored but which cannot be exploited by standard relational tools. If the information in dark data - scientific papers, Web classified ads, customer service notes, and so on - were instead in a relational database, it would give analysts a massive and valuable new set of "big data." DeepDive is distinctive when compared to previous information extraction systems in its ability to obtain very high precision and recall at reasonable engineering cost; in a number of applications, we have used DeepDive to create databases with accuracy that meets that of human annotators. To date we have successfully deployed DeepDive to create data-centric applications for insurance, materials science, genomics, paleontologists, law enforcement, and others. The data unlocked by DeepDive represents a massive opportunity for industry, government, and scientific researchers. DeepDive is enabled by an unusual design that combines large-scale probabilistic inference with a novel developer interaction cycle. This design is enabled by several core innovations around probabilistic training and inference.},
	author = {Ce Zhang and Jaeho Shin and Christopher R{\'e} and Michael J. Cafarella and Feng Niu},
	booktitle = {Proceedings of the 2016 International Conference on Management of Data, SIGMOD Conference 2016},
	title = {Extracting Databases from Dark Data with DeepDive.},
	url = {http://doi.acm.org/10.1145/2882903.2904442},
	venue = {San Francisco, CA, USA},
	year = {2016}
}

2013

CIDR 2013, Sixth Biennial Conference on Innovative Data Systems Research, Asilomar, CA, USA, January 2013
@inproceedings{abc,
	author = {Michael Anderson and Dolan Antenucci and Victor Bittorf and Matthew Burgess and Michael J. Cafarella and Arun Kumar and Feng Niu and Yongjoo Park and Christopher R{\'e} and Ce Zhang},
	booktitle = {CIDR 2013, Sixth Biennial Conference on Innovative Data Systems Research, Asilomar, CA, USA},
	title = {Brainwash: A Data System for Feature Engineering.},
	url = {http://www.cidrdb.org/cidr2013/Papers/CIDR13_Paper82.pdf},
	year = {2013}
}

2012

12th IEEE International Conference on Data Mining, ICDM 2012, Brussels, Belgium, December 2012
@inproceedings{abc,
	author = {Feng Niu and Ce Zhang and Christopher R{\'e} and Jude W. Shavlik},
	booktitle = {12th IEEE International Conference on Data Mining, ICDM 2012, Brussels, Belgium},
	title = {Scaling Inference for Markov Logic via Dual Decomposition.},
	url = {http://dx.doi.org/10.1109/ICDM.2012.96},
	year = {2012}
}
Proceedings of The Twenty-First Text REtrieval Conference, TREC 2012, Gaithersburg, Maryland, USA, November 2012
@inproceedings{abc,
	author = {John R. Frank and Max Kleiman-Weiner and Daniel A. Roberts and Feng Niu and Ce Zhang and Christopher R{\'e} and Ian Soboroff},
	booktitle = {Proceedings of The Twenty-First Text REtrieval Conference, TREC 2012, Gaithersburg, Maryland, USA},
	title = {Building an Entity-Centric Stream Filtering Test Collection for TREC 2012.},
	url = {http://trec.nist.gov/pubs/trec21/papers/KBA.OVERVIEW.pdf},
	year = {2012}
}
Proceedings of the Second International Workshop on Searching and Integrating New Web Data Sources, Istanbul, Turkey, August 2012
@inproceedings{abc,
	author = {Feng Niu and Ce Zhang and Christopher R{\'e} and Jude W. Shavlik},
	booktitle = {Proceedings of the Second International Workshop on Searching and Integrating New Web Data Sources, Istanbul, Turkey},
	title = {DeepDive: Web-scale Knowledge-base Construction using Statistical Learning and Inference.},
	url = {http://ceur-ws.org/Vol-884/VLDS2012_p25_Niu.pdf},
	year = {2012}
}
The 50th Annual Meeting of the Association for Computational Linguistics, Proceedings of the Conference, Jeju Island, Korea - Volume 1: Long Papers, July 2012
@inproceedings{abc,
	author = {Ce Zhang and Feng Niu and Christopher R{\'e} and Jude W. Shavlik},
	booktitle = {The 50th Annual Meeting of the Association for Computational Linguistics, Proceedings of the Conference},
	title = {Big Data versus the Crowd: Looking for Relationships in All the Right Places.},
	url = {http://www.aclweb.org/anthology/P12-1087},
	venue = {Jeju Island, Korea - Volume 1: Long Papers},
	year = {2012}
}
Int. J. Semantic Web Inf. Syst., January 2012
@inproceedings{abc,
	author = {Feng Niu and Ce Zhang and Christopher R{\'e} and Jude W. Shavlik},
	booktitle = {Int. J. Semantic Web Inf. Syst.},
	title = {Elementary: Large-Scale Knowledge-Base Construction via Machine Learning and Statistical Inference.},
	url = {http://dx.doi.org/10.4018/jswis.2012070103},
	year = {2012}
}

2011

CoRR, January 2011
@article{abc,
	author = {Feng Niu and Ce Zhang and Christopher R{\'e} and Jude W. Shavlik},
	journal = {CoRR},
	title = {Felix: Scaling Inference for Markov Logic with an Operator-based Approach},
	url = {http://arxiv.org/abs/1108.0294},
	year = {2011}
}