Publications by Hongyi Xin
2017
Bioinformatics, November 2017
Motivation
High throughput DNA sequencing (HTS) technologies generate an excessive number of small DNA segments -called short reads- that cause significant computational burden. To analyze the entire genome, each of the billions of short reads must be mapped to a reference genome based on the similarity between a read and ‘candidate’ locations in that reference genome. The similarity measurement, called alignment, formulated as an approximate string matching problem, is the computational bottleneck because: (i) it is implemented using quadratic-time dynamic programming algorithms and (ii) the majority of candidate locations in the reference genome do not align with a given read due to high dissimilarity. Calculating the alignment of such incorrect candidate locations consumes an overwhelming majority of a modern read mapper’s execution time. Therefore, it is crucial to develop a fast and effective filter that can detect incorrect candidate locations and eliminate them before invoking computationally costly alignment algorithms.
Results
We propose GateKeeper, a new hardware accelerator that functions as a pre-alignment step that quickly filters out most incorrect candidate locations. GateKeeper is the first design to accelerate pre-alignment using Field-Programmable Gate Arrays (FPGAs), which can perform pre-alignment much faster than software. When implemented on a single FPGA chip, GateKeeper maintains high accuracy (on average >96%) while providing, on average, 90-fold and 130-fold speedup over the state-of-the-art software pre-alignment techniques, Adjacency Filter and Shifted Hamming Distance (SHD), respectively. The addition of GateKeeper as a pre-alignment step can reduce the verification time of the mrFAST mapper by a factor of 10.
@article{abc, abstract = {Motivation High throughput DNA sequencing (HTS) technologies generate an excessive number of small DNA segments -called short reads- that cause significant computational burden. To analyze the entire genome, each of the billions of short reads must be mapped to a reference genome based on the similarity between a read and {\textquoteleft}candidate{\textquoteright} locations in that reference genome. The similarity measurement, called alignment, formulated as an approximate string matching problem, is the computational bottleneck because: (i) it is implemented using quadratic-time dynamic programming algorithms and (ii) the majority of candidate locations in the reference genome do not align with a given read due to high dissimilarity. Calculating the alignment of such incorrect candidate locations consumes an overwhelming majority of a modern read mapper{\textquoteright}s execution time. Therefore, it is crucial to develop a fast and effective filter that can detect incorrect candidate locations and eliminate them before invoking computationally costly alignment algorithms. Results We propose GateKeeper, a new hardware accelerator that functions as a pre-alignment step that quickly filters out most incorrect candidate locations. GateKeeper is the first design to accelerate pre-alignment using Field-Programmable Gate Arrays (FPGAs), which can perform pre-alignment much faster than software. When implemented on a single FPGA chip, GateKeeper maintains high accuracy (on average >96\%) while providing, on average, 90-fold and 130-fold speedup over the state-of-the-art software pre-alignment techniques, Adjacency Filter and Shifted Hamming Distance (SHD), respectively. The addition of GateKeeper as a pre-alignment step can reduce the verification time of the mrFAST mapper by a factor of 10.}, author = {Mohammed Alser and Hasan Hassan and Hongyi Xin and Oguz Ergin and Onur Mutlu and Can Alkan}, pages = {3355-3363}, journal = {Bioinformatics}, title = {GateKeeper: a new hardware architecture for accelerating pre-alignment in DNA short read mapping}, volume = {33}, year = {2017} }
2016
CoRR, January 2016
@article{abc, author = {Mohammed Alser and Hasan Hassan and Hongyi Xin and Oguz Ergin and Onur Mutlu and Can Alkan}, journal = {CoRR}, title = {GateKeeper: Enabling Fast Pre-Alignment in DNA Short Read Mapping with a New Streaming Accelerator Architecture.}, url = {http://arxiv.org/abs/1604.01789}, year = {2016} }
Bioinformatics, January 2016
@inproceedings{abc, author = {Hongyi Xin and Sunny Nahar and Richard Zhu and John Emmons and Gennady Pekhimenko and Carl Kingsford and Can Alkan and Onur Mutlu}, booktitle = {Bioinformatics}, title = {Optimal seed solver: optimizing seed selection in read mapping.}, url = {http://dx.doi.org/10.1093/bioinformatics/btv670}, year = {2016} }
2015
Bioinformatics, January 2015
@inproceedings{abc, author = {Hongyi Xin and John Greth and John Emmons and Gennady Pekhimenko and Carl Kingsford and Can Alkan and Onur Mutlu}, booktitle = {Bioinformatics}, title = {Shifted Hamming distance: a fast and accurate SIMD-friendly filter to accelerate alignment verification in read mapping.}, url = {http://dx.doi.org/10.1093/bioinformatics/btu856}, year = {2015} }
CoRR, January 2015
@article{abc, author = {Hongyi Xin and Richard Zhu and Sunny Nahar and John Emmons and Gennady Pekhimenko and Carl Kingsford and Can Alkan and Onur Mutlu}, journal = {CoRR}, title = {Optimal Seed Solver: Optimizing Seed Selection in Read Mapping.}, url = {http://arxiv.org/abs/1506.08235}, year = {2015} }
2014
TACO, January 2014
@inproceedings{abc, author = {Vivek Seshadri and Samihan Yedkar and Hongyi Xin and Onur Mutlu and Phillip B. Gibbons and Michael A. Kozuch and Todd C. Mowry}, booktitle = {TACO}, title = {Mitigating Prefetcher-Caused Pollution Using Informed Caching Policies for Prefetched Blocks.}, url = {http://doi.acm.org/10.1145/2677956}, year = {2014} }
2013
The 46th Annual IEEE/ACM International Symposium on Microarchitecture, MICRO-46, Davis, CA, USA, December 2013
@inproceedings{abc, author = {Gennady Pekhimenko and Vivek Seshadri and Yoongu Kim and Hongyi Xin and Onur Mutlu and Phillip B. Gibbons and Michael A. Kozuch and Todd C. Mowry}, booktitle = {The 46th Annual IEEE/ACM International Symposium on Microarchitecture, MICRO-46, Davis, CA, USA}, title = {Linearly compressed pages: a low-complexity, low-latency main memory compression framework.}, url = {http://doi.acm.org/10.1145/2540708.2540724}, year = {2013} }
2012 IEEE International Symposium on Performance Analysis of Systems Software, Austin, TX, USA, April 2013
@inproceedings{abc, author = {Chuanjun Zhang and Glenn G. Ko and Jungwook Choi and Shang-nien Tsai and Minje Kim and Abner Guzm{\'a}n-Rivera and Rob A. Rutenbar and Paris Smaragdis and Mi Sun Park and Narayanan Vijaykrishnan and Hongyi Xin and Onur Mutlu and Bin Li and Li Zhao and Mei Chen}, booktitle = {2012 IEEE International Symposium on Performance Analysis of Systems Software, Austin, TX, USA}, title = {EMERALD: Characterization of emerging applications and algorithms for low-power devices.}, url = {http://dx.doi.org/10.1109/ISPASS.2013.6557154}, year = {2013} }
BMC Genomics, January 2013
@article{abc, author = {Hongyi Xin and Donghyuk Lee and Farhad Hormozdiari and Samihan Yedkar and Onur Mutlu and Can Alkan}, journal = {BMC Genomics}, title = {Accelerating read mapping with FastHASH.}, url = {http://dx.doi.org/10.1186/1471-2164-14-S1-S13}, year = {2013} }