# Publications by Ji Liu

### 2017

Proceedings of the 34th International Conference on Machine Learning, ICML 2017, Sydney, NSW, Australia, August 2017

@inproceedings{abc, author = {Hantian Zhang and Jerry Li and Kaan Kara and Dan Alistarh and Ji Liu and Ce Zhang}, booktitle = {Proceedings of the 34th International Conference on Machine Learning, ICML 2017, Sydney, NSW, Australia}, title = {ZipML: Training Linear Models with End-to-End Low Precision, and a Little Bit of Deep Learning.}, url = {http://proceedings.mlr.press/v70/zhang17e.html}, year = {2017} }

CoRR, January 2017

@inproceedings{abc, author = {Xiangru Lian and Ce Zhang and Huan Zhang and Cho-Jui Hsieh and Wei Zhang and Ji Liu}, booktitle = {CoRR}, title = {Can Decentralized Algorithms Outperform Centralized Algorithms? A Case Study for Decentralized Parallel Stochastic Gradient Descent.}, url = {http://arxiv.org/abs/1705.09056}, year = {2017} }

### 2016

CoRR, January 2016

We present ZipML, the first framework for training dense generalized linear models using end-to-end low-precision representation--in ZipML, all movements of data, including those for input samples, model, and gradients, are represented using as little as two bits per component. Within our framework, we have successfully compressed, separately, the input data by 16x, gradient by 16x, and model by 16x while still getting the same training result. Even for the most challenging datasets, we find that robust convergence can be ensured using only an end-to-end 8-bit representation or a 6-bit representation if only samples are quantized. Our work builds on previous research on using low-precision representations for gradient and model in the context of stochastic gradient descent. Our main technical contribution is a new set of techniques which allow the training samples to be processed with low precision, without affecting the convergence of the algorithm. In turn, this leads to a system where all data items move in a quantized, low precision format. In particular, we first establish that randomized rounding, while sufficient when quantizing the model and the gradients, is biased when quantizing samples, and thus leads to a different training result. We propose two new data representations which converge to the same solution as in the original data representation both in theory and empirically and require as little as 2-bits per component. As a result, if the original data is stored as 32-bit floats, we decrease the bandwidth footprint for each training iteration by up to 16x. Our results hold for models such as linear regression and least squares SVM. ZipML raises interesting theoretical questions related to the robustness of SGD to approximate data, model, and gradient representations. We conclude this working paper by a description of ongoing work extending these preliminary results.

@article{abc, abstract = {We present ZipML, the first framework for training dense generalized linear models using end-to-end low-precision representation--in ZipML, all movements of data, including those for input samples, model, and gradients, are represented using as little as two bits per component. Within our framework, we have successfully compressed, separately, the input data by 16x, gradient by 16x, and model by 16x while still getting the same training result. Even for the most challenging datasets, we find that robust convergence can be ensured using only an end-to-end 8-bit representation or a 6-bit representation if only samples are quantized. Our work builds on previous research on using low-precision representations for gradient and model in the context of stochastic gradient descent. Our main technical contribution is a new set of techniques which allow the training samples to be processed with low precision, without affecting the convergence of the algorithm. In turn, this leads to a system where all data items move in a quantized, low precision format. In particular, we first establish that randomized rounding, while sufficient when quantizing the model and the gradients, is biased when quantizing samples, and thus leads to a different training result. We propose two new data representations which converge to the same solution as in the original data representation both in theory and empirically and require as little as 2-bits per component. As a result, if the original data is stored as 32-bit floats, we decrease the bandwidth footprint for each training iteration by up to 16x. Our results hold for models such as linear regression and least squares SVM. ZipML raises interesting theoretical questions related to the robustness of SGD to approximate data, model, and gradient representations. We conclude this working paper by a description of ongoing work extending these preliminary results. }, author = {Hantian Zhang and Kaan Kara and Jerry Li and Dan Alistarh and Ji Liu and Ce Zhang}, journal = {CoRR}, title = {ZipML: An End-to-end Bitwise Framework for Dense Generalized Linear Models.}, url = {http://arxiv.org/abs/1611.05402}, year = {2016} }

### 2013

Advances in Neural Information Processing Systems 26: 27th Annual Conference on Neural Information Processing Systems 2013. Proceedings of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States., Lake Tahoe, Nevada, United States., December 2013

@inproceedings{abc, author = {Srikrishna Sridhar and Stephen J. Wright and Christopher R{\'e} and Ji Liu and Victor Bittorf and Ce Zhang}, booktitle = {Advances in Neural Information Processing Systems 26: 27th Annual Conference on Neural Information Processing Systems 2013. Proceedings of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States.}, title = {An Approximate, Efficient LP Solver for LP Rounding.}, url = {http://papers.nips.cc/paper/4990-an-approximate-efficient-lp-solver-for-lp-rounding}, venue = {Lake Tahoe, Nevada, United States.}, year = {2013} }

CoRR, -, January 2013

Many problems in machine learning can be solved by rounding the solution of an appropriate linear program (LP). This paper shows that we can recover solutions of comparable quality by rounding an approximate LP solution instead of the ex- act one. These approximate LP solutions can be computed efficiently by applying a parallel stochastic-coordinate-descent method to a quadratic-penalty formulation of the LP. We derive worst-case runtime and solution quality guarantees of this scheme using novel perturbation and convergence analysis. Our experiments demonstrate that on such combinatorial problems as vertex cover, independent set and multiway-cut, our approximate rounding scheme is up to an order of magnitude faster than Cplex (a commercial LP solver) while producing solutions of similar quality.

@inproceedings{abc, abstract = {Many problems in machine learning can be solved by rounding the solution of an appropriate linear program (LP). This paper shows that we can recover solutions of comparable quality by rounding an approximate LP solution instead of the ex- act one. These approximate LP solutions can be computed efficiently by applying a parallel stochastic-coordinate-descent method to a quadratic-penalty formulation of the LP. We derive worst-case runtime and solution quality guarantees of this scheme using novel perturbation and convergence analysis. Our experiments demonstrate that on such combinatorial problems as vertex cover, independent set and multiway-cut, our approximate rounding scheme is up to an order of magnitude faster than Cplex (a commercial LP solver) while producing solutions of similar quality.}, author = {Srikrishna Sridhar and Victor Bittorf and Ji Liu and Ce Zhang and Christopher R{\'e} and Stephen J. Wright}, booktitle = {CoRR}, title = {An Approximate, Efficient Solver for LP Rounding.}, url = {http://arxiv.org/abs/1311.2661}, venue = {-}, year = {2013} }