BibTeX for publications

@article{Benson-preprint-direct-tsqr,
  author = {Austin Benson and David F. Gleich and James Demmel},
  title = {Direct Tall-and-skinny {QR} factorizations in MapReduce architectures},
  journal = {arXiv},
  year = {2012},
  volume = {cs.DC},
  pages = {1301.1071},
  howpublished = {Submitted.},
  owner = {David F. Gleich},
  timestamp = {2012.10.04},
  url = {http://arxiv.org/abs/1301.1071}
}
@article{Gleich-preprint-dynamic-pagerank,
  author = {David F. Gleich and Ryan A. Rossi},
  title = {A Dynamical System for {PageRank} with Time-Dependent Teleportation},
  journal = {arXiv},
  year = {2012},
  volume = {cs.SI},
  pages = {1211.4266},
  owner = {David F. Gleich},
  timestamp = {2012.11.17},
  url = {http://arxiv.org/abs/1211.4266}
}
@article{Rossi-preprint-max-clique,
  author = {Ryan A. Rossi and David F. Gleich and Assefaw H. Gebremedhin and
	Md. Mostofa Ali Patwary},
  title = {A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
	and Temporal Strong Components},
  journal = {arXiv},
  year = {2013},
  volume = {cs.SI},
  pages = {1302.6256},
  owner = {dgleich},
  timestamp = {2012.11.17},
  url = {http://arxiv.org/abs/1302.6256}
}
@article{Gleich2011-block-circulant,
  author = {David F. Gleich and Chen Greif and James M. Varah},
  title = {The power and Arnoldi methods in an algebra of circulants},
  journal = {Numerical Linear Algebra with Applications},
  year = {Accepted},
  abstract = {Circulant matrices play a central role in a recently proposed formulation
	of three-way data computations. In this setting, a three-way table
	corresponds to a matrix where each "scalar" is a vector of parameters
	defining a circulant. This interpretation provides many generalizations
	of results from matrix or vector-space algebra. We derive the power
	and Arnoldi methods in this algebra. In the course of our derivation,
	we define inner products, norms, and other notions. These extensions
	are straightforward in an algebraic sense, but the implications are
	dramatically different from the standard matrix case. For example,
	a matrix of circulants has a polynomial number of eigenvalues in
	its dimension; although, these can all be represented by a carefully
	chosen canonical set of eigenvalues and vectors. These results and
	algorithms are closely related to standard decoupling techniques
	on block-circulant matrices using the fast Fourier transform.},
  doi = {10.1002/nla.1845},
  file = {:Gleich 2012 - circulant.pdf},
  howpublished = {Submitted. Preprint arXiv:1101.2173.},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.01.06}
}
@article{Bayati-2013-netalign,
  author = {Mohsen Bayati and David F. Gleich and Amin Saberi and Ying Wang},
  title = {Message-Passing Algorithms for Sparse Network Alignment},
  journal = {ACM Trans. Knowl. Discov. Data},
  year = {2013},
  volume = {7},
  pages = {3:1--3:31},
  number = {1},
  month = mar,
  acmid = {2435212},
  address = {New York, NY, USA},
  articleno = {3},
  doi = {10.1145/2435209.2435212},
  file = {:Bayati 2013 - sparse belief propagation.pdf},
  issn = {1556-4681},
  issue_date = {March 2013},
  keywords = {Network alignment, belief propagation, graph matching, message-passing},
  numpages = {31},
  owner = {David F. Gleich},
  publisher = {ACM},
  timestamp = {2011.07.01}
}
@article{Bonchi-2012-fast-katz,
  author = {Bonchi, Francesco and Esfandiar, Pooya and Gleich, David F. and Greif,
	Chen and Lakshmanan, Laks V.S.},
  title = {Fast Matrix Computations for Pairwise and Columnwise Commute Times
	and {Katz} Scores},
  journal = {Internet Mathematics},
  year = {2012},
  volume = {8},
  pages = {73--112},
  number = {1-2},
  abstract = {We explore methods for approximating the commute time and Katz score
	between a pair of nodes. These methods are based on the approach
	of matrices, moments, and quadrature developed in the numerical linear
	algebra community. They rely on the Lanczos process and provide upper
	and lower bounds on an estimate of the pairwise scores. We also explore
	methods to approximate the commute times and Katz scores from a node
	to all other nodes in the graph. Here, our approach for the commute
	times is based on a variation of the conjugate gradient algorithm,
	and it provides an estimate of all the diagonals of the inverse of
	a matrix. Our technique for the Katz scores is based on exploiting
	an empirical localization property of the Katz matrix. We adapt algorithms
	used for personalized PageRank computing to these Katz scores and
	theoretically show that this approach is convergent. We evaluate
	these methods on 17 real-world graphs ranging in size from 1000 to
	1,000,000 nodes. Our results show that our pairwise commute-time
	method and columnwise Katz algorithm both have attractive theoretical
	properties and empirical performance.},
  doi = {10.1080/15427951.2012.625256},
  file = {:Bonchi 2012 - fast katz.pdf},
  keywords = {self},
  owner = {dfgleic},
  timestamp = {2011.06.30}
}
@article{constantine2010-rapr,
  author = {Paul G. Constantine and David F. Gleich},
  title = {Random Alpha {PageRank}},
  journal = {Internet Mathematics},
  year = {2010},
  volume = {6},
  pages = {189--236},
  number = {2},
  month = {September},
  abstract = {We suggest a revision to the PageRank random surfer model that considers
	the influence of a population of random surfers on the PageRank vector.
	In the revised model, each member of the population has its own teleportation
	parameter chosen from a probability distribution, and consequently,
	the ranking vector is random. We propose three algorithms for computing
	the statistics of the random ranking vector based respectively on
	(i) random sampling, (ii) paths along the links of the underlying
	graph, and (iii) quadrature formulas. We find that the expectation
	of the random ranking vector produces similar rankings to its deterministic
	analogue, but the standard deviation gives uncorrelated information
	(under a Kendall-tau metric) with myriad potential uses. We examine
	applications of this model to web spam.},
  doi = {10.1080/15427951.2009.10129185},
  file = {:constantine 2010 - random alpha pagerank.pdf},
  keywords = {self},
  owner = {David Gleich},
  timestamp = {2010.01.07},
  url = {http://projecteuclid.org/DPubS?service=UI&version=1.0&verb=Display&handle=euclid.im/1285339073}
}
@article{Constantine-2011-gni,
  author = {Paul G. Constantine and David F. Gleich and Gianluca Iaccarino},
  title = {A factorization of the spectral {Galerkin} system for parameterized
	matrix equations: derivation and applications},
  journal = {SIAM Journal of Scientific Computing},
  year = {2011},
  volume = {33},
  pages = {2995--3009},
  number = {5},
  abstract = {Recent work has explored solver strategies for the linear system of
	equations arising from a spectral Galerkin approximation of the solution
	of PDEs with parameterized (or stochastic) inputs. We consider the
	related problem of a matrix equation whose matrix and right-hand
	side depend on a set of parameters (e.g., a PDE with stochastic inputs
	semidiscretized in space) and examine the linear system arising from
	a similar Galerkin approximation of the solution. We derive a useful
	factorization of this system of equations, which yields bounds on
	the eigenvalues, clues to preconditioning, and a flexible implementation
	method for a wide array of problems. We complement this analysis
	with (i) a numerical study of preconditioners on a standard elliptic
	PDE test problem and (ii) a fluids application using existing CFD
	codes; the MATLAB codes used in the numerical studies are available
	online.},
  doi = {10.1137/100799046},
  file = {:Constantine 2011 - spectral galerkin.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2010.09.15}
}
@article{constantine2010-parameterized,
  author = {Paul G. Constantine and David F. Gleich and Gianluca Iaccarino},
  title = {Spectral Methods for Parameterized Matrix Equations},
  journal = {SIAM Journal on Matrix Analysis and Applications},
  year = {2010},
  volume = {31},
  pages = {2681-2699},
  number = {5},
  abstract = {We apply polynomial approximation methods—known in the numerical PDEs
	context as spectral methods—to approximate the vector-valued function
	that satisfies a linear system of equations where the matrix and
	the right-hand side depend on a parameter. We derive both an interpolatory
	pseudospectral method and a residual-minimizing Galerkin method,
	and we show how each can be interpreted as solving a truncated infinite
	system of equations; the difference between the two methods lies
	in where the truncation occurs. Using classical theory, we derive
	asymptotic error estimates related to the region of analyticity of
	the solution, and we present a practical residual error estimate.
	We verify the results with two numerical examples.},
  doi = {10.1137/090755965},
  file = {:constantine 2010 - parameterized matrix equations.pdf},
  keywords = {parameterized systems; spectral methods; self},
  owner = {David F. Gleich},
  publisher = {SIAM},
  timestamp = {2010.09.24}
}
@article{gleich2010-inner-outer,
  author = {David F. Gleich and Andrew P. Gray and Chen Greif and Tracy Lau},
  title = {An inner-outer iteration for {PageRank}},
  journal = {SIAM Journal of Scientific Computing},
  year = {2010},
  volume = {32},
  pages = {349--371},
  number = {1},
  month = {February},
  abstract = {We present a new iterative scheme for PageRank computation. The algorithm
	is applied to the linear system formulation of the problem, using
	inner-outer stationary iterations. It is simple, can be easily implemented
	and parallelized, and requires minimal storage overhead. Our convergence
	analysis shows that the algorithm is effective for a crude inner
	tolerance and is not sensitive to the choice of the parameters involved.
	The same idea can be used as a preconditioning technique for nonstationary
	schemes. Numerical examples featuring matrices of dimensions exceeding
	100,000,000 in sequential and parallel environments demonstrate the
	merits of our technique. Our code is available online for viewing
	and testing, along with several large scale examples.},
  doi = {10.1137/080727397},
  file = {:gleich 2010 - inner-outer pagerank.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2009.07.10}
}
@article{Gleich-2012-kronecker,
  author = {David F. Gleich and Art B. Owen},
  title = {Moment based estimation of stochastic {Kronecker} graph parameters},
  journal = {Internet Mathematics},
  year = {2012},
  volume = {8},
  pages = {232-256},
  number = {3},
  month = {August},
  abstract = {Stochastic Kronecker graphs supply a parsimonious model for large
	sparse real-world graphs. They can specify the distribution of a
	large random graph using only three or four parameters. Those parameters
	have, however, proved difficult to choose in specific applications.
	This article looks at method-of-moments estimators that are computationally
	much simpler than maximum likelihood. The estimators are fast, and
	in our examples, they typically yield Kronecker parameters with expected
	feature counts closer to a given graph than we get from KronFit.
	The improvement is especially prominent for the number of triangles
	in the graph.},
  doi = {10.1080/15427951.2012.680824},
  file = {:Gleich 2012 - Moment based Kronecker.pdf},
  owner = {David F. Gleich},
  timestamp = {2011.07.02}
}
@article{gleich2007-approximate,
  author = {David F. Gleich and Marzia Polito},
  title = {Approximating Personalized {PageRank} with Minimal Use of Webgraph
	Data},
  journal = {Internet Mathematics},
  year = {2007},
  volume = {3},
  pages = {257--294},
  number = {3},
  month = {December},
  abstract = {In this paper, we consider the problem of calculating fast and accurate
	approximations to the personalized PageRank score of a webpage. We
	focus on techniques to improve speed by limiting the amount of web
	graph data we need to access.
	
	
	Our algorithms provide both the approximation to the personalized
	PageRank score as well as guidance in using only the necessary information—and
	therefore sensibly reduce not only the computational cost of the
	algorithm but also the memory and memory bandwidth requirements.
	We report experiments with these algorithms on web graphs of up to
	118 million pages and prove a theoretical approximation bound for
	all. Finally, we propose a local, personalized web-search system
	for a future client system using our algorithms.},
  doi = {10.1080/15427951.2006.10129128},
  file = {:gleich 2007 - approximate personalized pagerank.pdf},
  key = {GP2007},
  keywords = {pagerank, self},
  owner = {David Gleich},
  timestamp = {2006.11.27}
}
@article{Gleich-2011-cads,
  author = {Gleich, David F. and Wang, Ying and Meng, Xiangrui and Ronaghi, Farnaz
	and Gerritsen, Margot and Saberi, Amin},
  title = {Some computational tools for digital archive and metadata maintenance},
  journal = {BIT Numerical Mathematics},
  year = {2011},
  volume = {51},
  pages = {127-154},
  abstract = {Computational tools are a mainstay of current search and recommendation
	technology. But modern digital archives are astonishingly diverse
	collections of older digitized material and newer born digital content.
	Finding interesting material in these archives is still challenging.
	The material often lacks appropriate annotation---or metadata---so
	that people can find the most interesting material. We describe four
	computational tools we developed to aid in the processing and maintenance
	of large digital archives. The first is an improvement to a graph
	layout algorithm for graphs with hundreds of thousands of nodes.
	The second is a new algorithm for matching databases with links among
	the objects, also known as a network alignment problem. The third
	is an optimization heuristic to disambiguate a set of geographic
	references in a book. And the fourth is a technique to automatically
	generate a title from a description.},
  affiliation = {Sandia National Laboratories, Livermore, CA 94550, USA},
  doi = {10.1007/s10543-011-0324-6},
  file = {:gleich 2011 - comptuational tools.pdf},
  issn = {0006-3835},
  issue = {1},
  keyword = {Mathematics and Statistics},
  keywords = {self},
  owner = {David F. Gleich},
  publisher = {Springer Netherlands},
  timestamp = {2011.03.25}
}
@article{wang2008-monte-carlo-for-adjoint,
  author = {Qiqi Wang and David F. Gleich and Amin Saberi and Nasrollah Etemadi
	and Parviz Moin},
  title = {A {Monte Carlo} method for solving unsteady adjoint equations},
  journal = {Journal of Computational Physics},
  year = {2008},
  volume = {227},
  pages = {6184--6205},
  number = {12},
  month = {June},
  abstract = {Traditionally, solving the adjoint equation for unsteady problems
	involves solving a large, structured linear system. This paper presents
	a variation on this technique and uses a Monte Carlo linear solver.
	The Monte Carlo solver yields a forward-time algorithm for solving
	unsteady adjoint equations. When applied to computing the adjoint
	associated with Burgers� equation, the Monte Carlo approach is
	faster for a large class of problems while preserving sufficient
	accuracy.},
  address = {San Diego, CA, USA},
  doi = {10.1016/j.jcp.2008.03.006},
  file = {:wang 2008 - monte carlo adjoint.pdf},
  issn = {0021-9991},
  keywords = {self},
  owner = {David Gleich},
  publisher = {Academic Press Professional, Inc.},
  timestamp = {2008.10.18}
}
@inproceedings{Andersen-2012-overlapping,
  author = {Andersen, Reid and Gleich, David F. and Mirrokni, Vahab},
  title = {Overlapping clusters for distributed computation},
  booktitle = {Proceedings of the fifth ACM international conference on Web search
	and data mining},
  year = {2012},
  series = {WSDM '12},
  pages = {273--282},
  address = {New York, NY, USA},
  publisher = {ACM},
  acmid = {2124330},
  doi = {10.1145/2124295.2124330},
  file = {:Andersen 2012 - overlapping.pdf},
  isbn = {978-1-4503-0747-5},
  keywords = {conductance, covering, diffusion, leave-time, pagerank},
  location = {Seattle, Washington, USA},
  numpages = {10},
  owner = {David F. Gleich},
  timestamp = {2011.07.01},
  url = {http://doi.acm.org/10.1145/2124295.2124330}
}
@inproceedings{Andersen-2011-overlapping-csc,
  author = {Andersen, Reid and Gleich, David F. and Mirrokni, Vahab S},
  title = {Overlapping clusters for distributed computation},
  booktitle = {Poster proceedings of the SIAM Workshop on Combinatorial and Scientific
	Computing (CSC)},
  year = {2011},
  note = {Poster.},
  file = {:Andersen 2011 - overlapping poster.pdf},
  keywords = {self},
  owner = {dgleich},
  timestamp = {2011.10.26}
}
@inproceedings{bayati2009-network-alignment,
  author = {Mohsen Bayati and Margot Gerritsen and David F. Gleich and Amin Saberi
	and Ying Wang},
  title = {Algorithms for Large, Sparse Network Alignment Problems},
  booktitle = {Proceedings of the 9th IEEE International Conference on Data Mining},
  year = {2009},
  pages = {705-710},
  month = {December},
  abstract = {We propose a new distributed algorithm for sparse variants of the
	network alignment problem that occurs in a variety of data mining
	areas including systems biology, database matching, and computer
	vision. Our algorithm uses a belief propagation heuristic and provides
	near optimal solutions for an NP-hard combinatorial optimization
	problem. We show that our algorithm is faster and outperforms or
	nearly ties existing algorithms on synthetic problems, a problem
	in bioinformatics, and a problem in ontology matching. We also provide
	a unified framework for studying and comparing all network alignment
	solvers.},
  doi = {10.1109/ICDM.2009.135},
  eprint = {0907.3338},
  file = {:bayati 2009 - network alignment.pdf},
  owner = {David F. Gleich},
  timestamp = {2009.07.20}
}
@inproceedings{constantine2007-pagerank-pce,
  author = {Paul G. Constantine and David F. Gleich},
  title = {Using Polynomial Chaos to Compute the Influence of Multiple Random
	Surfers in the {PageRank} Model},
  booktitle = {Proceedings of the 5th Workshop on Algorithms and Models for the
	Web Graph ({WAW2007})},
  year = {2007},
  editor = {Anthony Bonato and Fan Chung Graham},
  volume = {4863},
  series = {Lecture Notes in Computer Science},
  pages = {82--95},
  publisher = {Springer},
  abstract = {The PageRank equation computes the importance of pages in a web graph
	relative to a single random surfer with a constant teleportation
	coefficient. To be globally relevant, the teleportation coefficient
	should account for the influence of all users. Therefore, we correct
	the PageRank formulation by modeling the teleportation coefficient
	as a random variable distributed according to user behavior. With
	this correction, the PageRank values themselves become random. We
	present two methods to quantify the uncertainty in the random PageRank:
	a Monte Carlo sampling algorithm and an algorithm based the truncated
	polynomial chaos expansion of the random quantities. With each of
	these methods, we compute the expectation and standard deviation
	of the PageRanks. Our statistical analysis shows that the standard
	deviation of the PageRanks are uncorrelated with the PageRank vector.},
  doi = {10.1007/978-3-540-77004-6_7},
  file = {:constantine2007 - pagerank pce.pdf},
  key = {CG2007},
  keywords = {self, pagerank, polynomial chaos,},
  owner = {David Gleich},
  timestamp = {2007.10.10}
}
@inproceedings{Constantine-2012-svd-signal,
  author = {Paul G. Constantine and David F. Gleich},
  title = {Distinguishing signal from noise in an {SVD} of simulation data},
  booktitle = {Proceedings of the IEEE Conference on Acoustics, Speech, and Signal
	Processing},
  year = {2012},
  pages = {5333--5336},
  doi = {10.1109/ICASSP.2012.6289125},
  file = {:Constantine 2012 - signal svd.pdf},
  howpublished = {Preprint},
  owner = {David F. Gleich},
  timestamp = {2011.11.28}
}
@conference{Constantine-2011-MRTSQR,
  author = {Constantine, Paul G. and Gleich, David F.},
  title = {Tall and skinny {QR} factorizations in {MapReduce} architectures},
  booktitle = {Proceedings of the second international workshop on MapReduce and
	its applications},
  year = {2011},
  series = {MapReduce '11},
  pages = {43--50},
  address = {New York, NY, USA},
  publisher = {ACM},
  acmid = {1996103},
  doi = {10.1145/1996092.1996103},
  file = {:Constantine 2011 - TSQR.pdf},
  isbn = {978-1-4503-0700-0},
  keywords = {Hadoop, QR factorization, linear regression, matrix factorization,
	tsqr},
  location = {San Jose, California, USA},
  numpages = {8},
  owner = {David F. Gleich},
  timestamp = {2011.06.18}
}
@inproceedings{decoste2005-recommender,
  author = {Dennis Decoste and David F. Gleich and Tejaswi Kasturi and Sathiya
	Keerthi and Omid Madani and Seung-Taek Park and David M. Pennock
	and Corey Porter and Sumit Sanghai and Farial Shahnaz and Leonid
	Zhukov},
  title = {Recommender Systems Research at {Yahoo! Research Labs}},
  booktitle = {Beyond Personalization},
  year = {2005},
  address = {San Diego, CA},
  month = {January},
  note = {Position Statement},
  abstract = {We describe some of the ongoing projects at Yahoo! Research
	
	Labs that involve recommender systems. We discuss
	
	recommender systems related problems and solutions relevant
	
	to Yahoo!’s business.},
  file = {:decoste2005 - yahoo recommender systems.pdf},
  key = {DGK+05},
  keywords = {self, recommender systems},
  owner = {David Gleich},
  timestamp = {2006.10.13}
}
@inproceedings{esfandiar2010-fast-katz,
  author = {Pooya Esfandiar and Francesco Bonchi and David F. Gleich and Chen
	Greif and Laks V. S. Lakshmanan and Byung-Won On},
  title = {Fast {Katz} and Commuters: Efficient Approximation of Social Relatedness
	over Large Networks},
  booktitle = {Algorithms and Models for the Web Graph},
  year = {2010},
  abstract = {Motivated by social network data mining problems such as link prediction
	and collaborative filtering, significant research effort has been
	devoted to computing topological measures including the Katz score
	and the commute time. Existing approaches typically approximate all
	pairwise relationships simultaneously. In this paper, we are interested
	in computing: the score for a single pair of nodes, and the top-k
	nodes with the best scores from a given source node. For the pairwise
	problem, we apply an iterative algorithm that computes upper and
	lower bounds for the measures we seek. This algorithm exploits a
	relationship between the Lanczos process and a quadrature rule. For
	the top-k problem, we propose an algorithm that only accesses a small
	portion of the graph and is related to techniques used in personalized
	PageRank computing. To test the scalability and accuracy of our algorithms
	we experiment with three real-world networks and find that these
	algorithms run in milliseconds to seconds without any preprocessing.},
  doi = {10.1007/978-3-642-18009-5_13},
  file = {:esfandiar 2010 - fast katz.pdf},
  keywords = {self},
  owner = {David Gleich},
  timestamp = {2010.04.08}
}
@inproceedings{constantine2010-teleportation,
  author = {David F. Gleich and Paul G. Constantine and Abraham Flaxman and Asela
	Gunawardana},
  title = {Tracking the random surfer: empirically measured teleportation parameters
	in {PageRank}},
  booktitle = {WWW '10: Proceedings of the 19th international conference on World
	wide web},
  year = {2010},
  pages = {381--390},
  month = {April},
  abstract = {PageRank computes the importance of each node in a directed graph
	under a random surfer model governed by a teleportation parameter.
	Commonly denoted alpha, this parameter models the probability of
	following an edge inside the graph or, when the graph comes from
	a network of web pages and links, clicking a link on a web page.
	We empirically measure the teleportation parameter based on browser
	toolbar logs and a click trail analysis. For a particular user or
	machine, such analysis produces a value of alpha. We find that these
	values nicely fit a Beta distribution with mean edge-following probability
	between 0.3 and 0.7, depending on the site. Using these distributions,
	we compute PageRank scores where PageRank is computed with respect
	to a distribution as the teleportation parameter, rather than a constant
	teleportation parameter. These new metrics are evaluated on the graph
	of pages in Wikipedia.},
  doi = {10.1145/1772690.1772730},
  file = {:gleich 2010 - tracking the random surfer.pdf},
  isbn = {978-1-60558-799-8},
  keywords = {self},
  location = {Raleigh, North Carolina, USA},
  owner = {David F. Gleich},
  timestamp = {2009.12.03}
}
@inproceedings{Gleich-2011-skew-nuclear,
  author = {Gleich, David F. and Lim, Lek-Heng},
  title = {Rank aggregation via nuclear norm minimization},
  booktitle = {Proceedings of the 17th ACM SIGKDD international conference on Knowledge
	discovery and data mining},
  year = {2011},
  series = {KDD '11},
  pages = {60--68},
  address = {New York, NY, USA},
  publisher = {ACM},
  abstract = {The process of rank aggregation is intimately intertwined with the
	structure of skew-symmetric matrices. We apply recent advances in
	the theory and algorithms of matrix completion to skew-symmetric
	matrices. This combination of ideas produces a new method for ranking
	a set of items. The essence of our idea is that a rank aggregation
	describes a partially filled skew-symmetric matrix. We extend an
	algorithm for matrix completion to handle skew-symmetric data and
	use that to extract ranks for each item. Our algorithm applies to
	both pairwise comparison and rating data. Because it is based on
	matrix completion, it is robust to both noise and incomplete data.
	We show a formal recovery result for the noiseless case and present
	a detailed study of the algorithm on synthetic data and Netflix ratings.},
  acmid = {2020425},
  doi = {10.1145/2020408.2020425},
  file = {:Gleich 2011 - rank aggregation.pdf},
  isbn = {978-1-4503-0813-7},
  keywords = {self, nuclear norm, rank aggregation, skew symmetric},
  location = {San Diego, California, USA},
  numpages = {9},
  owner = {David F. Gleich},
  timestamp = {2010.01.30}
}
@inproceedings{Gleich-2012-neighborhoods,
  author = {David F. Gleich and C. Seshadhri},
  title = {Vertex neighborhoods, low conductance cuts, and good seeds for local
	community methods},
  booktitle = {KDD2012},
  year = {2012},
  pages = {597--605},
  doi = {10.1145/2339530.2339628},
  file = {:Gleich 2012 - neighborhoods.pdf},
  howpublished = {Preprint},
  owner = {David F. Gleich},
  timestamp = {2011.11.28}
}
@inproceedings{gleich2005-ppagerank,
  author = {David F. Gleich and Leonid Zhukov},
  title = {Scalable Computing with Power-Law Graphs: Experience with Parallel
	{PageRank}},
  booktitle = {SuperComputing 2005},
  year = {2005},
  month = {November},
  note = {Poster.},
  file = {:gleich2005 - parallel pagerank.pdf},
  key = {GZ2005},
  keywords = {self, pagerank},
  owner = {David Gleich},
  timestamp = {2006.10.13},
  url = {http://www.cs.purdue.edu/homes/dgleich/publications/gleich2005 - parallel pagerank.pdf}
}
@inproceedings{gleich2004-svd,
  author = {David F. Gleich and Leonid Zhukov},
  title = {An {SVD} Based Term Suggestion and Ranking System},
  booktitle = {ICDM '04: Proceedings of the Fourth IEEE International Conference
	on Data Mining (ICDM'04)},
  year = {2004},
  pages = {391--394},
  address = {Brighton, UK},
  month = {November},
  publisher = {IEEE Computer Society},
  abstract = {In this paper, we consider the application of the singular value decomposition
	(SVD) to a search term suggestion system in a pay-for-performance
	search market. We propose a novel positive and negative refinement
	method based on orthogonal subspace projections. We demonstrate that
	SVD subspace-based methods: 1) expand coverage by reordering the
	results, and 2) enhance the clustered structure of the data. The
	numerical experiments reported in this paper were performed on Overture's
	pay-per-performance search market data.},
  doi = {10.1109/ICDM.2004.10006},
  file = {:gleich 2004 - svd based term suggestion and ranking.pdf},
  isbn = {0-7695-2142-8},
  key = {GZ2004},
  keywords = {self},
  owner = {David Gleich},
  timestamp = {2006.10.13}
}
@inproceedings{gleich2005-wom,
  author = {David F. Gleich and Leonid Zhukov and Matthew Rasmussen and Kevin
	Lang},
  title = {The {World of Music}: {SDP} Embedding of High Dimensional data},
  booktitle = {Information Visualization 2005},
  year = {2005},
  note = {Interactive Poster},
  abstract = {In this paper we investigate the use of Semidefinite Programming (SDP)
	optimization for high dimensional data layout and graph visualization.
	We developed a set of interactive visualization tools and used them
	on music artist ratings data from Yahoo!. The computed layout preserves
	a natural grouping of the artists and provides visual assistance
	for browsing large music collections.},
  file = {:gleich2005 - world of music.pdf},
  key = {GZRL2005},
  keywords = {self, recommender systems},
  owner = {David Gleich},
  timestamp = {2006.10.13},
  url = {http://www.cs.purdue.edu/homes/dgleich/publications/gleich2005 - world of music.pdf}
}
@inproceedings{Khan-2012-netalign-multicore,
  author = {Arif Khan and David F. Gleich and Mahantesh Halappanavar and Alex
	Pothen},
  title = {A multicore algorithm for network alignment via approximate matching},
  booktitle = {Proceedings of the 2012 ACM/IEEE International Conference for High
	Performance Computing, Networking, Storage and Analysis},
  year = {2012},
  series = {SC '12},
  pages = {64:1--64:11},
  address = {Los Alamitos, CA, USA},
  publisher = {IEEE Computer Society Press},
  acmid = {2389083},
  articleno = {64},
  isbn = {978-1-4673-0804-5},
  location = {Salt Lake City, Utah},
  numpages = {11},
  owner = {David Gleich},
  timestamp = {2012.05.30},
  url = {http://conferences.computer.org/sc/2012/papers/1000a054.pdf}
}
@inproceedings{Rossi-2012-dynamicpr,
  author = {Ryan A. Rossi and David F. Gleich},
  title = {Dynamic {PageRank} using Evolving Teleportation},
  booktitle = {Algorithms and Models for the Web Graph},
  year = {2012},
  editor = {Bonato, Anthony and Janssen, Jeannette},
  volume = {7323},
  series = {Lecture Notes in Computer Science},
  pages = {126-137},
  publisher = {Springer Berlin Heidelberg},
  abstract = {The importance of nodes in a network constantly fluctuates based on
	changes in the network structure as well as changes in external interest.
	We propose an evolving teleportation adaptation of the PageRank method
	to capture how changes in external interest influence the importance
	of a node. This framework seamlessly generalizes PageRank because
	the importance of a node will converge to the PageRank values if
	the external influence stops changing. We demonstrate the effectiveness
	of the evolving teleportation on the Wikipedia graph and the Twitter
	social network. The external interest is given by the number of hourly
	visitors to each page and the number of monthly tweets for each user.},
  doi = {10.1007/978-3-642-30541-2_10},
  file = {:Rossi 2012 - dynamic-pr.pdf},
  isbn = {978-3-642-30540-5},
  owner = {dgleich},
  timestamp = {2012.02.29}
}
@misc{Andrew-2004-list-access,
  author = {Kevin Andrew and David F. Gleich},
  title = {MTF , BIT , and COMB: A Guide to Deterministic and Randomized Online
	Algorithms for the List Access Problem},
  howpublished = {Advanced Algorithms, Harvey Mudd College, Final Project},
  year = {2004},
  abstract = {In this survey, we discuss two randomized online algorithms for the
	list access
	
	problem. First, we review competitive analysis and show that the MTF
	algorithm
	
	is 2-competitive using a potential function. Then, we introduce randomized
	competitive analysis and the associated adversary models. We show
	that the randomized BIT algorithm is 7/4-competitive using a potential
	function argument. We then introduce the pairwise property and the
	TIMESTAMP algorithm to show that the COMB algorithm, a COMBination
	of the BIT and TIMEST AMP algorithms, is
	
	8/5-competitive. COMB is the best known randomized algorithm for the
	list access
	
	program.},
  file = {:Andrew 2004 - List Access.pdf},
  keywords = {self,},
  owner = {David F. Gleich},
  timestamp = {2011.07.02}
}
@misc{gleich2003-clinic,
  author = {Erin Bodine and David F. Gleich and Cathy Kurata and Jordan Kwan
	and Lesley Ward and Daniel Fain},
  title = {Three Methods for Improving Relevance in Web Search.},
  howpublished = {Clinic Report, Harvey Mudd College},
  month = {May 9},
  year = {2003},
  note = {102 pages. Includes fully documented program code on accompanying
	CD.},
  abstract = {The 2002–2003 Overture clinic project evaluated and implemented three
	different methods
	
	for improving relevance ordering in web search. The three methods
	were bottom up
	
	micro information unit (MIU) analysis, top down MIU analysis, and
	proximity scoring. We
	
	ran these three methods on the top 200 web pages returned for each
	of 58 queries by an
	
	already existing algorithmic search engine. We used two metrics, precision
	and relevance
	
	ordering, to evaluate the results.
	
	
	Precision deals with how relevant the web page is for a given query,
	while relevance
	
	ordering is how well-ordered the returned results are. We evaluated
	the precision of each
	
	method and of the algorithmic search engine by hand. For relevance
	ordering, we recruited
	
	other humans to compare pages and used their decisions to generate
	an ideal ranking for
	
	each query. The results of each of our methods and of the algorithmic
	search engine are then
	
	compared to this ideal ranking vector using Kendall’s Tau.
	
	
	Our bottom up MIU analysis method achieved the highest precision score
	of 0.78 out of
	
	1.00. In addition, bottom up MIU analysis received the second highest
	correlation coefficient
	
	(or relevance ordering score) of 0.107 while the algorithmic search
	engine received the highest
	
	correlation coefficient of 0.121. Interestingly, our proximity scoring
	method received high
	
	relevance ordering scores when the algorithmic search engine received
	low relevance ordering
	
	scores.},
  file = {:bodine2003 - clinic, improved relevance.pdf},
  key = {BGK+2003},
  keywords = {self, web search},
  owner = {David Gleich},
  timestamp = {2006.10.13}
}
@misc{Gleich-2005-directed-spectral,
  author = {David F. Gleich},
  title = {Hierarchical Directed Spectral Graph Partitioning},
  howpublished = {Information Networks, Stanford University, Final Project, 2005},
  year = {2006},
  abstract = {In this report, we examine the generalization of the Laplacian of
	a graph due to Fan
	
	Chung. We show that Fan Chung’s generalization reduces to examining
	one particular
	
	symmetrization of the adjacency matrix for a directed graph. From
	this result, the
	
	directed Cheeger bounds trivially follow. Additionally, we implement
	and examine
	
	the benefits of directed hierarchical spectral clustering empirically
	on a dataset from
	
	Wikipedia. Finally, we examine a set of competing heuristic methods
	on the same
	
	dataset.},
  file = {:Gleich 2005 - hierarchical directed spectral.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02},
  url = {http://www.cs.purdue.edu/homes/dgleich/publications/Gleich 2005 - hierarchical directed spectral.pdf}
}
@misc{Gleich-2005-finite-calculus,
  author = {David F. Gleich},
  title = {Finite Calculus: A tutorial for solving nasty sums},
  howpublished = {Combinatorics, Final Paper, Stanford University, 2004.},
  year = {2005},
  file = {:Gleich 2005 - finite calculus.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02}
}
@misc{Gleich-2003-computer-chess,
  author = {David F. Gleich},
  title = {Machine Learning in Computer Chess: Genetic Programming and KRK},
  howpublished = {Independent Study Report, Harvey Mudd College},
  year = {2003},
  abstract = {In this paper, I describe genetic programming as a machine learning
	paradigm and
	
	evaluate its results in attempting to learn basic chess rules. Genetic
	programming
	
	exploits a simulation of Darwinian evolution to construct programs.
	When applied
	
	to the King-Rook-King (KRK) chess endgame problem, genetic programming
	shows
	
	promising results in spite of a lack of significant chess knowledge.},
  file = {:Gleich 2003 - Machine Learning in Computer Chess.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02}
}
@inproceedings{gleich2007-pagerank-deriv,
  author = {David F. Gleich and Peter Glynn and Gene H. Golub and Chen Greif},
  title = {Three results on the {PageRank} vector: eigenstructure, sensitivity,
	and the derivative},
  booktitle = {Web Information Retrieval and Linear Algebra Algorithms},
  year = {2007},
  editor = {Andreas Frommer and Michael W. Mahoney and Daniel B. Szyld},
  number = {07071},
  series = {Dagstuhl Seminar Proceedings},
  publisher = {Internationales Begegnungs- und Forschungszentrum fuer Informatik
	(IBFI), Schloss Dagstuhl, Germany},
  abstract = {The three results on the PageRank vector are preliminary but shed
	light on the eigenstructure of a PageRank modified Markov chain and
	what happens when changing the teleportation parameter in the PageRank
	model. Computations with the derivative of the PageRank vector with
	respect to the teleportation parameter show predictive ability and
	identify an interesting set of pages from Wikipedia.},
  file = {:gleich 2007 - pagerank derivative and sensitivity.pdf},
  issn = {1862-4405},
  key = {GGGG2007},
  keywords = {self, PageRank, PageRank derivative, PageRank sensitivity, PageRankeigenstructure},
  optaddress = {Dagstuhl, Germany},
  optannote = {Keywords: PageRank, PageRank derivative, PageRank sensitivity, PageRank
	eigenstructure},
  opturl = {http://drops.dagstuhl.de/opus/volltexte/2007/1061 [date of citation:
	2007-01-01]},
  owner = {David Gleich},
  timestamp = {2008.03.04},
  url = {http://drops.dagstuhl.de/opus/volltexte/2007/1061}
}
@misc{Gleich-2006-wom,
  author = {David F. Gleich and Matthew Rasmussen and Kevin Lang and Leonid Zhukov},
  title = {The World of Music: User Ratings; Spectral and Spherical Embeddings;
	Map Projections},
  howpublished = {Online report},
  year = {2006},
  abstract = {In this paper we present an algorithm for layout and visualization
	of music collections based on similarities between musical artists.
	The core of the algorithm consists
	
	of a non-linear low dimensional embedding of a similarity graph constrained
	to the
	
	surface of a hyper-sphere. This approach effectively uses additional
	dimensions in the
	
	embedding. We derive the algorithm using a simple energy minimization
	procedure
	
	and show the relationships to several well known eigenvector based
	methods.
	
	We also describe a method for constructing a similarity graph from
	user ratings,
	
	as well as procedures for mapping the layout from the hyper-sphere
	to a 2d display.
	
	We demonstrate our techniques on Yahoo! Music user ratings data and
	a MusicMatch
	
	artist similarity graph.},
  file = {:Gleich 2006 - wom.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02},
  url = {http://www.cs.purdue.edu/homes/dgleich/publications/Gleich 2006 - wom.pdf}
}
@techreport{gleich2004-parallel,
  author = {David F. Gleich and Leonid Zhukov and Pavel Berkhin},
  title = {Fast Parallel {PageRank}: A Linear System Approach},
  institution = {Yahoo! Research Labs},
  year = {2004},
  number = {YRL-2004-038},
  abstract = {In this paper we investigate the convergence of iterative stationary
	and Krylov subspace methods for the PageRank linear system, including
	the convergence dependency on teleportation. We demonstrate that
	linear system iterations
	
	converge faster than the simple power method and are less
	
	sensitive to the changes in teleportation.
	
	In order to perform this study we developed a framework
	
	for parallel PageRank computing. We describe the details
	
	of the parallel implementation and provide experimental results obtained
	on a 70-node Beowulf cluster.},
  file = {:gleich2004-parallel.pdf},
  key = {GZB2006},
  keywords = {self, pagerank},
  owner = {David Gleich},
  timestamp = {2006.11.29},
  url = {http://www.cs.purdue.edu/homes/dgleich/publications/gleich2004-parallel.pdf}
}
@misc{Zhukov-2004-soft-clustering,
  author = {Leonid Zhukov and David F. Gleich},
  title = {Topic Identification in Soft Clustering using PCA and ICA},
  howpublished = {Online report, Yahoo! research labs},
  year = {2004},
  abstract = {Many applications can benefit from soft clustering, where
	
	each datum is assigned to multiple clusters with membership
	
	weights that sum to one. In this paper we present
	
	a comparison of principal component analysis (PCA) and
	
	independent component analysis (ICA) when used for soft
	
	clustering. We provide a short mathematical background for
	
	these methods and demonstrate their application to a sponsored
	
	links search listings dataset. We present examples of
	
	the soft clusters generated by both methods and compare
	
	the results.},
  file = {:Zhukov 2004 - soft clustering pca and ica.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02}
}
@article{Gleich-2011-review-of-kamvar,
  author = {David F. Gleich},
  title = {Review of: {Numerical algorithms for personalized search in self-organizing
	information networks by Sep Kamvar, Princeton Univ. Press, 2010,
	160pp., ISBN13: 978-0-691-14503-7}},
  journal = {Linear Algebra and its Applications},
  year = {2011},
  volume = {435},
  pages = {908 - 909},
  number = {4},
  doi = {10.1016/j.laa.2011.01.013},
  file = {:Gleich 2011 - review of kamvar.pdf},
  issn = {0024-3795},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02}
}
@phdthesis{gleich2009-thesis,
  author = {David F. Gleich},
  title = {Models and Algorithms for {PageRank} Sensitivity},
  school = {Stanford University},
  year = {2009},
  month = {September},
  abstract = {The PageRank model helps evaluate the relative importance of nodes
	in a
	
	large graph, such as the graph of links on the world wide web. An
	important
	
	piece of the PageRank model is the teleportation parameter $\alpha$.
	We explore the
	
	interaction between $\alpha$ and PageRank through the lens of sensitivity
	analysis.
	
	Writing the PageRank vector as a function of $\alpha$ allows us to
	take a derivative,
	
	which is a simple sensitivity measure. As an alternative approach,
	we apply
	
	techniques from the field of uncertainty quantification. Regarding
	$\alpha$ as a random variable
	
	produces a new PageRank model in which each PageRank value
	
	is a random variable. We explore the standard deviation of these variables
	
	to get another measure of PageRank sensitivity. One interpretation
	of this
	
	new model shows that it corrects a small oversight in the original
	PageRank
	
	formulation.
	
	
	Both of the above techniques require solving multiple PageRank problems,
	
	and thus a robust PageRank solver is needed. We discuss an inner-outer
	
	iteration for this purpose. The method is low-memory, simple to implement,
	
	and has excellent performance for a range of teleportation parameters.
	
	
	We show empirical results with these techniques on graphs with over
	2
	
	billion edges.},
  file = {:gleich 2009 - pagerank thesis.pdf},
  keywords = {self},
  owner = {David Gleich},
  timestamp = {2009.09.13},
  url = {http://www.stanford.edu/group/SOL/dissertations/pagerank-sensitivity-thesis-online.pdf}
}