BibTeX for publications
@article{Benson-preprint-direct-tsqr,
author = {Austin Benson and David F. Gleich and James Demmel},
title = {Direct Tall-and-skinny {QR} factorizations in MapReduce architectures},
journal = {arXiv},
year = {2012},
volume = {cs.DC},
pages = {1301.1071},
howpublished = {Submitted.},
owner = {David F. Gleich},
timestamp = {2012.10.04},
url = {http://arxiv.org/abs/1301.1071}
}
@article{Gleich-preprint-dynamic-pagerank,
author = {David F. Gleich and Ryan A. Rossi},
title = {A Dynamical System for {PageRank} with Time-Dependent Teleportation},
journal = {arXiv},
year = {2012},
volume = {cs.SI},
pages = {1211.4266},
owner = {David F. Gleich},
timestamp = {2012.11.17},
url = {http://arxiv.org/abs/1211.4266}
}
@article{Rossi-preprint-max-clique,
author = {Ryan A. Rossi and David F. Gleich and Assefaw H. Gebremedhin and
Md. Mostofa Ali Patwary},
title = {A Fast Parallel Maximum Clique Algorithm for Large Sparse Graphs
and Temporal Strong Components},
journal = {arXiv},
year = {2013},
volume = {cs.SI},
pages = {1302.6256},
owner = {dgleich},
timestamp = {2012.11.17},
url = {http://arxiv.org/abs/1302.6256}
}
@article{Gleich2011-block-circulant,
author = {David F. Gleich and Chen Greif and James M. Varah},
title = {The power and Arnoldi methods in an algebra of circulants},
journal = {Numerical Linear Algebra with Applications},
year = {Accepted},
abstract = {Circulant matrices play a central role in a recently proposed formulation
of three-way data computations. In this setting, a three-way table
corresponds to a matrix where each "scalar" is a vector of parameters
defining a circulant. This interpretation provides many generalizations
of results from matrix or vector-space algebra. We derive the power
and Arnoldi methods in this algebra. In the course of our derivation,
we define inner products, norms, and other notions. These extensions
are straightforward in an algebraic sense, but the implications are
dramatically different from the standard matrix case. For example,
a matrix of circulants has a polynomial number of eigenvalues in
its dimension; although, these can all be represented by a carefully
chosen canonical set of eigenvalues and vectors. These results and
algorithms are closely related to standard decoupling techniques
on block-circulant matrices using the fast Fourier transform.},
doi = {10.1002/nla.1845},
file = {:Gleich 2012 - circulant.pdf},
howpublished = {Submitted. Preprint arXiv:1101.2173.},
keywords = {self},
owner = {David F. Gleich},
timestamp = {2011.01.06}
}
@article{Bayati-2013-netalign,
author = {Mohsen Bayati and David F. Gleich and Amin Saberi and Ying Wang},
title = {Message-Passing Algorithms for Sparse Network Alignment},
journal = {ACM Trans. Knowl. Discov. Data},
year = {2013},
volume = {7},
pages = {3:1--3:31},
number = {1},
month = mar,
acmid = {2435212},
address = {New York, NY, USA},
articleno = {3},
doi = {10.1145/2435209.2435212},
file = {:Bayati 2013 - sparse belief propagation.pdf},
issn = {1556-4681},
issue_date = {March 2013},
keywords = {Network alignment, belief propagation, graph matching, message-passing},
numpages = {31},
owner = {David F. Gleich},
publisher = {ACM},
timestamp = {2011.07.01}
}
@article{Bonchi-2012-fast-katz,
author = {Bonchi, Francesco and Esfandiar, Pooya and Gleich, David F. and Greif,
Chen and Lakshmanan, Laks V.S.},
title = {Fast Matrix Computations for Pairwise and Columnwise Commute Times
and {Katz} Scores},
journal = {Internet Mathematics},
year = {2012},
volume = {8},
pages = {73--112},
number = {1-2},
abstract = {We explore methods for approximating the commute time and Katz score
between a pair of nodes. These methods are based on the approach
of matrices, moments, and quadrature developed in the numerical linear
algebra community. They rely on the Lanczos process and provide upper
and lower bounds on an estimate of the pairwise scores. We also explore
methods to approximate the commute times and Katz scores from a node
to all other nodes in the graph. Here, our approach for the commute
times is based on a variation of the conjugate gradient algorithm,
and it provides an estimate of all the diagonals of the inverse of
a matrix. Our technique for the Katz scores is based on exploiting
an empirical localization property of the Katz matrix. We adapt algorithms
used for personalized PageRank computing to these Katz scores and
theoretically show that this approach is convergent. We evaluate
these methods on 17 real-world graphs ranging in size from 1000 to
1,000,000 nodes. Our results show that our pairwise commute-time
method and columnwise Katz algorithm both have attractive theoretical
properties and empirical performance.},
doi = {10.1080/15427951.2012.625256},
file = {:Bonchi 2012 - fast katz.pdf},
keywords = {self},
owner = {dfgleic},
timestamp = {2011.06.30}
}
@article{constantine2010-rapr,
author = {Paul G. Constantine and David F. Gleich},
title = {Random Alpha {PageRank}},
journal = {Internet Mathematics},
year = {2010},
volume = {6},
pages = {189--236},
number = {2},
month = {September},
abstract = {We suggest a revision to the PageRank random surfer model that considers
the influence of a population of random surfers on the PageRank vector.
In the revised model, each member of the population has its own teleportation
parameter chosen from a probability distribution, and consequently,
the ranking vector is random. We propose three algorithms for computing
the statistics of the random ranking vector based respectively on
(i) random sampling, (ii) paths along the links of the underlying
graph, and (iii) quadrature formulas. We find that the expectation
of the random ranking vector produces similar rankings to its deterministic
analogue, but the standard deviation gives uncorrelated information
(under a Kendall-tau metric) with myriad potential uses. We examine
applications of this model to web spam.},
doi = {10.1080/15427951.2009.10129185},
file = {:constantine 2010 - random alpha pagerank.pdf},
keywords = {self},
owner = {David Gleich},
timestamp = {2010.01.07},
url = {http://projecteuclid.org/DPubS?service=UI&version=1.0&verb=Display&handle=euclid.im/1285339073}
}
@article{Constantine-2011-gni,
author = {Paul G. Constantine and David F. Gleich and Gianluca Iaccarino},
title = {A factorization of the spectral {Galerkin} system for parameterized
matrix equations: derivation and applications},
journal = {SIAM Journal of Scientific Computing},
year = {2011},
volume = {33},
pages = {2995--3009},
number = {5},
abstract = {Recent work has explored solver strategies for the linear system of
equations arising from a spectral Galerkin approximation of the solution
of PDEs with parameterized (or stochastic) inputs. We consider the
related problem of a matrix equation whose matrix and right-hand
side depend on a set of parameters (e.g., a PDE with stochastic inputs
semidiscretized in space) and examine the linear system arising from
a similar Galerkin approximation of the solution. We derive a useful
factorization of this system of equations, which yields bounds on
the eigenvalues, clues to preconditioning, and a flexible implementation
method for a wide array of problems. We complement this analysis
with (i) a numerical study of preconditioners on a standard elliptic
PDE test problem and (ii) a fluids application using existing CFD
codes; the MATLAB codes used in the numerical studies are available
online.},
doi = {10.1137/100799046},
file = {:Constantine 2011 - spectral galerkin.pdf},
keywords = {self},
owner = {David F. Gleich},
timestamp = {2010.09.15}
}
@article{constantine2010-parameterized,
author = {Paul G. Constantine and David F. Gleich and Gianluca Iaccarino},
title = {Spectral Methods for Parameterized Matrix Equations},
journal = {SIAM Journal on Matrix Analysis and Applications},
year = {2010},
volume = {31},
pages = {2681-2699},
number = {5},
abstract = {We apply polynomial approximation methods—known in the numerical PDEs
context as spectral methods—to approximate the vector-valued function
that satisfies a linear system of equations where the matrix and
the right-hand side depend on a parameter. We derive both an interpolatory
pseudospectral method and a residual-minimizing Galerkin method,
and we show how each can be interpreted as solving a truncated infinite
system of equations; the difference between the two methods lies
in where the truncation occurs. Using classical theory, we derive
asymptotic error estimates related to the region of analyticity of
the solution, and we present a practical residual error estimate.
We verify the results with two numerical examples.},
doi = {10.1137/090755965},
file = {:constantine 2010 - parameterized matrix equations.pdf},
keywords = {parameterized systems; spectral methods; self},
owner = {David F. Gleich},
publisher = {SIAM},
timestamp = {2010.09.24}
}
@article{gleich2010-inner-outer,
author = {David F. Gleich and Andrew P. Gray and Chen Greif and Tracy Lau},
title = {An inner-outer iteration for {PageRank}},
journal = {SIAM Journal of Scientific Computing},
year = {2010},
volume = {32},
pages = {349--371},
number = {1},
month = {February},
abstract = {We present a new iterative scheme for PageRank computation. The algorithm
is applied to the linear system formulation of the problem, using
inner-outer stationary iterations. It is simple, can be easily implemented
and parallelized, and requires minimal storage overhead. Our convergence
analysis shows that the algorithm is effective for a crude inner
tolerance and is not sensitive to the choice of the parameters involved.
The same idea can be used as a preconditioning technique for nonstationary
schemes. Numerical examples featuring matrices of dimensions exceeding
100,000,000 in sequential and parallel environments demonstrate the
merits of our technique. Our code is available online for viewing
and testing, along with several large scale examples.},
doi = {10.1137/080727397},
file = {:gleich 2010 - inner-outer pagerank.pdf},
keywords = {self},
owner = {David F. Gleich},
timestamp = {2009.07.10}
}
@article{Gleich-2012-kronecker,
author = {David F. Gleich and Art B. Owen},
title = {Moment based estimation of stochastic {Kronecker} graph parameters},
journal = {Internet Mathematics},
year = {2012},
volume = {8},
pages = {232-256},
number = {3},
month = {August},
abstract = {Stochastic Kronecker graphs supply a parsimonious model for large
sparse real-world graphs. They can specify the distribution of a
large random graph using only three or four parameters. Those parameters
have, however, proved difficult to choose in specific applications.
This article looks at method-of-moments estimators that are computationally
much simpler than maximum likelihood. The estimators are fast, and
in our examples, they typically yield Kronecker parameters with expected
feature counts closer to a given graph than we get from KronFit.
The improvement is especially prominent for the number of triangles
in the graph.},
doi = {10.1080/15427951.2012.680824},
file = {:Gleich 2012 - Moment based Kronecker.pdf},
owner = {David F. Gleich},
timestamp = {2011.07.02}
}
@article{gleich2007-approximate,
author = {David F. Gleich and Marzia Polito},
title = {Approximating Personalized {PageRank} with Minimal Use of Webgraph
Data},
journal = {Internet Mathematics},
year = {2007},
volume = {3},
pages = {257--294},
number = {3},
month = {December},
abstract = {In this paper, we consider the problem of calculating fast and accurate
approximations to the personalized PageRank score of a webpage. We
focus on techniques to improve speed by limiting the amount of web
graph data we need to access.
Our algorithms provide both the approximation to the personalized
PageRank score as well as guidance in using only the necessary information—and
therefore sensibly reduce not only the computational cost of the
algorithm but also the memory and memory bandwidth requirements.
We report experiments with these algorithms on web graphs of up to
118 million pages and prove a theoretical approximation bound for
all. Finally, we propose a local, personalized web-search system
for a future client system using our algorithms.},
doi = {10.1080/15427951.2006.10129128},
file = {:gleich 2007 - approximate personalized pagerank.pdf},
key = {GP2007},
keywords = {pagerank, self},
owner = {David Gleich},
timestamp = {2006.11.27}
}
@article{Gleich-2011-cads,
author = {Gleich, David F. and Wang, Ying and Meng, Xiangrui and Ronaghi, Farnaz
and Gerritsen, Margot and Saberi, Amin},
title = {Some computational tools for digital archive and metadata maintenance},
journal = {BIT Numerical Mathematics},
year = {2011},
volume = {51},
pages = {127-154},
abstract = {Computational tools are a mainstay of current search and recommendation
technology. But modern digital archives are astonishingly diverse
collections of older digitized material and newer born digital content.
Finding interesting material in these archives is still challenging.
The material often lacks appropriate annotation---or metadata---so
that people can find the most interesting material. We describe four
computational tools we developed to aid in the processing and maintenance
of large digital archives. The first is an improvement to a graph
layout algorithm for graphs with hundreds of thousands of nodes.
The second is a new algorithm for matching databases with links among
the objects, also known as a network alignment problem. The third
is an optimization heuristic to disambiguate a set of geographic
references in a book. And the fourth is a technique to automatically
generate a title from a description.},
affiliation = {Sandia National Laboratories, Livermore, CA 94550, USA},
doi = {10.1007/s10543-011-0324-6},
file = {:gleich 2011 - comptuational tools.pdf},
issn = {0006-3835},
issue = {1},
keyword = {Mathematics and Statistics},
keywords = {self},
owner = {David F. Gleich},
publisher = {Springer Netherlands},
timestamp = {2011.03.25}
}
@article{wang2008-monte-carlo-for-adjoint,
author = {Qiqi Wang and David F. Gleich and Amin Saberi and Nasrollah Etemadi
and Parviz Moin},
title = {A {Monte Carlo} method for solving unsteady adjoint equations},
journal = {Journal of Computational Physics},
year = {2008},
volume = {227},
pages = {6184--6205},
number = {12},
month = {June},
abstract = {Traditionally, solving the adjoint equation for unsteady problems
involves solving a large, structured linear system. This paper presents
a variation on this technique and uses a Monte Carlo linear solver.
The Monte Carlo solver yields a forward-time algorithm for solving
unsteady adjoint equations. When applied to computing the adjoint
associated with Burgers� equation, the Monte Carlo approach is
faster for a large class of problems while preserving sufficient
accuracy.},
address = {San Diego, CA, USA},
doi = {10.1016/j.jcp.2008.03.006},
file = {:wang 2008 - monte carlo adjoint.pdf},
issn = {0021-9991},
keywords = {self},
owner = {David Gleich},
publisher = {Academic Press Professional, Inc.},
timestamp = {2008.10.18}
}
@inproceedings{Andersen-2012-overlapping,
author = {Andersen, Reid and Gleich, David F. and Mirrokni, Vahab},
title = {Overlapping clusters for distributed computation},
booktitle = {Proceedings of the fifth ACM international conference on Web search
and data mining},
year = {2012},
series = {WSDM '12},
pages = {273--282},
address = {New York, NY, USA},
publisher = {ACM},
acmid = {2124330},
doi = {10.1145/2124295.2124330},
file = {:Andersen 2012 - overlapping.pdf},
isbn = {978-1-4503-0747-5},
keywords = {conductance, covering, diffusion, leave-time, pagerank},
location = {Seattle, Washington, USA},
numpages = {10},
owner = {David F. Gleich},
timestamp = {2011.07.01},
url = {http://doi.acm.org/10.1145/2124295.2124330}
}
@inproceedings{Andersen-2011-overlapping-csc,
author = {Andersen, Reid and Gleich, David F. and Mirrokni, Vahab S},
title = {Overlapping clusters for distributed computation},
booktitle = {Poster proceedings of the SIAM Workshop on Combinatorial and Scientific
Computing (CSC)},
year = {2011},
note = {Poster.},
file = {:Andersen 2011 - overlapping poster.pdf},
keywords = {self},
owner = {dgleich},
timestamp = {2011.10.26}
}
@inproceedings{bayati2009-network-alignment,
author = {Mohsen Bayati and Margot Gerritsen and David F. Gleich and Amin Saberi
and Ying Wang},
title = {Algorithms for Large, Sparse Network Alignment Problems},
booktitle = {Proceedings of the 9th IEEE International Conference on Data Mining},
year = {2009},
pages = {705-710},
month = {December},
abstract = {We propose a new distributed algorithm for sparse variants of the
network alignment problem that occurs in a variety of data mining
areas including systems biology, database matching, and computer
vision. Our algorithm uses a belief propagation heuristic and provides
near optimal solutions for an NP-hard combinatorial optimization
problem. We show that our algorithm is faster and outperforms or
nearly ties existing algorithms on synthetic problems, a problem
in bioinformatics, and a problem in ontology matching. We also provide
a unified framework for studying and comparing all network alignment
solvers.},
doi = {10.1109/ICDM.2009.135},
eprint = {0907.3338},
file = {:bayati 2009 - network alignment.pdf},
owner = {David F. Gleich},
timestamp = {2009.07.20}
}
@inproceedings{constantine2007-pagerank-pce,
author = {Paul G. Constantine and David F. Gleich},
title = {Using Polynomial Chaos to Compute the Influence of Multiple Random
Surfers in the {PageRank} Model},
booktitle = {Proceedings of the 5th Workshop on Algorithms and Models for the
Web Graph ({WAW2007})},
year = {2007},
editor = {Anthony Bonato and Fan Chung Graham},
volume = {4863},
series = {Lecture Notes in Computer Science},
pages = {82--95},
publisher = {Springer},
abstract = {The PageRank equation computes the importance of pages in a web graph
relative to a single random surfer with a constant teleportation
coefficient. To be globally relevant, the teleportation coefficient
should account for the influence of all users. Therefore, we correct
the PageRank formulation by modeling the teleportation coefficient
as a random variable distributed according to user behavior. With
this correction, the PageRank values themselves become random. We
present two methods to quantify the uncertainty in the random PageRank:
a Monte Carlo sampling algorithm and an algorithm based the truncated
polynomial chaos expansion of the random quantities. With each of
these methods, we compute the expectation and standard deviation
of the PageRanks. Our statistical analysis shows that the standard
deviation of the PageRanks are uncorrelated with the PageRank vector.},
doi = {10.1007/978-3-540-77004-6_7},
file = {:constantine2007 - pagerank pce.pdf},
key = {CG2007},
keywords = {self, pagerank, polynomial chaos,},
owner = {David Gleich},
timestamp = {2007.10.10}
}
@inproceedings{Constantine-2012-svd-signal,
author = {Paul G. Constantine and David F. Gleich},
title = {Distinguishing signal from noise in an {SVD} of simulation data},
booktitle = {Proceedings of the IEEE Conference on Acoustics, Speech, and Signal
Processing},
year = {2012},
pages = {5333--5336},
doi = {10.1109/ICASSP.2012.6289125},
file = {:Constantine 2012 - signal svd.pdf},
howpublished = {Preprint},
owner = {David F. Gleich},
timestamp = {2011.11.28}
}
@conference{Constantine-2011-MRTSQR,
author = {Constantine, Paul G. and Gleich, David F.},
title = {Tall and skinny {QR} factorizations in {MapReduce} architectures},
booktitle = {Proceedings of the second international workshop on MapReduce and
its applications},
year = {2011},
series = {MapReduce '11},
pages = {43--50},
address = {New York, NY, USA},
publisher = {ACM},
acmid = {1996103},
doi = {10.1145/1996092.1996103},
file = {:Constantine 2011 - TSQR.pdf},
isbn = {978-1-4503-0700-0},
keywords = {Hadoop, QR factorization, linear regression, matrix factorization,
tsqr},
location = {San Jose, California, USA},
numpages = {8},
owner = {David F. Gleich},
timestamp = {2011.06.18}
}
@inproceedings{decoste2005-recommender,
author = {Dennis Decoste and David F. Gleich and Tejaswi Kasturi and Sathiya
Keerthi and Omid Madani and Seung-Taek Park and David M. Pennock
and Corey Porter and Sumit Sanghai and Farial Shahnaz and Leonid
Zhukov},
title = {Recommender Systems Research at {Yahoo! Research Labs}},
booktitle = {Beyond Personalization},
year = {2005},
address = {San Diego, CA},
month = {January},
note = {Position Statement},
abstract = {We describe some of the ongoing projects at Yahoo! Research
Labs that involve recommender systems. We discuss
recommender systems related problems and solutions relevant
to Yahoo!’s business.},
file = {:decoste2005 - yahoo recommender systems.pdf},
key = {DGK+05},
keywords = {self, recommender systems},
owner = {David Gleich},
timestamp = {2006.10.13}
}
@inproceedings{esfandiar2010-fast-katz,
author = {Pooya Esfandiar and Francesco Bonchi and David F. Gleich and Chen
Greif and Laks V. S. Lakshmanan and Byung-Won On},
title = {Fast {Katz} and Commuters: Efficient Approximation of Social Relatedness
over Large Networks},
booktitle = {Algorithms and Models for the Web Graph},
year = {2010},
abstract = {Motivated by social network data mining problems such as link prediction
and collaborative filtering, significant research effort has been
devoted to computing topological measures including the Katz score
and the commute time. Existing approaches typically approximate all
pairwise relationships simultaneously. In this paper, we are interested
in computing: the score for a single pair of nodes, and the top-k
nodes with the best scores from a given source node. For the pairwise
problem, we apply an iterative algorithm that computes upper and
lower bounds for the measures we seek. This algorithm exploits a
relationship between the Lanczos process and a quadrature rule. For
the top-k problem, we propose an algorithm that only accesses a small
portion of the graph and is related to techniques used in personalized
PageRank computing. To test the scalability and accuracy of our algorithms
we experiment with three real-world networks and find that these
algorithms run in milliseconds to seconds without any preprocessing.},
doi = {10.1007/978-3-642-18009-5_13},
file = {:esfandiar 2010 - fast katz.pdf},
keywords = {self},
owner = {David Gleich},
timestamp = {2010.04.08}
}
@inproceedings{constantine2010-teleportation,
author = {David F. Gleich and Paul G. Constantine and Abraham Flaxman and Asela
Gunawardana},
title = {Tracking the random surfer: empirically measured teleportation parameters
in {PageRank}},
booktitle = {WWW '10: Proceedings of the 19th international conference on World
wide web},
year = {2010},
pages = {381--390},
month = {April},
abstract = {PageRank computes the importance of each node in a directed graph
under a random surfer model governed by a teleportation parameter.
Commonly denoted alpha, this parameter models the probability of
following an edge inside the graph or, when the graph comes from
a network of web pages and links, clicking a link on a web page.
We empirically measure the teleportation parameter based on browser
toolbar logs and a click trail analysis. For a particular user or
machine, such analysis produces a value of alpha. We find that these
values nicely fit a Beta distribution with mean edge-following probability
between 0.3 and 0.7, depending on the site. Using these distributions,
we compute PageRank scores where PageRank is computed with respect
to a distribution as the teleportation parameter, rather than a constant
teleportation parameter. These new metrics are evaluated on the graph
of pages in Wikipedia.},
doi = {10.1145/1772690.1772730},
file = {:gleich 2010 - tracking the random surfer.pdf},
isbn = {978-1-60558-799-8},
keywords = {self},
location = {Raleigh, North Carolina, USA},
owner = {David F. Gleich},
timestamp = {2009.12.03}
}
@inproceedings{Gleich-2011-skew-nuclear,
author = {Gleich, David F. and Lim, Lek-Heng},
title = {Rank aggregation via nuclear norm minimization},
booktitle = {Proceedings of the 17th ACM SIGKDD international conference on Knowledge
discovery and data mining},
year = {2011},
series = {KDD '11},
pages = {60--68},
address = {New York, NY, USA},
publisher = {ACM},
abstract = {The process of rank aggregation is intimately intertwined with the
structure of skew-symmetric matrices. We apply recent advances in
the theory and algorithms of matrix completion to skew-symmetric
matrices. This combination of ideas produces a new method for ranking
a set of items. The essence of our idea is that a rank aggregation
describes a partially filled skew-symmetric matrix. We extend an
algorithm for matrix completion to handle skew-symmetric data and
use that to extract ranks for each item. Our algorithm applies to
both pairwise comparison and rating data. Because it is based on
matrix completion, it is robust to both noise and incomplete data.
We show a formal recovery result for the noiseless case and present
a detailed study of the algorithm on synthetic data and Netflix ratings.},
acmid = {2020425},
doi = {10.1145/2020408.2020425},
file = {:Gleich 2011 - rank aggregation.pdf},
isbn = {978-1-4503-0813-7},
keywords = {self, nuclear norm, rank aggregation, skew symmetric},
location = {San Diego, California, USA},
numpages = {9},
owner = {David F. Gleich},
timestamp = {2010.01.30}
}
@inproceedings{Gleich-2012-neighborhoods,
author = {David F. Gleich and C. Seshadhri},
title = {Vertex neighborhoods, low conductance cuts, and good seeds for local
community methods},
booktitle = {KDD2012},
year = {2012},
pages = {597--605},
doi = {10.1145/2339530.2339628},
file = {:Gleich 2012 - neighborhoods.pdf},
howpublished = {Preprint},
owner = {David F. Gleich},
timestamp = {2011.11.28}
}
@inproceedings{gleich2005-ppagerank,
author = {David F. Gleich and Leonid Zhukov},
title = {Scalable Computing with Power-Law Graphs: Experience with Parallel
{PageRank}},
booktitle = {SuperComputing 2005},
year = {2005},
month = {November},
note = {Poster.},
file = {:gleich2005 - parallel pagerank.pdf},
key = {GZ2005},
keywords = {self, pagerank},
owner = {David Gleich},
timestamp = {2006.10.13},
url = {http://www.cs.purdue.edu/homes/dgleich/publications/gleich2005 - parallel pagerank.pdf}
}
@inproceedings{gleich2004-svd,
author = {David F. Gleich and Leonid Zhukov},
title = {An {SVD} Based Term Suggestion and Ranking System},
booktitle = {ICDM '04: Proceedings of the Fourth IEEE International Conference
on Data Mining (ICDM'04)},
year = {2004},
pages = {391--394},
address = {Brighton, UK},
month = {November},
publisher = {IEEE Computer Society},
abstract = {In this paper, we consider the application of the singular value decomposition
(SVD) to a search term suggestion system in a pay-for-performance
search market. We propose a novel positive and negative refinement
method based on orthogonal subspace projections. We demonstrate that
SVD subspace-based methods: 1) expand coverage by reordering the
results, and 2) enhance the clustered structure of the data. The
numerical experiments reported in this paper were performed on Overture's
pay-per-performance search market data.},
doi = {10.1109/ICDM.2004.10006},
file = {:gleich 2004 - svd based term suggestion and ranking.pdf},
isbn = {0-7695-2142-8},
key = {GZ2004},
keywords = {self},
owner = {David Gleich},
timestamp = {2006.10.13}
}
@inproceedings{gleich2005-wom,
author = {David F. Gleich and Leonid Zhukov and Matthew Rasmussen and Kevin
Lang},
title = {The {World of Music}: {SDP} Embedding of High Dimensional data},
booktitle = {Information Visualization 2005},
year = {2005},
note = {Interactive Poster},
abstract = {In this paper we investigate the use of Semidefinite Programming (SDP)
optimization for high dimensional data layout and graph visualization.
We developed a set of interactive visualization tools and used them
on music artist ratings data from Yahoo!. The computed layout preserves
a natural grouping of the artists and provides visual assistance
for browsing large music collections.},
file = {:gleich2005 - world of music.pdf},
key = {GZRL2005},
keywords = {self, recommender systems},
owner = {David Gleich},
timestamp = {2006.10.13},
url = {http://www.cs.purdue.edu/homes/dgleich/publications/gleich2005 - world of music.pdf}
}
@inproceedings{Khan-2012-netalign-multicore,
author = {Arif Khan and David F. Gleich and Mahantesh Halappanavar and Alex
Pothen},
title = {A multicore algorithm for network alignment via approximate matching},
booktitle = {Proceedings of the 2012 ACM/IEEE International Conference for High
Performance Computing, Networking, Storage and Analysis},
year = {2012},
series = {SC '12},
pages = {64:1--64:11},
address = {Los Alamitos, CA, USA},
publisher = {IEEE Computer Society Press},
acmid = {2389083},
articleno = {64},
isbn = {978-1-4673-0804-5},
location = {Salt Lake City, Utah},
numpages = {11},
owner = {David Gleich},
timestamp = {2012.05.30},
url = {http://conferences.computer.org/sc/2012/papers/1000a054.pdf}
}
@inproceedings{Rossi-2012-dynamicpr,
author = {Ryan A. Rossi and David F. Gleich},
title = {Dynamic {PageRank} using Evolving Teleportation},
booktitle = {Algorithms and Models for the Web Graph},
year = {2012},
editor = {Bonato, Anthony and Janssen, Jeannette},
volume = {7323},
series = {Lecture Notes in Computer Science},
pages = {126-137},
publisher = {Springer Berlin Heidelberg},
abstract = {The importance of nodes in a network constantly fluctuates based on
changes in the network structure as well as changes in external interest.
We propose an evolving teleportation adaptation of the PageRank method
to capture how changes in external interest influence the importance
of a node. This framework seamlessly generalizes PageRank because
the importance of a node will converge to the PageRank values if
the external influence stops changing. We demonstrate the effectiveness
of the evolving teleportation on the Wikipedia graph and the Twitter
social network. The external interest is given by the number of hourly
visitors to each page and the number of monthly tweets for each user.},
doi = {10.1007/978-3-642-30541-2_10},
file = {:Rossi 2012 - dynamic-pr.pdf},
isbn = {978-3-642-30540-5},
owner = {dgleich},
timestamp = {2012.02.29}
}
@misc{Andrew-2004-list-access,
author = {Kevin Andrew and David F. Gleich},
title = {MTF , BIT , and COMB: A Guide to Deterministic and Randomized Online
Algorithms for the List Access Problem},
howpublished = {Advanced Algorithms, Harvey Mudd College, Final Project},
year = {2004},
abstract = {In this survey, we discuss two randomized online algorithms for the
list access
problem. First, we review competitive analysis and show that the MTF
algorithm
is 2-competitive using a potential function. Then, we introduce randomized
competitive analysis and the associated adversary models. We show
that the randomized BIT algorithm is 7/4-competitive using a potential
function argument. We then introduce the pairwise property and the
TIMESTAMP algorithm to show that the COMB algorithm, a COMBination
of the BIT and TIMEST AMP algorithms, is
8/5-competitive. COMB is the best known randomized algorithm for the
list access
program.},
file = {:Andrew 2004 - List Access.pdf},
keywords = {self,},
owner = {David F. Gleich},
timestamp = {2011.07.02}
}
@misc{gleich2003-clinic,
author = {Erin Bodine and David F. Gleich and Cathy Kurata and Jordan Kwan
and Lesley Ward and Daniel Fain},
title = {Three Methods for Improving Relevance in Web Search.},
howpublished = {Clinic Report, Harvey Mudd College},
month = {May 9},
year = {2003},
note = {102 pages. Includes fully documented program code on accompanying
CD.},
abstract = {The 2002–2003 Overture clinic project evaluated and implemented three
different methods
for improving relevance ordering in web search. The three methods
were bottom up
micro information unit (MIU) analysis, top down MIU analysis, and
proximity scoring. We
ran these three methods on the top 200 web pages returned for each
of 58 queries by an
already existing algorithmic search engine. We used two metrics, precision
and relevance
ordering, to evaluate the results.
Precision deals with how relevant the web page is for a given query,
while relevance
ordering is how well-ordered the returned results are. We evaluated
the precision of each
method and of the algorithmic search engine by hand. For relevance
ordering, we recruited
other humans to compare pages and used their decisions to generate
an ideal ranking for
each query. The results of each of our methods and of the algorithmic
search engine are then
compared to this ideal ranking vector using Kendall’s Tau.
Our bottom up MIU analysis method achieved the highest precision score
of 0.78 out of
1.00. In addition, bottom up MIU analysis received the second highest
correlation coefficient
(or relevance ordering score) of 0.107 while the algorithmic search
engine received the highest
correlation coefficient of 0.121. Interestingly, our proximity scoring
method received high
relevance ordering scores when the algorithmic search engine received
low relevance ordering
scores.},
file = {:bodine2003 - clinic, improved relevance.pdf},
key = {BGK+2003},
keywords = {self, web search},
owner = {David Gleich},
timestamp = {2006.10.13}
}
@misc{Gleich-2005-directed-spectral,
author = {David F. Gleich},
title = {Hierarchical Directed Spectral Graph Partitioning},
howpublished = {Information Networks, Stanford University, Final Project, 2005},
year = {2006},
abstract = {In this report, we examine the generalization of the Laplacian of
a graph due to Fan
Chung. We show that Fan Chung’s generalization reduces to examining
one particular
symmetrization of the adjacency matrix for a directed graph. From
this result, the
directed Cheeger bounds trivially follow. Additionally, we implement
and examine
the benefits of directed hierarchical spectral clustering empirically
on a dataset from
Wikipedia. Finally, we examine a set of competing heuristic methods
on the same
dataset.},
file = {:Gleich 2005 - hierarchical directed spectral.pdf},
keywords = {self},
owner = {David F. Gleich},
timestamp = {2011.07.02},
url = {http://www.cs.purdue.edu/homes/dgleich/publications/Gleich 2005 - hierarchical directed spectral.pdf}
}
@misc{Gleich-2005-finite-calculus,
author = {David F. Gleich},
title = {Finite Calculus: A tutorial for solving nasty sums},
howpublished = {Combinatorics, Final Paper, Stanford University, 2004.},
year = {2005},
file = {:Gleich 2005 - finite calculus.pdf},
keywords = {self},
owner = {David F. Gleich},
timestamp = {2011.07.02}
}
@misc{Gleich-2003-computer-chess,
author = {David F. Gleich},
title = {Machine Learning in Computer Chess: Genetic Programming and KRK},
howpublished = {Independent Study Report, Harvey Mudd College},
year = {2003},
abstract = {In this paper, I describe genetic programming as a machine learning
paradigm and
evaluate its results in attempting to learn basic chess rules. Genetic
programming
exploits a simulation of Darwinian evolution to construct programs.
When applied
to the King-Rook-King (KRK) chess endgame problem, genetic programming
shows
promising results in spite of a lack of significant chess knowledge.},
file = {:Gleich 2003 - Machine Learning in Computer Chess.pdf},
keywords = {self},
owner = {David F. Gleich},
timestamp = {2011.07.02}
}
@inproceedings{gleich2007-pagerank-deriv,
author = {David F. Gleich and Peter Glynn and Gene H. Golub and Chen Greif},
title = {Three results on the {PageRank} vector: eigenstructure, sensitivity,
and the derivative},
booktitle = {Web Information Retrieval and Linear Algebra Algorithms},
year = {2007},
editor = {Andreas Frommer and Michael W. Mahoney and Daniel B. Szyld},
number = {07071},
series = {Dagstuhl Seminar Proceedings},
publisher = {Internationales Begegnungs- und Forschungszentrum fuer Informatik
(IBFI), Schloss Dagstuhl, Germany},
abstract = {The three results on the PageRank vector are preliminary but shed
light on the eigenstructure of a PageRank modified Markov chain and
what happens when changing the teleportation parameter in the PageRank
model. Computations with the derivative of the PageRank vector with
respect to the teleportation parameter show predictive ability and
identify an interesting set of pages from Wikipedia.},
file = {:gleich 2007 - pagerank derivative and sensitivity.pdf},
issn = {1862-4405},
key = {GGGG2007},
keywords = {self, PageRank, PageRank derivative, PageRank sensitivity, PageRankeigenstructure},
optaddress = {Dagstuhl, Germany},
optannote = {Keywords: PageRank, PageRank derivative, PageRank sensitivity, PageRank
eigenstructure},
opturl = {http://drops.dagstuhl.de/opus/volltexte/2007/1061 [date of citation:
2007-01-01]},
owner = {David Gleich},
timestamp = {2008.03.04},
url = {http://drops.dagstuhl.de/opus/volltexte/2007/1061}
}
@misc{Gleich-2006-wom,
author = {David F. Gleich and Matthew Rasmussen and Kevin Lang and Leonid Zhukov},
title = {The World of Music: User Ratings; Spectral and Spherical Embeddings;
Map Projections},
howpublished = {Online report},
year = {2006},
abstract = {In this paper we present an algorithm for layout and visualization
of music collections based on similarities between musical artists.
The core of the algorithm consists
of a non-linear low dimensional embedding of a similarity graph constrained
to the
surface of a hyper-sphere. This approach effectively uses additional
dimensions in the
embedding. We derive the algorithm using a simple energy minimization
procedure
and show the relationships to several well known eigenvector based
methods.
We also describe a method for constructing a similarity graph from
user ratings,
as well as procedures for mapping the layout from the hyper-sphere
to a 2d display.
We demonstrate our techniques on Yahoo! Music user ratings data and
a MusicMatch
artist similarity graph.},
file = {:Gleich 2006 - wom.pdf},
keywords = {self},
owner = {David F. Gleich},
timestamp = {2011.07.02},
url = {http://www.cs.purdue.edu/homes/dgleich/publications/Gleich 2006 - wom.pdf}
}
@techreport{gleich2004-parallel,
author = {David F. Gleich and Leonid Zhukov and Pavel Berkhin},
title = {Fast Parallel {PageRank}: A Linear System Approach},
institution = {Yahoo! Research Labs},
year = {2004},
number = {YRL-2004-038},
abstract = {In this paper we investigate the convergence of iterative stationary
and Krylov subspace methods for the PageRank linear system, including
the convergence dependency on teleportation. We demonstrate that
linear system iterations
converge faster than the simple power method and are less
sensitive to the changes in teleportation.
In order to perform this study we developed a framework
for parallel PageRank computing. We describe the details
of the parallel implementation and provide experimental results obtained
on a 70-node Beowulf cluster.},
file = {:gleich2004-parallel.pdf},
key = {GZB2006},
keywords = {self, pagerank},
owner = {David Gleich},
timestamp = {2006.11.29},
url = {http://www.cs.purdue.edu/homes/dgleich/publications/gleich2004-parallel.pdf}
}
@misc{Zhukov-2004-soft-clustering,
author = {Leonid Zhukov and David F. Gleich},
title = {Topic Identification in Soft Clustering using PCA and ICA},
howpublished = {Online report, Yahoo! research labs},
year = {2004},
abstract = {Many applications can benefit from soft clustering, where
each datum is assigned to multiple clusters with membership
weights that sum to one. In this paper we present
a comparison of principal component analysis (PCA) and
independent component analysis (ICA) when used for soft
clustering. We provide a short mathematical background for
these methods and demonstrate their application to a sponsored
links search listings dataset. We present examples of
the soft clusters generated by both methods and compare
the results.},
file = {:Zhukov 2004 - soft clustering pca and ica.pdf},
keywords = {self},
owner = {David F. Gleich},
timestamp = {2011.07.02}
}
@article{Gleich-2011-review-of-kamvar,
author = {David F. Gleich},
title = {Review of: {Numerical algorithms for personalized search in self-organizing
information networks by Sep Kamvar, Princeton Univ. Press, 2010,
160pp., ISBN13: 978-0-691-14503-7}},
journal = {Linear Algebra and its Applications},
year = {2011},
volume = {435},
pages = {908 - 909},
number = {4},
doi = {10.1016/j.laa.2011.01.013},
file = {:Gleich 2011 - review of kamvar.pdf},
issn = {0024-3795},
keywords = {self},
owner = {David F. Gleich},
timestamp = {2011.07.02}
}
@phdthesis{gleich2009-thesis,
author = {David F. Gleich},
title = {Models and Algorithms for {PageRank} Sensitivity},
school = {Stanford University},
year = {2009},
month = {September},
abstract = {The PageRank model helps evaluate the relative importance of nodes
in a
large graph, such as the graph of links on the world wide web. An
important
piece of the PageRank model is the teleportation parameter $\alpha$.
We explore the
interaction between $\alpha$ and PageRank through the lens of sensitivity
analysis.
Writing the PageRank vector as a function of $\alpha$ allows us to
take a derivative,
which is a simple sensitivity measure. As an alternative approach,
we apply
techniques from the field of uncertainty quantification. Regarding
$\alpha$ as a random variable
produces a new PageRank model in which each PageRank value
is a random variable. We explore the standard deviation of these variables
to get another measure of PageRank sensitivity. One interpretation
of this
new model shows that it corrects a small oversight in the original
PageRank
formulation.
Both of the above techniques require solving multiple PageRank problems,
and thus a robust PageRank solver is needed. We discuss an inner-outer
iteration for this purpose. The method is low-memory, simple to implement,
and has excellent performance for a range of teleportation parameters.
We show empirical results with these techniques on graphs with over
2
billion edges.},
file = {:gleich 2009 - pagerank thesis.pdf},
keywords = {self},
owner = {David Gleich},
timestamp = {2009.09.13},
url = {http://www.stanford.edu/group/SOL/dissertations/pagerank-sensitivity-thesis-online.pdf}
}