David Gleich

@article{Huang-preprint-mucond-cheeger,
  author = {Yufan Huang and David F. Gleich},
  journal = {arXiv},
  title = {A Cheeger inequality for size-specific conductance},
  year = {2023},
  pages = {2303.11452},
  volume = {cs.DM},
  arxiv = {http://arxiv.org/abs/2303.11452},
  owner = {dgleich}
}

@article{Eldaghar-preprint-epidemics,
  author = {Omar Eldaghar and Michael W. Mahoney and David F. Gleich},
  journal = {arXiv},
  title = {Multi-scale Local Network Structure Critically Impacts Epidemic Spread and Interventions},
  year = {2023},
  pages = {2312.17351},
  volume = {cs.SI},
  arxiv = {https://arxiv.org/abs/2312.17351},
  owner = {dgleich},
  url = {https://arxiv.org/abs/2312.17351}
}

@article{Gleich-2024-better-than-best-svd,
  author = {David F. Gleich},
  journal = {arXiv},
  title = {Better than best low-rank approximation with the singular value decomposition},
  year = {2024},
  pages = {2402.18427},
  volume = {math.NA},
  owner = {dgleich}
}

@article{Huang-preprint-SDPLR,
  author = {Yufan Huang and David F. Gleich},
  journal = {arXiv},
  title = {Suboptimality bounds for trace-bounded SDPs enable a faster and scalable low-rank SDP solver SDPLR+},
  year = {2024},
  volume = {math.OC},
  arxiv = {https://arxiv.org/abs/2406.10407},
  eprint = {2406.10407},
  mysoftware = {https://github.com/luotuoqingshan/SDPLRPlus.jl},
  owner = {dgleich}
}

@article{Le-2025-peptide,
  author = {Le, MyPhuong and Zhu, Yu and Dziekonski, Eric and Holden, Dylan and Gleich, David and Cooks, R. Graham},
  journal = {chemRxiv},
  title = {Framework for De novo Sequencing of Peptide Mixtures via Network Analysis and Two-Dimensional Tandem Mass Spectrometry},
  year = {2025},
  month = mar,
  doi = {10.26434/chemrxiv-2025-83d8x},
  publisher = {American Chemical Society (ACS)},
  url = {http://dx.doi.org/10.26434/chemrxiv-2025-83d8x}
}

@article{Eldaghar-2025-spatial-hypergraphs,
  author = {Omar Eldaghar and Yu Zhu and David F. Gleich},
  journal = {arXiv},
  title = {A spatial hypergraph model to smoothly interpolate betweenpairwise graphs and hypergraphs to study higher-orderstructures},
  year = {2025},
  note = {Expanded journal version of 2024 Complex Networks paper},
  arxiv = {https://arxiv.org/abs/2410.12688},
  mysoftware = {https://github.com/oeldaghar/spatial-hypergraph-epidemics}
}

@article{Huang-2025-magnetic-laplacian,
  author = {Yinan Huang and David F. Gleich and Pan Li},
  journal = {arXiv},
  title = {Powers of Magnetic Graph Matrix: Fourier Spectrum, Walk Compression, and Applications},
  year = {2025},
  pages = {2506.07343},
  arxiv = {https://arxiv.org/abs/2506.07343},
  owner = {dgleich}
}

@article{Tunnell-preprint-preconditioners-cg,
  author = {Marc Tunnell and David F. Gleich},
  journal = {arXiv},
  title = {An Empirical Study of Conjugate Gradient Preconditioners for Solving Symmetric Positive Definite Systems of Linear Equations},
  year = {2025},
  arxiv = {https://arxiv.org/abs/2505.20696},
  mysoftware = {https://github.com/tunnellm/CAPPABenchmarkTools.jl},
  owner = {dgleich}
}

@article{Wang-preprint-fine-grained,
  author = {Yuqing Wang and Charles Colley and Brian Wheatman and Jiya Su and David F. Gleich and Andrew A. Chien},
  journal = {arXiv},
  title = {How Fast Can Graph Computations Go on Fine-grained Parallel Architectures},
  year = {20},
  pages = {2507.00949},
  volume = {cs.DC},
  arxiv = {https://arxiv.org/abs/2507.00949},
  owner = {dgleich}
}

@inproceedings{Andersen-2012-overlapping,
  title = {Overlapping clusters for distributed computation},
  author = {Andersen, Reid and Gleich, David F. and Mirrokni, Vahab},
  booktitle = {Proceedings of the fifth ACM international conference on Web search and data mining},
  year = {2012},
  address = {New York, NY, USA},
  month = feb,
  pages = {273--282},
  publisher = {ACM},
  series = {WSDM '12},
  acmid = {2124330},
  doi = {10.1145/2124295.2124330},
  file = {:Andersen 2012 - overlapping.pdf},
  isbn = {978-1-4503-0747-5},
  keywords = {conductance, covering, diffusion, leave-time, pagerank},
  location = {Seattle, Washington, USA},
  mynote = {Acceptance rate 20.7\% = 75/362 (and also presentation = 30/362 = 8.2%)},
  numpages = {10},
  owner = {David F. Gleich},
  timestamp = {2011.07.01},
  url = {http://doi.acm.org/10.1145/2124295.2124330}
}

@inproceedings{Andersen-2011-overlapping-csc,
  title = {Overlapping clusters for distributed computation},
  author = {Andersen, Reid and Gleich, David F. and Mirrokni, Vahab S},
  booktitle = {Poster proceedings of the SIAM Workshop on Combinatorial and Scientific Computing (CSC)},
  year = {2011},
  note = {Poster.},
  file = {:Andersen 2011 - overlapping poster.pdf},
  keywords = {self},
  owner = {dgleich},
  timestamp = {2011.10.26}
}

@inproceedings{bayati2009-network-alignment,
  title = {Algorithms for Large, Sparse Network Alignment Problems},
  author = {Mohsen Bayati and Margot Gerritsen and David F. Gleich and Amin Saberi and Ying Wang},
  booktitle = {Proceedings of the 9th IEEE International Conference on Data Mining},
  year = {2009},
  month = {December},
  pages = {705-710},
  abstract = {We propose a new distributed algorithm for sparse variants of the network alignment problem that occurs in a variety of data mining areas including systems biology, database matching, and computer vision. Our algorithm uses a belief propagation heuristic and provides near optimal solutions for an NP-hard combinatorial optimization problem. We show that our algorithm is faster and outperforms or nearly ties existing algorithms on synthetic problems, a problem in bioinformatics, and a problem in ontology matching. We also provide a unified framework for studying and comparing all network alignment solvers.},
  doi = {10.1109/ICDM.2009.135},
  eprint = {0907.3338},
  file = {:bayati 2009 - network alignment.pdf},
  mynote = {Acceptance rate 140/786 = 17.8\%},
  owner = {David F. Gleich},
  timestamp = {2009.07.20}
}

@article{Bayati-2013-netalign,
  title = {Message-Passing Algorithms for Sparse Network Alignment},
  author = {Mohsen Bayati and David F. Gleich and Amin Saberi and Ying Wang},
  journal = {ACM Trans. Knowl. Discov. Data},
  pages = {3:1--3:31},
  volume = {7},
  year = {2013},
  month = mar,
  number = {1},
  abstract = {Network alignment generalizes and unifies several approaches for forming a matching or alignment between the vertices of two graphs. We study a mathematical programming framework for network alignment problem and a sparse variation of it where only a small number of matches between the vertices of the two graphs are possible. We propose a new message passing algorithm that allows us to compute, very efficiently, approximate solutions to the sparse network alignment problems with graph sizes as large as hundreds of thousands of vertices. We also provide extensive simulations comparing our algorithms with two of the best solvers for network alignment problems on two synthetic matching problems, two bioinformatics problems, and three large ontology alignment problems including a multilingual problem with a known labeled alignment.},
  acmid = {2435212},
  address = {New York, NY, USA},
  articleno = {3},
  doi = {10.1145/2435209.2435212},
  file = {:Bayati 2013 - sparse belief propagation.pdf},
  issn = {1556-4681},
  issue_date = {March 2013},
  keywords = {Network alignment, belief propagation, graph matching, message-passing},
  mysoftware = {https://www.cs.purdue.edu/homes/dgleich/codes/netalign/},
  numpages = {31},
  owner = {David F. Gleich},
  publisher = {ACM},
  timestamp = {2011.07.01}
}

@inproceedings{Benson-2013-direct-tsqr,
  title = {Direct Tall-and-skinny {QR} factorizations in {MapReduce} architectures},
  author = {Benson, A.R. and Gleich, D.F. and Demmel, J.},
  booktitle = {Big Data, 2013 IEEE International Conference on},
  year = {2013},
  month = oct,
  pages = {264--272},
  abstract = {The QR factorization and the SVD are two fundamental matrix decompositions with applications throughout scientific computing and data analysis. For matrices with many more rows than columns, so-called ``tall-and-skinny matrices,'' there is a numerically stable, efficient, communication-avoiding algorithm for computing the QR factorization. It has been used in traditional high performance computing and grid computing environments. For MapReduce environments, existing methods to compute the QR decomposition use a numerically unstable approach that relies on indirectly computing the Q factor. In the best case, these methods require only two passes over the data. In this paper, we describe how to compute a stable tall-and-skinny QR factorization on a MapReduce architecture in only slightly more than 2 passes over the data. We can compute the SVD with only a small change and no difference in performance. We present a performance comparison between our new direct TSQR method, indirect TSQR methods that use the communication-avoiding TSQR algorithm, and a standard unstable implementation for MapReduce (Cholesky QR). We find that our new stable method is competitive with unstable methods for matrices with a modest number of columns. This holds both in a theoretical performance model as well as in an actual implementation.},
  doi = {10.1109/BigData.2013.6691583},
  file = {:Benson 2013 - direct-tsqr.pdf},
  keywords = {parallel programming;singular value decomposition;Cholesky QR;MapReduce architectures;Q factor;SVD;communication-avoiding TSQR algorithm;data analysis;direct QR factorizations;grid computing;high performance computing;matrix decompositions;quick response factorizations;scientific computing;singular value decomposition;tall-and-skinny matrices;Algorithm design and analysis;Clustering algorithms;Matrix decomposition;Prediction algorithms;Q-factor;Standards;Symmetric matrices;Hadoop;MapReduce;QR;SVD;TSQR;matrix factorization},
  mynote = {Acceptance rate 45/259 = 17\%},
  mysoftware = {https://github.com/arbenson/mrtsqr},
  owner = {David F. Gleich},
  timestamp = {2012.10.04}
}

@article{Benson-2019-dynsys,
  title = {Computing tensor Z-eigenvectors with dynamical systems},
  author = {Austin Benson and David F. Gleich},
  journal = { {SIAM} Journal on Matrix Analysis and Applications},
  pages = {1311--1324},
  volume = {40},
  year = {2019},
  month = jan,
  number = {4},
  arxiv = {https://arxiv.org/abs/1805.00903},
  doi = {10.1137/18M1229584},
  file = {:Benson 2019 - dynamical systems for z-eigenvectors.pdf},
  mysoftware = {https://github.com/arbenson/TZE-dynsys},
  owner = {David Gleich},
  publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})},
  timestamp = {2018.05.07}
}

@article{Benson-2016-motif-spectral,
  author = {Austin Benson and David F. Gleich and Jure Leskovec},
  journal = {Science},
  title = {Higher-order organization of complex networks},
  year = {2016},
  number = {6295},
  pages = {163--166},
  volume = {353},
  arxiv = {https://arxiv.org/abs/1612.08447},
  doi = {10.1126/science.aad9029},
  file = {:Benson 2016 - spectral motifs.pdf},
  mysoftware = {http://snap.stanford.edu/higher-order/},
  owner = {dgleich},
  supplementary = {http://science.sciencemag.org/content/suppl/2016/07/07/353.6295.163.DC1?_ga=2.30502463.1893086197.1534945246-1536034508.1533753067},
  timestamp = {2015.05.19}
}

@article{Benson-2017-srw,
  author = {Austin Benson and David F. Gleich and Lek-Heng Lim},
  journal = {SIAM Review},
  title = {The Spacey Random Walk: a Stochastic Process for Higher-order Data},
  year = {2017},
  month = {May},
  number = {2},
  pages = {321-345},
  volume = {59},
  arxiv = {http://arxiv.org/abs/1602.02102},
  doi = {10.1137/16M1074023},
  file = {:Benson 2017 - spacey.pdf},
  howpublished = {arXiv},
  mysoftware = {https://github.com/arbenson/spacey-random-walks},
  owner = {David F. Gleich},
  timestamp = {2014.08.21},
  url = {http://arxiv.org/abs/1602.02102}
}

@inproceedings{Benson-2015-tensor,
  author = {Austin R. Benson and David F. Gleich and Jure Leskovec},
  booktitle = {Proceedings of the 2015 SIAM International Conference on Data Mining},
  title = {Tensor Spectral Clustering for Partitioning Higher-order Network Structures},
  year = {2015},
  pages = {118-126},
  abstract = {Spectral graph theory-based methods represent an important class of tools for studying the structure of networks. Spectral methods are based on a first-order Markov chain derived from a random walk on the graph and thus they cannot take advantage of important higher-order network substructures such as triangles, cycles, and feed-forward loops. Here we propose a Tensor Spectral Clustering (TSC) algorithm that allows for modeling higher-order network structures in a graph partitioning framework. Our TSC algorithm allows the user to specify which higher-order network structures (cycles, feed-forward loops, etc.) should be preserved by the network clustering. Higher-order network structures of interest are represented using a tensor, which we then partition by developing a multilinear spectral method. Our framework can be applied to discovering layered flows in networks as well as graph anomaly detection, which we illustrate on synthetic networks. In directed networks, a higher-order structure of particular interest is the directed 3-cycle, which captures feedback loops in networks. We demonstrate that our TSC algorithm produces large partitions that cut fewer directed 3-cycles than standard spectral clustering algorithms.},
  arxiv = {https://arxiv.org/abs/1502.05058},
  doi = {10.1137/1.9781611974010.14},
  file = {:Benson 2015 - tensor-sc.pdf},
  location = {Vancouver, BC},
  mynote = {Acceptance rate 22\%},
  owner = {dgleich},
  timestamp = {2015.01.19}
}

@inproceedings{Benson-2014-tsnmf,
  title = {Scalable methods for nonnegative matrix factorizations of near-separable tall-and-skinny matrices},
  author = {Austin R. Benson and Jason D. Lee and Bartek Rajwa and David F. Gleich},
  booktitle = {Proceedings of Neural Information Processing Systems},
  year = {2014},
  note = {Selected for Spotlight Presentation.},
  pages = {945--953},
  journal = {arXiv},
  mynote = {Acceptance rate for spotlight 3.6\% of 1678 submissions. Includes all software.},
  owner = {David F. Gleich},
  timestamp = {2014.02.23},
  url = {http://arxiv.org/abs/1402.6964}
}

@inproceedings{Bonato-2016-character,
  title = {Mining and modeling character networks},
  author = {Anthony Bonato and David Ryan D'Angelo and Ethan R. Elenberg and David F. Gleich and Yangyang Hou},
  booktitle = {International Workshop on Algorithms and Models for the Web-Graph},
  year = {2016},
  editor = {Bonato, Anthony and Graham, Fan Chung and Pra{\l}at, Pawe{\l} },
  pages = {100-114},
  publisher = {Springer International Publishing},
  series = {WAW},
  arxiv = {http://arxiv.org/abs/1608.00646},
  doi = {10.1007/978-3-319-49787-7_9},
  file = {:Bonato 2016 - character networks.pdf},
  isbn = {978-3-319-49787-7},
  journal = {arXiv},
  owner = {dgleich},
  timestamp = {2016.08.08}
}

@inproceedings{Eikmeier-2018-dynamic-competition,
  title = {Dynamic Competition Networks: Detecting Alliances and Leaders},
  author = {Bonato, Anthony
and Eikmeier, Nicole
and Gleich, David F.
and Malik, Rehan},
  booktitle = {Algorithms and Models for the Web Graph},
  year = {2018},
  editor = {Bonato, Anthony
and Pra{\l}at, Pawe{\l}
and Raigorodskii, Andrei},
  pages = {115--144},
  abstract = {We consider social networks of competing agents that evolve dynamically over time. Such dynamic competition networks are directed, where a directed edge from nodes u to v corresponds a negative social interaction. We present a novel hypothesis that serves as a predictive tool to uncover alliances and leaders within dynamic competition networks. Our focus is in the present study is to validate it on competitive networks arising from social game shows such as Survivor and Big Brother.},
  arxiv = {https://arxiv.org/abs/1803.01783},
  doi = {10.1007/978-3-319-92871-5_9},
  file = {:Eikmeier 2018 - dynamic competition.pdf},
  isbn = {978-3-319-92871-5},
  owner = {dgleich},
  timestamp = {2018.06.06}
}

@inproceedings{Bonato-2019-centrality,
  author = {Anthony Bonato and Nicole Eikmeier and David F. Gleich and Rehan Malik},
  booktitle = {Complex Networks and Their Applications VIII},
  title = {Centrality in dynamic competition networks},
  year = {2019},
  pages = {105--116},
  publisher = {Springer International Publishing},
  arxiv = {https://arxiv.org/abs/1909.06810},
  doi = {10.1007/978-3-030-36683-4_9},
  file = {:Bonato 2019 centrality.PDF},
  journal = {arXiv},
  owner = {dgleich},
  timestamp = {2019.09.20}
}

@article{Bonato-2014-dimmatch,
  title = {Dimensionality of Social Networks Using Motifs and Eigenvalues},
  author = {Anthony Bonato and David F. Gleich and Myunghwan Kim and Dieter Mitsche and {Pawe\l} {Pra\l{}at} and Amanda Tian and Stephen J. Young},
  journal = {PLoS ONE},
  pages = {e106052},
  volume = {9},
  year = {2014},
  month = {September},
  number = {9},
  abstract = {We consider the dimensionality of social networks, and develop experiments aimed at predicting that dimension. We find that a social network model with nodes and links sampled from an m-dimensional metric space with power-law distributed influence regions best fits samples from real-world networks when m scales logarithmically with the number of nodes of the network. This supports a logarithmic dimension hypothesis, and we provide evidence with two different social networks, Facebook and LinkedIn. Further, we employ two different methods for confirming the hypothesis: the first uses the distribution of motif counts, and the second exploits the eigenvalue distribution.},
  doi = {10.1371/journal.pone.0106052},
  file = {:Bonato 2014 - dimensionality.pdf},
  howpublished = {Submitted.},
  keywords = {self},
  mysoftware = {https://www.cs.purdue.edu/homes/dgleich/codes/geop-dim/geop-dim.tar.gz},
  owner = {dgleich},
  publisher = {Public Library of Science},
  timestamp = {2014.02.13}
}

@article{Bonchi-2012-fast-katz,
  title = {Fast Matrix Computations for Pairwise and Columnwise Commute Times and {Katz} Scores},
  author = {Bonchi, Francesco and Esfandiar, Pooya and Gleich, David F. and Greif, Chen and Lakshmanan, Laks V.S.},
  journal = {Internet Mathematics},
  pages = {73--112},
  volume = {8},
  year = {2012},
  number = {1-2},
  abstract = {We explore methods for approximating the commute time and Katz score between a pair of nodes. These methods are based on the approach of matrices, moments, and quadrature developed in the numerical linear algebra community. They rely on the Lanczos process and provide upper and lower bounds on an estimate of the pairwise scores. We also explore methods to approximate the commute times and Katz scores from a node to all other nodes in the graph. Here, our approach for the commute times is based on a variation of the conjugate gradient algorithm, and it provides an estimate of all the diagonals of the inverse of a matrix. Our technique for the Katz scores is based on exploiting an empirical localization property of the Katz matrix. We adapt algorithms used for personalized PageRank computing to these Katz scores and theoretically show that this approach is convergent. We evaluate these methods on 17 real-world graphs ranging in size from 1000 to 1,000,000 nodes. Our results show that our pairwise commute-time method and columnwise Katz algorithm both have attractive theoretical properties and empirical performance.},
  doi = {10.1080/15427951.2012.625256},
  file = {:Bonchi 2012 - fast katz.pdf},
  keywords = {self},
  owner = {dfgleic},
  timestamp = {2011.06.30}
}

@inproceedings{constantine2007-pagerank-pce,
  author = {Paul G. Constantine and David F. Gleich},
  booktitle = {Proceedings of the 5th Workshop on Algorithms and Models for the Web Graph ({WAW2007})},
  title = {Using Polynomial Chaos to Compute the Influence of Multiple Random Surfers in the {PageRank} Model},
  year = {2007},
  editor = {Anthony Bonato and Fan Chung Graham},
  pages = {82--95},
  publisher = {Springer},
  series = {Lecture Notes in Computer Science},
  volume = {4863},
  abstract = {The PageRank equation computes the importance of pages in a web graph relative to a single random surfer with a constant teleportation coefficient. To be globally relevant, the teleportation coefficient should account for the influence of all users. Therefore, we correct the PageRank formulation by modeling the teleportation coefficient as a random variable distributed according to user behavior. With this correction, the PageRank values themselves become random. We present two methods to quantify the uncertainty in the random PageRank: a Monte Carlo sampling algorithm and an algorithm based the truncated polynomial chaos expansion of the random quantities. With each of these methods, we compute the expectation and standard deviation of the PageRanks. Our statistical analysis shows that the standard deviation of the PageRanks are uncorrelated with the PageRank vector.},
  doi = {10.1007/978-3-540-77004-6_7},
  file = {:constantine2007 - pagerank pce.pdf},
  key = {CG2007},
  keywords = {self, pagerank, polynomial chaos},
  owner = {David Gleich},
  timestamp = {2007.10.10}
}

@inproceedings{Constantine-2012-svd-signal,
  title = {Distinguishing signal from noise in an {SVD} of simulation data},
  author = {Paul G. Constantine and David F. Gleich},
  booktitle = {Proceedings of the IEEE Conference on Acoustics, Speech, and Signal Processing},
  year = {2012},
  pages = {5333--5336},
  doi = {10.1109/ICASSP.2012.6289125},
  file = {:Constantine 2012 - signal svd.pdf},
  howpublished = {Preprint},
  owner = {David F. Gleich},
  timestamp = {2011.11.28}
}

@conference{Constantine-2011-MRTSQR,
  title = {Tall and skinny {QR} factorizations in {MapReduce} architectures},
  author = {Constantine, Paul G. and Gleich, David F.},
  booktitle = {Proceedings of the second international workshop on MapReduce and its applications},
  year = {2011},
  address = {New York, NY, USA},
  month = jun,
  pages = {43--50},
  publisher = {ACM},
  series = {MapReduce '11},
  acmid = {1996103},
  doi = {10.1145/1996092.1996103},
  file = {:Constantine 2011 - TSQR.pdf},
  isbn = {978-1-4503-0700-0},
  keywords = {Hadoop, QR factorization, linear regression, matrix factorization, tsqr},
  location = {San Jose, California, USA},
  numpages = {8},
  owner = {David F. Gleich},
  timestamp = {2011.06.18}
}

@article{constantine2010-rapr,
  title = {Random Alpha {PageRank} },
  author = {Paul G. Constantine and David F. Gleich},
  journal = {Internet Mathematics},
  pages = {189--236},
  volume = {6},
  year = {2010},
  month = {September},
  number = {2},
  abstract = {We suggest a revision to the PageRank random surfer model that considers the influence of a population of random surfers on the PageRank vector. In the revised model, each member of the population has its own teleportation parameter chosen from a probability distribution, and consequently, the ranking vector is random. We propose three algorithms for computing the statistics of the random ranking vector based respectively on (i) random sampling, (ii) paths along the links of the underlying graph, and (iii) quadrature formulas. We find that the expectation of the random ranking vector produces similar rankings to its deterministic analogue, but the standard deviation gives uncorrelated information (under a Kendall-tau metric) with myriad potential uses. We examine applications of this model to web spam.},
  doi = {10.1080/15427951.2009.10129185},
  file = {:constantine 2010 - random alpha pagerank.pdf},
  keywords = {self},
  owner = {David Gleich},
  timestamp = {2010.01.07},
  url = {http://projecteuclid.org/DPubS?service=UI&version=1.0&verb=Display&handle=euclid.im/1285339073}
}

@article{Constantine-2014-mrmr,
  title = {Model Reduction with {Map-Reduce}-enabled Tall and Skinny Singular Value Decomposition},
  author = {Paul G. Constantine and David F. Gleich and Yangyang Hou and Jeremy Templeton},
  journal = {SIAM J. Sci. Comput.},
  pages = {S166-S191},
  volume = {36},
  year = {2014},
  month = {November},
  number = {5},
  doi = {10.1137/130925219},
  file = {:Constantine 2014 - mapreduce model reduction.pdf},
  owner = {dgleich},
  timestamp = {2013.06.25},
  url = {http://arxiv.org/abs/1306.4690}
}

@article{Constantine-2011-gni,
  title = {A factorization of the spectral {Galerkin} system for parameterized matrix equations: derivation and applications},
  author = {Paul G. Constantine and David F. Gleich and Gianluca Iaccarino},
  journal = {SIAM Journal of Scientific Computing},
  pages = {2995--3009},
  volume = {33},
  year = {2011},
  number = {5},
  abstract = {Recent work has explored solver strategies for the linear system of equations arising from a spectral Galerkin approximation of the solution of PDEs with parameterized (or stochastic) inputs. We consider the related problem of a matrix equation whose matrix and right-hand side depend on a set of parameters (e.g., a PDE with stochastic inputs semidiscretized in space) and examine the linear system arising from a similar Galerkin approximation of the solution. We derive a useful factorization of this system of equations, which yields bounds on the eigenvalues, clues to preconditioning, and a flexible implementation method for a wide array of problems. We complement this analysis with (i) a numerical study of preconditioners on a standard elliptic PDE test problem and (ii) a fluids application using existing CFD codes; the MATLAB codes used in the numerical studies are available online.},
  doi = {10.1137/100799046},
  file = {:Constantine 2011 - spectral galerkin.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2010.09.15}
}

@article{constantine2010-parameterized,
  title = {Spectral Methods for Parameterized Matrix Equations},
  author = {Paul G. Constantine and David F. Gleich and Gianluca Iaccarino},
  journal = {SIAM Journal on Matrix Analysis and Applications},
  pages = {2681-2699},
  volume = {31},
  year = {2010},
  number = {5},
  abstract = {We apply polynomial approximation methods—known in the numerical PDEs context as spectral methods—to approximate the vector-valued function that satisfies a linear system of equations where the matrix and the right-hand side depend on a parameter. We derive both an interpolatory pseudospectral method and a residual-minimizing Galerkin method, and we show how each can be interpreted as solving a truncated infinite system of equations; the difference between the two methods lies in where the truncation occurs. Using classical theory, we derive asymptotic error estimates related to the region of analyticity of the solution, and we present a practical residual error estimate. We verify the results with two numerical examples.},
  doi = {10.1137/090755965},
  file = {:constantine 2010 - parameterized matrix equations.pdf},
  keywords = {parameterized systems; spectral methods; self},
  owner = {David F. Gleich},
  publisher = {SIAM},
  timestamp = {2010.09.24}
}

@inproceedings{decoste2005-recommender,
  title = {Recommender Systems Research at {Yahoo! Research Labs} },
  author = {Dennis Decoste and David F. Gleich and Tejaswi Kasturi and Sathiya Keerthi and Omid Madani and Seung-Taek Park and David M. Pennock and Corey Porter and Sumit Sanghai and Farial Shahnaz and Leonid Zhukov},
  booktitle = {Beyond Personalization},
  year = {2005},
  address = {San Diego, CA},
  month = {January},
  note = {Position Statement},
  abstract = {We describe some of the ongoing projects at Yahoo! Research Labs that involve recommender systems. We discuss recommender systems related problems and solutions relevant to Yahoo!’s business.},
  file = {:decoste2005 - yahoo recommender systems.pdf},
  key = {DGK+05},
  keywords = {self, recommender systems},
  owner = {David Gleich},
  timestamp = {2006.10.13}
}

@article{Eikmeier-2020-triangle-pa,
  author = {Nicole Eikmeier and David F. Gleich},
  journal = {Journal of Complex Networks},
  title = {Classes of Preferential Attachment and Triangle Preferential Attachment Models with Power-law Spectra},
  year = {2020},
  month = aug,
  number = {4},
  pages = {cnz040},
  volume = {8},
  arxiv = {https://arxiv.org/abs/1904.12989},
  doi = {10.1093/comnet/cnz040},
  file = {:Eikmeier 2020 - triangle pa.pdf},
  mysoftware = {https://github.com/eikmeier/TGPA},
  owner = {dgleich},
  timestamp = {2019-05-01}
}

@inproceedings{Eikmeier-2017-power-laws,
  title = {Revisiting Power-law Distributions in Spectra of Real World Networks},
  author = {Eikmeier, Nicole and Gleich, David F.},
  booktitle = {Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
  year = {2017},
  address = {New York, NY, USA},
  pages = {817--826},
  publisher = {ACM},
  series = {KDD '17},
  acmid = {3098128},
  doi = {10.1145/3097983.3098128},
  file = {:Eikmeier 2017 - power-laws.pdf},
  isbn = {978-1-4503-4887-4},
  keywords = {degree distributions, graph spectrum, power-law distributions, real world networks},
  location = {Halifax, NS, Canada},
  mynote = {Acceptance rate: 131/748 = 17.5%},
  mysoftware = {https://github.com/eikmeier/powerlaw-spectra},
  numpages = {10},
  owner = {dgleich},
  timestamp = {2017.03.22},
  url = {http://doi.acm.org/10.1145/3097983.3098128}
}

@inproceedings{Eikmeier-2018-hyperkron,
  author = {Nicole Eikmeier and Arjun S. Ramani and David F. Gleich},
  booktitle = {Proceedings of the International Conference on Data Mining (ICDM)},
  title = {The HyperKron Graph Model for higher-order features},
  year = {2018},
  month = nov,
  pages = {941-946},
  abstract = {In this manuscript we present the HyperKron Graph model: an extension of the Kronecker Model, but with a distribution over hyperedges. We prove that we can efficiently generate graphs from this model in time proportional to the number of edges times a small log-factor, and find that in practice the runtime is linear with respect to the number of edges. We illustrate a number of useful features of the HyperKron model including non-trivial clustering and highly skewed degree distributions. Finally, we fit the HyperKron model to real-world networks, and demonstrate the model's flexibility with a complex application of the HyperKron model to networks with coherent feed-forward loops.},
  arxiv = {https://arxiv.org/abs/1809.03488},
  doi = {10.1109/ICDM.2018.00115},
  file = {:Eikmeier 2018 - hyperkron.pdf},
  issn = {2374-8486},
  keywords = {graph theory;network theory (graphs);pattern clustering;Kronecker model;HyperKron graph model;nontrivial clustering;coherent feed-forward loops;higher-order features;highly skewed degree distributions;Computational modeling;Data models;Symmetric matrices;Indexes;Generators;Stochastic processes;kronecker graph, graph model, hyperedges},
  mynote = {Acceptance rate 189/948 (19.9\%)},
  mysoftware = {https://www.cs.purdue.edu/homes/dgleich/codes/hyperkron/},
  owner = {dgleich},
  timestamp = {2018.07.27}
}

@inproceedings{esfandiar2010-fast-katz,
  title = {Fast {Katz} and Commuters: Efficient Approximation of Social Relatedness over Large Networks},
  author = {Pooya Esfandiar and Francesco Bonchi and David F. Gleich and Chen Greif and Laks V. S. Lakshmanan and Byung-Won On},
  booktitle = {Algorithms and Models for the Web Graph},
  year = {2010},
  abstract = {Motivated by social network data mining problems such as link prediction and collaborative filtering, significant research effort has been devoted to computing topological measures including the Katz score and the commute time. Existing approaches typically approximate all pairwise relationships simultaneously. In this paper, we are interested in computing: the score for a single pair of nodes, and the top-k nodes with the best scores from a given source node. For the pairwise problem, we apply an iterative algorithm that computes upper and lower bounds for the measures we seek. This algorithm exploits a relationship between the Lanczos process and a quadrature rule. For the top-k problem, we propose an algorithm that only accesses a small portion of the graph and is related to techniques used in personalized PageRank computing. To test the scalability and accuracy of our algorithms we experiment with three real-world networks and find that these algorithms run in milliseconds to seconds without any preprocessing.},
  doi = {10.1007/978-3-642-18009-5_13},
  file = {:esfandiar 2010 - fast katz.pdf},
  keywords = {self},
  owner = {David Gleich},
  timestamp = {2010.04.08}
}

@article{Fountoulakis-2017-locally-biased,
  title = {An Optimization Approach to Locally-Biased Graph Algorithms},
  author = {Kimon Fountoulakis and David F. Gleich and Michael W. Mahoney},
  journal = {Proceedings of the IEEE},
  pages = {256-272},
  volume = {105},
  year = {2017},
  month = {February},
  number = {2},
  abstract = {Locally-biased graph algorithms are algorithms that attempt to find local or small-scale structure in a large data graph. In some cases, this can be accomplished by adding some sort of locality constraint and calling a traditional graph algorithm; but more interesting are locally-biased graph algorithms that compute answers by running a procedure that does not even look at most of the input graph. This corresponds more closely to what practitioners from various data science domains do, but it does not correspond well with the way that algorithmic and statistical theory is typically formulated. Recent work from several research communities has focused on developing locally-biased graph algorithms that come with strong complementary algorithmic and statistical theory and that are useful in practice in downstream data science applications. We provide a review and overview of this work, highlighting commonalities between seemingly different approaches, and highlighting promising directions for future work.},
  arxiv = {https://arxiv.org/abs/1607.04940},
  doi = {10.1109/JPROC.2016.2637349},
  file = {:Fountoulakis 2017 - locally biased.pdf},
  issn = {0018-9219},
  keywords = {data analysis;graph theory;learning (artificial intelligence);network theory (graphs);optimisation;pattern clustering;clustering;data science application;locally-biased graph algorithm;network flow;optimization approach;semisupervised learning;Algorithm design and analysis;Approximation algorithms;Clustering algorithms;Linear programming;Optimization;Partitioning algorithms;Clustering;graph algorithms;local algorithms;network flow;partitioning;semi-supervised learning;spectral graph theory},
  owner = {dgleich},
  timestamp = {2016.02.29}
}

@inproceedings{Gan-2020-LambdaPrime,
  author = {Junhao Gan and David F. Gleich and Nate Veldt and Anthony Wirth and Xin Zhang},
  booktitle = {45th International Symposium on Mathematical Foundations of Computer Science (MFCS 2020)},
  title = {Graph Clustering in All Parameter Regimes},
  year = {2020},
  address = {Dagstuhl, Germany},
  editor = {Javier Esparza and Daniel Kr{\'a}l},
  pages = {39:1--39:15},
  publisher = {Schloss Dagstuhl--Leibniz-Zentrum f{\"u}r Informatik},
  series = {Leibniz International Proceedings in Informatics (LIPIcs)},
  volume = {170},
  annote = {Keywords: Graph Clustering, Algorithms, Parametric Linear Programming},
  arxiv = {https://arxiv.org/abs/1910.06435},
  doi = {10.4230/LIPIcs.MFCS.2020.39},
  file = {:Gan 2020 - all parameters.pdf},
  isbn = {978-3-95977-159-7},
  issn = {1868-8969},
  url = {https://drops.dagstuhl.de/opus/volltexte/2020/12706},
  urn = {urn:nbn:de:0030-drops-127065}
}

@article{Gleich-2015-prbeyond,
  title = { {PageRank} beyond the Web},
  author = {David F. Gleich},
  journal = {SIAM Review},
  pages = {321--363},
  volume = {57},
  year = {2015},
  month = {August},
  number = {3},
  abstract = {Google's PageRank method was developed to evaluate the importance of web-pages via their link structure. The mathematics of PageRank, however, are entirely general and apply to any graph or network in any domain. Thus, PageRank is now regularly used in bibliometrics, social and information network analysis, and for link prediction and recommendation. It's even used for systems analysis of road networks, as well as biology, chemistry, neuroscience, and physics. We'll see the mathematics and ideas that unite these diverse applications.},
  doi = {10.1137/140976649},
  file = {:Gleich 2015 - prbeyond.pdf},
  owner = {David F. Gleich},
  timestamp = {2014.08.08}
}

@article{Gleich-2013-expanders,
  title = {Expanders, tropical semi-rings, and nuclear norms: oh my!},
  author = {Gleich, David F.},
  journal = {XRDS},
  pages = {32--36},
  volume = {19},
  year = {2013},
  month = {March},
  number = {3},
  acmid = {2425688},
  address = {New York, NY, USA},
  doi = {10.1145/2425676.2425688},
  file = {:Gleich 2013 - expanders.pdf},
  issn = {1528-4972},
  issue_date = {Spring 2013},
  numpages = {5},
  owner = {dgleich},
  publisher = {ACM},
  timestamp = {2013.04.06}
}

@inproceedings{constantine2010-teleportation,
  title = {Tracking the random surfer: empirically measured teleportation parameters in {PageRank} },
  author = {David F. Gleich and Paul G. Constantine and Abraham Flaxman and Asela Gunawardana},
  booktitle = {WWW '10: Proceedings of the 19th international conference on World wide web},
  year = {2010},
  month = {April},
  pages = {381--390},
  abstract = {PageRank computes the importance of each node in a directed graph under a random surfer model governed by a teleportation parameter. Commonly denoted alpha, this parameter models the probability of following an edge inside the graph or, when the graph comes from a network of web pages and links, clicking a link on a web page. We empirically measure the teleportation parameter based on browser toolbar logs and a click trail analysis. For a particular user or machine, such analysis produces a value of alpha. We find that these values nicely fit a Beta distribution with mean edge-following probability between 0.3 and 0.7, depending on the site. Using these distributions, we compute PageRank scores where PageRank is computed with respect to a distribution as the teleportation parameter, rather than a constant teleportation parameter. These new metrics are evaluated on the graph of pages in Wikipedia.},
  doi = {10.1145/1772690.1772730},
  file = {:gleich 2010 - tracking the random surfer.pdf},
  isbn = {978-1-60558-799-8},
  keywords = {self},
  location = {Raleigh, North Carolina, USA},
  mynote = {Acceptance rate = 14%, 104/743 submissions},
  owner = {David F. Gleich},
  timestamp = {2009.12.03}
}

@article{gleich2010-inner-outer,
  title = {An inner-outer iteration for {PageRank} },
  author = {David F. Gleich and Andrew P. Gray and Chen Greif and Tracy Lau},
  journal = {SIAM Journal of Scientific Computing},
  pages = {349--371},
  volume = {32},
  year = {2010},
  month = {February},
  number = {1},
  abstract = {We present a new iterative scheme for PageRank computation. The algorithm is applied to the linear system formulation of the problem, using inner-outer stationary iterations. It is simple, can be easily implemented and parallelized, and requires minimal storage overhead. Our convergence analysis shows that the algorithm is effective for a crude inner tolerance and is not sensitive to the choice of the parameters involved. The same idea can be used as a preconditioning technique for nonstationary schemes. Numerical examples featuring matrices of dimensions exceeding 100,000,000 in sequential and parallel environments demonstrate the merits of our technique. Our code is available online for viewing and testing, along with several large scale examples.},
  doi = {10.1137/080727397},
  file = {:gleich 2010 - inner-outer pagerank.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2009.07.10}
}

@article{Gleich2011-block-circulant,
  title = {The power and {Arnoldi} methods in an algebra of circulants},
  author = {David F. Gleich and Chen Greif and James M. Varah},
  journal = {Numerical Linear Algebra with Applications},
  pages = {809-831},
  volume = {20},
  year = {2013},
  month = {October},
  abstract = {Circulant matrices play a central role in a recently proposed formulation of three-way data computations. In this setting, a three-way table corresponds to a matrix where each "scalar" is a vector of parameters defining a circulant. This interpretation provides many generalizations of results from matrix or vector-space algebra. We derive the power and Arnoldi methods in this algebra. In the course of our derivation, we define inner products, norms, and other notions. These extensions are straightforward in an algebraic sense, but the implications are dramatically different from the standard matrix case. For example, a matrix of circulants has a polynomial number of eigenvalues in its dimension; although, these can all be represented by a carefully chosen canonical set of eigenvalues and vectors. These results and algorithms are closely related to standard decoupling techniques on block-circulant matrices using the fast Fourier transform.},
  doi = {10.1002/nla.1845},
  file = {:Gleich 2013 - circulant.pdf},
  howpublished = {Submitted. Preprint arXiv:1101.2173.},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.01.06}
}

@article{Gleich-2015-sublinear,
  title = {Sublinear Column-wise Actions of the Matrix Exponential on Social Networks},
  author = {David F. Gleich and Kyle Kloster},
  journal = {Internet Mathematics},
  pages = {352--384},
  volume = {11},
  year = {2015},
  number = {4--5},
  abstract = {We consider stochastic transition matrices from large social and information networks. For these matrices, we describe and evaluate three fast methods to estimate one column of the matrix exponential. The methods are designed to exploit the properties inherent in social networks, such as a power-law degree distribution. Using only this property, we prove that one of our three algorithms has a sublinear runtime. We present further experimental evidence showing that all three of them run quickly on social networks with billions of edges, and they accurately identify the largest elements of the column.},
  doi = {10.1080/15427951.2014.971203},
  file = {:Gleich 2015 - sublinear columns.pdf},
  owner = {David Gleich},
  timestamp = {2015.04.15}
}

@article{Gleich-2017-seeded-localization,
  title = {Localization in Seeded {PageRank} },
  author = {David F. Gleich and Kyle Kloster and Huda Nassar},
  journal = {Internet Mathematics},
  pages = {Online},
  year = {2017},
  arxiv = {https://arxiv.org/abs/1509.00016},
  doi = {10.24166/im.001.2017},
  file = {:Nassar 2017 - localization.pdf},
  howpublished = {Submitted.},
  mysoftware = {https://github.com/nassarhuda/pprlocal},
  owner = {David Gleich},
  timestamp = {2016.05.24}
}

@inproceedings{Gleich-2011-skew-nuclear,
  title = {Rank aggregation via nuclear norm minimization},
  author = {Gleich, David F. and Lim, Lek-Heng},
  booktitle = {Proceedings of the 17th ACM SIGKDD international conference on Knowledge discovery and data mining},
  year = {2011},
  address = {New York, NY, USA},
  pages = {60--68},
  publisher = {ACM},
  series = {KDD '11},
  abstract = {The process of rank aggregation is intimately intertwined with the structure of skew-symmetric matrices. We apply recent advances in the theory and algorithms of matrix completion to skew-symmetric matrices. This combination of ideas produces a new method for ranking a set of items. The essence of our idea is that a rank aggregation describes a partially filled skew-symmetric matrix. We extend an algorithm for matrix completion to handle skew-symmetric data and use that to extract ranks for each item. Our algorithm applies to both pairwise comparison and rating data. Because it is based on matrix completion, it is robust to both noise and incomplete data. We show a formal recovery result for the noiseless case and present a detailed study of the algorithm on synthetic data and Netflix ratings.},
  acmid = {2020425},
  doi = {10.1145/2020408.2020425},
  file = {:Gleich 2011 - rank aggregation.pdf},
  isbn = {978-1-4503-0813-7},
  keywords = {self, nuclear norm, rank aggregation, skew symmetric},
  location = {San Diego, California, USA},
  mynote = {Accepted for oral presentation 56/714 (overall accept 56+69 of 714 = 17.5%)},
  numpages = {9},
  owner = {David F. Gleich},
  timestamp = {2010.01.30}
}

@article{Gleich-2015-mlpr,
  author = {David F. Gleich and Lek-Heng Lim and Yongyang Yu},
  journal = {SIAM Journal on Matrix Analysis and Applications},
  title = {Multilinear {PageRank} },
  year = {2015},
  number = {4},
  pages = {1507-1541},
  volume = {36},
  abstract = {In this paper, we first extend the celebrated PageRank modification to a higher-order Markov chain. Although this system has attractive theoretical properties, it is computationally intractable for many interesting problems. We next study a computationally tractable approximation to the higher-order PageRank vector that involves a system of polynomial equations called multilinear PageRank. This is motivated by a novel ``spacey random surfer'' model, where the surfer remembers bits and pieces of history and is influenced by this information. The underlying stochastic process is an instance of a vertex-reinforced random walk. We develop convergence theory for a simple fixed-point method, a shifted fixed-point method, and a Newton iteration in a particular parameter regime. In marked contrast to the case of the PageRank vector of a Markov chain where the solution is always unique and easy to compute, there are parameter regimes of multilinear PageRank where solutions are not unique and simple algorithms do not converge. We provide a repository of these nonconvergent cases that we encountered through exhaustive enumeration and randomly sampling that we believe is useful for future study of the problem.},
  arxiv = {http://arxiv.org/abs/1409.1465},
  doi = {10.1137/140985160},
  file = {:Gleich 2015 - multilinear pagerank.pdf},
  mysoftware = {https://github.com/dgleich/mlpagerank},
  owner = {David F. Gleich},
  timestamp = {2014.09.04}
}

@inproceedings{Gleich-2014-alg-anti-diff,
  author = {David F. Gleich and Michael M. Mahoney},
  booktitle = {Proceedings of the International Conference on Machine Learning (ICML)},
  title = {Anti-differentiating approximation algorithms: A case study with min-cuts, spectral, and flow},
  year = {2014},
  pages = {1018-1025},
  comment = {25\% Acceptance Ratio},
  file = {:Gleich 2014 - anti-diff.pdf},
  mynote = {Acceptance rate 310/1238 = 25.0\%},
  owner = {dgleich},
  timestamp = {2014.02.05},
  url = {http://proceedings.mlr.press/v32/gleich14.html}
}

@incollection{Gleich-2016-mining,
  author = {David F. Gleich and Michael W. Mahoney},
  booktitle = {Handbook of Big Data},
  publisher = {CRC Press},
  title = {Mining large graphs},
  year = {2016},
  editor = {Peter B\"{u}hlmann and Petros Drineas and Michael Kane and Mark van de Laan},
  isbn = {978-1-4822-4907-1},
  pages = {191-220},
  series = {Handbooks of modern statistical methods},
  doi = {10.1201/b19567-20},
  file = {:Gleich 2016 - graph mining.pdf},
  owner = {David Gleich},
  timestamp = {2015.09.02}
}

@inproceedings{Gleich-2015-robustifying,
  title = {Using Local Spectral Methods to Robustify Graph-Based Learning Algorithms},
  author = {Gleich, David F. and Mahoney, Michael W.},
  booktitle = {Proceedings of the 21th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
  year = {2015},
  address = {New York, NY, USA},
  pages = {359--368},
  publisher = {ACM},
  series = {KDD '15},
  abstract = {Graph-based learning methods have a variety of names including semi-supervised and transductive learning. They typically use a diffusion to propagate labels from a small set of nodes with known class labels to the remaining nodes of the graph. While popular, these algorithms, when implemented in a straightforward fashion, are extremely sensitive to the details of the graph construction. Here, we provide four procedures to help make them more robust: recognizing implicit regularization in the diffusion, using a scalable push method to evaluate the diffusion, using rank-based rounding, and densifying the graph through a matrix polynomial. We study robustness with respect to the details of graph constructions, errors in node labeling, degree variability, and a variety of other real-world heterogeneities, studying these methods through a precise relationship with mincut problems. For instance, the densification strategy explicitly adds new weighted edges to a sparse graph. We find that this simple densification creates a graph where multiple diffusion methods are robust to several types of errors. This is demonstrated by a study with predicting product categories from an Amazon co-purchasing network.},
  acmid = {2783376},
  doi = {10.1145/2783258.2783376},
  file = {:Gleich 2015 - robustifying.pdf},
  isbn = {978-1-4503-3664-2},
  keywords = {clustering, diffusions, robustifying, semi-supervised learning},
  location = {Sydney, NSW, Australia},
  mysoftware = {https://www.cs.purdue.edu/homes/dgleich/codes/robust-diffusions/},
  numpages = {10},
  owner = {David Gleich},
  timestamp = {2015.08.19}
}

@article{Gleich-2012-kronecker,
  title = {Moment based estimation of stochastic {Kronecker} graph parameters},
  author = {David F. Gleich and Art B. Owen},
  journal = {Internet Mathematics},
  pages = {232-256},
  volume = {8},
  year = {2012},
  month = {August},
  number = {3},
  abstract = {Stochastic Kronecker graphs supply a parsimonious model for large sparse real-world graphs. They can specify the distribution of a large random graph using only three or four parameters. Those parameters have, however, proved difficult to choose in specific applications. This article looks at method-of-moments estimators that are computationally much simpler than maximum likelihood. The estimators are fast, and in our examples, they typically yield Kronecker parameters with expected feature counts closer to a given graph than we get from KronFit. The improvement is especially prominent for the number of triangles in the graph.},
  doi = {10.1080/15427951.2012.680824},
  file = {:Gleich 2012 - Moment based Kronecker.pdf},
  owner = {David F. Gleich},
  timestamp = {2011.07.02}
}

@article{gleich2007-approximate,
  title = {Approximating Personalized {PageRank} with Minimal Use of Webgraph Data},
  author = {David F. Gleich and Marzia Polito},
  journal = {Internet Mathematics},
  pages = {257--294},
  volume = {3},
  year = {2007},
  month = {December},
  number = {3},
  abstract = {In this paper, we consider the problem of calculating fast and accurate approximations to the personalized PageRank score of a webpage. We focus on techniques to improve speed by limiting the amount of web graph data we need to access. Our algorithms provide both the approximation to the personalized PageRank score as well as guidance in using only the necessary information—and therefore sensibly reduce not only the computational cost of the algorithm but also the memory and memory bandwidth requirements. We report experiments with these algorithms on web graphs of up to 118 million pages and prove a theoretical approximation bound for all. Finally, we propose a local, personalized web-search system for a future client system using our algorithms.},
  doi = {10.1080/15427951.2006.10129128},
  file = {:gleich 2007 - approximate personalized pagerank.pdf},
  key = {GP2007},
  keywords = {pagerank, self},
  owner = {David Gleich},
  timestamp = {2006.11.27}
}

@article{Gleich-2014-dynamic-pagerank,
  title = {A Dynamical System for {PageRank} with Time-Dependent Teleportation},
  author = {David F. Gleich and Ryan A. Rossi},
  journal = {Internet Mathematics},
  pages = {188-217},
  volume = {10},
  year = {2014},
  month = {June},
  number = {1--2},
  abstract = {We propose a dynamical system that captures changes to the network centrality of nodes as external interest in those nodes varies. We derive this system by adding time-dependent teleportation to the PageRank score. The result is not a single set of importance scores, but rather a time-dependent set. These can be converted into ranked lists in a variety of ways, for instance, by taking the largest change in the importance score. For an interesting class of dynamic teleportation functions, we derive closed-form solutions for the dynamic PageRank vector. The magnitude of the deviation from a static PageRank vector is given by a PageRank problem with complex-valued teleportation parameters. Moreover, these dynamical systems are easy to evaluate. We demonstrate the utility of dynamic teleportation on both the article graph of Wikipedia, where the external interest information is given by the number of hourly visitors to each page, and the Twitter social network, where external interest is the number of tweets per month. For these problems, we show that using information from the dynamical system helps improve a prediction task and identify trends in the data.},
  doi = {10.1080/15427951.2013.814092},
  file = {:Gleich 2014 - dynamical system.pdf},
  owner = {David F. Gleich},
  timestamp = {2012.11.17}
}

@inproceedings{Gleich-2012-neighborhoods,
  title = {Vertex neighborhoods, low conductance cuts, and good seeds for local community methods},
  author = {David F. Gleich and C. Seshadhri},
  booktitle = {KDD2012},
  year = {2012},
  month = aug,
  pages = {597--605},
  doi = {10.1145/2339530.2339628},
  file = {:Gleich 2012 - neighborhoods.pdf},
  howpublished = {Preprint},
  mynote = {Acceptance rate 133/755 = 17.6\%},
  owner = {David F. Gleich},
  timestamp = {2011.11.28}
}

@inproceedings{Gleich-2018-cc-generalized,
  title = {Correlation Clustering Generalized},
  author = {David F. Gleich and Nate Veldt and Anthony Wirth},
  booktitle = {Proceedings of 29th International Symposium on Algorithms and Computation},
  year = {2018},
  note = {Authors in Alphabetical Order following CS Theory Convention.},
  pages = {44:1--44:13},
  arxiv = {https://arxiv.org/abs/1809.09493},
  doi = {10.4230/LIPIcs.ISAAC.2018.44},
  file = {:Gleich 2018 - generalized-cc.pdf},
  owner = {dgleich},
  timestamp = {2018.08.27},
  url = {https://arxiv.org/abs/1809.09493}
}

@article{Gleich-2011-cads,
  title = {Some computational tools for digital archive and metadata maintenance},
  author = {Gleich, David F. and Wang, Ying and Meng, Xiangrui and Ronaghi, Farnaz and Gerritsen, Margot and Saberi, Amin},
  journal = {BIT Numerical Mathematics},
  pages = {127-154},
  volume = {51},
  year = {2011},
  abstract = {Computational tools are a mainstay of current search and recommendation technology. But modern digital archives are astonishingly diverse collections of older digitized material and newer born digital content. Finding interesting material in these archives is still challenging. The material often lacks appropriate annotation---or metadata---so that people can find the most interesting material. We describe four computational tools we developed to aid in the processing and maintenance of large digital archives. The first is an improvement to a graph layout algorithm for graphs with hundreds of thousands of nodes. The second is a new algorithm for matching databases with links among the objects, also known as a network alignment problem. The third is an optimization heuristic to disambiguate a set of geographic references in a book. And the fourth is a technique to automatically generate a title from a description.},
  affiliation = {Sandia National Laboratories, Livermore, CA 94550, USA},
  doi = {10.1007/s10543-011-0324-6},
  file = {:gleich 2011 - comptuational tools.pdf},
  issn = {0006-3835},
  issue = {1},
  keyword = {Mathematics and Statistics},
  keywords = {self},
  owner = {David F. Gleich},
  publisher = {Springer Netherlands},
  timestamp = {2011.03.25}
}

@inproceedings{gleich2005-ppagerank,
  title = {Scalable Computing with Power-Law Graphs: Experience with Parallel {PageRank} },
  author = {David F. Gleich and Leonid Zhukov},
  booktitle = {SuperComputing 2005},
  year = {2005},
  month = {November},
  note = {Poster.},
  file = {:gleich2005 - parallel pagerank.pdf},
  key = {GZ2005},
  keywords = {self, pagerank},
  owner = {David Gleich},
  timestamp = {2006.10.13},
  url = {http://www.cs.purdue.edu/homes/dgleich/publications/gleich2005 - parallel pagerank.pdf}
}

@inproceedings{gleich2004-svd,
  title = {An {SVD} Based Term Suggestion and Ranking System},
  author = {David F. Gleich and Leonid Zhukov},
  booktitle = {ICDM '04: Proceedings of the Fourth IEEE International Conference on Data Mining (ICDM'04)},
  year = {2004},
  address = {Brighton, UK},
  month = {November},
  pages = {391--394},
  publisher = {IEEE Computer Society},
  abstract = {In this paper, we consider the application of the singular value decomposition (SVD) to a search term suggestion system in a pay-for-performance search market. We propose a novel positive and negative refinement method based on orthogonal subspace projections. We demonstrate that SVD subspace-based methods: 1) expand coverage by reordering the results, and 2) enhance the clustered structure of the data. The numerical experiments reported in this paper were performed on Overture's pay-per-performance search market data.},
  doi = {10.1109/ICDM.2004.10006},
  file = {:gleich 2004 - svd based term suggestion and ranking.pdf},
  isbn = {0-7695-2142-8},
  key = {GZ2004},
  keywords = {self},
  owner = {David Gleich},
  timestamp = {2006.10.13}
}

@inproceedings{gleich2005-wom,
  title = {The {World of Music}: {SDP} Embedding of High Dimensional data},
  author = {David F. Gleich and Leonid Zhukov and Matthew Rasmussen and Kevin Lang},
  booktitle = {Information Visualization 2005},
  year = {2005},
  note = {Interactive Poster},
  abstract = {In this paper we investigate the use of Semidefinite Programming (SDP) optimization for high dimensional data layout and graph visualization. We developed a set of interactive visualization tools and used them on music artist ratings data from Yahoo!. The computed layout preserves a natural grouping of the artists and provides visual assistance for browsing large music collections.},
  file = {:gleich2005 - world of music.pdf},
  key = {GZRL2005},
  keywords = {self, recommender systems},
  owner = {David Gleich},
  timestamp = {2006.10.13},
  url = {http://www.cs.purdue.edu/homes/dgleich/publications/gleich2005 - world of music.pdf}
}

@inproceedings{Hou-2016-palm,
  author = {Yangyang Hou and Joyce Jiyoung Whang and David F. Gleich and Inderjit Dhillon},
  booktitle = {Proceedings of the 2016 SIAM International Conference on Data Mining},
  title = {Fast Multiplier Methods to Optimize Non-exhaustive, Overlapping Clustering},
  year = {2016},
  pages = {297--305},
  arxiv = {http://arxiv.org/abs/1602.01910},
  doi = {10.1137/1.9781611974348.34},
  howpublished = {Submitted},
  mynote = {96 of 374 submissions, 25\%},
  owner = {David Gleich},
  timestamp = {2015.10.21}
}

@inproceedings{Hou-2015-low-rank,
  title = {Non-exhaustive, Overlapping Clustering via Low-Rank Semidefinite Programming},
  author = {Hou, Yangyang and Whang, Joyce Jiyoung and Gleich, David F. and Dhillon, Inderjit S.},
  booktitle = {Proceedings of the 21th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
  year = {2015},
  address = {New York, NY, USA},
  pages = {427--436},
  publisher = {ACM},
  series = {KDD '15},
  acmid = {2783398},
  doi = {10.1145/2783258.2783398},
  file = {:Hou 2015 - low-rank.pdf},
  isbn = {978-1-4503-3664-2},
  keywords = {community detection, overlapping clustering, semidefinite programming},
  location = {Sydney, NSW, Australia},
  mynote = {Acceptance rate 159/819 = 19\%},
  numpages = {10},
  owner = {David Gleich},
  timestamp = {2015.07.28}
}

@article{Ibrahim-2020-hocrd,
  author = {Rania Ibrahim and David F. Gleich},
  journal = {PLoS ONE},
  title = {Local Hypergraph Clustering using Capacity Releasing Diffusion},
  year = {2020},
  month = dec,
  number = {12},
  pages = {e0243485},
  volume = {15},
  arxiv = {https://arxiv.org/abs/2003.04213},
  doi = {10.1371/journal.pone.0243485},
  file = {:Ibrahim 2020 - hyper-graph CRD.pdf},
  mysoftware = {https://www.dropbox.com/s/acbcg9adupd26no/Higher_order_capacity_releasing_diffusion-master%20%281%29.zip?dl=0},
  owner = {dgleich},
  timestamp = {2019.08.20}
}

@inproceedings{Ibrahim-2019-nonlinear,
  title = {Nonlinear Diffusion for Community Detection and Semi-Supervised Learning},
  author = {Rania Ibrahim and David F. Gleich},
  booktitle = {The World Wide Web Conference},
  year = {2019},
  address = {New York, NY, USA},
  pages = {739--750},
  publisher = {ACM},
  series = {WWW '19},
  abstract = {Diffusions, such as the heat kernel diffusion and the PageRank vector, and their relatives are widely used graph mining primitives that have been successful in a variety of contexts including community detection and semi-supervised learning. The majority of existing methods and methodology involves linear diffusions, which then yield simple algorithms involving repeated matrix-vector operations. Recent work, however, has shown that sophisticated and complicated techniques based on network embeddings and neural networks can give empirical results superior to those based on linear diffusions. In this paper, we illustrate a class of nonlinear graph diffusions that are competitive with state of the art embedding techniques and outperform classic diffusions. Our new methods enjoy much of the simplicity underlying classic diffusion methods as well. Formally, they are based on nonlinear dynamical systems that can be realized with an implementation akin to applying a nonlinear function after each matrix-vector product in a classic diffusion. This framework also enables us to easily integrate results from multiple data representations in a principled fashion. Furthermore, we have some theoretical relationships that suggest choices of the nonlinear term. We demonstrate the benefits of these techniques on a variety of synthetic and real-world data.},
  acmid = {3313483},
  doi = {10.1145/3308558.3313483},
  file = {:Ibrahim 2019 - nonlinear diffusion.pdf},
  isbn = {978-1-4503-6674-8},
  keywords = {Community Detection, Graph-based Semi-Supervised Learning, Nonlinear Diffusion},
  location = {San Francisco, CA, USA},
  mynote = {225/1247 (18%) of submissions},
  mysoftware = {https://github.com/RaniaSalama/Nonlinear_Diffusion},
  numpages = {12},
  owner = {dgleich},
  timestamp = {2019.01.21}
}

@inproceedings{Jiang-2015-flux,
  title = {Differential Flux Balance Analysis of Quantitative Proteomic Data on Protein Interaction Networks},
  author = {Biaobin Jiang and David F. Gleich and Michael Gribskov},
  booktitle = {Symposium on Signal Processing and Mathematical Modeling of Biological Processes with Applications to Cyber-Physical Systems for Precise Medicine},
  year = {2015},
  organization = {IEEE},
  pages = {977--981},
  series = {GlobalSIP},
  doi = {10.1109/GlobalSIP.2015.7418343},
  file = {:Jiang 2015 - differential flux balance.pdf},
  owner = {David Gleich},
  timestamp = {2015.09.02}
}

@article{Jiang-2017-aptrank,
  title = { {AptRank}: an adaptive {PageRank} model for protein function prediction on bi-relational graphs},
  author = {Biaobin Jiang and Kyle Kloster and David F. Gleich and Michael Gribskov},
  journal = {Bioinformatics},
  pages = {1829--1836},
  volume = {33},
  year = {2017},
  month = {June},
  number = {12},
  arxiv = {https://arxiv.org/abs/1601.05506},
  doi = {10.1093/bioinformatics/btx029},
  file = {:Jiang 2017 - aptrank.pdf},
  mysoftware = {https://github.rcac.purdue.edu/mgribsko/aptrank},
  owner = {David Gleich},
  publisher = {Oxford},
  timestamp = {2016.01.21}
}

@article{Kang-2021-adaptive,
  author = {Kang, Xuejiao and Gleich, David F. and Sameh, Ahmed and Grama, Ananth},
  journal = {ACM Trans. Parallel Comput.},
  title = {Adaptive Erasure Coded Fault Tolerant Linear System Solver},
  year = {2021},
  issn = {2329-4949},
  month = {dec},
  number = {4},
  volume = {8},
  abstract = {As parallel and distributed systems scale, fault tolerance is an increasingly important problem---particularly on systems with limited I/O capacity and bandwidth. Erasure coded computations address this problem by augmenting a given problem instance with redundant data and then solving the augmented problem in a fault oblivious manner in a faulty parallel environment. In the event of faults, a computationally inexpensive procedure is used to compute the true solution from a potentially fault-prone solution. These techniques are significantly more efficient than conventional solutions to the fault tolerance problem.In this article, we show how we can minimize, to optimality, the overhead associated with our problem augmentation techniques for linear system solvers. Specifically, we present a technique that adaptively augments the problem only when faults are detected. At any point in execution, we only solve a system whose size is identical to the original input system. This has several advantages in terms of maintaining the size and conditioning of the system, as well as in only adding the minimal amount of computation needed to tolerate observed faults. We present, in detail, the augmentation process, the parallel formulation, and evaluation of performance of our technique. Specifically, we show that the proposed adaptive fault tolerance mechanism has minimal overhead in terms of FLOP counts with respect to the original solver executing in a non-faulty environment, has good convergence properties, and yields excellent parallel performance. We also demonstrate that our approach significantly outperforms an optimized application-level checkpointing scheme that only checkpoints needed data structures.},
  address = {New York, NY, USA},
  articleno = {21},
  doi = {10.1145/3490557},
  file = {:Kang 2021 - adaptive.pdf},
  issue_date = {December 2021},
  keywords = {adaptive fault tolerance, linear solver, Fault tolerance},
  numpages = {19},
  publisher = {Association for Computing Machinery}
}

@inproceedings{Kang-2017-distributed-erasure,
  title = {Distributed Fault Tolerant Linear System Solvers Based on Erasure Coding},
  author = {Xuejiao Kang and David F. Gleich and Ahmed Sameh and Ananth Grama},
  booktitle = {2017 IEEE 37th International Conference on Distributed Computing Systems (ICDCS)},
  year = {2017},
  pages = {2478-2485},
  abstract = {We present efficient coding schemes and distributed implementations of erasure coded linear system solvers. Erasure coded computations belong to the class of algorithmic fault tolerance schemes. They are based on augmenting an input dataset, executing the algorithm on the augmented dataset, and in the event of a fault, recovering the solution from the corresponding augmented solution. This process can be viewed as the computational analog of erasure coded storage schemes. The proposed technique has a number of important benefits: (i) as the hardware platform scales in size and number of faults, our scheme yields increasing improvement in resource utilization, compared to traditional schemes; (ii) the proposed scheme is easy to code - the core algorithms remain the same; and (iii) the general scheme is flexible - accommodating a range of computation and communication tradeoffs. We present new coding schemes for augmenting the input matrix that satisfy the recovery equations of erasure coding with high probability in the event of random failures. These coding schemes also minimize fill (non-zero elements introduced by the coding block), while being amenable to efficient partitioning across processing nodes. We demonstrate experimentally that our scheme adds minimal overhead for fault tolerance, yields excellent parallel efficiency and scalability, and is robust to different fault arrival models.},
  doi = {10.1109/ICDCS.2017.261},
  file = {:Kang 2017 - erasure.pdf},
  issn = {1063-6927},
  keywords = {matrix algebra;minimisation;parallel processing;probability;resource allocation;software fault tolerance;storage management;algorithmic fault tolerance schemes;distributed fault tolerant linear system solvers;erasure coded storage schemes;erasure coding;fill minimization;input matrix augmentation;parallel systems;probability;random failures;resource utilization;Computational modeling;Encoding;Fault tolerance;Fault tolerant systems;Linear systems;Program processors;Sparse matrices;Erasure Coding;Fault Tolerance;Kruskal rank;Linear System Solvers},
  owner = {dgleich},
  timestamp = {2017.03.22}
}

@inproceedings{Khan-2012-netalign-multicore,
  title = {A multicore algorithm for network alignment via approximate matching},
  author = {Arif Khan and David F. Gleich and Mahantesh Halappanavar and Alex Pothen},
  booktitle = {Proceedings of the 2012 ACM/IEEE International Conference for High Performance Computing, Networking, Storage and Analysis},
  year = {2012},
  address = {Los Alamitos, CA, USA},
  month = nov,
  pages = {64:1--64:11},
  publisher = {IEEE Computer Society Press},
  series = {SC '12},
  acmid = {2389083},
  articleno = {64},
  file = {:Khan 2012 - netalign multicore.pdf},
  isbn = {978-1-4673-0804-5},
  location = {Salt Lake City, Utah},
  mynote = {Acceptance rate 100/472 = 21\%},
  numpages = {11},
  owner = {David Gleich},
  timestamp = {2012.05.30},
  url = {http://conferences.computer.org/sc/2012/papers/1000a054.pdf}
}

@inproceedings{Kloster-2013-nexpokit,
  title = {A Nearly-Sublinear Method for Approximating a Column of the Matrix Exponential for Matrices from Large, Sparse Networks},
  author = {Kyle Kloster and David F. Gleich},
  booktitle = {Algorithms and Models for the Web Graph},
  year = {2013},
  editor = {Bonato, Anthony and Mitzenmacher, Michael and Pra\l{}at, Pawe\l{} },
  month = dec,
  pages = {68-79},
  publisher = {Springer International Publishing},
  series = {Lecture Notes in Computer Science},
  volume = {8305},
  abstract = {We consider random-walk transition matrices from large social and information networks. For these matrices, we describe and evaluate a fast method to estimate one column of the matrix exponential. Our method runs in sublinear time on networks where the maximum degree grows doubly logarithmic with respect to the number of nodes. For collaboration networks with over 5 million edges, we find it runs in less than a second on a standard desktop machine.},
  doi = {10.1007/978-3-319-03536-9_6},
  file = {:Kloster 2013 - nexpokit.pdf},
  isbn = {978-3-319-03535-2},
  keywords = {Matrix exponential; Gauss-Southwell; local algorithms},
  owner = {David Gleich},
  timestamp = {2013.09.06}
}

@article{Kloster-2016-ppr-paths,
  title = {Seeded {PageRank} Solution Paths},
  author = {Kyle Kloster and David F. Gleich},
  journal = {European Journal of Applied Mathematics},
  pages = {812--845},
  volume = {27},
  year = {2016},
  number = {6},
  abstract = {We study the behaviour of network diffusions based on the PageRank random walk from a set of seed nodes. These diffusions are known to reveal small, localized clusters (or communities), and also large macro-scale clusters by varying a parameter that has a dual-interpretation as an accuracy bound and as a regularization level. We propose a new method that quickly approximates the result of the diffusion for all values of this parameter. Our method efficiently generates an approximate solution path or regularization path associated with a PageRank diffusion, and it reveals cluster structures at multiple size-scales between small and large. We formally prove a runtime bound on this method that is independent of the size of the network, and we investigate multiple optimizations to our method that can be more practical in some settings. We demonstrate that these methods identify refined clustering structure on a number of real-world networks with up to 2 billion edges.},
  arxiv = {https://arxiv.org/abs/1503.00322},
  doi = {10.1017/S0956792516000280},
  file = {:Kloster 2016 - seeded pagerank.pdf},
  mysoftware = {https://github.com/kkloste/pagerank-paths},
  owner = {dgleich},
  publisher = {Cambridge University Press},
  timestamp = {2015.08.20}
}

@inproceedings{Kloster-2014-hkrelax,
  title = {Heat Kernel Based Community Detection},
  author = {Kloster, Kyle and Gleich, David F.},
  booktitle = {Proceedings of the 20th {ACM SIGKDD} International Conference on Knowledge Discovery and Data Mining},
  year = {2014},
  address = {New York, NY, USA},
  pages = {1386--1395},
  publisher = {ACM},
  series = {KDD '14},
  acmid = {2623706},
  doi = {10.1145/2623330.2623706},
  file = {:Kloster 2014 - hkrelax.pdf},
  isbn = {978-1-4503-2956-9},
  keywords = {heat kernel, local clustering},
  mynote = {Includes all software. Acceptance rate 151/1036 = 14.6\%},
  mysoftware = {https://www.cs.purdue.edu/homes/dgleich/codes/hkgrow/},
  numpages = {10},
  owner = {David F. Gleich},
  timestamp = {2014.02.23}
}

@inproceedings{Klymko-2014-dircomm,
  author = {Christine Klymko and David F. Gleich and Tamara G. Kolda},
  booktitle = {Proceedings of the ASE BigData Conference},
  title = {Using Triangles to Improve Community Detection in Directed Networks},
  year = {2014},
  address = {Stanford, CA},
  note = {Full version on arXiv \url{http://arxiv.org/abs/1404.5874} },
  arxiv = {http://arxiv.org/abs/1404.5874},
  doi = {http://www.ase360.org/handle/123456789/104},
  keywords = {self},
  owner = {David Gleich},
  timestamp = {2014.06.27},
  url = {http://arxiv.org/abs/1404.5874}
}

@article{Lin-2018-diffusion,
  title = {Multimodal network diffusion predicts future disease-gene-chemical associations},
  author = {Chih-Hsu Lin and Daniel M Konecki and Meng Liu and Stephen J Wilson and Huda Nassar and Angela D Wilkins and David F Gleich and Olivier Lichtarge},
  journal = {Bioinformatics},
  pages = {bty858},
  year = {2018},
  month = {October},
  doi = {10.1093/bioinformatics/bty858},
  editor = {Jonathan Wren},
  file = {:Lin 2018 - multimodal aptrank.pdf},
  owner = {David Gleich},
  publisher = {Oxford University Press ({OUP})},
  timestamp = {2019.01.14}
}

@article{Mohammadi-2017-tame,
  title = {Triangular Alignment ({TAME}): A tensor-based approach for higher-order network alignment},
  author = {Shahin Mohammadi and David F. Gleich and Tamara G. Kolda and Ananth Grama},
  journal = {Transactions on Computational Biology and Bioinformatics},
  pages = {1446--1458},
  volume = {14},
  year = {2017},
  month = {November},
  note = {Published online (July 2016) ahead of print.},
  number = {6},
  arxiv = {http://arxiv.org/abs/1510.06482},
  doi = {10.1109/TCBB.2016.2595583},
  file = {:Mohammadi 2016 - tame.pdf},
  mysoftware = {https://github.com/shmohammadi86/TAME},
  owner = {David Gleich},
  timestamp = {2015.10.21}
}

@article{Mohammadi-2018-action,
  title = {A geometric approach to characterize the functional identity of single cells},
  author = {Shahin Mohammadi and Vikram Ravindra and David F. Gleich and Ananth Grama},
  journal = {Nature Communications},
  pages = {1516},
  volume = {9},
  year = {2018},
  month = {April},
  number = {1},
  doi = {10.1038/s41467-018-03933-2},
  file = {:Mohammadi 2018 - action.pdf},
  mysoftware = {http://compbio.mit.edu/ACTION/},
  owner = {dgleich},
  publisher = {Springer Nature},
  timestamp = {2018.04.18}
}

@inproceedings{Nassar-2019-pairwise-prediction,
  author = {Huda Nassar and Austin R. Benson and David F. Gleich},
  booktitle = {Proceedings of the 2019 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining},
  title = {Pairwise Link Prediction},
  year = {2019},
  pages = {386-393},
  series = {ASONAM 19},
  arxiv = {https://arxiv.org/abs/1907.04503},
  doi = {10.1145/3341161.3342897},
  file = {:Nassar 2019 - pairwise.pdf},
  keywords = {link prediction, higher order methods, PageRank},
  location = {Vancouver, British Columbia, Canada},
  mynote = {Acceptance rate 14\% (41/286) for full papers},
  mysoftware = {https://github.com/nassarhuda/pairseed},
  numpages = {8},
  owner = {dgleich},
  timestamp = {2019.05.01}
}

@inproceedings{Nassar-2017-multimodal-netalign,
  title = {Multimodal Network Alignment},
  author = {Huda Nassar and David F. Gleich},
  booktitle = {Proceedings of the 2017 SIAM International Conference on Data Mining},
  year = {2017},
  pages = {615-623},
  arxiv = {https://arxiv.org/abs/1703.10511},
  doi = {10.1137/1.9781611974973.69},
  file = {:Nassar 2017 - multimodal network alignment.pdf},
  mynote = {acceptance rate 26\% (around 88/338)},
  mysoftware = {https://github.com/nassarhuda/MSD},
  owner = {dgleich},
  timestamp = {2017.01.03}
}

@inproceedings{Nassar-2020-glance,
  author = {Nassar, Huda and Kennedy, Caitlin and Jain, Shweta and Benson, Austin R. and Gleich, David F.},
  booktitle = {Proceedings of The Web Conference 2020},
  title = {Using Cliques with Higher-Order Spectral Embeddings Improves Graph Visualizations},
  year = {2020},
  address = {New York, NY, USA},
  pages = {2927-2933},
  publisher = {Association for Computing Machinery},
  series = {WWW '20},
  doi = {10.1145/3366423.3380059},
  file = {:Nassar 2020 - glance.pdf},
  isbn = {9781450370233},
  keywords = {graph visualization, cliques sampling, graph layout, spectral methods, higher-order methods},
  location = {Taipei, Taiwan},
  mynote = {Acceptance rate 98/397},
  mysoftware = {https://github.com/nassarhuda/GLANCE},
  numpages = {7},
  owner = {dgleich},
  timestamp = {2020.03.11}
}

@inproceedings{Nassar-2015-prlocal,
  title = {Strong localization in personalized {PageRank} },
  author = {Huda Nassar and Kyle Kloster and David F. Gleich},
  booktitle = {Proceedings of the 2015 Workshop on Algorithms for the Webgraph},
  year = {2015},
  number = {9479},
  pages = {190--202},
  series = {LNCS},
  abstract = {The personalized PageRank diffusion is a fundamental tool in network analysis tasks like community detection and link prediction. It models the spread of a quantity from a set of seed nodes, and it has been observed to stay localized near this seed set. We derive an upper-bound on the number of entries necessary to approximate a personalized PageRank vector in graphs with skewed degree sequences. This bound shows localization under mild assumptions on the maximum and minimum degrees. Experimental results on random graphs with these degree sequences show the bound is loose and support a conjectured bound.},
  doi = {10.1007/978-3-319-26784-5_15},
  file = {:Nassar 2015 - strong localization.pdf},
  mysoftware = {https://github.com/nassarhuda/pprlocal/tree/a53f98da293455d1e9dd723cb272a3fcc800c1bd},
  owner = {David Gleich},
  timestamp = {2015.07.26},
  url = {http://arxiv.org/abs/1509.00016}
}

@article{Nassar-2021-multinetwork,
  author = {Huda Nassar and Georgios Kollias and Ananth Grama and David F. Gleich},
  journal = {SIAM J. Scientific Computing},
  title = {Scalable Algorithms for Multiple Network Alignment},
  year = {2021},
  number = {5},
  pages = {S592--S611},
  volume = {43},
  arxiv = {https://arxiv.org/abs/1809.0819},
  doi = {10.1137/20m1345876},
  file = {:Nassar 2021 - multiple alignment.pdf},
  mysoftware = {https://github.com/nassarhuda/NetworkAlign.jl},
  owner = {dgleich},
  timestamp = {2018.07.27}
}

@inproceedings{Nassar-2018-low-rank-eigenalign,
  title = {Low Rank Spectral Network Alignment},
  author = {Nassar, Huda and Veldt, Nate and Mohammadi, Shahin and Grama, Ananth and Gleich, David F.},
  booktitle = {Proceedings of the 2018 World Wide Web Conference},
  year = {2018},
  pages = {619--628},
  publisher = {International World Wide Web Conferences Steering Committee},
  series = {WWW '18},
  acmid = {3186128},
  doi = {10.1145/3178876.3186128},
  file = {:Nassar 2018 - low-rank net-align.pdf},
  isbn = {978-1-4503-5639-8},
  keywords = {graph matching, low rank matrix, low-rank bipartite matching, network alignment},
  location = {Lyon, France},
  mynote = {Acceptance rate 171/1155 (around 14.8%)},
  mysoftware = {https://github.com/nassarhuda/lowrank_spectral},
  numpages = {10},
  owner = {dgleich},
  timestamp = {2018.01.15}
}

@inproceedings{Rainey-2016-nano-pagerank,
  title = {Massive graph processing on nanocomputers},
  author = {Bryan P. Rainey and David F. Gleich},
  booktitle = {IEEE International Conference on Big Data},
  year = {2016},
  note = {Third Workshop on High Performance Big Graph Data Management, Analysis, and Mining},
  pages = {3326--3335},
  doi = {10.1109/BigData.2016.7840992},
  file = {:Rainey 2016 - nanocomputers.pdf},
  mynote = {Acceptance rate: Workshop},
  mysoftware = {https://github.com/brainey421/badjgraph/},
  owner = {dgleich},
  timestamp = {2016.04.29}
}

@article{Ramani-2019-fast-graph-sampling,
  title = {Coin-flipping, ball-dropping, and grass-hopping for generating random graphs from matrices of probabilities},
  author = {Arjun S. Ramani and Nicole Eikmeier and David F. Gleich},
  journal = {SIAM Review},
  pages = {549-595},
  volume = {61},
  year = {2019},
  number = {3},
  arxiv = {https://arxiv.org/abs/1709.03438},
  doi = {10.1137/17M1127132},
  file = {:Ramani 2019 - coin flipping.pdf},
  mysoftware = {https://github.com/dgleich/grass-hopping-graphs},
  owner = {dgleich},
  timestamp = {2017.08.23}
}

@inproceedings{Ravindra-2019-rigid,
  title = {Rigid Graph Alignment},
  author = {Ravindra, Vikram
and Nassar, Huda
and Gleich, David F.
and Grama, Ananth},
  booktitle = {Complex Networks and Their Applications VIII},
  year = {2019},
  pages = {621--632},
  publisher = {Springer International Publishing},
  abstract = {An increasingly important class of networks is derived from physical systems that have a spatial basis. Specifically, nodes in the network have spatial coordinates associated with them, and conserved edges in two networks being aligned have correlated distance measures. An example of such a network is the human brain connectome -- a network of co-activity of different regions of the brain, as observed in a functional MRI (fMRI). Here, the problem of identifying conserved patterns corresponds to the alignment of connectomes. In this context, one may structurally align the brains through co-registration to a common coordinate system. Alternately, one may align the networks, ignoring the structural basis of co-activity. In this paper, we formulate a novel problem -- rigid graph alignment, which simultaneously aligns the network, as well as the underlying structure. We formally specify the problem and present a method based on expectation maximization, which alternately aligns the network and the structure via rigid body transformations. We demonstrate that our method significantly improves the quality of network alignment in synthetic graphs. We also apply rigid graph alignment to functional brain networks derived from 20 subjects drawn from the Human Connectome Project (HCP), and show over a two-fold increase in quality of alignment. Our results are broadly applicable to other applications and abstracted networks that can be embedded in metric spaces -- e.g., through spectral embeddings.},
  arxiv = {https://arxiv.org/abs/1908.03201},
  doi = {10.1007/978-3-030-36687-2_52},
  file = {:Ravindra 2020 - rigid alignment.pdf},
  isbn = {978-3-030-36687-2},
  journal = {arXiv},
  owner = {dgleich},
  timestamp = {2019.08.20}
}

@incollection{Ren-2020-parallelism,
  author = {Yanfei Ren and David F. Gleich},
  booktitle = {Parallel Algorithms in Computational Science and Engineering},
  publisher = {Birkh\"{a}user},
  title = {A simple study of pleasing parallelism on multicore computers},
  year = {2020},
  editor = {A. Grama and A. H. Sameh},
  note = {Report version on this website differs slightly from final published chapter.},
  pages = {325--346},
  doi = {10.1007/978-3-030-43736-7_11},
  file = {:Ren 2020 - simple parallel.pdf},
  mysoftware = {https://github.com/YanfeiRen/pagerank},
  owner = {David Gleich},
  timestamp = {2019.11.15}
}

@inproceedings{Rossi-2012-dynamicpr,
  title = {Dynamic {PageRank} using Evolving Teleportation},
  author = {Ryan A. Rossi and David F. Gleich},
  booktitle = {Algorithms and Models for the Web Graph},
  year = {2012},
  editor = {Bonato, Anthony and Janssen, Jeannette},
  pages = {126-137},
  publisher = {Springer Berlin Heidelberg},
  series = {Lecture Notes in Computer Science},
  volume = {7323},
  abstract = {The importance of nodes in a network constantly fluctuates based on changes in the network structure as well as changes in external interest. We propose an evolving teleportation adaptation of the PageRank method to capture how changes in external interest influence the importance of a node. This framework seamlessly generalizes PageRank because the importance of a node will converge to the PageRank values if the external influence stops changing. We demonstrate the effectiveness of the evolving teleportation on the Wikipedia graph and the Twitter social network. The external interest is given by the number of hourly visitors to each page and the number of monthly tweets for each user.},
  doi = {10.1007/978-3-642-30541-2_10},
  file = {:Rossi 2012 - dynamic-pr.pdf},
  isbn = {978-3-642-30540-5},
  owner = {dgleich},
  timestamp = {2012.02.29}
}

@article{Rossi-2015-max-clique,
  title = {Parallel Maximum Clique Algorithms with Applications to Network Analysis},
  author = {Ryan A. Rossi and David F. Gleich and Assefaw H. Gebremedhin},
  journal = {SIAM Journal on Scientific Computing},
  pages = {C589-C616},
  volume = {37},
  year = {2015},
  number = {5},
  abstract = {We present a fast, parallel maximum clique algorithm for large sparse graphs that is designed to exploit characteristics of social and information networks. The method exhibits a roughly linear runtime scaling over real-world networks ranging from a thousand to a hundred million nodes. In a test on a social network with 1.8 billion edges, the algorithm finds the largest clique in about 20 minutes. At its heart the algorithm employs a branch-and-bound strategy with novel and aggressive pruning techniques. The pruning techniques include the combined use of core numbers of vertices along with a good initial heuristic solution to remove the vast majority of the search space. In addition, the exploration of the search tree is parallelized. During the search, processes immediately communicate changes to upper and lower bounds on the size of the maximum clique. This exchange of information occasionally results in a superlinear speedup because tasks with large search spaces can be pruned by other processes. We demonstrate the impact of the algorithm on applications using two different network analysis problems: computation of temporal strong components in dynamic networks and determination of compression-friendly ordering of nodes of massive networks.},
  arxiv = {http://arxiv.org/abs/1302.6256},
  doi = {10.1137/14100018X},
  file = {:Rossi 2015 - max-clique.pdf},
  owner = {dgleich},
  timestamp = {2012.11.17}
}

@inproceedings{Rossi-2014-max-clique-poster,
  title = {Fast Maximum Clique Algorithms for Large Graphs},
  author = {Ryan A. Rossi and David F. Gleich and Assefaw H. Gebremedhin and Md. Mostofa Ali Patwary},
  booktitle = {Poster Proceedings of WWW2014},
  year = {2014},
  pages = {365-366},
  doi = {10.1145/2567948.2577283},
  file = {:Rossi 2014 - max-clique.pdf},
  owner = {David F. Gleich},
  timestamp = {2014.01.08}
}

@inproceedings{Ruggles-2020-parallel,
  author = {Cameron Ruggles and Nate Veldt and David F. Gleich},
  booktitle = {Proceedings of the SIAM Workshop on Combinatorial Scientific Computing 2020 (CSC20)},
  title = {A Parallel Projection Method for Metric Constrained Optimization},
  year = {2020},
  pages = {43-53},
  publisher = {SIAM},
  arxiv = {http://arxiv.org/abs/1901.10084},
  doi = {10.1137/1.9781611976229.5},
  file = {:Ruggles 2020 parallel.PDF},
  mysoftware = {https://github.com/camruggles/ParallelDykstras},
  owner = {dgleich},
  timestamp = {2019.01.25}
}

@inproceedings{Sinha-2016-deconvolving,
  title = {Deconvolving Feedback loops in Recommender Systems},
  author = {Ayan Sinha and David F. Gleich and Karthik Ramani},
  booktitle = {Neural Information Processing Systems (NIPS)},
  year = {2016},
  editor = {D. D. Lee and M. Sugiyama and U. V. Luxburg and I. Guyon and R. Garnett},
  pages = {3243--3251},
  publisher = {Curran Associates, Inc.},
  file = {:Sinha 2016 - deconvolving.pdf},
  mynote = {Acceptance rate 568/2500 = 23\%},
  mysoftware = {https://github.com/sinhayan/Deconvolving_Feedback_Loops},
  owner = {David Gleich},
  timestamp = {2015.10.21},
  url = {https://papers.nips.cc/paper/6283-deconvolving-feedback-loops-in-recommender-systems}
}

@article{Sinha-2018-gauss,
  title = {Gauss's law for networks directly reveals community boundaries},
  author = {Ayan Sinha and David F. Gleich and Karthik Ramani},
  journal = {Scientific Reports},
  pages = {11909},
  volume = {8},
  year = {2018},
  month = {August},
  number = {1},
  doi = {10.1038/s41598-018-30401-0},
  file = {:Sinha 2018 - gauss for networks.pdf},
  owner = {dgleich},
  publisher = {Springer Nature America, Inc},
  supplementary = {https://static-content.springer.com/esm/art%3A10.1038%2Fs41598-018-30401-0/MediaObjects/41598_2018_30401_MOESM1_ESM.pdf},
  timestamp = {2016.01.20}
}

@inproceedings{Veldt-2016-simple-local-flow,
  author = {Nate Veldt and David F. Gleich and Michael W. Mahoney},
  booktitle = {International Conference on Machine Learning},
  title = {A Simple and Strongly-Local Flow-Based Method for Cut Improvement},
  year = {2016},
  pages = {1938-1947},
  arxiv = {https://arxiv.org/abs/1605.08490},
  file = {:Veldt 2016 - simple local.pdf},
  mynote = {Acceptance rate 322/1327 = 24\%},
  mysoftware = {https://github.com/nveldt/SimpleLocal},
  owner = {David Gleich},
  timestamp = {2016.01.22},
  url = {http://jmlr.org/proceedings/papers/v48/veldt16.html}
}

@article{Veldt-2019-metric-lps,
  title = {Metric-Constrained Optimization for Graph Clustering Algorithms},
  author = {Nate Veldt and David Gleich and Anthony Wirth and James Saunderson},
  journal = {SIAM J. Mathematics of Data Science},
  pages = {333-355},
  volume = {1},
  year = {2019},
  number = {2},
  abstract = {We outline a new approach for solving linear programming relaxations of NP-hard graph clustering problems that enforce triangle inequality constraints on output variables. Extensive previous research has shown that solutions to these relaxations can be used to obtain good approximation algorithms for clustering objectives. However, these are rarely solved in practice due to their high memory requirements. We first prove that the linear programming relaxation of the correlation clustering objective is equivalent to a special case of a well-known problem in machine learning called metric nearness. We then develop a general solver for metric-constrained linear and quadratic programs by generalizing and improving a simple projection algorithm, originally developed for metric nearness. We give several novel approximation guarantees for using our approach to find lower bounds for challenging graph clustering tasks such as sparsest cut, maximum modularity, and correlation clustering. We demonstrate the power of our framework by solving relaxations of these problems involving up to  $10^7$  variables and  $10^{11}$  constraints.},
  arxiv = {https://arxiv.org/abs/1806.01678},
  doi = {10.1137/18M1217152},
  file = {:Veldt 2019 - metric optimization.pdf},
  mysoftware = {https://github.com/nveldt/MetricOptimization},
  owner = {dgleich},
  timestamp = {2018.06.06}
}

@inproceedings{Veldt-2018-lambda-cc,
  title = {A Correlation Clustering Framework for Community Detection},
  author = {Veldt, Nate and Gleich, David F. and Wirth, Anthony},
  booktitle = {Proceedings of the 2018 World Wide Web Conference},
  year = {2018},
  address = {Republic and Canton of Geneva, Switzerland},
  pages = {439--448},
  publisher = {International World Wide Web Conferences Steering Committee},
  series = {WWW '18},
  acmid = {3186110},
  arxiv = {https://arxiv.org/abs/1712.05825},
  doi = {10.1145/3178876.3186110},
  file = {:Veldt 2018 - lambda-CC.pdf},
  isbn = {978-1-4503-5639-8},
  keywords = {cluster deletion, community detection, correlation clustering, graph clustering, network analysis, sparsest cut},
  location = {Lyon, France},
  mynote = {Acceptance Rate 171/1155 (14.5\%)},
  mysoftware = {https://github.com/nveldt/LamCC},
  numpages = {10},
  owner = {David Gleich},
  timestamp = {2017.11.14}
}

@inproceedings{Veldt-2019-flow,
  title = {Flow-Based Local Graph Clustering with Better Seed Set Inclusion},
  author = {Nate Veldt and Christine Klymko and David F. Gleich},
  booktitle = {Proceedings of the SIAM International Conference on Data Mining},
  year = {2019},
  pages = {378-386},
  abstract = {Flow-based methods for local graph clustering have received significant recent attention for their theoretical cut improvement and runtime guarantees. In this work we present two improvements for using flow-based methods in real-world semi-supervised clustering problems. Our first contribution is a generalized objective function that allows practitioners to place strict and soft penalties on excluding specific seed nodes from the output set. This feature allows us to avoid the tendency, often exhibited by previous flow-based methods, to contract a large seed set into a small set of nodes that does not contain all or even most of the seed nodes. Our second contribution is a fast algorithm for minimizing our generalized objective function, based on a variant of the push-relabel algorithm for computing preflows. We make our approach very fast in practice by implementing a global relabeling heuristic and employing a warm-start procedure to quickly solve related cut problems. In practice our algorithm is faster than previous related flow-based methods, and is also more robust in detecting ground truth target regions in a graph thanks to its ability to better incorporate semi-supervised information about target clusters.},
  arxiv = {https://arxiv.org/abs/1811.12280},
  doi = {10.1137/1.9781611975673.43},
  file = {:Veldt 2019 - flowseed.pdf},
  mynote = {90/397 submissions (22.7\%)},
  mysoftware = {https://github.com/nveldt/FlowSeed},
  owner = {dgleich},
  timestamp = {2019.01.25}
}

@inproceedings{Veldt-2020-bipartite,
  author = {Nate Veldt and Anthony Wirth and David F. Gleich},
  booktitle = {Proceeding of KDD2020},
  title = {Parameterized Objectives and Algorithms for Clustering Bipartite Graphs and Hypergraphs},
  year = {2020},
  pages = {1868-1876},
  series = {KDD '20},
  arxiv = {https://arxiv.org/abs/2002.09460},
  doi = {10.1145/3394486.3403238},
  file = {:Veldt 2020 - bipartite lambda-cc.pdf},
  journal = {arXiv},
  mynote = {216 of 1279 papers accepted (16.9%)},
  mysoftware = {https://github.com/nveldt/ParamCC},
  owner = {dgleich},
  timestamp = {2020.03.11}
}

@inproceedings{Veldt-2019-resolution,
  title = {Learning Resolution Parameters for Graph Clustering},
  author = {Nate Veldt and Anthony Wirth and David F. Gleich},
  booktitle = {The World Wide Web Conference},
  year = {2019},
  address = {New York, NY, USA},
  pages = {1909--1919},
  publisher = {ACM},
  series = {WWW '19},
  abstract = {Finding clusters of well-connected nodes in a graph is an extensively studied problem in graph-based data analysis. Because of its many applications, a large number of distinct graph clustering objective functions and algorithms have already been proposed and analyzed. To aid practitioners in determining the best clustering approach to use in different applications, we present new techniques for automatically learning how to set clustering resolution parameters. These parameters control the size and structure of communities that are formed by optimizing a generalized objective function. We begin by formalizing the notion of a parameter fitness function, which measures how well a fixed input clustering approximately solves a generalized clustering objective for a specific resolution parameter value. Under reasonable assumptions, which suit two key graph clustering applications, such a parameter fitness function can be efficiently minimized using a bisection-like method, yielding a resolution parameter that fits well with the example clustering. We view our framework as a type of single-shot hyperparameter tuning, as we are able to learn a good resolution parameter with just a single example. Our general approach can be applied to learn resolution parameters for both local and global graph clustering objectives. We demonstrate its utility in several experiments on real-world data where it is helpful to learn resolution parameters from a given example clustering.},
  acmid = {3313471},
  arxiv = {https://arxiv.org/abs/1903.05246},
  doi = {10.1145/3308558.3313471},
  file = {:Veldt 2019 - resolution parameters.pdf},
  isbn = {978-1-4503-6674-8},
  keywords = {Graph clustering, community detection, resolution parameters},
  location = {San Francisco, CA, USA},
  mynote = {225/1247 (18%) of submissions accepted for full paper},
  mysoftware = {https://github.com/nveldt/LearnResParams},
  numpages = {11},
  owner = {dgleich},
  timestamp = {2019.01.21}
}

@inproceedings{Veldt-2017-low-rank-cc,
  title = {Correlation Clustering with Low-Rank Matrices},
  author = {Veldt, Nate and Wirth, Anthony I. and Gleich, David F.},
  booktitle = {Proceedings of the 26th International Conference on World Wide Web},
  year = {2017},
  pages = {1025--1034},
  series = {WWW '17},
  acmid = {3052586},
  arxiv = {https://arxiv.org/abs/1611.07305},
  doi = {10.1145/3038912.3052586},
  file = {:Veldt 2017 - Low-rank CC.pdf},
  isbn = {978-1-4503-4913-0},
  keywords = {correlation clustering, low-rank matrices},
  location = {Perth, Australia},
  mynote = {Acceptance rate 164/966 (17%)},
  mysoftware = {https://github.com/nveldt/PSDCC},
  numpages = {10},
  owner = {dgleich},
  timestamp = {2016.11.13},
  url = {https://arxiv.org/abs/1611.07305}
}

@article{wang2008-monte-carlo-for-adjoint,
  title = {A {Monte Carlo} method for solving unsteady adjoint equations},
  author = {Qiqi Wang and David F. Gleich and Amin Saberi and Nasrollah Etemadi and Parviz Moin},
  journal = {Journal of Computational Physics},
  pages = {6184--6205},
  volume = {227},
  year = {2008},
  month = {June},
  number = {12},
  abstract = {Traditionally, solving the adjoint equation for unsteady problems involves solving a large, structured linear system. This paper presents a variation on this technique and uses a Monte Carlo linear solver. The Monte Carlo solver yields a forward-time algorithm for solving unsteady adjoint equations. When applied to computing the adjoint associated with Burgersï¿½ equation, the Monte Carlo approach is faster for a large class of problems while preserving sufficient accuracy.},
  address = {San Diego, CA, USA},
  doi = {10.1016/j.jcp.2008.03.006},
  file = {:wang 2008 - monte carlo adjoint.pdf},
  issn = {0021-9991},
  keywords = {self},
  owner = {David Gleich},
  publisher = {Academic Press Professional, Inc.},
  timestamp = {2008.10.18}
}

@inproceedings{Whang-2015-neokm,
  title = {Non-exhaustive, overlapping K-means},
  author = {Joyce Jiyoung Whang and Inderjit S. Dhillon and David F. Gleich},
  booktitle = {Proceedings of the 2015 SIAM International Conference on Data Mining},
  year = {2015},
  pages = {936-944},
  abstract = {Traditional clustering algorithms, such as k-means, output a clustering that is disjoint and exhaustive, that is, every single data point is assigned to exactly one cluster. However, in real datasets, clusters can overlap and there are often outliers that do not belong to any cluster. This is a well recognized problem that has received much attention in the past, and several algorithms, such as fuzzy k-means have been proposed for overlapping clustering. However, most existing algorithms address either overlap or outlier detection and do not tackle the problem in a unified way. In this paper, we propose a simple and intuitive objective function that captures the issues of overlap and non-exhaustiveness in a unified manner. Our objective function can be viewed as a reformulation of the traditional k-means objective, with easy-to-understand parameters that capture the degrees of overlap and non-exhaustiveness. By studying the objective, we are able to obtain a simple iterative algorithm which we call NEO-K-Means (Non-Exhaustive Overlapping K-Means). Furthermore, by considering an extension to weighted kernel k-means, we can tackle the case of non-exhaustive and overlapping graph clustering. This extension allows us to apply our NEO-K-Means algorithm to the community detection problem, which is an important task in network analysis. Our experimental results show that the new objective and algorithm are effective in finding ground-truth clusterings that have varied overlap and non-exhaustiveness; for the case of graphs, we show that our algorithm outperforms state-of-the-art overlapping community detection methods.},
  doi = {10.1137/1.9781611974010.105},
  file = {:Whang 2015 - neokm.pdf},
  howpublished = {Submitted},
  location = {Vancouver, BC},
  mynote = {Acceptance rate 22\%},
  owner = {David F. Gleich},
  timestamp = {2014.02.23}
}

@article{Whang-2016-community,
  author = {Joyce Jiyoung Whang and David F. Gleich and Inderjit S. Dhillon},
  journal = {Transactions on Knowledge and Data Engineering},
  title = {Overlapping Community Detection Using Neighborhood-Inflated Seed Expansion},
  year = {2016},
  month = {May},
  number = {5},
  pages = {1272--1284},
  volume = {28},
  abstract = {Community detection is an important task in network analysis. A community (also referred to as a cluster) is a set of cohesive vertices that have more connections inside the set than outside. In many social and information networks, these communities naturally overlap. For instance, in a social network, each vertex in a graph corresponds to an individual who usually participates in multiple communities. In this paper, we propose an efficient overlapping community detection algorithm using a seed expansion approach. The key idea of our algorithm is to find good seeds, and then greedily expand these seeds based on a community metric. Within this seed expansion method, we investigate the problem of how to determine good seed nodes in a graph. In particular, we develop new seeding strategies for a personalized PageRank clustering scheme that optimizes the conductance community score. An important step in our method is the neighborhood inflation step where seeds are modified to represent their entire vertex neighborhood. Experimental results show that our seed expansion algorithm outperforms other state-of-the-art overlapping community detection methods in terms of producing cohesive clusters and identifying ground-truth communities. We also show that our new seeding strategies are better than existing strategies, and are thus effective in finding good overlapping communities in real-world networks.},
  doi = {10.1109/TKDE.2016.2518687},
  file = {:Whang 2016 - overlapping.pdf},
  keywords = {Clustering algorithms;Collaboration;Image edge detection;Kernel;MySpace;Clustering;Community Detection;Community detection;Overlapping Communities;Personalized PageRank;Seed Expansion;Seeds;clustering;overlapping communities;personalized PageRank;seed expansion;seeds},
  mysoftware = {We propose an efficient overlapping community detection algorithm using a seed expansion approach. In particular, we develop effective seeding strategies for a personalized PageRank scheme that optimizes the conductance community score. The key idea of our algorithm is to find good seeds, and then greedily expand these seeds based on a community metric. We name our algorithm 'NISE' by abbreviating our main idea, Neighborhood-Inflated Seed Expansion.},
  owner = {David F. Gleich},
  timestamp = {2015.05.19},
  url = {http://arxiv.org/abs/1503.07439}
}

@inproceedings{Whang-2013-overlapping,
  title = {Overlapping community detection using seed set expansion},
  author = {Whang, Joyce Jiyoung and Gleich, David F. and Dhillon, Inderjit S.},
  booktitle = {Proceedings of the 22nd ACM international conference on Conference on information and knowledge management},
  year = {2013},
  address = {New York, NY, USA},
  month = oct,
  pages = {2099--2108},
  publisher = {ACM},
  series = {CIKM '13},
  abstract = {Community detection is an important task in network analysis. A community (also referred to as a cluster) is a set of cohesive vertices that have more connections inside the set than outside. In many social and information networks, these communities naturally overlap. For instance, in a social network, each vertex in a graph corresponds to an individual who usually participates in multiple communities. One of the most successful techniques for finding overlapping communities is based on local optimization and expansion of a community metric around a seed set of vertices. In this paper, we propose an efficient overlapping community detection algorithm using a seed set expansion approach. In particular, we develop new seeding strategies for a personalized PageRank scheme that optimizes the conductance community score. The key idea of our algorithm is to find good seeds, and then expand these seed sets using the personalized PageRank clustering procedure. Experimental results show that this seed set expansion approach outperforms other state-of-the-art overlapping community detection methods. We also show that our new seeding strategies are better than previous strategies, and are thus effective in finding good overlapping clusters in a graph.},
  acmid = {2505535},
  doi = {10.1145/2505515.2505535},
  file = {:Whang 2013 - overlapping.pdf},
  isbn = {978-1-4503-2263-8},
  keywords = {clustering, community detection, overlapping clusters, seed set expansion, seeds},
  location = {San Francisco, California, USA},
  mynote = {Acceptance rate 143/848 16.8\%},
  mysoftware = {https://www.cs.utexas.edu/~joyce/codes/cikm2013/nise.html},
  numpages = {10},
  owner = {David Gleich},
  timestamp = {2013.06.08}
}

@article{Whang-2019-neo-k-means,
  title = {Non-exhaustive, Overlapping Clustering},
  author = {Joyce Jiyoung Whang and Yangyang Hou and David F. Gleich and Inderjit Dhillon},
  journal = {Transactions on Pattern Analysis and Machine Intelligence},
  volume = {41},
  year = {2019},
  month = nov,
  number = {11},
  abstract = {Traditional clustering algorithms, such as K-Means, output a clustering that is disjoint and exhaustive, i.e., every single data point is assigned to exactly one cluster. However, in many real-world datasets, clusters can overlap and there are often outliers that do not belong to any cluster. While this is a well-recognized problem, most existing algorithms address either overlap or outlier detection and do not tackle the problem in a unified way. In this paper, we propose an intuitive objective function, which we call the NEO-K-Means (Non-Exhaustive, Overlapping K-Means) objective, that captures the issues of overlap and non-exhaustiveness in a unified manner. Our objective function can be viewed as a reformulation of the traditional K-Means objective, with easy-to-understand parameters that capture the degrees of overlap and non-exhaustiveness. By considering an extension to weighted kernel K-Means, we show that we can also apply our NEO-K-Means idea to overlapping community detection, which is an important task in network analysis. To optimize the NEO-K-Means objective, we develop not only fast iterative algorithms but also more sophisticated algorithms using low-rank semidefinite programming techniques. Our experimental results show that the new objective and algorithms are effective in finding ground-truth clusterings that have varied overlap and non-exhaustiveness.},
  doi = {10.1109/TPAMI.2018.2863278},
  keywords = {Clustering algorithms;Linear programming;Kernel;Iterative methods;Computer science;Anomaly detection;Task analysis;Overlapping clustering;K-Means;outlier;semidefinite programming;graph clustering;community detection},
  owner = {dgleich},
  timestamp = {2016.12.02}
}

@inproceedings{Wu-2016-tensor-co-clustering,
  author = {Tao Wu and Austin Benson and David F. Gleich},
  booktitle = {Advances in Neural Information Processing Systems 29},
  title = {General tensor spectral co-clustering for higher-order data},
  year = {2016},
  note = {http://arxiv.org/abs/1603.00395},
  pages = {2559--2567},
  arxiv = {https://arxiv.org/abs/1603.00395},
  file = {:Wu 2016 - general-tensor-co-clustering.pdf},
  mynote = {Acceptance rate 568/2500 = 23\%},
  mysoftware = {https://github.com/wutao27/GtensorSC},
  owner = {dgleich},
  timestamp = {2016.02.29},
  url = {http://papers.nips.cc/paper/6376-general-tensor-spectral-co-clustering-for-higher-order-data}
}

@article{Wu-2019-mwmc,
  title = {Multi-way Monte Carlo Method for Linear Systems},
  author = {Tao Wu and David F. Gleich},
  journal = { {SIAM} Journal on Scientific Computing},
  pages = {A3449--A3475},
  volume = {41},
  year = {2019},
  month = jan,
  number = {6},
  arxiv = {http://arxiv.org/abs/1608.04361},
  doi = {10.1137/18M121527X},
  file = {:Wu 2019 - multiway mc.pdf},
  mysoftware = {https://github.com/wutao27/multi-way-MC},
  owner = {dgleich},
  publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})},
  timestamp = {2016.08.16}
}

@inproceedings{Wu-2017-retrospective-higher-order,
  title = {Retrospective Higher-Order Markov Processes for User Trails},
  author = {Wu, Tao and Gleich, David F.},
  booktitle = {Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
  year = {2017},
  address = {New York, NY, USA},
  pages = {1185--1194},
  publisher = {ACM},
  series = {KDD '17},
  acmid = {3098127},
  arxiv = {https://arxiv.org/abs/1704.05982},
  doi = {10.1145/3097983.3098127},
  file = {:Wu 2017 - rhomp.pdf},
  isbn = {978-1-4503-4887-4},
  keywords = {higher-order markov chains, tensor factorization, user models},
  location = {Halifax, NS, Canada},
  mynote = {Acceptance rate: 131/748 = 17.5%},
  mysoftware = {https://github.com/wutao27/RHOMP},
  numpages = {10},
  owner = {dgleich},
  timestamp = {2017.03.22}
}

@inproceedings{Yin-2017-local-motif,
  title = {Local Higher-Order Graph Clustering},
  author = {Yin, Hao and Benson, Austin R. and Leskovec, Jure and Gleich, David F.},
  booktitle = {Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
  year = {2017},
  address = {New York, NY, USA},
  pages = {555--564},
  publisher = {ACM},
  series = {KDD '17},
  acmid = {3098069},
  doi = {10.1145/3097983.3098069},
  file = {:Yin 2017 - local clustering.pdf},
  isbn = {978-1-4503-4887-4},
  keywords = {clustering, community detection, higher-order structure, motifs},
  location = {Halifax, NS, Canada},
  mynote = {Acceptance rate: 131/748 = 17.5%; oral = 64/748 = 8.5%},
  mysoftware = {http://snap.stanford.edu/mappr/},
  numpages = {10},
  owner = {dgleich},
  timestamp = {2017.03.22}
}

@article{Zhu-2016-parallel-mincut,
  title = {A Parallel Min-Cut Algorithm using Iteratively Reweighted Least Squares},
  author = {Yao Zhu and David F. Gleich},
  journal = {Parallel Computing},
  pages = {43--59},
  volume = {59},
  year = {2016},
  month = {November},
  arxiv = {http://arxiv.org/abs/1501.03105},
  doi = {10.1016/j.parco.2016.02.003},
  file = {:Zhu 2016 - pirmcut.pdf},
  mysoftware = {https://www.cs.purdue.edu/homes/dgleich/codes/pirmcut/pirmcut-data.7z},
  owner = {David Gleich},
  timestamp = {2014.10.17}
}

@article{Gleich-2017-erasure,
  title = {Erasure coding for fault oblivious linear system solvers},
  author = {Yao Zhu and Ananth Grama and David F. Gleich},
  journal = {SIAM J. of Scientific Computing},
  pages = {C48-C64},
  volume = {39},
  year = {2017},
  number = {1},
  doi = {10.1137/15M1041511},
  file = {:Zhu 2017 - Erasure Coded.pdf},
  mysoftware = {https://github.com/dgleich/erasure-coded-cg},
  owner = {dgleich},
  timestamp = {2015.01.26},
  url = {http://arxiv.org/abs/1412.7364}
}

@article{Fountoulakis-2023-flow,
  author = {Kimon Fountoulakis and Meng Liu and David F. Gleich and Michael W. Mahoney},
  journal = { {SIAM} Review},
  title = {Flow-Based Algorithms for Improving Clusters: A Unifying Framework, Software, and Performance},
  year = {2023},
  month = feb,
  number = {1},
  pages = {59--143},
  volume = {65},
  arxiv = {https://arxiv.org/abs/2004.09608},
  doi = {10.1137/20m1333055},
  file = {:Fountoulakis 2023 - flow-based.pdf},
  mysoftware = {https://github.com/dgleich/flowpaper-code},
  publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})}
}

@inproceedings{Liu-2020-slq,
  author = {Meng Liu and David F. Gleich},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  title = {Strongly local p-norm-cut algorithms for semi-supervised learning and local graph clustering},
  year = {2020},
  pages = {5023--5035},
  volume = {33},
  arxiv = {http://arxiv.org/abs/2006.08569},
  file = {:Liu 2020 - strongly local p-norm.pdf},
  mynote = {Publication rate 1900/9454 (20\%)},
  mysoftware = {https://github.com/MengLiuPurdue/SLQ},
  url = {https://proceedings.neurips.cc//paper/2020/file/3501672ebc68a5524629080e3ef60aef-Paper.pdf}
}

@article{Nassar-2020-neighborhood,
  author = {Huda Nassar and Austin R. Benson and David F. Gleich},
  journal = {Social Network Analysis and Mining},
  title = {Neighborhood and PageRank Methods for Pairwise Link Prediction},
  year = {2020},
  pages = {63},
  volume = {10},
  arxiv = {https://arxiv.org/abs/1907.04503},
  doi = {10.1007/s13278-020-00671-6},
  file = {:Nassar 2020 - pairwise prediction.pdf},
  mysoftware = {https://github.com/nassarhuda/pairseed}
}

@article{Colley-2023-tensor-kron,
  author = {Colley, Charles and Nassar, Huda and Gleich, David F.},
  journal = {SIAM Journal on Matrix Analysis and Applications},
  title = {Dominant Z-Eigenpairs of Tensor Kronecker Products Decouple},
  year = {2023},
  number = {3},
  pages = {1006-1031},
  volume = {44},
  arxiv = {https://arxiv.org/abs/2011.08837},
  doi = {10.1137/22M1502008},
  file = {:Colley 2023 - tensor kronecker.pdf},
  mysoftware = {https://www.cs.purdue.edu/homes/ccolley/project_pages/TensorKroneckerProducts.html},
  owner = {dgleich},
  timestamp = {2020-12-01}
}

@inproceedings{Liu-2021-localhyper,
  author = {Meng Liu and Nate Veldt and Haoyu Song and Pan Li and David F. Gleich},
  booktitle = {Proceedings of the Web Conference 2021},
  title = {Strongly Local Hypergraph Diffusions for Clustering and Semi-supervised Learning},
  year = {2021},
  pages = {2092-2103},
  series = {WWW '21},
  arxiv = {https://arxiv.org/abs/2011.07752},
  doi = {10.1145/3442381.3449887},
  file = {:Liu 2021 localhyper.PDF},
  keywords = {PageRank, hypergraph, community detection, local clustering},
  location = {Ljubljana, Slovenia},
  mynote = {357 of 1736 papers (20.6\%)},
  mysoftware = {https://github.com/MengLiuPurdue/LHQD},
  numpages = {12},
  owner = {dgleich},
  timestamp = {2020-12-01}
}

@article{Benson-2021-higher-order,
  author = {Austin R. Benson and David F. Gleich and Desmond J. Higham},
  journal = {SIAM News},
  title = {Higher-order Network Analysis Takes Off, Fueled by Classical Ideas and New Data},
  year = {2021},
  note = {Abbreviated version published on SIAM News, full version on arXiv},
  pages = {2103.05031},
  volume = {cs.SI},
  arxiv = {https://arxiv.org/abs/2103.05031},
  howpublished = {Abbreviated version published on SIAM News \url{https://sinews.siam.org/Details-Page/higher-order-network-analysis-takes-off-fueled-by-old-ideas-and-new-data} },
  owner = {dgleich},
  timestamp = {2021-05-17},
  url = {https://sinews.siam.org/Details-Page/higher-order-network-analysis-takes-off-fueled-by-old-ideas-and-new-data}
}

@article{Jones-2022-cliques,
  author = {Landon R. Jones and Robert K. Swihart and David F. Gleich and Geriann Albers and Scott A. Johnson and Cassie M. Hudson and Patrick A. Zollner},
  journal = {Landscape Ecology},
  title = {Estimating statewide carrying capacity of bobcats (Lynx rufus) using improved maximum clique algorithms},
  year = {2022},
  month = {jul},
  doi = {10.1007/s10980-022-01460-6},
  file = {:Jones 2022 - Carrying Capacity with Cliques.pdf},
  owner = {dgleich},
  publisher = {Springer Science and Business Media {LLC} }
}

@article{Benson-2022-fauci-email,
  author = {Benson, Austin R. and Veldt, Nate and Gleich, David F.},
  journal = {Proceedings of the International AAAI Conference on Web and Social Media},
  title = {Fauci-Email: A {JSON} Digest of {Anthony} {Fauci}'s Released Emails},
  year = {2022},
  month = may,
  number = {1},
  pages = {1208-1217},
  volume = {16},
  abstractnote = {A collection of over 3000 pages of emails sent by Anthony Fauci and his staff were released in an effort to understand the United States government response to the COVID-19 pandemic. In this paper, we describe how the original PDF document of emails was translated into a resource consisting of json files that make many future studies easy. We include examples for how to convert this email information into a network, a hypergraph, a temporal sequence, and a tensor for subsequent analysis, and discuss use cases and benefits in analyzing the data in these different derived formats. These resources are broadly useful for future research and pedagogical uses in terms of human and system behavioral interactions.},
  arxiv = {https://arxiv.org/abs/2108.01239},
  file = {:Benson 2022 - fauci-email.pdf},
  mysoftware = {https://github.com/nveldt/fauci-email},
  owner = {dgleich},
  url = {https://ojs.aaai.org/index.php/ICWSM/article/view/19371}
}

@article{Shur-2023-flexible-pagerank,
  author = {Shur, Disha and Huang, Yufan and Gleich, David F.},
  journal = {Journal of Applied and Computational Topology},
  title = {A flexible PageRank-based graph embedding framework closely related to spectral eigenvector embeddings},
  year = {2023},
  arxiv = {https://arxiv.org/abs/2207.11321},
  doi = {https://doi.org/10.1007/s41468-023-00129-6},
  file = {:Shur 2023 - fleible PageRank embedding.pdf},
  mysoftware = {https://github.com/dishashur/log-pagerank},
  owner = {dgleich}
}

@article{Liu-2023-topological,
  author = {Liu, Meng and Dey, Tamal K. and Gleich, David F.},
  journal = {Nature Machine Intelligence},
  title = {Topological structure of complex predictions},
  year = {2023},
  pages = {1382-1389},
  volume = {5},
  arxiv = {https://arxiv.org/abs/2207.14358},
  file = {:Liu 2023 - topological.pdf},
  mysoftware = {https://github.com/MengLiuPurdue/Graph-Topological-Data-Analysis},
  owner = {dgleich},
  url = {https://arxiv.org/abs/2207.14358}
}

@article{Ravindra-2022-tkde,
  author = {Vikram Ravindra and Huda Nassar and David F. Gleich and Ananth Grama},
  journal = {Transactions on Knowledge and Data Engineering},
  title = {Aligning Spatially Constrained Graphs},
  year = {2022},
  pages = {1-13},
  doi = {10.1109/TKDE.2022.3206823},
  file = {:Ravindra 2022 - spatial alignment.pdf},
  owner = {dgleich}
}

@inproceedings{Huang-2023-mucond-lrsdp,
  author = {Yufan Huang and C. Seshadhri and David F. Gleich},
  booktitle = {Proceedings of the 40th International Conference on Machine Learning},
  title = {Theoretical bounds on the network community profile from low-rank semi-definite programming},
  year = {2023},
  pages = {13976-13992},
  series = {ICML 2023},
  arxiv = {https://arxiv.org/abs/2303.14550},
  file = {:Huang 2023 - ncp low-rank sdp.pdf},
  mysoftware = {https://github.com/luotuoqingshan/mu-conductance-low-rank-sdp},
  owner = {dgleich}
}

@inproceedings{Huang-2024-densest-subhypergraph,
  author = {Yufan Huang and David F. Gleich and Nate Veldt},
  booktitle = {Proceedings of the ACM on Web Conference 2024},
  title = {Densest Subhypergraph: Negative Supermodular Functions and Strongly Localized Methods},
  year = {2024},
  month = may,
  pages = {881?892},
  publisher = {Association for Computing Machinery},
  abstract = {Dense subgraph discovery is a fundamental primitive in graph and hypergraph analysis which among other applications has been used for real-time story detection on social media and improving access to data stores of social networking systems. We present several contributions for localized densest subgraph discovery, which seeks dense subgraphs located nearby given seed sets of nodes. We first introduce a generalization of a recent anchored densest subgraph problem, extending this previous objective to hypergraphs and also adding a tunable locality parameter that controls the extent to which the output set overlaps with seed nodes. Our primary technical contribution is to prove when it is possible to obtain a strongly-local algorithm for solving this problem, meaning that the runtime depends only on the size of the input set. We provide a strongly-local algorithm that applies whenever the locality parameter is not too small, and show via counterexample why strongly-local algorithms are impossible below a certain threshold. Along the way to proving our results for localized densest subgraph discovery, we also provide several advances in solving global dense subgraph discovery objectives. This includes the first strongly polynomial time algorithm for the densest supermodular set problem and a flow-based exact algorithm for a heavy and dense subgraph discovery problem in graphs with arbitrary node weights. We demonstrate our algorithms on several web-based data analysis tasks.},
  arxiv = {https://arxiv.org/abs/2310.13792},
  doi = {10.1145/3589334.3645624},
  file = {:Huang 2024 - densest subhypergraph.pdf},
  isbn = {9798400701719},
  mysoftware = {https://github.com/luotuoqingshan/local-DHSG},
  owner = {dgleich}
}

@inproceedings{Eldaghar-2024-hypergraph-epi,
  author = {Omar Eldaghar and Yu Zhu and David F. Gleich},
  booktitle = {The 13th International Conference on Complex Networks and their Applications},
  title = {A spatial hypergraph model where epidemic spread demonstrates clear higher-order effects},
  year = {2024},
  abstract = {We demonstrate a spatial hypergraph model that allows us to vary the amount of higher-order structure in the generated hypergraph. Specifically, we can vary from a model that is a pure pairwise graph into a model that is almost a pure hypergraph. We use this spatial hypergraph model to study higher-order effects in epidemic spread. We use a susceptible-infected-recovered-susceptible (SIRS) epidemic model designed to mimic the spread of an airborne pathogen. We study three types of airborne effects that emulate airborne dilution effects. For the scenario of linear dilution, which roughly corresponds to constant ventilation per person as required in many building codes, we see essentially no impact from introducing small hyperedges up to size 15 whereas we do see effects when the hyperedge set is dominated by large hyperedges. Specifically, we track the mean infections after the SIRS epidemic has run for a while so it is in a ?steady state? and find the mean is higher in the large hyperedge regime whereas it is unchanged from pairwise to small hyperedge regime.},
  arxiv = {https://arxiv.org/abs/2410.12688},
  mysoftware = {https://github.com/oeldaghar/spatial-hypergraph-epidemics}
}

@misc{Andrew-2004-list-access,
  author = {Kevin Andrew and David F. Gleich},
  howpublished = {Advanced Algorithms, Harvey Mudd College, Final Project},
  title = {MTF , BIT , and COMB: A Guide to Deterministic and Randomized Online Algorithms for the List Access Problem},
  year = {2004},
  abstract = {In this survey, we discuss two randomized online algorithms for the list access problem. First, we review competitive analysis and show that the MTF algorithm is 2-competitive using a potential function. Then, we introduce randomized competitive analysis and the associated adversary models. We show that the randomized BIT algorithm is 7/4-competitive using a potential function argument. We then introduce the pairwise property and the TIMESTAMP algorithm to show that the COMB algorithm, a COMBination of the BIT and TIMEST AMP algorithms, is 8/5-competitive. COMB is the best known randomized algorithm for the list access program.},
  file = {:Andrew 2004 - List Access.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02}
}

@misc{gleich2003-clinic,
  title = {Three Methods for Improving Relevance in Web Search.},
  author = {Erin Bodine and David F. Gleich and Cathy Kurata and Jordan Kwan and Lesley Ward and Daniel Fain},
  howpublished = {Clinic Report, Harvey Mudd College},
  month = {May 9},
  note = {102 pages. Includes fully documented program code on accompanying CD.},
  year = {2003},
  abstract = {The 2002–2003 Overture clinic project evaluated and implemented three different methods for improving relevance ordering in web search. The three methods were bottom up micro information unit (MIU) analysis, top down MIU analysis, and proximity scoring. We ran these three methods on the top 200 web pages returned for each of 58 queries by an already existing algorithmic search engine. We used two metrics, precision and relevance ordering, to evaluate the results. Precision deals with how relevant the web page is for a given query, while relevance ordering is how well-ordered the returned results are. We evaluated the precision of each method and of the algorithmic search engine by hand. For relevance ordering, we recruited other humans to compare pages and used their decisions to generate an ideal ranking for each query. The results of each of our methods and of the algorithmic search engine are then compared to this ideal ranking vector using Kendall’s Tau. Our bottom up MIU analysis method achieved the highest precision score of 0.78 out of 1.00. In addition, bottom up MIU analysis received the second highest correlation coefficient (or relevance ordering score) of 0.107 while the algorithmic search engine received the highest correlation coefficient of 0.121. Interestingly, our proximity scoring method received high relevance ordering scores when the algorithmic search engine received low relevance ordering scores.},
  file = {:bodine2003 - clinic, improved relevance.pdf},
  key = {BGK+2003},
  keywords = {self, web search},
  owner = {David Gleich},
  timestamp = {2006.10.13}
}

@article{Constantine-preprint-active,
  author = {Paul G. Constantine and David F. Gleich},
  journal = {arXiv},
  title = {Computing Active Subspaces},
  year = {2014},
  pages = {1408.0545},
  volume = {math.NA},
  arxiv = {http://arxiv.org/abs/1408.0545},
  owner = {dgleich},
  timestamp = {2014.08.19},
  url = {http://arxiv.org/abs/1408.0545}
}

@misc{Gleich-2005-directed-spectral,
  title = {Hierarchical Directed Spectral Graph Partitioning},
  author = {David F. Gleich},
  howpublished = {Information Networks, Stanford University, Final Project, 2005},
  year = {2006},
  abstract = {In this report, we examine the generalization of the Laplacian of a graph due to Fan Chung. We show that Fan Chung’s generalization reduces to examining one particular symmetrization of the adjacency matrix for a directed graph. From this result, the directed Cheeger bounds trivially follow. Additionally, we implement and examine the beneﬁts of directed hierarchical spectral clustering empirically on a dataset from Wikipedia. Finally, we examine a set of competing heuristic methods on the same dataset.},
  file = {:Gleich 2005 - hierarchical directed spectral.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02},
  url = {http://www.cs.purdue.edu/homes/dgleich/publications/Gleich 2005 - hierarchical directed spectral.pdf}
}

@misc{Gleich-2005-finite-calculus,
  title = {Finite Calculus: A tutorial for solving nasty sums},
  author = {David F. Gleich},
  howpublished = {Combinatorics, Final Paper, Stanford University, 2004.},
  year = {2005},
  file = {:Gleich 2005 - finite calculus.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02}
}

@misc{Gleich-2003-computer-chess,
  title = {Machine Learning in Computer Chess: Genetic Programming and KRK},
  author = {David F. Gleich},
  howpublished = {Independent Study Report, Harvey Mudd College},
  year = {2003},
  abstract = {In this paper, I describe genetic programming as a machine learning paradigm and evaluate its results in attempting to learn basic chess rules. Genetic programming exploits a simulation of Darwinian evolution to construct programs. When applied to the King-Rook-King (KRK) chess endgame problem, genetic programming shows promising results in spite of a lack of signiﬁcant chess knowledge.},
  file = {:Gleich 2003 - Machine Learning in Computer Chess.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02}
}

@inproceedings{gleich2007-pagerank-deriv,
  title = {Three results on the {PageRank} vector: eigenstructure, sensitivity, and the derivative},
  author = {David F. Gleich and Peter Glynn and Gene H. Golub and Chen Greif},
  booktitle = {Web Information Retrieval and Linear Algebra Algorithms},
  year = {2007},
  editor = {Andreas Frommer and Michael W. Mahoney and Daniel B. Szyld},
  number = {07071},
  publisher = {Internationales Begegnungs- und Forschungszentrum fuer Informatik (IBFI), Schloss Dagstuhl, Germany},
  series = {Dagstuhl Seminar Proceedings},
  abstract = {The three results on the PageRank vector are preliminary but shed light on the eigenstructure of a PageRank modified Markov chain and what happens when changing the teleportation parameter in the PageRank model. Computations with the derivative of the PageRank vector with respect to the teleportation parameter show predictive ability and identify an interesting set of pages from Wikipedia.},
  file = {:gleich 2007 - pagerank derivative and sensitivity.pdf},
  issn = {1862-4405},
  key = {GGGG2007},
  keywords = {self, PageRank, PageRank derivative, PageRank sensitivity, PageRankeigenstructure},
  optaddress = {Dagstuhl, Germany},
  optannote = {Keywords: PageRank, PageRank derivative, PageRank sensitivity, PageRank eigenstructure},
  opturl = {http://drops.dagstuhl.de/opus/volltexte/2007/1061 [date of citation: 2007-01-01]},
  owner = {David Gleich},
  timestamp = {2008.03.04},
  url = {http://drops.dagstuhl.de/opus/volltexte/2007/1061}
}

@misc{Gleich-2006-wom,
  title = {The World of Music: User Ratings; Spectral and Spherical Embeddings; Map Projections},
  author = {David F. Gleich and Matthew Rasmussen and Kevin Lang and Leonid Zhukov},
  howpublished = {Online report},
  year = {2006},
  abstract = {In this paper we present an algorithm for layout and visualization of music collections based on similarities between musical artists. The core of the algorithm consists of a non-linear low dimensional embedding of a similarity graph constrained to the surface of a hyper-sphere. This approach effectively uses additional dimensions in the embedding. We derive the algorithm using a simple energy minimization procedure and show the relationships to several well known eigenvector based methods. We also describe a method for constructing a similarity graph from user ratings, as well as procedures for mapping the layout from the hyper-sphere to a 2d display. We demonstrate our techniques on Yahoo! Music user ratings data and a MusicMatch artist similarity graph.},
  file = {:Gleich 2006 - wom.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02},
  url = {http://www.cs.purdue.edu/homes/dgleich/publications/Gleich 2006 - wom.pdf}
}

@techreport{gleich2004-parallel,
  title = {Fast Parallel {PageRank}: A Linear System Approach},
  author = {David F. Gleich and Leonid Zhukov and Pavel Berkhin},
  institution = {Yahoo! Research Labs},
  year = {2004},
  number = {YRL-2004-038},
  abstract = {In this paper we investigate the convergence of iterative stationary and Krylov subspace methods for the PageRank linear system, including the convergence dependency on teleportation. We demonstrate that linear system iterations converge faster than the simple power method and are less sensitive to the changes in teleportation. In order to perform this study we developed a framework for parallel PageRank computing. We describe the details of the parallel implementation and provide experimental results obtained on a 70-node Beowulf cluster.},
  file = {:gleich2004-parallel.pdf},
  key = {GZB2006},
  keywords = {self, pagerank},
  mysoftware = {https://github.com/dgleich/ppagerank},
  owner = {David Gleich},
  timestamp = {2006.11.29},
  url = {http://www.cs.purdue.edu/homes/dgleich/publications/gleich2004-parallel.pdf}
}

@article{Stinson-preprint-zonotopes,
  title = {A randomized algorithm for enumerating zonotope vertices},
  author = {Kerrek Stinson and David F. Gleich and Paul G. Constantine},
  journal = {arXiv},
  pages = {1602.06620},
  volume = {math.NA},
  year = {2016},
  owner = {dgleich},
  timestamp = {2016.02.22},
  url = {http://arxiv.org/abs/1602.06620}
}

@misc{Zhukov-2004-soft-clustering,
  title = {Topic Identiﬁcation in Soft Clustering using PCA and ICA},
  author = {Leonid Zhukov and David F. Gleich},
  howpublished = {Online report, Yahoo! research labs},
  year = {2004},
  abstract = {Many applications can benefit from soft clustering, where each datum is assigned to multiple clusters with membership weights that sum to one. In this paper we present a comparison of principal component analysis (PCA) and independent component analysis (ICA) when used for soft clustering. We provide a short mathematical background for these methods and demonstrate their application to a sponsored links search listings dataset. We present examples of the soft clusters generated by both methods and compare the results.},
  file = {:Zhukov 2004 - soft clustering pca and ica.pdf},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02}
}

@article{Gleich-2011-review-of-kamvar,
  title = {Review of: {Numerical algorithms for personalized search in self-organizing information networks by Sep Kamvar, Princeton Univ. Press, 2010, 160pp., ISBN13: 978-0-691-14503-7} },
  author = {David F. Gleich},
  journal = {Linear Algebra and its Applications},
  pages = {908 - 909},
  volume = {435},
  year = {2011},
  number = {4},
  doi = {10.1016/j.laa.2011.01.013},
  file = {:Gleich 2011 - review of kamvar.pdf},
  issn = {0024-3795},
  keywords = {self},
  owner = {David F. Gleich},
  timestamp = {2011.07.02}
}

@incollection{Constantine-preprint-pcam,
  title = {Ranking Web Pages},
  author = {David F. Gleich and Paul G. Constantine},
  booktitle = {The Princeton Companion to Applied Mathematics},
  publisher = {Princeton University Press},
  year = {2015},
  address = {Princeton, NJ, USA},
  editor = {Nicholas J. Higham and Mark R. Dennis and Paul Glendinning and Paul A. Martin and Fadil Santosa and Jared Tanner},
  pages = {755-757},
  owner = {David Gleich},
  timestamp = {2013.09.09}
}

@phdthesis{gleich2009-thesis,
  title = {Models and Algorithms for {PageRank} Sensitivity},
  author = {David F. Gleich},
  school = {Stanford University},
  year = {2009},
  month = {September},
  abstract = {The PageRank model helps evaluate the relative importance of nodes in a large graph, such as the graph of links on the world wide web. An important piece of the PageRank model is the teleportation parameter  $\alpha$ . We explore the interaction between  $\alpha$  and PageRank through the lens of sensitivity analysis. Writing the PageRank vector as a function of  $\alpha$  allows us to take a derivative, which is a simple sensitivity measure. As an alternative approach, we apply techniques from the field of uncertainty quantification. Regarding  $\alpha$  as a random variable produces a new PageRank model in which each PageRank value is a random variable. We explore the standard deviation of these variables to get another measure of PageRank sensitivity. One interpretation of this new model shows that it corrects a small oversight in the original PageRank formulation. Both of the above techniques require solving multiple PageRank problems, and thus a robust PageRank solver is needed. We discuss an inner-outer iteration for this purpose. The method is low-memory, simple to implement, and has excellent performance for a range of teleportation parameters. We show empirical results with these techniques on graphs with over 2 billion edges.},
  file = {:gleich 2009 - pagerank thesis.pdf},
  keywords = {self},
  owner = {David Gleich},
  timestamp = {2009.09.13},
  url = {http://www.stanford.edu/group/SOL/dissertations/pagerank-sensitivity-thesis-online.pdf}
}

Social sites

Older social sites