@article{Benson-preprint-dynsys, title = {Computing tensor Z-eigenvectors with dynamical systems}, author = {Austin Benson and David F. Gleich}, journal = {arXiv}, pages = {1805.00903}, volume = {math.NA}, year = {2018}, owner = {David Gleich}, timestamp = {2018.05.07}, url = {https://arxiv.org/abs/1805.00903} }

@article{Constantine-preprint-active, title = {Computing Active Subspaces}, author = {Paul G. Constantine and David F. Gleich}, journal = {arXiv}, pages = {1408.0545}, volume = {math.NA}, year = {2014}, owner = {dgleich}, timestamp = {2014.08.19}, url = {http://arxiv.org/abs/1408.0545} }

@article{Ramani-preprint-fast-graph-sampling, title = {Coin-flipping, ball-dropping, and grass-hopping for generating random graphs from matrices of probabilities}, author = {Arjun S. Ramani and Nicole Eikmeier and David F. Gleich}, journal = {arXiv}, pages = {1709.03438}, volume = {cs.SI}, year = {2017}, howpublished = {Submitted}, owner = {dgleich}, timestamp = {2017.08.23}, url = {https://arxiv.org/abs/1709.03438} }

@article{Stinson-preprint-zonotopes, title = {A randomized algorithm for enumerating zonotope vertices}, author = {Kerrek Stinson and David F. Gleich and Paul G. Constantine}, journal = {arXiv}, pages = {1602.06620}, volume = {math.NA}, year = {2016}, owner = {dgleich}, timestamp = {2016.02.22}, url = {http://arxiv.org/abs/1602.06620} }

@article{Veldt-preprint-metric-lps, title = {A Projection Method for Metric-Constrained Optimization}, author = {Nate Veldt and David Gleich and Anthony Wirth and James Saunderson}, journal = {arXiv}, pages = {1806.01678}, volume = {cs.NA}, year = {2018}, arxiv = {https://arxiv.org/abs/1806.01678}, owner = {dgleich}, timestamp = {2018.06.06}, url = {https://arxiv.org/abs/1806.01678} }

@article{Wu-preprint-mwmc, title = {Multi-way Monte Carlo Method for Linear Systems}, author = {Tao Wu and David F. Gleich}, journal = {arXiv}, pages = {1608.04361}, volume = {cs.NA}, year = {2016}, mysoftware = {https://github.com/wutao27/multi-way-MC}, owner = {dgleich}, timestamp = {2016.08.16}, url = {http://arxiv.org/abs/1608.04361} }

@article{Whang-2019-neo-k-means, title = {Non-exhaustive, Overlapping Clustering}, author = {Joyce Jiyoung Whang and Yangyang Hou and David F. Gleich and Inderjit Dhillon}, journal = {Transactions on Pattern Analysis and Machine Intelligence}, year = {2019}, note = {Accepted.}, abstract = {Traditional clustering algorithms, such as K-Means, output a clustering that is disjoint and exhaustive, i.e., every single data point is assigned to exactly one cluster. However, in many real-world datasets, clusters can overlap and there are often outliers that do not belong to any cluster. While this is a well-recognized problem, most existing algorithms address either overlap or outlier detection and do not tackle the problem in a unified way. In this paper, we propose an intuitive objective function, which we call the NEO-K-Means (Non-Exhaustive, Overlapping K-Means) objective, that captures the issues of overlap and non-exhaustiveness in a unified manner. Our objective function can be viewed as a reformulation of the traditional K-Means objective, with easy-to-understand parameters that capture the degrees of overlap and non-exhaustiveness. By considering an extension to weighted kernel K-Means, we show that we can also apply our NEO-K-Means idea to overlapping community detection, which is an important task in network analysis. To optimize the NEO-K-Means objective, we develop not only fast iterative algorithms but also more sophisticated algorithms using low-rank semidefinite programming techniques. Our experimental results show that the new objective and algorithms are effective in finding ground-truth clusterings that have varied overlap and non-exhaustiveness.}, doi = {10.1109/TPAMI.2018.2863278}, keywords = {Clustering algorithms;Linear programming;Kernel;Iterative methods;Computer science;Anomaly detection;Task analysis;Overlapping clustering;K-Means;outlier;semidefinite programming;graph clustering;community detection}, owner = {dgleich}, timestamp = {2016.12.02} }

@inproceedings{Andersen-2012-overlapping, title = {Overlapping clusters for distributed computation}, author = {Andersen, Reid and Gleich, David F. and Mirrokni, Vahab}, booktitle = {Proceedings of the fifth ACM international conference on Web search and data mining}, year = {2012}, address = {New York, NY, USA}, month = feb, pages = {273--282}, publisher = {ACM}, series = {WSDM '12}, acmid = {2124330}, doi = {10.1145/2124295.2124330}, file = {:Andersen 2012 - overlapping.pdf}, isbn = {978-1-4503-0747-5}, keywords = {conductance, covering, diffusion, leave-time, pagerank}, location = {Seattle, Washington, USA}, mynote = {Acceptance rate 20.7\% = 75/362 (and also presentation = 30/362 = 8.2%)}, numpages = {10}, owner = {David F. Gleich}, timestamp = {2011.07.01}, url = {http://doi.acm.org/10.1145/2124295.2124330} }

@inproceedings{Andersen-2011-overlapping-csc, title = {Overlapping clusters for distributed computation}, author = {Andersen, Reid and Gleich, David F. and Mirrokni, Vahab S}, booktitle = {Poster proceedings of the SIAM Workshop on Combinatorial and Scientific Computing (CSC)}, year = {2011}, note = {Poster.}, file = {:Andersen 2011 - overlapping poster.pdf}, keywords = {self}, owner = {dgleich}, timestamp = {2011.10.26} }

@inproceedings{bayati2009-network-alignment, title = {Algorithms for Large, Sparse Network Alignment Problems}, author = {Mohsen Bayati and Margot Gerritsen and David F. Gleich and Amin Saberi and Ying Wang}, booktitle = {Proceedings of the 9th IEEE International Conference on Data Mining}, year = {2009}, month = {December}, pages = {705-710}, abstract = {We propose a new distributed algorithm for sparse variants of the network alignment problem that occurs in a variety of data mining areas including systems biology, database matching, and computer vision. Our algorithm uses a belief propagation heuristic and provides near optimal solutions for an NP-hard combinatorial optimization problem. We show that our algorithm is faster and outperforms or nearly ties existing algorithms on synthetic problems, a problem in bioinformatics, and a problem in ontology matching. We also provide a unified framework for studying and comparing all network alignment solvers.}, doi = {10.1109/ICDM.2009.135}, eprint = {0907.3338}, file = {:bayati 2009 - network alignment.pdf}, mynote = {Acceptance rate 140/786 = 17.8\%}, owner = {David F. Gleich}, timestamp = {2009.07.20} }

@article{Bayati-2013-netalign, title = {Message-Passing Algorithms for Sparse Network Alignment}, author = {Mohsen Bayati and David F. Gleich and Amin Saberi and Ying Wang}, journal = {ACM Trans. Knowl. Discov. Data}, pages = {3:1--3:31}, volume = {7}, year = {2013}, month = mar, number = {1}, abstract = {Network alignment generalizes and unifies several approaches for forming a matching or alignment between the vertices of two graphs. We study a mathematical programming framework for network alignment problem and a sparse variation of it where only a small number of matches between the vertices of the two graphs are possible. We propose a new message passing algorithm that allows us to compute, very efficiently, approximate solutions to the sparse network alignment problems with graph sizes as large as hundreds of thousands of vertices. We also provide extensive simulations comparing our algorithms with two of the best solvers for network alignment problems on two synthetic matching problems, two bioinformatics problems, and three large ontology alignment problems including a multilingual problem with a known labeled alignment.}, acmid = {2435212}, address = {New York, NY, USA}, articleno = {3}, doi = {10.1145/2435209.2435212}, file = {:Bayati 2013 - sparse belief propagation.pdf}, issn = {1556-4681}, issue_date = {March 2013}, keywords = {Network alignment, belief propagation, graph matching, message-passing}, mysoftware = {https://www.cs.purdue.edu/homes/dgleich/codes/netalign/}, numpages = {31}, owner = {David F. Gleich}, publisher = {ACM}, timestamp = {2011.07.01} }

@inproceedings{Benson-2013-direct-tsqr, title = {Direct Tall-and-skinny {QR} factorizations in {MapReduce} architectures}, author = {Benson, A.R. and Gleich, D.F. and Demmel, J.}, booktitle = {Big Data, 2013 IEEE International Conference on}, year = {2013}, month = oct, pages = {264--272}, abstract = {The QR factorization and the SVD are two fundamental matrix decompositions with applications throughout scientific computing and data analysis. For matrices with many more rows than columns, so-called ``tall-and-skinny matrices,'' there is a numerically stable, efficient, communication-avoiding algorithm for computing the QR factorization. It has been used in traditional high performance computing and grid computing environments. For MapReduce environments, existing methods to compute the QR decomposition use a numerically unstable approach that relies on indirectly computing the Q factor. In the best case, these methods require only two passes over the data. In this paper, we describe how to compute a stable tall-and-skinny QR factorization on a MapReduce architecture in only slightly more than 2 passes over the data. We can compute the SVD with only a small change and no difference in performance. We present a performance comparison between our new direct TSQR method, indirect TSQR methods that use the communication-avoiding TSQR algorithm, and a standard unstable implementation for MapReduce (Cholesky QR). We find that our new stable method is competitive with unstable methods for matrices with a modest number of columns. This holds both in a theoretical performance model as well as in an actual implementation.}, doi = {10.1109/BigData.2013.6691583}, file = {:Benson 2013 - direct-tsqr.pdf}, keywords = {parallel programming;singular value decomposition;Cholesky QR;MapReduce architectures;Q factor;SVD;communication-avoiding TSQR algorithm;data analysis;direct QR factorizations;grid computing;high performance computing;matrix decompositions;quick response factorizations;scientific computing;singular value decomposition;tall-and-skinny matrices;Algorithm design and analysis;Clustering algorithms;Matrix decomposition;Prediction algorithms;Q-factor;Standards;Symmetric matrices;Hadoop;MapReduce;QR;SVD;TSQR;matrix factorization}, mynote = {Acceptance rate 45/259 = 17\%}, mysoftware = {https://github.com/arbenson/mrtsqr}, owner = {David F. Gleich}, timestamp = {2012.10.04} }

@article{Benson-2016-motif-spectral, title = {Higher-order organization of complex networks}, author = {Austin Benson and David F. Gleich and Jure Leskovec}, journal = {Science}, pages = {163--166}, volume = {353}, year = {2016}, number = {6295}, doi = {10.1126/science.aad9029}, file = {:Benson 2016 - spectral motifs.pdf}, howpublished = {Submitted}, mysoftware = {http://snap.stanford.edu/higher-order/}, owner = {dgleich}, supplementary = {http://science.sciencemag.org/content/suppl/2016/07/07/353.6295.163.DC1?_ga=2.30502463.1893086197.1534945246-1536034508.1533753067}, timestamp = {2015.05.19} }

@article{Benson-2017-srw, title = {The Spacey Random Walk: a Stochastic Process for Higher-order Data}, author = {Austin Benson and David F. Gleich and Lek-Heng Lim}, journal = {SIAM Review}, pages = {321-345}, volume = {59}, year = {2017}, month = {May}, number = {2}, arxiv = {http://arxiv.org/abs/1602.02102}, doi = {10.1137/16M1074023}, howpublished = {arXiv}, mysoftware = {https://github.com/arbenson/spacey-random-walks}, owner = {David F. Gleich}, timestamp = {2014.08.21}, url = {http://arxiv.org/abs/1602.02102} }

@inproceedings{Benson-2015-tensor, title = {Tensor Spectral Clustering for Partitioning Higher-order Network Structures}, author = {Austin R. Benson and David F. Gleich and Jure Leskovec}, booktitle = {Proceedings of the 2015 SIAM International Conference on Data Mining}, year = {2015}, pages = {118-126}, abstract = {Spectral graph theory-based methods represent an important class of tools for studying the structure of networks. Spectral methods are based on a first-order Markov chain derived from a random walk on the graph and thus they cannot take advantage of important higher-order network substructures such as triangles, cycles, and feed-forward loops. Here we propose a Tensor Spectral Clustering (TSC) algorithm that allows for modeling higher-order network structures in a graph partitioning framework. Our TSC algorithm allows the user to specify which higher-order network structures (cycles, feed-forward loops, etc.) should be preserved by the network clustering. Higher-order network structures of interest are represented using a tensor, which we then partition by developing a multilinear spectral method. Our framework can be applied to discovering layered flows in networks as well as graph anomaly detection, which we illustrate on synthetic networks. In directed networks, a higher-order structure of particular interest is the directed 3-cycle, which captures feedback loops in networks. We demonstrate that our TSC algorithm produces large partitions that cut fewer directed 3-cycles than standard spectral clustering algorithms.}, doi = {10.1137/1.9781611974010.14}, file = {:Benson 2015 - tensor-sc.pdf}, location = {Vancouver, BC}, mynote = {Acceptance rate 22\%}, owner = {dgleich}, timestamp = {2015.01.19} }

@inproceedings{Benson-2014-tsnmf, title = {Scalable methods for nonnegative matrix factorizations of near-separable tall-and-skinny matrices}, author = {Austin R. Benson and Jason D. Lee and Bartek Rajwa and David F. Gleich}, booktitle = {Proceedings of Neural Information Processing Systems}, year = {2014}, note = {Selected for Spotlight Presentation.}, pages = {945--953}, journal = {arXiv}, mynote = {Acceptance rate for spotlight 3.6\% of 1678 submissions. Includes all software.}, owner = {David F. Gleich}, timestamp = {2014.02.23}, url = {http://arxiv.org/abs/1402.6964} }

@inproceedings{Bonato-2016-character, title = {Mining and modeling character networks}, author = {Anthony Bonato and David Ryan D'Angelo and Ethan R. Elenberg and David F. Gleich and Yangyang Hou}, booktitle = {International Workshop on Algorithms and Models for the Web-Graph}, year = {2016}, editor = {Bonato, Anthony and Graham, Fan Chung and Pra{\l}at, Pawe{\l} }, pages = {100-114}, publisher = {Springer International Publishing}, series = {WAW}, arxiv = {http://arxiv.org/abs/1608.00646}, doi = {10.1007/978-3-319-49787-7_9}, file = {:Bonato 2016 - character networks.pdf}, isbn = {978-3-319-49787-7}, journal = {arXiv}, owner = {dgleich}, timestamp = {2016.08.08} }

@inproceedings{Eikmeier-2018-dynamic-competition, title = {Dynamic Competition Networks: Detecting Alliances and Leaders}, author = {Bonato, Anthony and Eikmeier, Nicole and Gleich, David F. and Malik, Rehan}, booktitle = {Algorithms and Models for the Web Graph}, year = {2018}, editor = {Bonato, Anthony and Pra{\l}at, Pawe{\l} and Raigorodskii, Andrei}, pages = {115--144}, abstract = {We consider social networks of competing agents that evolve dynamically over time. Such dynamic competition networks are directed, where a directed edge from nodes u to v corresponds a negative social interaction. We present a novel hypothesis that serves as a predictive tool to uncover alliances and leaders within dynamic competition networks. Our focus is in the present study is to validate it on competitive networks arising from social game shows such as Survivor and Big Brother.}, arxiv = {https://arxiv.org/abs/1803.01783}, doi = {10.1007/978-3-319-92871-5_9}, file = {:Eikmeier 2018 - dynamic competition.pdf}, isbn = {978-3-319-92871-5}, owner = {dgleich}, timestamp = {2018.06.06} }

@article{Bonato-2014-dimmatch, title = {Dimensionality of Social Networks Using Motifs and Eigenvalues}, author = {Anthony Bonato and David F. Gleich and Myunghwan Kim and Dieter Mitsche and {Pawe\l} {Pra\l{}at} and Amanda Tian and Stephen J. Young}, journal = {PLoS ONE}, pages = {e106052}, volume = {9}, year = {2014}, month = {September}, number = {9}, abstract = {We consider the dimensionality of social networks, and develop experiments aimed at predicting that dimension. We find that a social network model with nodes and links sampled from an

}, doi = {10.1371/journal.pone.0106052}, file = {:Bonato 2014 - dimensionality.pdf}, howpublished = {Submitted.}, keywords = {self}, mysoftware = {https://www.cs.purdue.edu/homes/dgleich/codes/geop-dim/geop-dim.tar.gz}, owner = {dgleich}, publisher = {Public Library of Science}, timestamp = {2014.02.13} }m -dimensional metric space with power-law distributed influence regions best fits samples from real-world networks whenm scales logarithmically with the number of nodes of the network. This supports a logarithmic dimension hypothesis, and we provide evidence with two different social networks, Facebook and LinkedIn. Further, we employ two different methods for confirming the hypothesis: the first uses the distribution of motif counts, and the second exploits the eigenvalue distribution.

@article{Bonchi-2012-fast-katz, title = {Fast Matrix Computations for Pairwise and Columnwise Commute Times and {Katz} Scores}, author = {Bonchi, Francesco and Esfandiar, Pooya and Gleich, David F. and Greif, Chen and Lakshmanan, Laks V.S.}, journal = {Internet Mathematics}, pages = {73--112}, volume = {8}, year = {2012}, number = {1-2}, abstract = {We explore methods for approximating the commute time and Katz score between a pair of nodes. These methods are based on the approach of matrices, moments, and quadrature developed in the numerical linear algebra community. They rely on the Lanczos process and provide upper and lower bounds on an estimate of the pairwise scores. We also explore methods to approximate the commute times and Katz scores from a node to all other nodes in the graph. Here, our approach for the commute times is based on a variation of the conjugate gradient algorithm, and it provides an estimate of all the diagonals of the inverse of a matrix. Our technique for the Katz scores is based on exploiting an empirical localization property of the Katz matrix. We adapt algorithms used for personalized PageRank computing to these Katz scores and theoretically show that this approach is convergent. We evaluate these methods on 17 real-world graphs ranging in size from 1000 to 1,000,000 nodes. Our results show that our pairwise commute-time method and columnwise Katz algorithm both have attractive theoretical properties and empirical performance.}, doi = {10.1080/15427951.2012.625256}, file = {:Bonchi 2012 - fast katz.pdf}, keywords = {self}, owner = {dfgleic}, timestamp = {2011.06.30} }

@inproceedings{constantine2007-pagerank-pce, title = {Using Polynomial Chaos to Compute the Influence of Multiple Random Surfers in the {PageRank} Model}, author = {Paul G. Constantine and David F. Gleich}, booktitle = {Proceedings of the 5th Workshop on Algorithms and Models for the Web Graph ({WAW2007})}, year = {2007}, editor = {Anthony Bonato and Fan Chung Graham}, pages = {82--95}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, volume = {4863}, abstract = {The PageRank equation computes the importance of pages in a web graph relative to a single random surfer with a constant teleportation coefficient. To be globally relevant, the teleportation coefficient should account for the influence of all users. Therefore, we correct the PageRank formulation by modeling the teleportation coefficient as a random variable distributed according to user behavior. With this correction, the PageRank values themselves become random. We present two methods to quantify the uncertainty in the random PageRank: a Monte Carlo sampling algorithm and an algorithm based the truncated polynomial chaos expansion of the random quantities. With each of these methods, we compute the expectation and standard deviation of the PageRanks. Our statistical analysis shows that the standard deviation of the PageRanks are uncorrelated with the PageRank vector.}, doi = {10.1007/978-3-540-77004-6_7}, file = {:constantine2007 - pagerank pce.pdf}, key = {CG2007}, keywords = {self, pagerank, polynomial chaos,}, owner = {David Gleich}, timestamp = {2007.10.10} }

@inproceedings{Constantine-2012-svd-signal, title = {Distinguishing signal from noise in an {SVD} of simulation data}, author = {Paul G. Constantine and David F. Gleich}, booktitle = {Proceedings of the IEEE Conference on Acoustics, Speech, and Signal Processing}, year = {2012}, pages = {5333--5336}, doi = {10.1109/ICASSP.2012.6289125}, file = {:Constantine 2012 - signal svd.pdf}, howpublished = {Preprint}, owner = {David F. Gleich}, timestamp = {2011.11.28} }

@conference{Constantine-2011-MRTSQR, title = {Tall and skinny {QR} factorizations in {MapReduce} architectures}, author = {Constantine, Paul G. and Gleich, David F.}, booktitle = {Proceedings of the second international workshop on MapReduce and its applications}, year = {2011}, address = {New York, NY, USA}, month = jun, pages = {43--50}, publisher = {ACM}, series = {MapReduce '11}, acmid = {1996103}, doi = {10.1145/1996092.1996103}, file = {:Constantine 2011 - TSQR.pdf}, isbn = {978-1-4503-0700-0}, keywords = {Hadoop, QR factorization, linear regression, matrix factorization, tsqr}, location = {San Jose, California, USA}, numpages = {8}, owner = {David F. Gleich}, timestamp = {2011.06.18} }

@article{constantine2010-rapr, title = {Random Alpha {PageRank} }, author = {Paul G. Constantine and David F. Gleich}, journal = {Internet Mathematics}, pages = {189--236}, volume = {6}, year = {2010}, month = {September}, number = {2}, abstract = {We suggest a revision to the PageRank random surfer model that considers the influence of a population of random surfers on the PageRank vector. In the revised model, each member of the population has its own teleportation parameter chosen from a probability distribution, and consequently, the ranking vector is random. We propose three algorithms for computing the statistics of the random ranking vector based respectively on (i) random sampling, (ii) paths along the links of the underlying graph, and (iii) quadrature formulas. We find that the expectation of the random ranking vector produces similar rankings to its deterministic analogue, but the standard deviation gives uncorrelated information (under a Kendall-tau metric) with myriad potential uses. We examine applications of this model to web spam.}, doi = {10.1080/15427951.2009.10129185}, file = {:constantine 2010 - random alpha pagerank.pdf}, keywords = {self}, owner = {David Gleich}, timestamp = {2010.01.07}, url = {http://projecteuclid.org/DPubS?service=UI&version=1.0&verb=Display&handle=euclid.im/1285339073} }

@article{Constantine-2014-mrmr, title = {Model Reduction with {Map-Reduce}-enabled Tall and Skinny Singular Value Decomposition}, author = {Paul G. Constantine and David F. Gleich and Yangyang Hou and Jeremy Templeton}, journal = {SIAM J. Sci. Comput.}, pages = {S166-S191}, volume = {36}, year = {2014}, month = {November}, number = {5}, doi = {10.1137/130925219}, file = {:Constantine 2014 - mapreduce model reduction.pdf}, owner = {dgleich}, timestamp = {2013.06.25}, url = {http://arxiv.org/abs/1306.4690} }

@article{Constantine-2011-gni, title = {A factorization of the spectral {Galerkin} system for parameterized matrix equations: derivation and applications}, author = {Paul G. Constantine and David F. Gleich and Gianluca Iaccarino}, journal = {SIAM Journal of Scientific Computing}, pages = {2995--3009}, volume = {33}, year = {2011}, number = {5}, abstract = {Recent work has explored solver strategies for the linear system of equations arising from a spectral Galerkin approximation of the solution of PDEs with parameterized (or stochastic) inputs. We consider the related problem of a matrix equation whose matrix and right-hand side depend on a set of parameters (e.g., a PDE with stochastic inputs semidiscretized in space) and examine the linear system arising from a similar Galerkin approximation of the solution. We derive a useful factorization of this system of equations, which yields bounds on the eigenvalues, clues to preconditioning, and a flexible implementation method for a wide array of problems. We complement this analysis with (i) a numerical study of preconditioners on a standard elliptic PDE test problem and (ii) a fluids application using existing CFD codes; the MATLAB codes used in the numerical studies are available online.}, doi = {10.1137/100799046}, file = {:Constantine 2011 - spectral galerkin.pdf}, keywords = {self}, owner = {David F. Gleich}, timestamp = {2010.09.15} }

@article{constantine2010-parameterized, title = {Spectral Methods for Parameterized Matrix Equations}, author = {Paul G. Constantine and David F. Gleich and Gianluca Iaccarino}, journal = {SIAM Journal on Matrix Analysis and Applications}, pages = {2681-2699}, volume = {31}, year = {2010}, number = {5}, abstract = {We apply polynomial approximation methods—known in the numerical PDEs context as spectral methods—to approximate the vector-valued function that satisfies a linear system of equations where the matrix and the right-hand side depend on a parameter. We derive both an interpolatory pseudospectral method and a residual-minimizing Galerkin method, and we show how each can be interpreted as solving a truncated infinite system of equations; the difference between the two methods lies in where the truncation occurs. Using classical theory, we derive asymptotic error estimates related to the region of analyticity of the solution, and we present a practical residual error estimate. We verify the results with two numerical examples.}, doi = {10.1137/090755965}, file = {:constantine 2010 - parameterized matrix equations.pdf}, keywords = {parameterized systems; spectral methods; self}, owner = {David F. Gleich}, publisher = {SIAM}, timestamp = {2010.09.24} }

@inproceedings{decoste2005-recommender, title = {Recommender Systems Research at {Yahoo! Research Labs} }, author = {Dennis Decoste and David F. Gleich and Tejaswi Kasturi and Sathiya Keerthi and Omid Madani and Seung-Taek Park and David M. Pennock and Corey Porter and Sumit Sanghai and Farial Shahnaz and Leonid Zhukov}, booktitle = {Beyond Personalization}, year = {2005}, address = {San Diego, CA}, month = {January}, note = {Position Statement}, abstract = {We describe some of the ongoing projects at Yahoo! Research Labs that involve recommender systems. We discuss recommender systems related problems and solutions relevant to Yahoo!’s business.}, file = {:decoste2005 - yahoo recommender systems.pdf}, key = {DGK+05}, keywords = {self, recommender systems}, owner = {David Gleich}, timestamp = {2006.10.13} }

@inproceedings{Eikmeier-2017-power-laws, title = {Revisiting Power-law Distributions in Spectra of Real World Networks}, author = {Eikmeier, Nicole and Gleich, David F.}, booktitle = {Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, year = {2017}, address = {New York, NY, USA}, pages = {817--826}, publisher = {ACM}, series = {KDD '17}, acmid = {3098128}, doi = {10.1145/3097983.3098128}, file = {:Eikmeier 2017 - power-laws.pdf}, isbn = {978-1-4503-4887-4}, keywords = {degree distributions, graph spectrum, power-law distributions, real world networks}, location = {Halifax, NS, Canada}, mynote = {Acceptance rate: 131/748 = 17.5%}, mysoftware = {https://github.com/eikmeier/powerlaw-spectra}, numpages = {10}, owner = {dgleich}, timestamp = {2017.03.22}, url = {http://doi.acm.org/10.1145/3097983.3098128} }

@inproceedings{esfandiar2010-fast-katz, title = {Fast {Katz} and Commuters: Efficient Approximation of Social Relatedness over Large Networks}, author = {Pooya Esfandiar and Francesco Bonchi and David F. Gleich and Chen Greif and Laks V. S. Lakshmanan and Byung-Won On}, booktitle = {Algorithms and Models for the Web Graph}, year = {2010}, abstract = {Motivated by social network data mining problems such as link prediction and collaborative filtering, significant research effort has been devoted to computing topological measures including the Katz score and the commute time. Existing approaches typically approximate all pairwise relationships simultaneously. In this paper, we are interested in computing: the score for a single pair of nodes, and the top-k nodes with the best scores from a given source node. For the pairwise problem, we apply an iterative algorithm that computes upper and lower bounds for the measures we seek. This algorithm exploits a relationship between the Lanczos process and a quadrature rule. For the top-k problem, we propose an algorithm that only accesses a small portion of the graph and is related to techniques used in personalized PageRank computing. To test the scalability and accuracy of our algorithms we experiment with three real-world networks and find that these algorithms run in milliseconds to seconds without any preprocessing.}, doi = {10.1007/978-3-642-18009-5_13}, file = {:esfandiar 2010 - fast katz.pdf}, keywords = {self}, owner = {David Gleich}, timestamp = {2010.04.08} }

@article{Fountoulakis-2017-locally-biased, title = {An Optimization Approach to Locally-Biased Graph Algorithms}, author = {Kimon Fountoulakis and David F. Gleich and Michael W. Mahoney}, journal = {Proceedings of the IEEE}, pages = {256-272}, volume = {105}, year = {2017}, month = {February}, number = {2}, abstract = {Locally-biased graph algorithms are algorithms that attempt to find local or small-scale structure in a large data graph. In some cases, this can be accomplished by adding some sort of locality constraint and calling a traditional graph algorithm; but more interesting are locally-biased graph algorithms that compute answers by running a procedure that does not even look at most of the input graph. This corresponds more closely to what practitioners from various data science domains do, but it does not correspond well with the way that algorithmic and statistical theory is typically formulated. Recent work from several research communities has focused on developing locally-biased graph algorithms that come with strong complementary algorithmic and statistical theory and that are useful in practice in downstream data science applications. We provide a review and overview of this work, highlighting commonalities between seemingly different approaches, and highlighting promising directions for future work.}, arxiv = {https://arxiv.org/abs/1607.04940}, doi = {10.1109/JPROC.2016.2637349}, file = {:Fountoulakis 2017 - locally biased.pdf}, howpublished = {Submitted.}, issn = {0018-9219}, keywords = {data analysis;graph theory;learning (artificial intelligence);network theory (graphs);optimisation;pattern clustering;clustering;data science application;locally-biased graph algorithm;network flow;optimization approach;semisupervised learning;Algorithm design and analysis;Approximation algorithms;Clustering algorithms;Linear programming;Optimization;Partitioning algorithms;Clustering;graph algorithms;local algorithms;network flow;partitioning;semi-supervised learning;spectral graph theory}, owner = {dgleich}, timestamp = {2016.02.29} }

@article{Gleich-2015-prbeyond, title = { {PageRank} beyond the Web}, author = {David F. Gleich}, journal = {SIAM Review}, pages = {321--363}, volume = {57}, year = {2015}, month = {August}, number = {3}, abstract = {Google's PageRank method was developed to evaluate the importance of web-pages via their link structure. The mathematics of PageRank, however, are entirely general and apply to any graph or network in any domain. Thus, PageRank is now regularly used in bibliometrics, social and information network analysis, and for link prediction and recommendation. It's even used for systems analysis of road networks, as well as biology, chemistry, neuroscience, and physics. We'll see the mathematics and ideas that unite these diverse applications.}, doi = {10.1137/140976649}, file = {:Gleich 2015 - prbeyond.pdf}, owner = {David F. Gleich}, timestamp = {2014.08.08} }

@article{Gleich-2013-expanders, title = {Expanders, tropical semi-rings, and nuclear norms: oh my!}, author = {Gleich, David F.}, journal = {XRDS}, pages = {32--36}, volume = {19}, year = {2013}, month = {March}, number = {3}, acmid = {2425688}, address = {New York, NY, USA}, doi = {10.1145/2425676.2425688}, file = {:Gleich 2013 - expanders.pdf}, issn = {1528-4972}, issue_date = {Spring 2013}, numpages = {5}, owner = {dgleich}, publisher = {ACM}, timestamp = {2013.04.06} }

@inproceedings{constantine2010-teleportation, title = {Tracking the random surfer: empirically measured teleportation parameters in {PageRank} }, author = {David F. Gleich and Paul G. Constantine and Abraham Flaxman and Asela Gunawardana}, booktitle = {WWW '10: Proceedings of the 19th international conference on World wide web}, year = {2010}, month = {April}, pages = {381--390}, abstract = {PageRank computes the importance of each node in a directed graph under a random surfer model governed by a teleportation parameter. Commonly denoted alpha, this parameter models the probability of following an edge inside the graph or, when the graph comes from a network of web pages and links, clicking a link on a web page. We empirically measure the teleportation parameter based on browser toolbar logs and a click trail analysis. For a particular user or machine, such analysis produces a value of alpha. We find that these values nicely fit a Beta distribution with mean edge-following probability between 0.3 and 0.7, depending on the site. Using these distributions, we compute PageRank scores where PageRank is computed with respect to a distribution as the teleportation parameter, rather than a constant teleportation parameter. These new metrics are evaluated on the graph of pages in Wikipedia.}, doi = {10.1145/1772690.1772730}, file = {:gleich 2010 - tracking the random surfer.pdf}, isbn = {978-1-60558-799-8}, keywords = {self}, location = {Raleigh, North Carolina, USA}, mynote = {Acceptance rate = 14%, 104/743 submissions}, owner = {David F. Gleich}, timestamp = {2009.12.03} }

@article{gleich2010-inner-outer, title = {An inner-outer iteration for {PageRank} }, author = {David F. Gleich and Andrew P. Gray and Chen Greif and Tracy Lau}, journal = {SIAM Journal of Scientific Computing}, pages = {349--371}, volume = {32}, year = {2010}, month = {February}, number = {1}, abstract = {We present a new iterative scheme for PageRank computation. The algorithm is applied to the linear system formulation of the problem, using inner-outer stationary iterations. It is simple, can be easily implemented and parallelized, and requires minimal storage overhead. Our convergence analysis shows that the algorithm is effective for a crude inner tolerance and is not sensitive to the choice of the parameters involved. The same idea can be used as a preconditioning technique for nonstationary schemes. Numerical examples featuring matrices of dimensions exceeding 100,000,000 in sequential and parallel environments demonstrate the merits of our technique. Our code is available online for viewing and testing, along with several large scale examples.}, doi = {10.1137/080727397}, file = {:gleich 2010 - inner-outer pagerank.pdf}, keywords = {self}, owner = {David F. Gleich}, timestamp = {2009.07.10} }

@article{Gleich2011-block-circulant, title = {The power and {Arnoldi} methods in an algebra of circulants}, author = {David F. Gleich and Chen Greif and James M. Varah}, journal = {Numerical Linear Algebra with Applications}, pages = {809-831}, volume = {20}, year = {2013}, month = {October}, abstract = {Circulant matrices play a central role in a recently proposed formulation of three-way data computations. In this setting, a three-way table corresponds to a matrix where each "scalar" is a vector of parameters defining a circulant. This interpretation provides many generalizations of results from matrix or vector-space algebra. We derive the power and Arnoldi methods in this algebra. In the course of our derivation, we define inner products, norms, and other notions. These extensions are straightforward in an algebraic sense, but the implications are dramatically different from the standard matrix case. For example, a matrix of circulants has a polynomial number of eigenvalues in its dimension; although, these can all be represented by a carefully chosen canonical set of eigenvalues and vectors. These results and algorithms are closely related to standard decoupling techniques on block-circulant matrices using the fast Fourier transform.}, doi = {10.1002/nla.1845}, file = {:Gleich 2013 - circulant.pdf}, howpublished = {Submitted. Preprint arXiv:1101.2173.}, keywords = {self}, owner = {David F. Gleich}, timestamp = {2011.01.06} }

@article{Gleich-2015-sublinear, title = {Sublinear Column-wise Actions of the Matrix Exponential on Social Networks}, author = {David F. Gleich and Kyle Kloster}, journal = {Internet Mathematics}, pages = {352--384}, volume = {11}, year = {2015}, number = {4--5}, abstract = {We consider stochastic transition matrices from large social and information networks. For these matrices, we describe and evaluate three fast methods to estimate one column of the matrix exponential. The methods are designed to exploit the properties inherent in social networks, such as a power-law degree distribution. Using only this property, we prove that one of our three algorithms has a sublinear runtime. We present further experimental evidence showing that all three of them run quickly on social networks with billions of edges, and they accurately identify the largest elements of the column.}, doi = {10.1080/15427951.2014.971203}, file = {:Gleich 2015 - sublinear columns.pdf}, owner = {David Gleich}, timestamp = {2015.04.15} }

@article{Gleich-2017-seeded-localization, title = {Localization in Seeded {PageRank} }, author = {David F. Gleich and Kyle Kloster and Huda Nassar}, journal = {Internet Mathematics}, pages = {Online}, year = {2017}, arxiv = {https://arxiv.org/abs/1509.00016}, doi = {10.1017/s0956792516000280}, file = {:Nassar 2017 - localization.pdf}, howpublished = {Submitted.}, mysoftware = {https://github.com/nassarhuda/pprlocal}, owner = {David Gleich}, timestamp = {2016.05.24} }

@inproceedings{Gleich-2011-skew-nuclear, title = {Rank aggregation via nuclear norm minimization}, author = {Gleich, David F. and Lim, Lek-Heng}, booktitle = {Proceedings of the 17th ACM SIGKDD international conference on Knowledge discovery and data mining}, year = {2011}, address = {New York, NY, USA}, pages = {60--68}, publisher = {ACM}, series = {KDD '11}, abstract = {The process of rank aggregation is intimately intertwined with the structure of skew-symmetric matrices. We apply recent advances in the theory and algorithms of matrix completion to skew-symmetric matrices. This combination of ideas produces a new method for ranking a set of items. The essence of our idea is that a rank aggregation describes a partially filled skew-symmetric matrix. We extend an algorithm for matrix completion to handle skew-symmetric data and use that to extract ranks for each item. Our algorithm applies to both pairwise comparison and rating data. Because it is based on matrix completion, it is robust to both noise and incomplete data. We show a formal recovery result for the noiseless case and present a detailed study of the algorithm on synthetic data and Netflix ratings.}, acmid = {2020425}, doi = {10.1145/2020408.2020425}, file = {:Gleich 2011 - rank aggregation.pdf}, isbn = {978-1-4503-0813-7}, keywords = {self, nuclear norm, rank aggregation, skew symmetric}, location = {San Diego, California, USA}, mynote = {Accepted for oral presentation 56/714 (overall accept 56+69 of 714 = 17.5%)}, numpages = {9}, owner = {David F. Gleich}, timestamp = {2010.01.30} }

@article{Gleich-2015-mlpr, title = {Multilinear {PageRank} }, author = {David F. Gleich and Lek-Heng Lim and Yongyang Yu}, journal = {SIAM Journal on Matrix Analysis and Applications}, pages = {1507-1541}, volume = {36}, year = {2015}, number = {4}, abstract = {In this paper, we first extend the celebrated PageRank modification to a higher-order Markov chain. Although this system has attractive theoretical properties, it is computationally intractable for many interesting problems. We next study a computationally tractable approximation to the higher-order PageRank vector that involves a system of polynomial equations called multilinear PageRank. This is motivated by a novel ``spacey random surfer'' model, where the surfer remembers bits and pieces of history and is influenced by this information. The underlying stochastic process is an instance of a vertex-reinforced random walk. We develop convergence theory for a simple fixed-point method, a shifted fixed-point method, and a Newton iteration in a particular parameter regime. In marked contrast to the case of the PageRank vector of a Markov chain where the solution is always unique and easy to compute, there are parameter regimes of multilinear PageRank where solutions are not unique and simple algorithms do not converge. We provide a repository of these nonconvergent cases that we encountered through exhaustive enumeration and randomly sampling that we believe is useful for future study of the problem.}, doi = {10.1137/140985160}, file = {:Gleich 2015 - multilinear pagerank.pdf}, mysoftware = {https://github.com/dgleich/mlpagerank}, owner = {David F. Gleich}, timestamp = {2014.09.04}, url = {http://arxiv.org/abs/1409.1465} }

@inproceedings{Gleich-2014-alg-anti-diff, title = {Anti-differentiating approximation algorithms: A case study with min-cuts, spectral, and flow}, author = {David F. Gleich and Michael M. Mahoney}, booktitle = {Proceedings of the International Conference on Machine Learning (ICML)}, year = {2014}, pages = {1018-1025}, comment = {25\% Acceptance Ratio}, file = {:Gleich 2014 - anti-diff.pdf}, mynote = {Acceptance rate 310/1238 = 25.0\%}, owner = {dgleich}, timestamp = {2014.02.05}, url = {http://machinelearning.wustl.edu/mlpapers/papers/icml2014c2_gleich14} }

@incollection{Gleich-2016-mining, title = {Mining large graphs}, author = {David F. Gleich and Michael W. Mahoney}, booktitle = {Handbook of Big Data}, publisher = {CRC Press}, year = {2016}, editor = {Peter B\"{u}hlmann and Petros Drineas and Michael Kane and Mark van de Laan}, pages = {191-220}, series = {Handbooks of modern statistical methods}, doi = {10.1201/b19567-17}, file = {:Gleich 2016 - graph mining.pdf}, isbn = {978-1-4822-4907-1}, owner = {David Gleich}, timestamp = {2015.09.02} }

@inproceedings{Gleich-2015-robustifying, title = {Using Local Spectral Methods to Robustify Graph-Based Learning Algorithms}, author = {Gleich, David F. and Mahoney, Michael W.}, booktitle = {Proceedings of the 21th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, year = {2015}, address = {New York, NY, USA}, pages = {359--368}, publisher = {ACM}, series = {KDD '15}, abstract = {Graph-based learning methods have a variety of names including semi-supervised and transductive learning. They typically use a diffusion to propagate labels from a small set of nodes with known class labels to the remaining nodes of the graph. While popular, these algorithms, when implemented in a straightforward fashion, are extremely sensitive to the details of the graph construction. Here, we provide four procedures to help make them more robust: recognizing implicit regularization in the diffusion, using a scalable push method to evaluate the diffusion, using rank-based rounding, and densifying the graph through a matrix polynomial. We study robustness with respect to the details of graph constructions, errors in node labeling, degree variability, and a variety of other real-world heterogeneities, studying these methods through a precise relationship with mincut problems. For instance, the densification strategy explicitly adds new weighted edges to a sparse graph. We find that this simple densification creates a graph where multiple diffusion methods are robust to several types of errors. This is demonstrated by a study with predicting product categories from an Amazon co-purchasing network.}, acmid = {2783376}, doi = {10.1145/2783258.2783376}, file = {:Gleich 2015 - robustifying.pdf}, isbn = {978-1-4503-3664-2}, keywords = {clustering, diffusions, robustifying, semi-supervised learning}, location = {Sydney, NSW, Australia}, mysoftware = {https://www.cs.purdue.edu/homes/dgleich/codes/robust-diffusions/}, numpages = {10}, owner = {David Gleich}, timestamp = {2015.08.19} }

@article{Gleich-2012-kronecker, title = {Moment based estimation of stochastic {Kronecker} graph parameters}, author = {David F. Gleich and Art B. Owen}, journal = {Internet Mathematics}, pages = {232-256}, volume = {8}, year = {2012}, month = {August}, number = {3}, abstract = {Stochastic Kronecker graphs supply a parsimonious model for large sparse real-world graphs. They can specify the distribution of a large random graph using only three or four parameters. Those parameters have, however, proved difficult to choose in specific applications. This article looks at method-of-moments estimators that are computationally much simpler than maximum likelihood. The estimators are fast, and in our examples, they typically yield Kronecker parameters with expected feature counts closer to a given graph than we get from KronFit. The improvement is especially prominent for the number of triangles in the graph.}, doi = {10.1080/15427951.2012.680824}, file = {:Gleich 2012 - Moment based Kronecker.pdf}, owner = {David F. Gleich}, timestamp = {2011.07.02} }

@article{gleich2007-approximate, title = {Approximating Personalized {PageRank} with Minimal Use of Webgraph Data}, author = {David F. Gleich and Marzia Polito}, journal = {Internet Mathematics}, pages = {257--294}, volume = {3}, year = {2007}, month = {December}, number = {3}, abstract = {In this paper, we consider the problem of calculating fast and accurate approximations to the personalized PageRank score of a webpage. We focus on techniques to improve speed by limiting the amount of web graph data we need to access. Our algorithms provide both the approximation to the personalized PageRank score as well as guidance in using only the necessary information—and therefore sensibly reduce not only the computational cost of the algorithm but also the memory and memory bandwidth requirements. We report experiments with these algorithms on web graphs of up to 118 million pages and prove a theoretical approximation bound for all. Finally, we propose a local, personalized web-search system for a future client system using our algorithms.}, doi = {10.1080/15427951.2006.10129128}, file = {:gleich 2007 - approximate personalized pagerank.pdf}, key = {GP2007}, keywords = {pagerank, self}, owner = {David Gleich}, timestamp = {2006.11.27} }

@article{Gleich-2014-dynamic-pagerank, title = {A Dynamical System for {PageRank} with Time-Dependent Teleportation}, author = {David F. Gleich and Ryan A. Rossi}, journal = {Internet Mathematics}, pages = {188-217}, volume = {10}, year = {2014}, month = {June}, number = {1--2}, abstract = {We propose a dynamical system that captures changes to the network centrality of nodes as external interest in those nodes varies. We derive this system by adding time-dependent teleportation to the PageRank score. The result is not a single set of importance scores, but rather a time-dependent set. These can be converted into ranked lists in a variety of ways, for instance, by taking the largest change in the importance score. For an interesting class of dynamic teleportation functions, we derive closed-form solutions for the dynamic PageRank vector. The magnitude of the deviation from a static PageRank vector is given by a PageRank problem with complex-valued teleportation parameters. Moreover, these dynamical systems are easy to evaluate. We demonstrate the utility of dynamic teleportation on both the article graph of Wikipedia, where the external interest information is given by the number of hourly visitors to each page, and the Twitter social network, where external interest is the number of tweets per month. For these problems, we show that using information from the dynamical system helps improve a prediction task and identify trends in the data.}, doi = {10.1080/15427951.2013.814092}, file = {:Gleich 2014 - dynamical system.pdf}, owner = {David F. Gleich}, timestamp = {2012.11.17} }

@inproceedings{Gleich-2012-neighborhoods, title = {Vertex neighborhoods, low conductance cuts, and good seeds for local community methods}, author = {David F. Gleich and C. Seshadhri}, booktitle = {KDD2012}, year = {2012}, month = aug, pages = {597--605}, doi = {10.1145/2339530.2339628}, file = {:Gleich 2012 - neighborhoods.pdf}, howpublished = {Preprint}, mynote = {Acceptance rate 133/755 = 17.6\%}, owner = {David F. Gleich}, timestamp = {2011.11.28} }

@article{Gleich-2011-cads, title = {Some computational tools for digital archive and metadata maintenance}, author = {Gleich, David F. and Wang, Ying and Meng, Xiangrui and Ronaghi, Farnaz and Gerritsen, Margot and Saberi, Amin}, journal = {BIT Numerical Mathematics}, pages = {127-154}, volume = {51}, year = {2011}, abstract = {Computational tools are a mainstay of current search and recommendation technology. But modern digital archives are astonishingly diverse collections of older digitized material and newer born digital content. Finding interesting material in these archives is still challenging. The material often lacks appropriate annotation---or metadata---so that people can find the most interesting material. We describe four computational tools we developed to aid in the processing and maintenance of large digital archives. The first is an improvement to a graph layout algorithm for graphs with hundreds of thousands of nodes. The second is a new algorithm for matching databases with links among the objects, also known as a network alignment problem. The third is an optimization heuristic to disambiguate a set of geographic references in a book. And the fourth is a technique to automatically generate a title from a description.}, affiliation = {Sandia National Laboratories, Livermore, CA 94550, USA}, doi = {10.1007/s10543-011-0324-6}, file = {:gleich 2011 - comptuational tools.pdf}, issn = {0006-3835}, issue = {1}, keyword = {Mathematics and Statistics}, keywords = {self}, owner = {David F. Gleich}, publisher = {Springer Netherlands}, timestamp = {2011.03.25} }

@inproceedings{gleich2005-ppagerank, title = {Scalable Computing with Power-Law Graphs: Experience with Parallel {PageRank} }, author = {David F. Gleich and Leonid Zhukov}, booktitle = {SuperComputing 2005}, year = {2005}, month = {November}, note = {Poster.}, file = {:gleich2005 - parallel pagerank.pdf}, key = {GZ2005}, keywords = {self, pagerank}, owner = {David Gleich}, timestamp = {2006.10.13}, url = {http://www.cs.purdue.edu/homes/dgleich/publications/gleich2005 - parallel pagerank.pdf} }

@inproceedings{gleich2004-svd, title = {An {SVD} Based Term Suggestion and Ranking System}, author = {David F. Gleich and Leonid Zhukov}, booktitle = {ICDM '04: Proceedings of the Fourth IEEE International Conference on Data Mining (ICDM'04)}, year = {2004}, address = {Brighton, UK}, month = {November}, pages = {391--394}, publisher = {IEEE Computer Society}, abstract = {In this paper, we consider the application of the singular value decomposition (SVD) to a search term suggestion system in a pay-for-performance search market. We propose a novel positive and negative refinement method based on orthogonal subspace projections. We demonstrate that SVD subspace-based methods: 1) expand coverage by reordering the results, and 2) enhance the clustered structure of the data. The numerical experiments reported in this paper were performed on Overture's pay-per-performance search market data.}, doi = {10.1109/ICDM.2004.10006}, file = {:gleich 2004 - svd based term suggestion and ranking.pdf}, isbn = {0-7695-2142-8}, key = {GZ2004}, keywords = {self}, owner = {David Gleich}, timestamp = {2006.10.13} }

@inproceedings{gleich2005-wom, title = {The {World of Music}: {SDP} Embedding of High Dimensional data}, author = {David F. Gleich and Leonid Zhukov and Matthew Rasmussen and Kevin Lang}, booktitle = {Information Visualization 2005}, year = {2005}, note = {Interactive Poster}, abstract = {In this paper we investigate the use of Semidefinite Programming (SDP) optimization for high dimensional data layout and graph visualization. We developed a set of interactive visualization tools and used them on music artist ratings data from Yahoo!. The computed layout preserves a natural grouping of the artists and provides visual assistance for browsing large music collections.}, file = {:gleich2005 - world of music.pdf}, key = {GZRL2005}, keywords = {self, recommender systems}, owner = {David Gleich}, timestamp = {2006.10.13}, url = {http://www.cs.purdue.edu/homes/dgleich/publications/gleich2005 - world of music.pdf} }

@inproceedings{Hou-2016-palm, title = {Fast Multiplier Methods to Optimize Non-exhaustive, Overlapping Clustering}, author = {Yangyang Hou and Joyce Jiyoung Whang and David F. Gleich and Inderjit Dhillon}, booktitle = {Proceedings of the 2016 SIAM International Conference on Data Mining}, year = {2016}, pages = {297--305}, doi = {10.1137/1.9781611974348.34}, howpublished = {Submitted}, mynote = {96 of 374 submissions, 25\%}, owner = {David Gleich}, timestamp = {2015.10.21}, url = {http://arxiv.org/abs/1602.01910} }

@inproceedings{Hou-2015-low-rank, title = {Non-exhaustive, Overlapping Clustering via Low-Rank Semidefinite Programming}, author = {Hou, Yangyang and Whang, Joyce Jiyoung and Gleich, David F. and Dhillon, Inderjit S.}, booktitle = {Proceedings of the 21th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, year = {2015}, address = {New York, NY, USA}, pages = {427--436}, publisher = {ACM}, series = {KDD '15}, acmid = {2783398}, doi = {10.1145/2783258.2783398}, file = {:Hou 2015 - low-rank.pdf}, isbn = {978-1-4503-3664-2}, keywords = {community detection, overlapping clustering, semidefinite programming}, location = {Sydney, NSW, Australia}, mynote = {Acceptance rate 159/819 = 19\%}, numpages = {10}, owner = {David Gleich}, timestamp = {2015.07.28} }

@inproceedings{Jiang-2015-flux, title = {Differential Flux Balance Analysis of Quantitative Proteomic Data on Protein Interaction Networks}, author = {Biaobin Jiang and David F. Gleich and Michael Gribskov}, booktitle = {Symposium on Signal Processing and Mathematical Modeling of Biological Processes with Applications to Cyber-Physical Systems for Precise Medicine}, year = {2015}, organization = {IEEE}, pages = {977--981}, series = {GlobalSIP}, doi = {10.1109/GlobalSIP.2015.7418343}, file = {:Jiang 2015 - differential flux balance.pdf}, owner = {David Gleich}, timestamp = {2015.09.02} }

@article{Jiang-2017-aptrank, title = { {AptRank}: an adaptive {PageRank} model for protein function prediction on bi-relational graphs}, author = {Biaobin Jiang and Kyle Kloster and David F. Gleich and Michael Gribskov}, journal = {Bioinformatics}, pages = {1829--1836}, volume = {33}, year = {2017}, month = {June}, number = {12}, arxiv = {https://arxiv.org/abs/1601.05506}, doi = {10.1093/bioinformatics/btx029}, file = {:Jiang 2017 - aptrank.pdf}, mysoftware = {https://github.rcac.purdue.edu/mgribsko/aptrank}, owner = {David Gleich}, publisher = {Oxford}, timestamp = {2016.01.21} }

@inproceedings{Kang-2017-distributed-erasure, title = {Distributed Fault Tolerant Linear System Solvers Based on Erasure Coding}, author = {Xuejiao Kang and David F. Gleich and Ahmed Sameh and Ananth Grama}, booktitle = {2017 IEEE 37th International Conference on Distributed Computing Systems (ICDCS)}, year = {2017}, pages = {2478-2485}, abstract = {We present efficient coding schemes and distributed implementations of erasure coded linear system solvers. Erasure coded computations belong to the class of algorithmic fault tolerance schemes. They are based on augmenting an input dataset, executing the algorithm on the augmented dataset, and in the event of a fault, recovering the solution from the corresponding augmented solution. This process can be viewed as the computational analog of erasure coded storage schemes. The proposed technique has a number of important benefits: (i) as the hardware platform scales in size and number of faults, our scheme yields increasing improvement in resource utilization, compared to traditional schemes; (ii) the proposed scheme is easy to code - the core algorithms remain the same; and (iii) the general scheme is flexible - accommodating a range of computation and communication tradeoffs. We present new coding schemes for augmenting the input matrix that satisfy the recovery equations of erasure coding with high probability in the event of random failures. These coding schemes also minimize fill (non-zero elements introduced by the coding block), while being amenable to efficient partitioning across processing nodes. We demonstrate experimentally that our scheme adds minimal overhead for fault tolerance, yields excellent parallel efficiency and scalability, and is robust to different fault arrival models.}, doi = {10.1109/ICDCS.2017.261}, file = {:Kang 2017 - erasure.pdf}, issn = {1063-6927}, keywords = {matrix algebra;minimisation;parallel processing;probability;resource allocation;software fault tolerance;storage management;algorithmic fault tolerance schemes;distributed fault tolerant linear system solvers;erasure coded storage schemes;erasure coding;fill minimization;input matrix augmentation;parallel systems;probability;random failures;resource utilization;Computational modeling;Encoding;Fault tolerance;Fault tolerant systems;Linear systems;Program processors;Sparse matrices;Erasure Coding;Fault Tolerance;Kruskal rank;Linear System Solvers}, owner = {dgleich}, timestamp = {2017.03.22} }

@inproceedings{Khan-2012-netalign-multicore, title = {A multicore algorithm for network alignment via approximate matching}, author = {Arif Khan and David F. Gleich and Mahantesh Halappanavar and Alex Pothen}, booktitle = {Proceedings of the 2012 ACM/IEEE International Conference for High Performance Computing, Networking, Storage and Analysis}, year = {2012}, address = {Los Alamitos, CA, USA}, month = nov, pages = {64:1--64:11}, publisher = {IEEE Computer Society Press}, series = {SC '12}, acmid = {2389083}, articleno = {64}, file = {:Khan 2012 - netalign multicore.pdf}, isbn = {978-1-4673-0804-5}, location = {Salt Lake City, Utah}, mynote = {Acceptance rate 100/472 = 21\%}, numpages = {11}, owner = {David Gleich}, timestamp = {2012.05.30}, url = {http://conferences.computer.org/sc/2012/papers/1000a054.pdf} }

@inproceedings{Kloster-2013-nexpokit, title = {A Nearly-Sublinear Method for Approximating a Column of the Matrix Exponential for Matrices from Large, Sparse Networks}, author = {Kyle Kloster and David F. Gleich}, booktitle = {Algorithms and Models for the Web Graph}, year = {2013}, editor = {Bonato, Anthony and Mitzenmacher, Michael and Pra\l{}at, Pawe\l{} }, month = dec, pages = {68-79}, publisher = {Springer International Publishing}, series = {Lecture Notes in Computer Science}, volume = {8305}, abstract = {We consider random-walk transition matrices from large social and information networks. For these matrices, we describe and evaluate a fast method to estimate one column of the matrix exponential. Our method runs in sublinear time on networks where the maximum degree grows doubly logarithmic with respect to the number of nodes. For collaboration networks with over 5 million edges, we find it runs in less than a second on a standard desktop machine.}, doi = {10.1007/978-3-319-03536-9_6}, file = {:Kloster 2013 - nexpokit.pdf}, isbn = {978-3-319-03535-2}, keywords = {Matrix exponential; Gauss-Southwell; local algorithms}, owner = {David Gleich}, timestamp = {2013.09.06} }

@article{Kloster-2016-ppr-paths, title = {Seeded {PageRank} Solution Paths}, author = {Kyle Kloster and David F. Gleich}, journal = {European Journal of Applied Mathematics}, pages = {812--845}, volume = {27}, year = {2016}, number = {6}, abstract = {We study the behaviour of network diffusions based on the PageRank random walk from a set of seed nodes. These diffusions are known to reveal small, localized clusters (or communities), and also large macro-scale clusters by varying a parameter that has a dual-interpretation as an accuracy bound and as a regularization level. We propose a new method that quickly approximates the result of the diffusion for all values of this parameter. Our method efficiently generates an approximate solution path or regularization path associated with a PageRank diffusion, and it reveals cluster structures at multiple size-scales between small and large. We formally prove a runtime bound on this method that is independent of the size of the network, and we investigate multiple optimizations to our method that can be more practical in some settings. We demonstrate that these methods identify refined clustering structure on a number of real-world networks with up to 2 billion edges.}, arxiv = {https://arxiv.org/abs/1503.00322}, doi = {10.1017/S0956792516000280}, file = {:Kloster 2016 - seeded pagerank.pdf}, mysoftware = {https://github.com/kkloste/pagerank-paths}, owner = {dgleich}, publisher = {Cambridge University Press}, timestamp = {2015.08.20} }

@inproceedings{Kloster-2014-hkrelax, title = {Heat Kernel Based Community Detection}, author = {Kloster, Kyle and Gleich, David F.}, booktitle = {Proceedings of the 20th {ACM SIGKDD} International Conference on Knowledge Discovery and Data Mining}, year = {2014}, address = {New York, NY, USA}, pages = {1386--1395}, publisher = {ACM}, series = {KDD '14}, acmid = {2623706}, doi = {10.1145/2623330.2623706}, file = {:Kloster 2014 - hkrelax.pdf}, isbn = {978-1-4503-2956-9}, keywords = {heat kernel, local clustering}, mynote = {Includes all software. Acceptance rate 151/1036 = 14.6\%}, mysoftware = {https://www.cs.purdue.edu/homes/dgleich/codes/hkgrow/}, numpages = {10}, owner = {David F. Gleich}, timestamp = {2014.02.23} }

@inproceedings{Klymko-2014-dircomm, title = {Using Triangles to Improve Community Detection in Directed Networks}, author = {Christine Klymko and David F. Gleich and Tamara G. Kolda}, booktitle = {Proceedings of the ASE BigData Conference}, year = {2014}, address = {Stanford, CA}, note = {Full version on arXiv \url{http://arxiv.org/abs/1404.5874} }, doi = {http://www.ase360.org/handle/123456789/104}, keywords = {self}, owner = {David Gleich}, timestamp = {2014.06.27}, url = {http://arxiv.org/abs/1404.5874} }

@article{Mohammadi-2017-tame, title = {Triangular Alignment ({TAME}): A tensor-based approach for higher-order network alignment}, author = {Shahin Mohammadi and David F. Gleich and Tamara G. Kolda and Ananth Grama}, journal = {Transactions on Computational Biology and Bioinformatics}, pages = {1446--1458}, volume = {14}, year = {2016}, month = {November}, note = {Published online (July 2016) ahead of print.}, number = {6}, arxiv = {http://arxiv.org/abs/1510.06482}, doi = {10.1109/TCBB.2016.2595583}, file = {:Mohammadi 2016 - tame.pdf}, mysoftware = {https://github.com/shmohammadi86/TAME}, owner = {David Gleich}, timestamp = {2015.10.21} }

@article{Mohammadi-2018-action, title = {A geometric approach to characterize the functional identity of single cells}, author = {Shahin Mohammadi and Vikram Ravindra and David F. Gleich and Ananth Grama}, journal = {Nature Communications}, pages = {1516}, volume = {9}, year = {2018}, month = {April}, number = {1}, doi = {10.1038/s41467-018-03933-2}, file = {:Mohammadi 2018 - action.pdf}, mysoftware = {http://compbio.mit.edu/ACTION/}, owner = {dgleich}, publisher = {Springer Nature}, timestamp = {2018.04.18} }

@inproceedings{Nassar-2017-multimodal-netalign, title = {Multimodal Network Alignment}, author = {Huda Nassar and David F. Gleich}, booktitle = {Proceedings of the 2017 SIAM International Conference on Data Mining}, year = {2017}, pages = {615-623}, arxiv = {https://arxiv.org/abs/1703.10511}, doi = {10.1137/1.9781611974973.69}, file = {:Nassar 2017 - multimodal network alignment.pdf}, mynote = {acceptance rate 26\% (around 88/338)}, mysoftware = {https://github.com/nassarhuda/MSD}, owner = {dgleich}, timestamp = {2017.01.03} }

@inproceedings{Nassar-2015-prlocal, title = {Strong localization in personalized {PageRank} }, author = {Huda Nassar and Kyle Kloster and David F. Gleich}, booktitle = {Proceedings of the 2015 Workshop on Algorithms for the Webgraph}, year = {2015}, number = {9479}, pages = {190--202}, series = {LNCS}, abstract = {The personalized PageRank diffusion is a fundamental tool in network analysis tasks like community detection and link prediction. It models the spread of a quantity from a set of seed nodes, and it has been observed to stay localized near this seed set. We derive an upper-bound on the number of entries necessary to approximate a personalized PageRank vector in graphs with skewed degree sequences. This bound shows localization under mild assumptions on the maximum and minimum degrees. Experimental results on random graphs with these degree sequences show the bound is loose and support a conjectured bound.}, doi = {10.1007/978-3-319-26784-5_15}, file = {:Nassar 2015 - strong localization.pdf}, howpublished = {Submitted}, journal = {arXiv}, mysoftware = {https://github.com/nassarhuda/pprlocal/tree/a53f98da293455d1e9dd723cb272a3fcc800c1bd}, owner = {David Gleich}, timestamp = {2015.07.26}, url = {http://arxiv.org/abs/1509.00016} }

@inproceedings{Nassar-2018-low-rank-eigenalign, title = {Low Rank Spectral Network Alignment}, author = {Nassar, Huda and Veldt, Nate and Mohammadi, Shahin and Grama, Ananth and Gleich, David F.}, booktitle = {Proceedings of the 2018 World Wide Web Conference}, year = {2018}, pages = {619--628}, publisher = {International World Wide Web Conferences Steering Committee}, series = {WWW '18}, acmid = {3186128}, doi = {10.1145/3178876.3186128}, file = {:Nassar 2018 - low-rank net-align.pdf}, isbn = {978-1-4503-5639-8}, keywords = {graph matching, low rank matrix, low-rank bipartite matching, network alignment}, location = {Lyon, France}, mynote = {Acceptance rate 171/1155 (around 14.8%)}, mysoftware = {https://github.com/nassarhuda/lowrank_spectral}, numpages = {10}, owner = {dgleich}, timestamp = {2018.01.15} }

@inproceedings{Rainey-2016-nano-pagerank, title = {Massive graph processing on nanocomputers}, author = {Bryan P. Rainey and David F. Gleich}, booktitle = {IEEE International Conference on Big Data}, year = {2016}, note = {Third Workshop on High Performance Big Graph Data Management, Analysis, and Mining}, pages = {3326--3335}, doi = {10.1109/BigData.2016.7840992}, file = {:Rainey 2016 - nanocomputers.pdf}, mynote = {Acceptance rate: Workshop}, mysoftware = {https://github.com/brainey421/badjgraph/}, owner = {dgleich}, timestamp = {2016.04.29} }

@inproceedings{Rossi-2012-dynamicpr, title = {Dynamic {PageRank} using Evolving Teleportation}, author = {Ryan A. Rossi and David F. Gleich}, booktitle = {Algorithms and Models for the Web Graph}, year = {2012}, editor = {Bonato, Anthony and Janssen, Jeannette}, pages = {126-137}, publisher = {Springer Berlin Heidelberg}, series = {Lecture Notes in Computer Science}, volume = {7323}, abstract = {The importance of nodes in a network constantly fluctuates based on changes in the network structure as well as changes in external interest. We propose an evolving teleportation adaptation of the PageRank method to capture how changes in external interest influence the importance of a node. This framework seamlessly generalizes PageRank because the importance of a node will converge to the PageRank values if the external influence stops changing. We demonstrate the effectiveness of the evolving teleportation on the Wikipedia graph and the Twitter social network. The external interest is given by the number of hourly visitors to each page and the number of monthly tweets for each user.}, doi = {10.1007/978-3-642-30541-2_10}, file = {:Rossi 2012 - dynamic-pr.pdf}, isbn = {978-3-642-30540-5}, owner = {dgleich}, timestamp = {2012.02.29} }

@article{Rossi-2015-max-clique, title = {Parallel Maximum Clique Algorithms with Applications to Network Analysis}, author = {Ryan A. Rossi and David F. Gleich and Assefaw H. Gebremedhin}, journal = {SIAM Journal on Scientific Computing}, pages = {C589-C616}, volume = {37}, year = {2015}, number = {5}, abstract = {We present a fast, parallel maximum clique algorithm for large sparse graphs that is designed to exploit characteristics of social and information networks. The method exhibits a roughly linear runtime scaling over real-world networks ranging from a thousand to a hundred million nodes. In a test on a social network with 1.8 billion edges, the algorithm finds the largest clique in about 20 minutes. At its heart the algorithm employs a branch-and-bound strategy with novel and aggressive pruning techniques. The pruning techniques include the combined use of core numbers of vertices along with a good initial heuristic solution to remove the vast majority of the search space. In addition, the exploration of the search tree is parallelized. During the search, processes immediately communicate changes to upper and lower bounds on the size of the maximum clique. This exchange of information occasionally results in a superlinear speedup because tasks with large search spaces can be pruned by other processes. We demonstrate the impact of the algorithm on applications using two different network analysis problems: computation of temporal strong components in dynamic networks and determination of compression-friendly ordering of nodes of massive networks.}, arxiv = {http://arxiv.org/abs/1302.6256}, doi = {10.1137/14100018X}, file = {:Rossi 2015 - max-clique.pdf}, owner = {dgleich}, timestamp = {2012.11.17} }

@inproceedings{Rossi-2014-max-clique-poster, title = {Fast Maximum Clique Algorithms for Large Graphs}, author = {Ryan A. Rossi and David F. Gleich and Assefaw H. Gebremedhin and Md. Mostofa Ali Patwary}, booktitle = {Poster Proceedings of WWW2014}, year = {2014}, pages = {365-366}, doi = {10.1145/2567948.2577283}, file = {:Rossi 2014 - max-clique.pdf}, owner = {David F. Gleich}, timestamp = {2014.01.08} }

@inproceedings{Sinha-2016-deconvolving, title = {Deconvolving Feedback loops in Recommender Systems}, author = {Ayan Sinha and David F. Gleich and Karthik Ramani}, booktitle = {Neural Information Processing Systems (NIPS)}, year = {2016}, editor = {D. D. Lee and M. Sugiyama and U. V. Luxburg and I. Guyon and R. Garnett}, pages = {3243--3251}, publisher = {Curran Associates, Inc.}, file = {:Sinha 2016 - deconvolving.pdf}, mynote = {Acceptance rate 568/2500 = 23\%}, mysoftware = {https://github.com/sinhayan/Deconvolving_Feedback_Loops}, owner = {David Gleich}, timestamp = {2015.10.21}, url = {https://papers.nips.cc/paper/6283-deconvolving-feedback-loops-in-recommender-systems} }

@article{Sinha-2018-gauss, title = {Gauss's law for networks directly reveals community boundaries}, author = {Ayan Sinha and David F. Gleich and Karthik Ramani}, journal = {Scientific Reports}, volume = {8}, year = {2018}, month = {aug}, number = {1}, doi = {10.1038/s41598-018-30401-0}, file = {:Sinha 2018 - gauss for networks.pdf}, owner = {dgleich}, publisher = {Springer Nature America, Inc}, supplementary = {https://static-content.springer.com/esm/art%3A10.1038%2Fs41598-018-30401-0/MediaObjects/41598_2018_30401_MOESM1_ESM.pdf}, timestamp = {2016.01.20} }

@inproceedings{Veldt-2016-simple-local-flow, title = {A Simple and Strongly-Local Flow-Based Method for Cut Improvement}, author = {Luke N. Veldt and David F. Gleich and Michael W. Mahoney}, booktitle = {International Conference on Machine Learning}, year = {2016}, pages = {1938-1947}, file = {:Veldt 2016 - simple local.pdf}, mynote = {Acceptance rate 322/1327 = 24\%}, mysoftware = {https://github.com/nveldt/SimpleLocal}, owner = {David Gleich}, timestamp = {2016.01.22}, url = {http://jmlr.org/proceedings/papers/v48/veldt16.html} }

@inproceedings{Veldt-2018-lambda-cc, title = {A Correlation Clustering Framework for Community Detection}, author = {Veldt, Nate and Gleich, David F. and Wirth, Anthony}, booktitle = {Proceedings of the 2018 World Wide Web Conference}, year = {2018}, address = {Republic and Canton of Geneva, Switzerland}, pages = {439--448}, publisher = {International World Wide Web Conferences Steering Committee}, series = {WWW '18}, acmid = {3186110}, arxiv = {https://arxiv.org/abs/1712.05825}, doi = {10.1145/3178876.3186110}, file = {:Veldt 2018 - lambda-CC.pdf}, isbn = {978-1-4503-5639-8}, keywords = {cluster deletion, community detection, correlation clustering, graph clustering, network analysis, sparsest cut}, location = {Lyon, France}, mynote = {Acceptance Rate 171/1155 (14.5\%)}, mysoftware = {https://github.com/nveldt/LamCC}, numpages = {10}, owner = {David Gleich}, timestamp = {2017.11.14} }

@inproceedings{Veldt-2017-low-rank-cc, title = {Correlation Clustering with Low-Rank Matrices}, author = {Veldt, Nate and Wirth, Anthony I. and Gleich, David F.}, booktitle = {Proceedings of the 26th International Conference on World Wide Web}, year = {2017}, pages = {1025--1034}, series = {WWW '17}, acmid = {3052586}, doi = {10.1145/3038912.3052586}, file = {:Veldt 2017 - Low-rank CC.pdf}, isbn = {978-1-4503-4913-0}, keywords = {correlation clustering, low-rank matrices}, location = {Perth, Australia}, mynote = {Acceptance rate 164/966 (17%)}, mysoftware = {https://github.com/nveldt/PSDCC}, numpages = {10}, owner = {dgleich}, timestamp = {2016.11.13}, url = {https://arxiv.org/abs/1611.07305} }

@article{wang2008-monte-carlo-for-adjoint, title = {A {Monte Carlo} method for solving unsteady adjoint equations}, author = {Qiqi Wang and David F. Gleich and Amin Saberi and Nasrollah Etemadi and Parviz Moin}, journal = {Journal of Computational Physics}, pages = {6184--6205}, volume = {227}, year = {2008}, month = {June}, number = {12}, abstract = {Traditionally, solving the adjoint equation for unsteady problems involves solving a large, structured linear system. This paper presents a variation on this technique and uses a Monte Carlo linear solver. The Monte Carlo solver yields a forward-time algorithm for solving unsteady adjoint equations. When applied to computing the adjoint associated with Burgersï¿½ equation, the Monte Carlo approach is faster for a large class of problems while preserving sufficient accuracy.}, address = {San Diego, CA, USA}, doi = {10.1016/j.jcp.2008.03.006}, file = {:wang 2008 - monte carlo adjoint.pdf}, issn = {0021-9991}, keywords = {self}, owner = {David Gleich}, publisher = {Academic Press Professional, Inc.}, timestamp = {2008.10.18} }

@inproceedings{Whang-2015-neokm, title = {Non-exhaustive, overlapping K-means}, author = {Joyce Jiyoung Whang and Inderjit S. Dhillon and David F. Gleich}, booktitle = {Proceedings of the 2015 SIAM International Conference on Data Mining}, year = {2015}, pages = {936-944}, abstract = {Traditional clustering algorithms, such as k-means, output a clustering that is disjoint and exhaustive, that is, every single data point is assigned to exactly one cluster. However, in real datasets, clusters can overlap and there are often outliers that do not belong to any cluster. This is a well recognized problem that has received much attention in the past, and several algorithms, such as fuzzy k-means have been proposed for overlapping clustering. However, most existing algorithms address either overlap or outlier detection and do not tackle the problem in a unified way. In this paper, we propose a simple and intuitive objective function that captures the issues of overlap and non-exhaustiveness in a unified manner. Our objective function can be viewed as a reformulation of the traditional k-means objective, with easy-to-understand parameters that capture the degrees of overlap and non-exhaustiveness. By studying the objective, we are able to obtain a simple iterative algorithm which we call NEO-K-Means (Non-Exhaustive Overlapping K-Means). Furthermore, by considering an extension to weighted kernel k-means, we can tackle the case of non-exhaustive and overlapping graph clustering. This extension allows us to apply our NEO-K-Means algorithm to the community detection problem, which is an important task in network analysis. Our experimental results show that the new objective and algorithm are effective in finding ground-truth clusterings that have varied overlap and non-exhaustiveness; for the case of graphs, we show that our algorithm outperforms state-of-the-art overlapping community detection methods.}, doi = {10.1137/1.9781611974010.105}, file = {:Whang 2015 - neokm.pdf}, howpublished = {Submitted}, location = {Vancouver, BC}, mynote = {Acceptance rate 22\%}, owner = {David F. Gleich}, timestamp = {2014.02.23} }

@article{Whang-2016-community, title = {Overlapping Community Detection Using Neighborhood-Inflated Seed Expansion}, author = {Joyce Jiyoung Whang and David F. Gleich and Inderjit S. Dhillon}, journal = {Transactions on Knowledge and Data Engineering}, pages = {1272--1284}, volume = {28}, year = {2016}, month = {May}, number = {5}, abstract = {Community detection is an important task in network analysis. A community (also referred to as a cluster) is a set of cohesive vertices that have more connections inside the set than outside. In many social and information networks, these communities naturally overlap. For instance, in a social network, each vertex in a graph corresponds to an individual who usually participates in multiple communities. In this paper, we propose an efficient overlapping community detection algorithm using a seed expansion approach. The key idea of our algorithm is to find good seeds, and then greedily expand these seeds based on a community metric. Within this seed expansion method, we investigate the problem of how to determine good seed nodes in a graph. In particular, we develop new seeding strategies for a personalized PageRank clustering scheme that optimizes the conductance community score. An important step in our method is the neighborhood inflation step where seeds are modified to represent their entire vertex neighborhood. Experimental results show that our seed expansion algorithm outperforms other state-of-the-art overlapping community detection methods in terms of producing cohesive clusters and identifying ground-truth communities. We also show that our new seeding strategies are better than existing strategies, and are thus effective in finding good overlapping communities in real-world networks.}, doi = {10.1109/TKDE.2016.2518687}, file = {:Whang 2016 - overlapping.pdf}, keywords = {Clustering algorithms;Collaboration;Image edge detection;Kernel;MySpace;Clustering;Community Detection;Community detection;Overlapping Communities;Personalized PageRank;Seed Expansion;Seeds;clustering;overlapping communities;personalized PageRank;seed expansion;seeds}, owner = {David F. Gleich}, timestamp = {2015.05.19}, url = {http://arxiv.org/abs/1503.07439} }

@inproceedings{Whang-2013-overlapping, title = {Overlapping community detection using seed set expansion}, author = {Whang, Joyce Jiyoung and Gleich, David F. and Dhillon, Inderjit S.}, booktitle = {Proceedings of the 22nd ACM international conference on Conference on information and knowledge management}, year = {2013}, address = {New York, NY, USA}, month = oct, pages = {2099--2108}, publisher = {ACM}, series = {CIKM '13}, abstract = {Community detection is an important task in network analysis. A community (also referred to as a cluster) is a set of cohesive vertices that have more connections inside the set than outside. In many social and information networks, these communities naturally overlap. For instance, in a social network, each vertex in a graph corresponds to an individual who usually participates in multiple communities. One of the most successful techniques for finding overlapping communities is based on local optimization and expansion of a community metric around a seed set of vertices. In this paper, we propose an efficient overlapping community detection algorithm using a seed set expansion approach. In particular, we develop new seeding strategies for a personalized PageRank scheme that optimizes the conductance community score. The key idea of our algorithm is to find good seeds, and then expand these seed sets using the personalized PageRank clustering procedure. Experimental results show that this seed set expansion approach outperforms other state-of-the-art overlapping community detection methods. We also show that our new seeding strategies are better than previous strategies, and are thus effective in finding good overlapping clusters in a graph.}, acmid = {2505535}, doi = {10.1145/2505515.2505535}, file = {:Whang 2013 - overlapping.pdf}, isbn = {978-1-4503-2263-8}, keywords = {clustering, community detection, overlapping clusters, seed set expansion, seeds}, location = {San Francisco, California, USA}, mynote = {Acceptance rate 143/848 16.8\%}, mysoftware = {https://www.cs.utexas.edu/~joyce/codes/cikm2013/nise.html}, numpages = {10}, owner = {David Gleich}, timestamp = {2013.06.08} }

@inproceedings{Wu-2016-tensor-co-clustering, title = {General tensor spectral co-clustering for higher-order data}, author = {Tao Wu and Austin Benson and David F. Gleich}, booktitle = {Advances in Neural Information Processing Systems 29}, year = {2016}, note = {http://arxiv.org/abs/1603.00395}, pages = {2559--2567}, file = {:Wu 2016 - general-tensor-co-clustering.pdf}, howpublished = {Submitted.}, mynote = {Acceptance rate 568/2500 = 23\%}, mysoftware = {https://github.com/wutao27/GtensorSC}, owner = {dgleich}, timestamp = {2016.02.29}, url = {http://papers.nips.cc/paper/6376-general-tensor-spectral-co-clustering-for-higher-order-data} }

@inproceedings{Wu-2017-retrospective-higher-order, title = {Retrospective Higher-Order Markov Processes for User Trails}, author = {Wu, Tao and Gleich, David F.}, booktitle = {Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, year = {2017}, address = {New York, NY, USA}, pages = {1185--1194}, publisher = {ACM}, series = {KDD '17}, acmid = {3098127}, arxiv = {https://arxiv.org/abs/1704.05982}, doi = {10.1145/3097983.3098127}, file = {:Wu 2017 - rhomp.pdf}, isbn = {978-1-4503-4887-4}, keywords = {higher-order markov chains, tensor factorization, user models}, location = {Halifax, NS, Canada}, mynote = {Acceptance rate: 131/748 = 17.5%}, mysoftware = {https://github.com/wutao27/RHOMP}, numpages = {10}, owner = {dgleich}, timestamp = {2017.03.22} }

@inproceedings{Yin-2017-local-motif, title = {Local Higher-Order Graph Clustering}, author = {Yin, Hao and Benson, Austin R. and Leskovec, Jure and Gleich, David F.}, booktitle = {Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, year = {2017}, address = {New York, NY, USA}, pages = {555--564}, publisher = {ACM}, series = {KDD '17}, acmid = {3098069}, doi = {10.1145/3097983.3098069}, file = {:Yin 2017 - local clustering.pdf}, isbn = {978-1-4503-4887-4}, keywords = {clustering, community detection, higher-order structure, motifs}, location = {Halifax, NS, Canada}, mynote = {Acceptance rate: 131/748 = 17.5%; oral = 64/748 = 8.5%}, mysoftware = {http://snap.stanford.edu/mappr/}, numpages = {10}, owner = {dgleich}, timestamp = {2017.03.22} }

@article{Zhu-2016-parallel-mincut, title = {A Parallel Min-Cut Algorithm using Iteratively Reweighted Least Squares}, author = {Yao Zhu and David F. Gleich}, journal = {Parallel Computing}, pages = {43--59}, volume = {59}, year = {2016}, month = {November}, arxiv = {http://arxiv.org/abs/1501.03105}, doi = {10.1016/j.parco.2016.02.003}, file = {:Zhu 2016 - pirmcut.pdf}, mysoftware = {https://www.cs.purdue.edu/homes/dgleich/codes/pirmcut/pirmcut-data.7z}, owner = {David Gleich}, timestamp = {2014.10.17} }

@article{Gleich-2017-erasure, title = {Erasure coding for fault oblivious linear system solvers}, author = {Yao Zhu and Ananth Grama and David F. Gleich}, journal = {SIAM J. of Scientific Computing}, pages = {C48-C64}, volume = {39}, year = {2017}, number = {1}, doi = {10.1137/15M1041511}, file = {:Zhu 2017 - Erasure Coded.pdf}, mysoftware = {https://github.com/dgleich/erasure-coded-cg}, owner = {dgleich}, timestamp = {2015.01.26}, url = {http://arxiv.org/abs/1412.7364} }

@misc{Andrew-2004-list-access, title = {MTF , BIT , and COMB: A Guide to Deterministic and Randomized Online Algorithms for the List Access Problem}, author = {Kevin Andrew and David F. Gleich}, howpublished = {Advanced Algorithms, Harvey Mudd College, Final Project}, year = {2004}, abstract = {In this survey, we discuss two randomized online algorithms for the list access problem. First, we review competitive analysis and show that the MTF algorithm is 2-competitive using a potential function. Then, we introduce randomized competitive analysis and the associated adversary models. We show that the randomized BIT algorithm is 7/4-competitive using a potential function argument. We then introduce the pairwise property and the TIMESTAMP algorithm to show that the COMB algorithm, a COMBination of the BIT and TIMEST AMP algorithms, is 8/5-competitive. COMB is the best known randomized algorithm for the list access program.}, file = {:Andrew 2004 - List Access.pdf}, keywords = {self,}, owner = {David F. Gleich}, timestamp = {2011.07.02} }

@misc{gleich2003-clinic, title = {Three Methods for Improving Relevance in Web Search.}, author = {Erin Bodine and David F. Gleich and Cathy Kurata and Jordan Kwan and Lesley Ward and Daniel Fain}, howpublished = {Clinic Report, Harvey Mudd College}, month = {May 9}, note = {102 pages. Includes fully documented program code on accompanying CD.}, year = {2003}, abstract = {The 2002–2003 Overture clinic project evaluated and implemented three different methods for improving relevance ordering in web search. The three methods were bottom up micro information unit (MIU) analysis, top down MIU analysis, and proximity scoring. We ran these three methods on the top 200 web pages returned for each of 58 queries by an already existing algorithmic search engine. We used two metrics, precision and relevance ordering, to evaluate the results. Precision deals with how relevant the web page is for a given query, while relevance ordering is how well-ordered the returned results are. We evaluated the precision of each method and of the algorithmic search engine by hand. For relevance ordering, we recruited other humans to compare pages and used their decisions to generate an ideal ranking for each query. The results of each of our methods and of the algorithmic search engine are then compared to this ideal ranking vector using Kendall’s Tau. Our bottom up MIU analysis method achieved the highest precision score of 0.78 out of 1.00. In addition, bottom up MIU analysis received the second highest correlation coefficient (or relevance ordering score) of 0.107 while the algorithmic search engine received the highest correlation coefficient of 0.121. Interestingly, our proximity scoring method received high relevance ordering scores when the algorithmic search engine received low relevance ordering scores.}, file = {:bodine2003 - clinic, improved relevance.pdf}, key = {BGK+2003}, keywords = {self, web search}, owner = {David Gleich}, timestamp = {2006.10.13} }

@misc{Gleich-2005-directed-spectral, title = {Hierarchical Directed Spectral Graph Partitioning}, author = {David F. Gleich}, howpublished = {Information Networks, Stanford University, Final Project, 2005}, note = {Cited over 6 times.}, year = {2006}, abstract = {In this report, we examine the generalization of the Laplacian of a graph due to Fan Chung. We show that Fan Chung’s generalization reduces to examining one particular symmetrization of the adjacency matrix for a directed graph. From this result, the directed Cheeger bounds trivially follow. Additionally, we implement and examine the beneﬁts of directed hierarchical spectral clustering empirically on a dataset from Wikipedia. Finally, we examine a set of competing heuristic methods on the same dataset.}, file = {:Gleich 2005 - hierarchical directed spectral.pdf}, keywords = {self}, owner = {David F. Gleich}, timestamp = {2011.07.02}, url = {http://www.cs.purdue.edu/homes/dgleich/publications/Gleich 2005 - hierarchical directed spectral.pdf} }

@misc{Gleich-2005-finite-calculus, title = {Finite Calculus: A tutorial for solving nasty sums}, author = {David F. Gleich}, howpublished = {Combinatorics, Final Paper, Stanford University, 2004.}, year = {2005}, file = {:Gleich 2005 - finite calculus.pdf}, keywords = {self}, owner = {David F. Gleich}, timestamp = {2011.07.02} }

@misc{Gleich-2003-computer-chess, title = {Machine Learning in Computer Chess: Genetic Programming and KRK}, author = {David F. Gleich}, howpublished = {Independent Study Report, Harvey Mudd College}, year = {2003}, abstract = {In this paper, I describe genetic programming as a machine learning paradigm and evaluate its results in attempting to learn basic chess rules. Genetic programming exploits a simulation of Darwinian evolution to construct programs. When applied to the King-Rook-King (KRK) chess endgame problem, genetic programming shows promising results in spite of a lack of signiﬁcant chess knowledge.}, file = {:Gleich 2003 - Machine Learning in Computer Chess.pdf}, keywords = {self}, owner = {David F. Gleich}, timestamp = {2011.07.02} }

@inproceedings{gleich2007-pagerank-deriv, title = {Three results on the {PageRank} vector: eigenstructure, sensitivity, and the derivative}, author = {David F. Gleich and Peter Glynn and Gene H. Golub and Chen Greif}, booktitle = {Web Information Retrieval and Linear Algebra Algorithms}, year = {2007}, editor = {Andreas Frommer and Michael W. Mahoney and Daniel B. Szyld}, number = {07071}, publisher = {Internationales Begegnungs- und Forschungszentrum fuer Informatik (IBFI), Schloss Dagstuhl, Germany}, series = {Dagstuhl Seminar Proceedings}, abstract = {The three results on the PageRank vector are preliminary but shed light on the eigenstructure of a PageRank modified Markov chain and what happens when changing the teleportation parameter in the PageRank model. Computations with the derivative of the PageRank vector with respect to the teleportation parameter show predictive ability and identify an interesting set of pages from Wikipedia.}, file = {:gleich 2007 - pagerank derivative and sensitivity.pdf}, issn = {1862-4405}, key = {GGGG2007}, keywords = {self, PageRank, PageRank derivative, PageRank sensitivity, PageRankeigenstructure}, optaddress = {Dagstuhl, Germany}, optannote = {Keywords: PageRank, PageRank derivative, PageRank sensitivity, PageRank eigenstructure}, opturl = {http://drops.dagstuhl.de/opus/volltexte/2007/1061 [date of citation: 2007-01-01]}, owner = {David Gleich}, timestamp = {2008.03.04}, url = {http://drops.dagstuhl.de/opus/volltexte/2007/1061} }

@misc{Gleich-2006-wom, title = {The World of Music: User Ratings; Spectral and Spherical Embeddings; Map Projections}, author = {David F. Gleich and Matthew Rasmussen and Kevin Lang and Leonid Zhukov}, howpublished = {Online report}, year = {2006}, abstract = {In this paper we present an algorithm for layout and visualization of music collections based on similarities between musical artists. The core of the algorithm consists of a non-linear low dimensional embedding of a similarity graph constrained to the surface of a hyper-sphere. This approach effectively uses additional dimensions in the embedding. We derive the algorithm using a simple energy minimization procedure and show the relationships to several well known eigenvector based methods. We also describe a method for constructing a similarity graph from user ratings, as well as procedures for mapping the layout from the hyper-sphere to a 2d display. We demonstrate our techniques on Yahoo! Music user ratings data and a MusicMatch artist similarity graph.}, file = {:Gleich 2006 - wom.pdf}, keywords = {self}, owner = {David F. Gleich}, timestamp = {2011.07.02}, url = {http://www.cs.purdue.edu/homes/dgleich/publications/Gleich 2006 - wom.pdf} }

@techreport{gleich2004-parallel, title = {Fast Parallel {PageRank}: A Linear System Approach}, author = {David F. Gleich and Leonid Zhukov and Pavel Berkhin}, institution = {Yahoo! Research Labs}, year = {2004}, number = {YRL-2004-038}, abstract = {In this paper we investigate the convergence of iterative stationary and Krylov subspace methods for the PageRank linear system, including the convergence dependency on teleportation. We demonstrate that linear system iterations converge faster than the simple power method and are less sensitive to the changes in teleportation. In order to perform this study we developed a framework for parallel PageRank computing. We describe the details of the parallel implementation and provide experimental results obtained on a 70-node Beowulf cluster.}, file = {:gleich2004-parallel.pdf}, key = {GZB2006}, keywords = {self, pagerank}, mysoftware = {https://github.com/dgleich/ppagerank}, owner = {David Gleich}, timestamp = {2006.11.29}, url = {http://www.cs.purdue.edu/homes/dgleich/publications/gleich2004-parallel.pdf} }

@misc{Zhukov-2004-soft-clustering, title = {Topic Identiﬁcation in Soft Clustering using PCA and ICA}, author = {Leonid Zhukov and David F. Gleich}, howpublished = {Online report, Yahoo! research labs}, year = {2004}, abstract = {Many applications can benefit from soft clustering, where each datum is assigned to multiple clusters with membership weights that sum to one. In this paper we present a comparison of principal component analysis (PCA) and independent component analysis (ICA) when used for soft clustering. We provide a short mathematical background for these methods and demonstrate their application to a sponsored links search listings dataset. We present examples of the soft clusters generated by both methods and compare the results.}, file = {:Zhukov 2004 - soft clustering pca and ica.pdf}, keywords = {self}, owner = {David F. Gleich}, timestamp = {2011.07.02} }

@article{Gleich-2011-review-of-kamvar, title = {Review of: {Numerical algorithms for personalized search in self-organizing information networks by Sep Kamvar, Princeton Univ. Press, 2010, 160pp., ISBN13: 978-0-691-14503-7} }, author = {David F. Gleich}, journal = {Linear Algebra and its Applications}, pages = {908 - 909}, volume = {435}, year = {2011}, number = {4}, doi = {10.1016/j.laa.2011.01.013}, file = {:Gleich 2011 - review of kamvar.pdf}, issn = {0024-3795}, keywords = {self}, owner = {David F. Gleich}, timestamp = {2011.07.02} }

@incollection{Constantine-preprint-pcam, title = {Ranking Web Pages}, author = {David F. Gleich and Paul G. Constantine}, booktitle = {The Princeton Companion to Applied Mathematics}, publisher = {Princeton University Press}, year = {2015}, address = {Princeton, NJ, USA}, editor = {Nicholas J. Higham and Mark R. Dennis and Paul Glendinning and Paul A. Martin and Fadil Santosa and Jared Tanner}, pages = {755-757}, owner = {David Gleich}, timestamp = {2013.09.09} }

@phdthesis{gleich2009-thesis, title = {Models and Algorithms for {PageRank} Sensitivity}, author = {David F. Gleich}, school = {Stanford University}, year = {2009}, month = {September}, abstract = {The PageRank model helps evaluate the relative importance of nodes in a large graph, such as the graph of links on the world wide web. An important piece of the PageRank model is the teleportation parameter . We explore the interaction between and PageRank through the lens of sensitivity analysis. Writing the PageRank vector as a function of allows us to take a derivative, which is a simple sensitivity measure. As an alternative approach, we apply techniques from the field of uncertainty quantification. Regarding as a random variable produces a new PageRank model in which each PageRank value is a random variable. We explore the standard deviation of these variables to get another measure of PageRank sensitivity. One interpretation of this new model shows that it corrects a small oversight in the original PageRank formulation. Both of the above techniques require solving multiple PageRank problems, and thus a robust PageRank solver is needed. We discuss an inner-outer iteration for this purpose. The method is low-memory, simple to implement, and has excellent performance for a range of teleportation parameters. We show empirical results with these techniques on graphs with over 2 billion edges.}, file = {:gleich 2009 - pagerank thesis.pdf}, keywords = {self}, owner = {David Gleich}, timestamp = {2009.09.13}, url = {http://www.stanford.edu/group/SOL/dissertations/pagerank-sensitivity-thesis-online.pdf} }