Automatically generated by Mendeley Desktop 1.19.5 Any changes to this file will be lost if it is regenerated by Mendeley. BibTeX export options can be customized via Preferences -> BibTeX in Mendeley Desktop @inproceedings{degenne2020gamification, abstract = {We investigate an active pure-exploration setting, that includes best-arm identification, in the context of linear stochastic bandits. While asymptotically optimal algorithms exist for standard multi-arm bandits, the existence of such algorithms for the best-arm identification in linear bandits has been elusive despite several attempts to address it. First, we provide a thorough comparison and new insight over different notions of optimality in the linear case, including G-optimality, transductive optimality from optimal experimental design and asymptotic optimality. Second, we design the first asymptotically optimal algorithm for fixed-confidence pure exploration in linear bandits. As a consequence, our algorithm naturally bypasses the pitfall caused by a simple but difficult instance, that most prior algorithms had to be engineered to deal with explicitly. Finally, we avoid the need to fully solve an optimal design problem by providing an approach that entails an efficient implementation.}, author = {Degenne, R{\'{e}}my and M{\'{e}}nard, Pierre and Shang, Xuedong and Valko, Michal}, booktitle = {International Conference on Machine Learning}, title = {{Gamification of pure exploration for linear bandits}}, year = {2020} } @inproceedings{gillenwater2012discovering, abstract = {We propose a novel probabilistic technique for modeling and extracting salient struc-ture from large document collections. As in clustering and topic modeling, our goal is to provide an organizing perspective into otherwise overwhelming amounts of infor-mation. We are particularly interested in revealing and exploiting relationships be-tween documents. To this end, we focus on extracting diverse sets of threads—singly-linked, coherent chains of important doc-uments. To illustrate, we extract research threads from citation graphs and construct timelines from news articles. Our method is highly scalable, running on a corpus of over 30 million words in about four minutes, more than 75 times faster than a dynamic topic model. Finally, the results from our model more closely resemble human news summaries according to several metrics and are also preferred by human judges.}, author = {Gillenwater, Jennifer and Kulesza, Alex and Taskar, Ben}, booktitle = {Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning}, file = {::}, pages = {710--720}, title = {{Discovering Diverse and Salient Threads in Document Collections}}, year = {2012} } @unpublished{poulson2019high-performance, abstract = {Determinantal Point Processes (DPPs) were introduced by Mac-chi [1] as a model for repulsive (fermionic) particle distributions. But their recent popularization is largely due to their usefulness for encouraging diversity in the final stage of a recommender system [2]. The standard sampling scheme for finite DPPs is a spectral decomposition followed by an equivalent of a randomly diagonally-pivoted Cholesky factorization of an orthogonal projection, which is only applicable to Hermitian kernels and has an expensive setup cost. Researchers have begun to connect DPP sampling to LDL H factoriza-tions as a means of avoiding the initial spectral decomposition, but existing approaches have only outperformed the spectral decomposition approach in special circumstances, where the number of kept modes is a small percentage of the ground set size. This article proves that trivial modifications of LU and LDL H fac-torizations yield efficient direct sampling schemes for non-Hermitian and Hermitian DPP kernels, respectively. Further, it is experimentally shown that even dynamically-scheduled, shared-memory paralleliza-tions of high-performance dense and sparse-direct factorizations can be trivially modified to yield DPP sampling schemes with essentially identical performance. The software developed as part of this research, Catamari [hodges-tar.com/catamari] is released under the Mozilla Public License v2.0. It contains header-only, C++14 plus OpenMP 4.0 implementations of dense and sparse-direct, Hermitian and non-Hermitian DPP samplers. * jack@hodgestar.com, Hodge Star Scientific Computing}, archivePrefix = {arXiv}, arxivId = {1905.00165v1}, author = {Poulson, Jack}, eprint = {1905.00165v1}, title = {{High-performance sampling of generic determinantal point processes}}, url = {https://arxiv.org/pdf/1905.00165.pdf}, year = {2019} } @article{GoChGr14, abstract = {Video summarization is a challenging problem with great application potential. Whereas prior approaches, largely unsupervised in nature, focus on sampling use-ful frames and assembling them as summaries, we consider video summarization as a supervised subset selection problem. Our idea is to teach the system to learn from human-created summaries how to select informative and diverse subsets, so as to best meet evaluation metrics derived from human-perceived quality. To this end, we propose the sequential determinantal point process (seqDPP), a proba-bilistic model for diverse sequential subset selection. Our novel seqDPP heeds the inherent sequential structures in video data, thus overcoming the deficiency of the standard DPP, which treats video frames as randomly permutable items. Mean-while, seqDPP retains the power of modeling diverse subsets, essential for summa-rization. Our extensive results of summarizing videos from 3 datasets demonstrate the superior performance of our method, compared to not only existing unsuper-vised methods but also naive applications of the standard DPP model.}, author = {Gong, Boqing and Chao, Wl and Grauman, Kristen and Sha, F}, issn = {10495258}, journal = {Nips}, keywords = {erse sequential subset selection,for,supervised video summarization}, pages = {1--9}, title = {{Diverse Sequential Subset Selection for Supervised Video Summarization}}, url = {http://papers.nips.cc/paper/5413-large-scale-l-bfgs-using-mapreduce}, year = {2014} } @article{BaHa16, abstract = {We show that repulsive random variables can yield Monte Carlo methods with faster convergence rates than the typical {\$}N{\^{}}{\{}-1/2{\}}{\$}, where {\$}N{\$} is the number of integrand evaluations. More precisely, we propose stochastic numerical quadratures involving determinantal point processes associated with multivariate orthogonal polynomials, and we obtain root mean square errors that decrease as {\$}N{\^{}}{\{}-(1+1/d)/2{\}}{\$}, where {\$}d{\$} is the dimension of the ambient space. First, we prove a central limit theorem (CLT) for the linear statistics of a class of determinantal point processes, when the reference measure is a product measure supported on a hypercube, which satisfies the Nevai-class regularity condition, a result which may be of independent interest. Next, we introduce a Monte Carlo method based on these determinantal point processes, and prove a CLT with explicit limiting variance for the quadrature error, when the reference measure satisfies a stronger regularity condition. As a corollary, by taking a specific reference measure and using a construction similar to importance sampling, we obtain a general Monte Carlo method, which applies to any measure with continuously derivable density. Loosely speaking, our method can be interpreted as a stochastic counterpart to Gaussian quadrature, which, at the price of some convergence rate, is easily generalizable to any dimension and has a more explicit error term.}, annote = {NULL}, archivePrefix = {arXiv}, arxivId = {1605.00361}, author = {Bardenet, R{\'{e}}mi and Hardy, Adrien}, eprint = {1605.00361}, file = {::}, month = {may}, pages = {48}, title = {{Monte Carlo with Determinantal Point Processes}}, url = {http://arxiv.org/abs/1605.00361}, year = {2016} } @article{KuTa12, abstract = {Determinantal point processes (DPPs) are elegant probabilistic models of repulsion that arise in quantum physics and random matrix theory. In contrast to traditional structured models like Markov random fields, which become intractable and hard to approximate in the presence of negative correlations, DPPs offer efficient and exact algorithms for sampling, marginalization, conditioning, and other inference tasks. We provide a gentle introduction to DPPs, focusing on the intuitions, algorithms, and extensions that are most relevant to the machine learning community, and show how DPPs can be applied to real-world applications like finding diverse sets of high-quality search results, building informative summaries by selecting diverse sentences from documents, modeling non-overlapping human poses in images or video, and automatically building timelines of important news stories.}, annote = {NULL}, archivePrefix = {arXiv}, arxivId = {1207.6083}, author = {Kulesza, Alex and Taskar, Ben}, doi = {10.1561/2200000044}, eprint = {1207.6083}, file = {::}, issn = {1935-8237}, journal = {Foundations and Trends in Machine Learning}, month = {jul}, number = {2-3}, pages = {123--286}, title = {{Determinantal Point Processes for Machine Learning}}, url = {http://arxiv.org/abs/1207.6083}, volume = {5}, year = {2012} } @article{AvGa13, abstract = {Consider a finite weighted oriented graph. We study a probability measure on the set of spanning rooted oriented forests on the graph. We prove that the set of roots sampled from this measure is a determinantal process, characterized by a possibly non-symmetric kernel with complex eigenvalues. We then derive several results relating this measure to the Markov process associated with the starting graph, to the spectrum of its generator and to hitting times of subsets of the graph. In particular, the mean hitting time of the set of roots turns out to be independent of the starting point, conditioning or not to a given number of roots. Wilson's algorithm provides a way to sample this measure and, in absence of complex eigenvalues of the generator, we explain how to get samples with a number of roots approximating a prescribed integer. We also exploit the properties of this measure to give some probabilistic insight into the proof of an algebraic result due to Micchelli and Willoughby [13]. Further, we present two different related coalescence and fragmentation processes.}, author = {Avena, L and Gaudilli{\`{e}}re, A}, file = {::}, journal = {e-prints}, keywords = {05C81,05C85 Keywords,15A15,15A18,60J20,Finite networks,MSC 2010,Wilson's algorithm,coalescence and frag-mentation,determinantal processes,hit-ting times,local equilibria,primary,random partitions,random sets,secondary,spanning forests}, title = {{On some random forests with determinantal roots}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.740.6173{\&}rep=rep1{\&}type=pdf}, year = {2013} } @article{GaPaKo16, abstract = {Determinantal point processes (DPPs) have garnered attention as an elegant probabilistic model of set diversity. They are useful for a number of subset selection tasks, including product recommendation. DPPs are parametrized by a positive semi-definite kernel matrix. In this work we present a new method for learning the DPP kernel from observed data using a low-rank factorization of this kernel. We show that this low-rank factorization enables a learning algorithm that is nearly an order of magnitude faster than previous approaches, while also providing for a method for computing product recommendation predictions that is far faster (up to 20x faster or more for large item catalogs) than previous techniques that involve a full-rank DPP kernel. Furthermore, we show that our method provides equivalent or sometimes better predictive performance than prior full-rank DPP approaches, and better performance than several other competing recommendation methods in many cases. We conduct an extensive experimental evaluation using several real-world datasets in the domain of product recommendation to demonstrate the utility of our method, along with its limitations.}, archivePrefix = {arXiv}, arxivId = {1602.05436}, author = {Gartrell, Mike and Paquet, Ulrich and Koenigstein, Noam}, eprint = {1602.05436}, file = {::}, pages = {10}, title = {{Low-Rank Factorization of Determinantal Point Processes for Recommendation}}, url = {http://arxiv.org/abs/1602.05436}, year = {2016} } @inproceedings{anari2016monte, abstract = {Strongly Rayleigh distributions are natural generalizations of product and determinantal probability distributions and satisfy strongest form of negative dependence properties. We show that the "natural" Monte Carlo Markov Chain (MCMC) is rapidly mixing in the support of a {\{}$\backslash$em homogeneous{\}} strongly Rayleigh distribution. As a byproduct, our proof implies Markov chains can be used to efficiently generate approximate samples of a {\$}k{\$}-determinantal point process. This answers an open question raised by Deshpande and Rademacher.}, archivePrefix = {arXiv}, arxivId = {1602.05242}, author = {Anari, Nima and Gharan, Shayan Oveis and Rezaei, Alireza}, booktitle = {Conference on Learning Theory}, eprint = {1602.05242}, file = {::}, keywords = {1,13,2016,algorithms for sampling,determinantal point processes,monte carlo markov chain,r,strongly rayleigh distributions and,vol 49,workshop and conference proceedings}, month = {feb}, title = {{Monte Carlo Markov Chains for sampling Strongly Rayleigh distributions and Determinantal Point Processes}}, url = {http://arxiv.org/abs/1602.05242}, year = {2016} } @inproceedings{MaSr16, abstract = {Determinantal Point Processes (DPPs) are probabilistic models over all subsets a ground set of {\$}N{\$} items. They have recently gained prominence in several applications that rely on "diverse" subsets. However, their applicability to large problems is still limited due to the {\$}\backslashmathcal O(N{\^{}}3){\$} complexity of core tasks such as sampling and learning. We enable efficient sampling and learning for DPPs by introducing KronDPP, a DPP model whose kernel matrix decomposes as a tensor product of multiple smaller kernel matrices. This decomposition immediately enables fast exact sampling. But contrary to what one may expect, leveraging the Kronecker product structure for speeding up DPP learning turns out to be more difficult. We overcome this challenge, and derive batch and stochastic optimization algorithms for efficiently learning the parameters of a KronDPP.}, address = {Barcelona, Spain}, archivePrefix = {arXiv}, arxivId = {1605.08374}, author = {Mariet, Zelda and Sra, Suvrit}, booktitle = {Neural Information Processing Systems}, eprint = {1605.08374}, file = {::}, month = {may}, pages = {2694----2702}, title = {{Kronecker Determinantal Point Processes}}, url = {https://papers.nips.cc/paper/6296-kronecker-determinantal-point-processes http://arxiv.org/abs/1605.08374}, year = {2016} } @article{LiJeSr16d, abstract = {In this note we consider sampling from (non-homogeneous) strongly Rayleigh probability measures. As an important corollary, we obtain a fast mixing Markov Chain sampler for Determinantal Point Processes.}, annote = {NULL}, archivePrefix = {arXiv}, arxivId = {1607.03559}, author = {Li, Chengtao and Jegelka, Stefanie and Sra, Suvrit}, eprint = {1607.03559}, file = {::}, journal = {ArXiv e-prints}, title = {{Fast Sampling for Strongly Rayleigh Measures with Application to Determinantal Point Processes}}, url = {http://arxiv.org/abs/1607.03559}, year = {2016} } @article{EvLa10, archivePrefix = {arXiv}, arxivId = {0912.4389}, author = {Evans, T S and Lambiotte, R}, doi = {10.1140/epjb/e2010-00261-8}, eprint = {0912.4389}, file = {::}, keywords = {05,40,75,89,community detection,edge partition,fb random walks and,fb structures and organization,hc networks and genealogical,in complex,levy flights,line graphs,overlapping communities,pacs,systems,trees,vertex cover}, pages = {1--8}, title = {{Line Graphs of Weighted Networks for Overlapping Communities(EPJB正式版).pdf}}, year = {2010} } @article{DuBa16, abstract = {We propose a new class of determinantal point processes (DPPs) which can be manipulated for inference and parameter learning in potentially sublinear time in the number of items. This class, based on a specific low-rank factorization of the marginal kernel, is particularly suited to a subclass of continuous DPPs and DPPs defined on exponentially many items. We apply this new class to modelling text documents as sampling a DPP of sentences, and propose a conditional maximum likelihood formulation to model topic proportions, which is made possible with no approximation for our class of DPPs. We present an application to document summarization with a DPP on {\$}2{\^{}}{\{}500{\}}{\$} items.}, archivePrefix = {arXiv}, arxivId = {1610.05925}, author = {Dupuy, Christophe and Bach, Francis}, eprint = {1610.05925}, file = {::}, journal = {arXiv preprint arXiv:1610.05925}, month = {oct}, title = {{Learning Determinantal Point Processes in Sublinear Time}}, url = {http://arxiv.org/abs/1610.05925}, year = {2016} } @article{Bardenet2016, abstract = {We show that repulsive random variables can yield Monte Carlo methods with faster convergence rates than the typical {\$}N{\^{}}{\{}-1/2{\}}{\$}, where {\$}N{\$} is the number of integrand evaluations. More precisely, we propose stochastic numerical quadratures involving determinantal point processes associated with multivariate orthogonal polynomials, and we obtain root mean square errors that decrease as {\$}N{\^{}}{\{}-(1+1/d)/2{\}}{\$}, where {\$}d{\$} is the dimension of the ambient space. First, we prove a central limit theorem (CLT) for the linear statistics of a class of determinantal point processes, when the reference measure is a product measure supported on a hypercube, which satisfies the Nevai-class regularity condition, a result which may be of independent interest. Next, we introduce a Monte Carlo method based on these determinantal point processes, and prove a CLT with explicit limiting variance for the quadrature error, when the reference measure satisfies a stronger regularity condition. As a corollary, by taking a specific reference measure and using a construction similar to importance sampling, we obtain a general Monte Carlo method, which applies to any measure with continuously derivable density. Loosely speaking, our method can be interpreted as a stochastic counterpart to Gaussian quadrature, which, at the price of some convergence rate, is easily generalizable to any dimension and has a more explicit error term.}, archivePrefix = {arXiv}, arxivId = {1605.00361}, author = {Bardenet, R{\'{e}}mi and Hardy, Adrien}, eprint = {1605.00361}, file = {::}, month = {may}, pages = {48}, title = {{Monte Carlo with Determinantal Point Processes}}, url = {http://arxiv.org/abs/1605.00361}, year = {2016} } @inproceedings{DeWa17, abstract = {Given a full rank matrix {\$}X{\$} with more columns than rows consider the task of estimating the pseudo inverse {\$}X{\^{}}+{\$} based on the pseudo inverse of a sampled subset of columns (of size at least the number of rows). We show that this is possible if the subset of columns is chosen proportional to the squared volume spanned by the rows of the chosen submatrix (ie, volume sampling). The resulting estimator is unbiased and surprisingly the covariance of the estimator also has a closed form: It equals a specific factor times {\$}X{\^{}}+X{\^{}}{\{}+\backslashtop{\}}{\$}. Pseudo inverse plays an important part in solving the linear least squares problem, where we try to predict a label for each column of {\$}X{\$}. We assume labels are expensive and we are only given the labels for the small subset of columns we sample from {\$}X{\$}. Using our methods we show that the weight vector of the solution for the sub problem is an unbiased estimator of the optimal solution for the whole problem based on all column labels. We believe that these new formulas establish a fundamental connection between linear least squares and volume sampling. We use our methods to obtain an algorithm for volume sampling that is faster than state-of-the-art and for obtaining bounds for the total loss of the estimated least-squares solution on all labeled columns.}, address = {Long Beach, CA, USA}, archivePrefix = {arXiv}, arxivId = {1705.06908}, author = {Derezinski, Michal and Warmuth, Manfred K.}, booktitle = {Neural Information Processing Systems}, eprint = {1705.06908}, file = {::}, month = {may}, title = {{Unbiased estimates for linear regression via volume sampling}}, url = {http://arxiv.org/abs/1705.06908}, year = {2017} } @inproceedings{DeGaLa17, abstract = {Dans cet article, nous {\'{e}}tudions une adaptation des processus ponctuels d{\'{e}}terminantaux au cadre des pixels d'une image. Il s'agit d'un cadre 2D discret, stationnaire et p{\'{e}}riodique. Nous nous int{\'{e}}ressons en particulier aux propri{\'{e}}t{\'{e}}s de r{\'{e}}pulsion d'un tel processus. Nous {\'{e}}tudions d'abord la r{\'{e}}pulsion totale, puis en utilisant le mod{\`{e}}le shot noise, nous caract{\'{e}}risons les cas de r{\'{e}}pulsion maximale et minimale de ces processus pixelliques d{\'{e}}terminantaux.}, address = {Juan-les-Pins}, author = {Desolneux, Agn{\`{e}}s and Galerne, Bruno and Launay, Claire}, booktitle = {GRETSI}, file = {::}, title = {{Etude de la r{\'{e}}pulsion des processus pixelliques d{\'{e}}terminantaux}}, url = {https://hal.archives-ouvertes.fr/hal-01548767v2}, year = {2017} } @article{Agarwal2014, abstract = {In this paper, we compare three initialization schemes for the KMEANS clustering algorithm: 1) random initialization (KMEANSRAND), 2) KMEANS++, and 3) KMEANSD++. Both KMEANSRAND and KMEANS++ have a major that the value of k needs to be set by the user of the algorithms. (Kang 2013) recently proposed a novel use of determinantal point processes for sampling the initial centroids for the KMEANS algorithm (we call it KMEANSD++). They, however, do not provide any evaluation establishing that KMEANSD++ is better than other algorithms. In this paper, we show that the performance of KMEANSD++ is comparable to KMEANS++ (both of which are better than KMEANSRAND) with KMEANSD++ having an additional that it can automatically approximate the value of k.}, archivePrefix = {arXiv}, arxivId = {1410.6975}, author = {Agarwal, Apoorv and Choromanska, Anna and Choromanski, Krzysztof}, eprint = {1410.6975}, file = {::}, month = {oct}, title = {{Notes on using Determinantal Point Processes for Clustering with Applications to Text Clustering}}, url = {http://arxiv.org/abs/1410.6975}, year = {2014} } @article{HKPV06, abstract = {We give a probabilistic introduction to determinantal and permanental point processes. Determinantal processes arise in physics (fermions, eigenvalues of random matrices) and in combinatorics (nonintersecting paths, random spanning trees). They have the striking property that the number of points in a region {\$}D{\$} is a sum of independent Bernoulli random variables, with parameters which are eigenvalues of the relevant operator on {\$}L{\^{}}2(D){\$}. Moreover, any determinantal process can be represented as a mixture of determinantal projection processes. We give a simple explanation for these known facts, and establish analogous representations for permanental processes, with geometric variables replacing the Bernoulli variables. These representations lead to simple proofs of existence criteria and central limit theorems, and unify known results on the distribution of absolute values in certain processes with radially symmetric distributions.}, archivePrefix = {arXiv}, arxivId = {math/0503110}, author = {Hough, J. Ben and Krishnapur, Manjunath and Peres, Yuval and Vir{\'{a}}g, B{\'{a}}lint}, doi = {10.1214/154957806000000078}, eprint = {0503110}, file = {::}, issn = {1549-5787}, journal = {Probability Surveys}, pages = {206--229}, primaryClass = {math}, title = {{Determinantal Processes and Independence}}, url = {http://arxiv.org/abs/math/0503110{\%}5Cnhttp://www.arxiv.org/pdf/math/0503110.pdf}, volume = {3}, year = {2006} } @article{KaDeKo16, abstract = {Gaussian Process bandit optimization has emerged as a powerful tool for optimizing noisy black box functions. One example in machine learning is hyper-parameter optimization where each evaluation of the target function requires training a model which may involve days or even weeks of computation. Most methods for this so-called "Bayesian optimization" only allow sequential exploration of the parameter space. However, it is often desirable to propose batches or sets of parameter values to explore simultaneously, especially when there are large parallel processing facilities at our disposal. Batch methods require modeling the interaction between the different evaluations in the batch, which can be expensive in complex scenarios. In this paper, we propose a new approach for parallelizing Bayesian optimization by modeling the diversity of a batch via Determinantal point processes (DPPs) whose kernels are learned automatically. This allows us to generalize a previous result as well as prove better regret bounds based on DPP sampling. Our experiments on a variety of synthetic and real-world robotics and hyper-parameter optimization tasks indicate that our DPP-based methods, especially those based on DPP sampling, outperform state-of-the-art methods.}, archivePrefix = {arXiv}, arxivId = {1611.04088}, author = {Kathuria, Tarun and Deshpande, Amit and Kohli, Pushmeet}, eprint = {1611.04088}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Kathuria, Deshpande, Kohli - 2016 - Batched Gaussian Process Bandit Optimization via Determinantal Point Processes.pdf:pdf}, pages = {1--16}, title = {{Batched Gaussian Process Bandit Optimization via Determinantal Point Processes}}, url = {http://arxiv.org/abs/1611.04088}, year = {2016} } @inproceedings{LiJeSr16, abstract = {In this note we consider sampling from (non-homogeneous) strongly Rayleigh probability measures. As an important corollary, we obtain a fast mixing Markov Chain sampler for Determinantal Point Processes.}, archivePrefix = {arXiv}, arxivId = {1607.03559}, author = {Li, Chengtao and Jegelka, Stefanie and Sra, Suvrit}, booktitle = {Neural Information Processing Systems}, eprint = {1607.03559}, file = {::}, title = {{Fast Sampling for Strongly Rayleigh Measures with Application to Determinantal Point Processes}}, year = {2016} } @inproceedings{Kan13, abstract = {Determinantal Point Process (DPP) has gained much popularity for modeling sets of diverse items. The gist of DPP is that the probability of choosing a particular set of items is proportional to the determinant of a positive definite matrix that de-fines the similarity of those items. However, computing the determinant requires time cubic in the number of items, and is hence impractical for large sets. In this paper, we address this problem by constructing a rapidly mixing Markov chain, from which we can acquire a sample from the given DPP in sub-cubic time. In ad-dition, we show that this framework can be extended to sampling from cardinality-constrained DPPs. As an application, we show how our sampling algorithm can be used to provide a fast heuristic for determining the number of clusters, resulting in better clustering. There are some crucial errors in the proofs of the theorem which invalidate the theoretical claims of this paper. Please consult the appendix for more details.}, author = {Kang, Byungkon}, booktitle = {NIPS}, file = {::}, title = {{Fast Determinantal Point Process Sampling with Application to Clustering}}, year = {2013} } @inproceedings{AfKuFo13, abstract = {Determinantal point processes (DPPs) are appealing models for subset selection prob-lems where diversity is desired. They offer surprisingly efficient inference, including sam-pling in O(N 3) time and O(N 2) space, where N is the number of base items. However, in some applications, N may grow so large that sampling from a DPP becomes compu-tationally infeasible. This is especially true in settings where the DPP kernel matrix can-not be represented by a linear decomposition of low-dimensional feature vectors. In these cases, we propose applying the Nystr{\"{o}}m ap-proximation to project the kernel matrix into a low-dimensional space. While theoretical guarantees for the Nystr{\"{o}}m approximation in terms of standard matrix norms have been previously established, we are concerned with probabilistic measures, like total variation dis-tance between the DPP and its Nystr{\"{o}}m ap-proximation, that behave quite differently. In this paper we derive new error bounds for the Nystr{\"{o}}m-approximated DPP and present em-pirical results to corroborate them. We then demonstrate the Nystr{\"{o}}m-approximated DPP by applying it to a motion capture summa-rization task.}, address = {Scottsdale, AZ, USA}, author = {Affandi, Raja Hafiz and Kulesza, Alex and Fox, Emily B and Taskar, Ben}, booktitle = {International Conference on Artificial Intelligence and Statistics}, file = {::}, issn = {15337928}, keywords = {dblp}, pages = {85--98}, title = {{Nystrom Approximation for Large-Scale Determinantal Processes.}}, url = {http://jmlr.org/proceedings/papers/v31/affandi13a.html}, volume = {31}, year = {2013} } @article{Evans2009, abstract = {In this paper, we use a partition of the links of a network in order to uncover its community structure. This approach allows for communities to overlap at nodes, so that nodes may be in more than one community. We do this by making a node partition of the line graph of the original network. In this way we show that any algorithm which produces a partition of nodes can be used to produce a partition of links. We discuss the role of the degree heterogeneity and propose a weighted version of the line graph in order to account for this.}, archivePrefix = {arXiv}, arxivId = {0903.2181}, author = {Evans, T. S. and Lambiotte, R.}, doi = {10.1103/PhysRevE.80.016105}, eprint = {0903.2181}, file = {::}, issn = {1539-3755}, journal = {Physical Review E}, month = {jul}, number = {1}, pages = {016105}, title = {{Line graphs, link partitions, and overlapping communities}}, url = {http://arxiv.org/abs/0903.2181}, volume = {80}, year = {2009} } @inproceedings{ZeSr16, abstract = {We introduce Divnet, a flexible technique for learning networks with di-verse neurons. Divnet models neuronal diversity by placing a Determi-nantal Point Process (DPP) over neurons in a given layer. It uses this DPP to select a subset of diverse neurons and subsequently fuses the redundant neurons into the selected ones. Compared with previous approaches, Di-vnet offers a more principled, flexible technique for capturing neuronal diversity and thus implicitly enforcing regularization. This enables effec-tive auto-tuning of network architecture and leads to smaller network sizes without hurting performance. Moreover, through its focus on diversity and neuron fusing, Divnet remains compatible with other procedures that seek to reduce memory footprints of networks. We present experimental results to corroborate our claims: for pruning neural networks, Divnet is seen to be notably superior to competing approaches.}, address = {San Juan, Puerto Rico}, archivePrefix = {arXiv}, arxivId = {arXiv:1511.05077v6}, author = {Mariet, Zelda and Sra, Suvrit}, booktitle = {Conference on Learning Representations}, eprint = {arXiv:1511.05077v6}, file = {::}, pages = {1--13}, title = {{Diversity Networks}}, url = {https://arxiv.org/pdf/1511.05077.pdf}, year = {2016} } @article{GiKuFo14, abstract = {A determinantal point process (DPP) is a probabilistic model of set diversity compactly parameterized by a positive semi-definite kernel matrix. To fit a DPP to a given task, we would like to learn the entries of its kernel matrix by maximizing the log-likelihood of the available data. However, log-likelihood is non-convex in the entries of the kernel matrix, and this learning problem is conjectured to be NP-hard. Thus, previous work has instead focused on more restricted convex learning settings: learning only a single weight for each row of the kernel matrix, or learning weights for a linear combination of DPPs with fixed kernel matrices. In this work we propose a novel algorithm for learning the full kernel matrix. By changing the kernel parameterization from matrix entries to eigenvalues and eigenvectors, and then lower-bounding the likelihood in the manner of expectation-maximization algorithms, we obtain an effective optimization procedure. We test our method on a real-world product recommendation task, and achieve relative gains of up to 16.5{\%} in test log-likelihood compared to the naive approach of maximizing likelihood by projected gradient ascent on the entries of the kernel matrix.}, archivePrefix = {arXiv}, arxivId = {1411.1088}, author = {Gillenwater, Jennifer A and Kulesza, Alex and Fox, Emily and Taskar, Ben}, eprint = {1411.1088}, journal = {Nips}, pages = {3149--3157}, title = {{Expectation-Maximization for Learning Determinantal Point Processes}}, url = {http://papers.nips.cc/paper/5564-expectation-maximization-for-learning-determinantal-point-processes.pdf{\%}5Cnhttp://papers.nips.cc/paper/5564-expectation-maximization-for-learning-determinantal-point-processes}, year = {2014} } @article{KaDe16, abstract = {Subset selection problems ask for a small, diverse yet representative subset of the given data. When pairwise similarities are captured by a kernel, the determinants of submatrices provide a measure of diversity or independence of items within a subset. Matroid theory gives another notion of independence, thus giving rise to optimization and sampling questions about Determinantal Point Processes (DPPs) under matroid constraints. Partition constraints, as a special case, arise naturally when incorporating additional labeling or clustering information, besides the kernel, in DPPs. Finding the maximum determinant submatrix under matroid constraints on its row/column indices has been previously studied. However, the corresponding question of sampling from DPPs under matroid constraints has been unresolved, beyond the simple cardinality constrained k-DPPs. We give the first polynomial time algorithm to sample exactly from DPPs under partition constraints, for any constant number of partitions. We complement this by a complexity theoretic barrier that rules out such a result under general matroid constraints. Our experiments indicate that partition-constrained DPPs offer more flexibility and more diversity than k-DPPs and their naive extensions, while being reasonably efficient in running time. We also show that a simple greedy initialization followed by local search gives improved approximation guarantees for the problem of MAP inference from k- DPPs on well-conditioned kernels. Our experiments show that this improvement is significant for larger values of k, supporting our theoretical result.}, archivePrefix = {arXiv}, arxivId = {1607.01551}, author = {Kathuria, Tarun and Deshpande, Amit}, eprint = {1607.01551}, file = {::}, number = {Nips}, title = {{On Sampling and Greedy MAP Inference of Constrained Determinantal Point Processes}}, url = {http://arxiv.org/abs/1607.01551}, year = {2016} } @inproceedings{GiKuTa13, abstract = {Determinantal point processes (DPPs) have recently been proposed as computa-tionally efficient probabilistic models of diverse sets for a variety of applications, including document summarization, image search, and pose estimation. Many DPP inference operations, including normalization and sampling, are tractable; however, finding the most likely configuration (MAP), which is often required in practice for decoding, is NP-hard, so we must resort to approximate inference. This optimization problem, which also arises in experimental design and sensor placement, involves finding the largest principal minor of a positive semidefinite matrix. Because the objective is log-submodular, greedy algorithms have been used in the past with some empirical success; however, these methods only give approximation guarantees in the special case of monotone objectives, which cor-respond to a restricted class of DPPs. In this paper we propose a new algorithm for approximating the MAP problem based on continuous techniques for submod-ular function maximization. Our method involves a novel continuous relaxation of the log-probability function, which, in contrast to the multilinear extension used for general submodular functions, can be evaluated and differentiated exactly and efficiently. We obtain a practical algorithm with a 1/4-approximation guarantee for a more general class of non-monotone DPPs; our algorithm also extends to MAP inference under complex polytope constraints, making it possible to com-bine DPPs with Markov random fields, weighted matchings, and other models. We demonstrate that our approach outperforms standard and recent methods on both synthetic and real-world data.}, author = {Gillenwater, Jennifer and Kulesza, Alex and Taskar, Ben}, booktitle = {Nips}, file = {::}, isbn = {9781627480031}, issn = {10495258}, pages = {1--9}, title = {{Near-Optimal MAP Inference for Determinantal Point Processes}}, url = {http://www.cis.upenn.edu/{~}taskar/pubs/dppmap{\_}nips12.pdf}, year = {2013} } @inproceedings{LiJeSr17, abstract = {We study dual volume sampling, a method for selecting k columns from an n*m short and wide matrix (n {\textless}= k {\textless}= m) such that the probability of selection is proportional to the volume of the parallelepiped spanned by the rows of the induced submatrix. This method was studied in [3], who motivated it as a promising method for column subset selection. However, the development of polynomial time sampling algorithms -- exact or approximate -- has been since open. We close this open problem by presenting (i) an exact (randomized) polynomial time sampling algorithm; (ii) its derandomization that samples subsets satisfying the desired properties deterministically; and (iii) an efficient approximate sampling procedure using Markov chains that are provably fast mixing. Our algorithms can thus benefit downstream applications of dual volume sampling, such as column subset selection and experimental design.}, address = {Long Beach, CA, USA}, annote = {NULL}, archivePrefix = {arXiv}, arxivId = {1703.02674}, author = {Li, Chengtao and Jegelka, Stefanie and Sra, Suvrit}, booktitle = {Neural Information Processing Systems}, eprint = {1703.02674}, file = {::}, month = {mar}, title = {{Column Subset Selection via Polynomial Time Dual Volume Sampling}}, url = {http://arxiv.org/abs/1703.02674}, year = {2017} } @inproceedings{TrBaAm17, abstract = {R{\'{e}}sum{\'{e}} – Nous consid{\'{e}}rons l echantillonnage de signaux sur graph{\`{e}} a bande limit{\'{e}}e k , i . e . , les combinaisons lin{\'{e}}aires des k premiers modes de Fourier du graphe . Il existe k noeuds du graphe qui permettent leur reconstruction parfaite , les trouver n{\'{e}}cessite cependant une diagonalisation partielle de la matrice laplacienne , trop co{\^{u}}teus{\`{e}} a grande dimension . Nous proposons une nouvelle m{\'{e}}thode rapide d echantillonnage bas{\'{e}}e sur des processus d{\'{e}}terminantaux qui permet la reconstructio a partir d ' un nombre d echantillons de l ' ordre de k . Abstract – We consider the problem of sampling k - bandlimited graph signals , i . e . , linear combinations of the first k graph Fourier modes . We know that a set of k nodes embedding all k - bandlimited signals always exists , thereby enabling their perfect reconstruction after sampling . Unfortunately , to exhibit such a set , one needs to partially diagonalize the graph Laplacian , which becomes prohibitive at large scale . We propose a novel strategy based on determinantal point processes that side - steps partial diagonalisation and enables reconstruction with only O (k) samples .}, archivePrefix = {arXiv}, arxivId = {1704.02239}, author = {Tremblay, Nicolas and Barthelm{\'{e}}, Simon and Amblard, Pierre-Olivier}, booktitle = {GRETSI}, eprint = {1704.02239}, file = {::}, title = {{{\'{E}}chantillonnage de signaux sur graphes via des processus d{\'{e}}terminantaux}}, url = {https://hal.archives-ouvertes.fr/hal-01503736 https://arxiv.org/abs/1704.02239}, year = {2017} } @article{AgChCh14, abstract = {In this paper, we compare three initialization schemes for the KMEANS clustering algorithm: 1) random initialization (KMEANSRAND), 2) KMEANS++, and 3) KMEANSD++. Both KMEANSRAND and KMEANS++ have a major that the value of k needs to be set by the user of the algorithms. (Kang 2013) recently proposed a novel use of determinantal point processes for sampling the initial centroids for the KMEANS algorithm (we call it KMEANSD++). They, however, do not provide any evaluation establishing that KMEANSD++ is better than other algorithms. In this paper, we show that the performance of KMEANSD++ is comparable to KMEANS++ (both of which are better than KMEANSRAND) with KMEANSD++ having an additional that it can automatically approximate the value of k.}, archivePrefix = {arXiv}, arxivId = {1410.6975}, author = {Agarwal, Apoorv and Choromanska, Anna and Choromanski, Krzysztof}, eprint = {1410.6975}, file = {::}, month = {oct}, title = {{Notes on using Determinantal Point Processes for Clustering with Applications to Text Clustering}}, url = {http://arxiv.org/abs/1410.6975}, year = {2014} } @article{LaMoRu15, abstract = {Statistical models and methods for determinantal point processes (DPPs) seem largely unexplored. We demonstrate that DPPs provide useful models for the description of spatial point pattern datasets where nearby points repel each other. Such data are usually modelled by Gibbs point processes, where the likelihood and moment expressions are intractable and simulations are time consuming. We exploit the appealing probabilistic properties of DPPs to develop parametric models, where the likelihood and moment expressions can be easily evaluated and realizations can be quickly simulated. We discuss how statistical inference is conducted using the likelihood or moment properties of DPP models, and we provide freely available software for simulation and statistical inference.}, archivePrefix = {arXiv}, arxivId = {1205.4818}, author = {Lavancier, Fr{\'{e}}d{\'{e}}ric and M{\{}$\backslash$o{\}}ller, Jesper and Rubak, Ege}, doi = {10.1111/rssb.12096}, eprint = {1205.4818}, file = {::}, issn = {14679868}, journal = {Journal of the Royal Statistical Society. Series B: Statistical Methodology}, keywords = {Maximum-likelihood-based inference,Point process density,Product densities,Repulsiveness,Simulation,Spectral approach}, number = {4}, pages = {853--877}, title = {{Determinantal point process models and statistical inference}}, volume = {77}, year = {2015} } @article{UrBrMoRi17, abstract = {Determinantal Point Processes (DPPs) are a family of probabilistic models that have a repulsive behavior, and lend themselves naturally to many tasks in machine learning where returning a diverse set of objects is important. While there are fast algorithms for sampling, marginalization and conditioning, much less is known about learning the parameters of a DPP. Our contribution is twofold: (i) we establish the optimal sample complexity achievable in this problem and show that it is governed by a natural parameter, which we call the cycle sparsity; (ii) we propose a provably fast combinatorial algorithm that implements the method of moments efficiently and achieves optimal sample complexity. Finally, we give experimental results that confirm our theoretical findings.}, archivePrefix = {arXiv}, arxivId = {1703.00539}, author = {Urschel, John and Brunel, Victor-Emmanuel and Moitra, Ankur and Rigollet, Philippe}, eprint = {1703.00539}, file = {::}, journal = {International Conference on Machine Learning}, pages = {3511--3520}, title = {{Learning Determinantal Point Processes with Moments and Cycles}}, url = {http://proceedings.mlr.press/v70/urschel17a/urschel17a.pdf https://arxiv.org/abs/1703.00539}, volume = {70}, year = {2017} } @inproceedings{LiJeSr16a, abstract = {The Nystr$\backslash$"om method has long been popular for scaling up kernel methods. Its theoretical guarantees and empirical performance rely critically on the quality of the landmarks selected. We study landmark selection for Nystr$\backslash$"om using Determinantal Point Processes (DPPs), discrete probability models that allow tractable generation of diverse samples. We prove that landmarks selected via DPPs guarantee bounds on approximation errors; subsequently, we analyze implications for kernel ridge regression. Contrary to prior reservations due to cubic complexity of DPPsampling, we show that (under certain conditions) Markov chain DPP sampling requires only linear time in the size of the data. We present several empirical results that support our theoretical analysis, and demonstrate the superior performance of DPP-based landmark selection compared with existing approaches.}, address = {New York, USA}, archivePrefix = {arXiv}, arxivId = {1603.06052}, author = {Li, Chengtao and Jegelka, Stefanie and Sra, Suvrit}, booktitle = {International Conference on Machine Learning}, eprint = {1603.06052}, file = {::}, month = {mar}, title = {{Fast DPP Sampling for Nystr$\backslash$"om with Application to Kernel Methods}}, url = {http://proceedings.mlr.press/v48/lih16.html http://arxiv.org/abs/1603.06052}, volume = {48}, year = {2016} } @article{Affandi2014a, abstract = {Determinantal point processes (DPPs) are well- suited for modeling repulsion and have proven useful in applications where diversity is desired. While DPPs have many appealing properties, learning the parameters of a DPP is difficult, as the likelihood is non-convex and is infeasible to compute in many scenarios. Here we propose Bayesian methods for learning the DPP kernel parameters. These methods are applicable in large- scale discrete and continuous DPP settings, even when the likelihood can only be bounded. We demonstrate the utility of our DPP learning methods in studying the progression of diabetic neuropathy based on the spatial distribution of nerve fibers, and in studying human perception of diversity in images.}, archivePrefix = {arXiv}, arxivId = {1402.4862}, author = {Affandi, Raja Hafiz and Fox, Emily B. and Adams, Ryan P. and Taskar, Ben}, eprint = {1402.4862}, isbn = {9781634393973}, journal = {International Conference on Machine Learning}, number = {1}, pages = {2967--2981}, title = {{Learning the parameters of determinantal point process kernels}}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84919798123{\&}partnerID=tZOtx3y1}, volume = {4}, year = {2014} } @article{TrAmBa17, abstract = {We present a new random sampling strategy for k-bandlimited signals defined on graphs, based on determinantal point processes (DPP). For small graphs, ie, in cases where the spectrum of the graph is accessible, we exhibit a DPP sampling scheme that enables perfect recovery of bandlimited signals. For large graphs, ie, in cases where the graph's spectrum is not accessible, we investigate, both theoretically and empirically, a sub-optimal but much faster DPP based on loop-erased random walks on the graph. Preliminary experiments show promising results especially in cases where the number of measurements should stay as small as possible and for graphs that have a strong community structure. Our sampling scheme is efficient and can be applied to graphs with up to {\$}10{\^{}}6{\$} nodes.}, archivePrefix = {arXiv}, arxivId = {1703.01594}, author = {Tremblay, Nicolas and Amblard, Pierre-Olivier and Barthelm{\'{e}}, Simon}, eprint = {1703.01594}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Tremblay, Amblard, Barthelm{\'{e}} - 2017 - Graph sampling with determinantal processes.pdf:pdf}, isbn = {9780992862671}, journal = {ArXiv e-prints}, title = {{Graph sampling with determinantal processes}}, url = {http://arxiv.org/abs/1703.01594}, year = {2017} } @inproceedings{LiJeSr16c, abstract = {We study probability measures induced by set functions with constraints. Such measures arise in a variety of real-world settings, where prior knowledge, resource limitations, or other pragmatic considerations impose constraints. We consider the task of rapidly sampling from such constrained measures, and develop fast Markov chain samplers for them. Our first main result is for MCMC sampling from Strongly Rayleigh (SR) measures, for which we present sharp polynomial bounds on the mixing time. As a corollary, this result yields a fast mixing sampler for Determinantal Point Processes (DPPs), yielding (to our knowledge) the first provably fast MCMC sampler for DPPs since their inception over four decades ago. Beyond SR measures, we develop MCMC samplers for probabilistic models with hard constraints and identify sufficient conditions under which their chains mix rapidly. We illustrate our claims by empirically verifying the dependence of mixing times on the key factors governing our theoretical bounds.}, address = {Barcelona, Spain}, annote = {NULL}, archivePrefix = {arXiv}, arxivId = {1608.01008}, author = {Li, Chengtao and Jegelka, Stefanie and Sra, Suvrit}, booktitle = {Neural Information Processing Systems}, eprint = {1608.01008}, file = {::}, issn = {10495258}, number = {2}, title = {{Fast Mixing Markov Chains for Strongly Rayleigh Measures, DPPs, and Constrained Sampling}}, url = {https://papers.nips.cc/paper/6182-fast-mixing-markov-chains-for-strongly-rayleigh-measures-dpps-and-constrained-sampling http://arxiv.org/abs/1608.01008}, year = {2016} } @inproceedings{ZhKjMa17, abstract = {We study a mini-batch diversification scheme for stochastic gradient descent (SGD). While classi-cal SGD relies on uniformly sampling data points to form a mini-batch, we propose a non-uniform sampling scheme based on the Determinantal Point Process (DPP). The DPP relies on a sim-ilarity measure between data points and gives low probabilities to mini-batches which contain redun-dant data, and higher probabilities to mini-batches with more diverse data. This simultaneously bal-ances the data and leads to stochastic gradients with lower variance. We term this approach Di-versified Mini-Batch SGD (DM-SGD). We show that regular SGD and a biased version of stratified sampling emerge as special cases. Furthermore, DM-SGD generalizes stratified sampling to cases where no discrete features exist to bin the data into groups. We show experimentally that our method results more interpretable and diverse features in unsupervised setups, and in better classification accuracies in supervised setups.}, address = {Sydney, Australia}, author = {Zhang, Cheng and Kjellstr{\"{o}}m, Hedvig and Mandt, Stephan}, booktitle = {Uncertainty in Artificial Intelligence}, file = {::}, title = {{Determinantal Point Processes for Mini-Batch Diversification}}, url = {http://auai.org/uai2017/proceedings/papers/69.pdf}, year = {2017} } @article{HoKoFa14, abstract = {In the period since 2004, many novel sophisticated approaches for generic multi-document summarization have been developed. Intuitive simple approaches have also been shown to perform unexpectedly well for the task. Yet it is practically impossible to compare the existing approaches directly, because systems have been evaluated on different datasets, with different evaluation measures, against different sets of comparison systems. Here we present a corpus of summaries produced by several state-of-the-art extractive summarization systems or by popular baseline systems. The inputs come from the 2004 DUC evaluation, the latest year in which generic summarization was addressed in a shared task. We use the same settings for ROUGE automatic evaluation to compare the systems directly and analyze the statistical significance of the differences in performance. We show that in terms of average scores the state-of-the-art systems appear similar but that in fact they produce very different summaries. Our corpus will facilitate future research on generic summarization and motivates the need for development of more sensitive evaluation measures and for approaches to system combination in summarization.}, author = {Hong, Kai and Conroy, John M and Favre, Benoit and Kulesza, Alex and Lin, Hui and Nenkova, Ani}, file = {::}, isbn = {978-2-9517408-8-4}, journal = {Lrec2014}, keywords = {duc 2004,evaluation,generic summarization}, number = {Classy 04}, pages = {1608--1616}, title = {{A Repository of State of the Art and Competitive Baseline Summaries for Generic News Summarization}}, volume = {65}, year = {2014} } @article{StVi16, abstract = {A determinantal point process (DPP) over a universe {\$}\backslash{\{}1,\backslashldots,m\backslash{\}}{\$} with respect to an {\$}m \backslashtimes m{\$} positive semidefinite matrix {\$}L{\$} is a probability distribution where the probability of a subset {\$}S \backslashsubseteq \backslash{\{}1,\backslashldots,m\backslash{\}}{\$} is proportional to the determinant of the principal minor of {\$}L{\$} corresponding to {\$}S.{\$} DPPs encapsulate a wide variety of known distributions and appear naturally (and surprisingly) in a wide variety of areas such as physics, mathematics and computer science. Several applications that use DPPs rely on the fact that they are computationally tractable -- i.e., there are algorithms for sampling from DPPs efficiently. Recently, there is growing interest in studying a generalization of DPPs in which the support of the distribution is a restricted family B of subsets of {\$}\backslash{\{}1,2,\backslashldots, m\backslash{\}}{\$}. Mathematically, these distributions, which we call generalized DPPs, include the well-studied hardcore distributions as special cases (when {\$}L{\$} is diagonal). In applications, they can be used to refine models based on DPPs by imposing combinatorial constraints on the support of the distribution. In this paper we take first steps in a systematic study of computational questions concerning generalized DPPs. We introduce a natural class of linear families: roughly, a family B is said to be linear if there is a collection of {\$}p{\$} linear forms that all elements of B satisfy. Important special cases of linear families are all sets of cardinality {\$}k{\$} -- giving rise to {\$}k{\$}-DPPs -- and, more generally, partition matroids. On the positive side, we prove that, when {\$}p{\$} is a constant, there is an efficient, exact sampling algorithm for linear DPPs. We complement these results by proving that, when {\$}p{\$} is large, the computational problem related to such DPPs becomes {\$}\backslash{\#}{\$}P-hard. Our proof techniques rely and build on the interplay between polynomials and probability distributions.}, archivePrefix = {arXiv}, arxivId = {1608.00554}, author = {Straszak, Damian and Vishnoi, Nisheeth K.}, eprint = {1608.00554}, file = {::}, month = {aug}, title = {{Generalized Determinantal Point Processes: The Linear Case}}, url = {http://arxiv.org/abs/1608.00554}, year = {2016} } @inproceedings{BrMoRi17, abstract = {Determinantal point processes (DPPs) have wide-ranging applications in machine learning, where they are used to enforce the notion of diversity in subset selection problems. Many estimators have been proposed, but surprisingly the basic properties of the maximum likelihood estimator (MLE) have received little attention. In this paper, we study the local geometry of the expected log-likelihood function to prove several rates of convergence for the MLE. We also give a complete characterization of the case where the MLE converges at a parametric rate. Even in the latter case, we also exhibit a potential curse of dimensionality where the asymptotic variance of the MLE is exponentially large in the dimension of the problem.}, address = {Amsterdam, Netherlands}, author = {Brunel, Victor-Emmanuel and Moitra, Ankur and Rigollet, Philippe and Urschel, John}, booktitle = {Conference on Learning Theory}, editor = {Kale, Satyen and Shamir, Ohad}, pages = {343--345}, publisher = {PMLR}, series = {Proceedings of Machine Learning Research}, title = {{Rates of estimation for determinantal point processes}}, url = {http://proceedings.mlr.press/v65/brunel17a.html}, volume = {65}, year = {2017} } @inproceedings{GaBaVa17, abstract = {Determinantal point processes (DPPs) are distributions over sets of items that model diversity using kernels. Their applications in machine learning include summary extraction and recommendation systems. Yet, the cost of sampling from a DPP is prohibitive in large-scale applications, which has triggered an effort towards efficient approximate samplers. We build a novel MCMC sampler that combines ideas from combinatorial geometry, linear programming, and Monte Carlo methods to sample from DPPs with a fixed sample cardinality, also called projection DPPs. Our sampler leverages the ability of the hit-and-run MCMC kernel to efficiently move across convex bodies. Previous theoretical results yield a fast mixing time of our chain when targeting a distribution that is close to a projection DPP, but not a DPP in general. Our empirical results demonstrate that this extends to sampling projection DPPs, i.e., our sampler is more sample-efficient than previous approaches which in turn translates to faster convergence when dealing with costly-to-evaluate functions, such as summary extraction in our experiments.}, address = {Sydney, Australia}, archivePrefix = {arXiv}, arxivId = {1705.10498}, author = {Gautier, Guillaume and Bardenet, R{\'{e}}mi and Valko, Michal}, booktitle = {International Conference on Machine Learning}, editor = {Precup, Doina and Teh, Yee Whye}, eprint = {1705.10498}, file = {::}, pages = {1223--1232}, publisher = {PMLR}, title = {{Zonotope hit-and-run for efficient sampling from projection DPPs}}, url = {http://proceedings.mlr.press/v70/gautier17a/gautier17a.pdf http://arxiv.org/abs/1705.10498}, year = {2017} } @article{MaSr15, abstract = {Determinantal point processes (DPPs) offer an elegant tool for encoding probabilities over subsets of a ground set. Discrete DPPs are parametrized by a positive semidefinite matrix (called the DPP kernel), and estimating this kernel is key to learning DPPs from observed data. We consider the task of learning the DPP kernel, and develop for it a surprisingly simple yet effective new algorithm. Our algorithm offers the following benefits over previous approaches: (a) it is much simpler; (b) it yields equally good and sometimes even better local maxima; and (c) it runs an order of magnitude faster on large problems. We present experimental results on both real and simulated data to illustrate the numerical performance of our technique.}, archivePrefix = {arXiv}, arxivId = {1508.00792}, author = {Mariet, Zelda and Sra, Suvrit}, eprint = {1508.00792}, file = {::}, month = {aug}, title = {{Fixed-point algorithms for learning determinantal point processes}}, url = {http://arxiv.org/abs/1508.00792}, year = {2015} } @inproceedings{ChZhZh17, abstract = {Recommender systems take the key responsibility to help users discover items that they might be interested in. Many recommenda-tion algorithms are built upon similarity measures, which usually result in low intra-list diversity. The deficiency in capturing the whole range of user interest often leads to poor satisfaction. To solve this problem, increasing attention has been paid on improving the diversity of recommendation results in recent years. In this paper, we propose a novel method to improve the diversity of top-N recommendation results based on the determinantal point process (DPP), which is an elegant model for characterizing the repulsion phenomenon. We propose an acceleration algorithm to greatly speed up the process of the result inference, making our algorithm practical for large-scale scenarios. We also incorporate a tunable parameter into the DPP model which allows the users to smoothly control the level of diversity. More diversity metrics are introduced to better evaluate diversification algorithms. We have evaluated our algorithm on several public datasets, and compared it thoroughly with other reference algorithms. Results show that our proposed algorithm provides a much better accuracy-diversity trade-off with comparable efficiency.}, address = {Como, Italy}, archivePrefix = {arXiv}, arxivId = {1709.05135}, author = {Chen, Laming and Zhang, Guoxin and Zhou, Hanning}, booktitle = {ACM Conference on Recommender Systems, Large Scale Recommendation System Workshop}, eprint = {1709.05135}, file = {::}, title = {{Improving the Diversity of Top-N Recommendation via Determinantal Point Process}}, url = {https://arxiv.org/pdf/1709.05135.pdf}, volume = {8}, year = {2017} } @inproceedings{gartrell2016low, abstract = {Determinantal point processes (DPPs) have garnered attention as an elegant probabilistic model of set diversity. They are useful for a number of subset selection tasks, including product recommendation. DPPs are parametrized by a positive semi-definite kernel matrix. In this work we present a new method for learning the DPP kernel from observed data using a low-rank factorization of this kernel. We show that this low-rank factorization enables a learning algorithm that is nearly an order of magnitude faster than previous approaches, while also providing for a method for computing product recommendation predictions that is far faster (up to 20x faster or more for large item catalogs) than previous techniques that involve a full-rank DPP kernel. Furthermore, we show that our method provides equivalent or sometimes better predictive performance than prior full-rank DPP approaches, and better performance than several other competing recommendation methods in many cases. We conduct an extensive experimental evaluation using several real-world datasets in the domain of product recommendation to demonstrate the utility of our method, along with its limitations.}, archivePrefix = {arXiv}, arxivId = {1602.05436}, author = {Gartrell, Mike and Paquet, Ulrich and Koenigstein, Noam}, booktitle = {AAAI Conference on Artificial Intelligence}, eprint = {1602.05436}, file = {::}, title = {{Low-Rank Factorization of Determinantal Point Processes for Recommendation}}, url = {http://arxiv.org/abs/1602.05436}, year = {2017} } @inproceedings{LiJeSr16a, abstract = {Determinantal Point Processes (DPPs) provide probabilistic models over discrete sets of items that help model repulsion and diversity. Applicability of DPPs to large sets of data is, however, hindered by the expensive matrix operations involved, especially when sampling. We therefore propose a new efficient approximate two-stage sampling algorithm for discrete k-DPPs. As opposed to previous approximations, our algorithm aims at minimizing the variational distance to the original distribution. Experiments indicate that the resulting sampling algorithm works well on large data and yields more accurate samples than previous approaches.}, address = {Cadiz, Spain}, archivePrefix = {arXiv}, arxivId = {1509.01618}, author = {Li, Chengtao and Jegelka, Stefanie and Sra, Suvrit}, booktitle = {International Conference on Artificial Intelligence and Statistics}, eprint = {1509.01618}, file = {::}, pages = {1--14}, title = {{Efficient Sampling for k-Determinantal Point Processes}}, url = {http://proceedings.mlr.press/v51/li16f.pdf http://arxiv.org/abs/1509.01618}, volume = {51}, year = {2016} } @article{BaTi15, abstract = {Determinantal point processes (DPPs) are point process models that naturally encode diversity between the points of a given realization, through a positive definite kernel {\$}K{\$}. DPPs possess desirable properties, such as exact sampling or analyticity of the moments, but learning the parameters of kernel {\$}K{\$} through likelihood-based inference is not straightforward. First, the kernel that appears in the likelihood is not {\$}K{\$}, but another kernel {\$}L{\$} related to {\$}K{\$} through an often intractable spectral decomposition. This issue is typically bypassed in machine learning by directly parametrizing the kernel {\$}L{\$}, at the price of some interpretability of the model parameters. We follow this approach here. Second, the likelihood has an intractable normalizing constant, which takes the form of a large determinant in the case of a DPP over a finite set of objects, and the form of a Fredholm determinant in the case of a DPP over a continuous domain. Our main contribution is to derive bounds on the likelihood of a DPP, both for finite and continuous domains. Unlike previous work, our bounds are cheap to evaluate since they do not rely on approximating the spectrum of a large matrix or an operator. Through usual arguments, these bounds thus yield cheap variational inference and moderately expensive exact Markov chain Monte Carlo inference methods for DPPs.}, archivePrefix = {arXiv}, arxivId = {1507.01154}, author = {Bardenet, R{\'{e}}mi and Titsias, Michalis K.}, eprint = {1507.01154}, file = {::}, month = {jul}, title = {{Inference for determinantal point processes without spectral knowledge}}, url = {http://arxiv.org/abs/1507.01154}, year = {2015} } @article{LiBi12, abstract = {We introduce a method to learn a mixture of submodular “shells” in a large-margin setting. A submodular shell is an abstract submodular function that can be instantiated with a ground set and a set of parameters to produce a submodular function. A mixture of such shells can then also be so instantiated to produce a more complex submodular function. What our algorithm learns are the mixture weights over such shells. We provide a risk bound guarantee when learning in a large-margin structured-prediction setting using a projected subgradient method when only approximate submodular optimization is possible (such as with submodular function maximization). We apply this method to the problem of multi-document summarization and produce the best results reported so far on the widely used NIST DUC-05 through DUC-07 document summarization corpora.}, author = {Lin, Hui and Bilmes, Ja}, isbn = {9780974903989}, journal = {Uncertainty in Artificial Intelligence}, title = {{Learning mixtures of submodular shells with application to document summarization}}, url = {http://arxiv.org/abs/1210.4871}, year = {2012} } @article{kulesza2011kdpps, abstract = {Determinantal point processes ( DPPs ) have recently been proposed$\backslash$nas models for set selection problems where diversity is pre- ferred.$\backslash$nFor example, they can be used to select diverse sets of sentences$\backslash$nto form doc- ument summaries, or to find multiple non- overlapping$\backslash$nhuman ...}, author = {Kulesza, Alex and Taskar, Ben}, journal = {International Conference on Machine Learning}, pages = {1193--1200}, title = {{k-DPPs: Fixed-Size Determinantal Point Processes}}, year = {2011} } @article{Dann2017, abstract = {Statistical performance bounds for reinforcement learning (RL) algorithms can be critical for high-stakes applications like healthcare. This paper introduces a new framework for theoretically measuring the performance of such algorithms called Uniform-PAC, which is a strengthening of the classical Probably Approximately Correct (PAC) framework. In contrast to the PAC framework, the uniform version may be used to derive high probability regret guarantees and so forms a bridge between the two setups that has been missing in the literature. We demonstrate the benefits of the new framework for finite-state episodic MDPs with a new algorithm that is Uniform-PAC and simultaneously achieves optimal regret and PAC guarantees except for a factor of the horizon.}, archivePrefix = {arXiv}, arxivId = {1703.07710}, author = {Dann, Christoph and Lattimore, Tor and Brunskill, Emma}, eprint = {1703.07710}, file = {::}, issn = {10495258}, number = {c}, title = {{Unifying PAC and Regret: Uniform PAC Bounds for Episodic Reinforcement Learning}}, url = {http://arxiv.org/abs/1703.07710}, year = {2017} } @article{Kocsis2006, abstract = {For large state-space Markovian Decision Problems Monte- Carlo planning is one of the few viable approaches to find near-optimal solutions. In this paper we introduce a new algorithm, UCT, that ap- plies bandit ideas to guide Monte-Carlo planning. In finite-horizon or discounted MDPs the algorithm is shown to be consistent and finite sample bounds are derived on the estimation error due to sampling. Ex- perimental results show that in several domains, UCT is significantly more efficient than its alternatives.}, author = {Kocsis, Levente and Szepesv{\'{a}}ri, Csaba}, file = {::}, journal = {European conference on machine learning}, title = {{Bandit based Monte-Carlo Planning}}, year = {2006} } @article{Chowdhury2018, abstract = {We consider online learning for minimizing regret in unknown, episodic Markov decision processes (MDPs) with continuous states and actions. We develop variants of the UCRL and posterior sampling algorithms that employ nonparametric Gaussian process priors to generalize across the state and action spaces. When the transition and reward functions of the true MDP are either sampled from Gaussian process priors (fully Bayesian setting) or are members of the associated Reproducing Kernel Hilbert Spaces of functions induced by symmetric psd kernels (frequentist setting), we show that the algorithms enjoy sublinear regret bounds. The bounds are in terms of explicit structural parameters of the kernels, namely a novel generalization of the information gain metric from kernelized bandit, and highlight the influence of transition and reward function structure on the learning performance. Our results are applicable to multi-dimensional state and action spaces with composite kernel structures, and generalize results from the literature on kernelized bandits, and the adaptive control of parametric linear dynamical systems with quadratic costs.}, archivePrefix = {arXiv}, arxivId = {1805.08052}, author = {Chowdhury, Sayak Ray and Gopalan, Aditya}, eprint = {1805.08052}, file = {::}, title = {{Online Learning in Kernelized Markov Decision Processes}}, url = {http://arxiv.org/abs/1805.08052}, year = {2018} } @article{Feldman2012, abstract = {We consider online planning in Markov decision processes (MDPs). In online planning, the agent focuses on its current state only, deliberates about the set of possible policies from that state onwards and, when interrupted, uses the outcome of that exploratory deliberation to choose what action to perform next. The performance of algorithms for online planning is assessed in terms of simple regret, which is the agent's expected performance loss when the chosen action, rather than an optimal one, is followed. To date, state-of-the-art algorithms for online planning in general MDPs are either best effort, or guarantee only polynomial-rate reduction of simple regret over time. Here we introduce a new Monte-Carlo tree search algorithm, BRUE, that guarantees exponential-rate reduction of simple regret and error probability. This algorithm is based on a simple yet non-standard state-space sampling scheme, MCTS2e, in which different parts of each sample are dedicated to different exploratory objectives. Our empirical evaluation shows that BRUE not only provides superior performance guarantees, but is also very effective in practice and favorably compares to state-of-the-art. We then extend BRUE with a variant of "learning by forgetting." The resulting set of algorithms, BRUE(alpha), generalizes BRUE, improves the exponential factor in the upper bound on its reduction rate, and exhibits even more attractive empirical performance.}, archivePrefix = {arXiv}, arxivId = {1206.3382}, author = {Feldman, Zohar and Domshlak, Carmel}, eprint = {1206.3382}, file = {::}, pages = {1--34}, title = {{Simple Regret Optimization in Online Planning for Markov Decision Processes}}, url = {http://arxiv.org/abs/1206.3382}, year = {2012} } @article{Warlop2018, author = {Warlop, Romain and Lazaric, Alessandro and Mary, J{\'{e}}r{\'{e}}mie}, file = {::}, number = {October}, title = {{Fighting Boredom in Recommender Systems with Linear Reinforcement Learning}}, volume = {14}, year = {2018} } @article{Strehl2007, abstract = {Several algorithms for learning near-optimal policies in Markov Decision Processes have been analyzed and proven efficient. Empirical results have suggested that Model-based Interval Estimation (MBIE) learns efficiently in practice, effectively balancing exploration and exploitation. This paper presents a theoretical analysis of MBIE and a new variation called MBIE-EB, proving their efficiency even under worst-case conditions. The paper also introduces a new performance metric, average loss, and relates it to its less "online" cousins from the literature. {\textcopyright} 2008 Elsevier Inc. All rights reserved.}, author = {Strehl, Alexander L and Littman, Michael L}, doi = {10.1016/j.jcss.2007.08.009}, file = {::}, issn = {00220000}, journal = {Journal of Computer and System Sciences}, keywords = {Learning theory,Markov Decision Processes,Reinforcement learning}, number = {8}, pages = {1309--1331}, title = {{An analysis of model-based Interval Estimation for Markov Decision Processes}}, volume = {74}, year = {2007} } @article{Osband, abstract = {Most provably-efficient learning algorithms introduce optimism about poorly-understood states and actions to encourage exploration. We study an alternative approach for efficient exploration, posterior sampling for reinforcement learning (PSRL). This algorithm proceeds in repeated episodes of known duration. At the start of each episode, PSRL updates a prior distribution over Markov decision processes and takes one sample from this posterior. PSRL then follows the policy that is optimal for this sample during the episode. The algorithm is conceptually simple, computationally efficient and allows an agent to encode prior knowledge in a natural way. We establish an {\$}\backslashtilde{\{}O{\}}(\backslashtau S \backslashsqrt{\{}AT{\}}){\$} bound on the expected regret, where {\$}T{\$} is time, {\$}\backslashtau{\$} is the episode length and {\$}S{\$} and {\$}A{\$} are the cardinalities of the state and action spaces. This bound is one of the first for an algorithm not based on optimism, and close to the state of the art for any reinforcement learning algorithm. We show through simulation that PSRL significantly outperforms existing algorithms with similar regret bounds.}, archivePrefix = {arXiv}, arxivId = {1306.0940}, author = {Osband, Ian and Russo, Daniel and {Van Roy}, Benjamin}, eprint = {1306.0940}, file = {::}, pages = {1--10}, title = {{(More) Efficient Reinforcement Learning via Posterior Sampling}}, url = {http://arxiv.org/abs/1306.0940}, year = {2013} } @article{Chang2005, abstract = {Based on recent results for multiarmed bandit problems, we propose an adaptive sampling algorithm that approximates the optimal value of a finite-horizon Markov decision process (MDP) with finite state and action spaces. The algorithm adaptively chooses which action to sample as the sampling process proceeds and generates an asymptotically unbiased estimator, whose bias is bounded by a quantity that converges to zero at rate ln N , where N is the total number of samples that are used per state sampled in each stage. The worst-case running-time complexity of the algorithm is OO N H , independent of the size of the state space, where A is the size of the action space and H is the horizon length. The algorithm can be used to create an approximate receding horizon control to solve infinite-horizon MDPs. To illustrate the algorithm, computational results are reported on simple examples from inventory control.}, author = {Chang, Hyeong Soo and Fu, Michael C. and Hu, Jiaqiao and Marcus, Steven I.}, doi = {10.1287/opre.1040.0145}, file = {::}, issn = {0030-364X}, journal = {Operations Research}, number = {1}, pages = {126--139}, title = {{An Adaptive Sampling Algorithm for Solving Markov Decision Processes}}, volume = {53}, year = {2005} } @inproceedings{Coulom2006, author = {Coulom, R{\'{e}}mi}, booktitle = {5th International Conference on Computer and Games}, file = {::}, title = {{Efficient Selectivity and Backup Operators in Monte-Carlo Tree Search emi Coulom To cite this version : Efficient Selectivity and Backup Operators in Monte-Carlo Tree Search}}, year = {2006} } @article{Ortner, abstract = {We derive sublinear regret bounds for undiscounted reinforcement learning in continuous state space. The proposed algorithm combines state aggregation with the use of upper confidence bounds for implementing optimism in the face of uncertainty. Beside the existence of an optimal policy which satisfies the Poisson equation, the only assumptions made are Holder continuity of rewards and transition probabilities.}, archivePrefix = {arXiv}, arxivId = {1302.2550}, author = {Ortner, Ronald and Ryabko, Daniil}, eprint = {1302.2550}, file = {::}, title = {{Online Regret Bounds for Undiscounted Continuous Reinforcement Learning}}, url = {http://arxiv.org/abs/1302.2550}, year = {2013} } @inproceedings{bartlett2019scale-free, abstract = {We address the problem of planning in an environment with deterministic dynamics and stochastic discounted rewards under a limited numerical budget where the ranges of both rewards and noise are unknown. We introduce PlaTypOOS, an adaptive, robust, and efficient alternative to the OLOP (open-loop optimistic planning) algorithm. Whereas OLOP requires a priori knowledge of the ranges of both rewards and noise, PlaTypOOS dynamically adapts its behavior to both. This allows PlaTypOOS to be immune to two vulnerabilities of OLOP: failure when given underestimated ranges of noise and rewards and inefficiency when these are overestimated. PlaTypOOS additionally adapts to the global smoothness of the value function. PlaTypOOS acts in a provably more efficient manner vs. OLOP when OLOP is given an overestimated reward and show that in the case of no noise, PlaTypOOS learns exponentially faster.}, author = {Bartlett, Peter L and Gabillon, Victor and Healey, Jennifer and Valko, Michal}, booktitle = {International Conference on Machine Learning}, title = {{Scale-free adaptive planning for deterministic dynamics and discounted rewards}}, year = {2019} } @article{S.Bubeck2010, abstract = {We consider the problem of planning in a stochastic and discounted environment with a limited numerical budget. More precisely, we investigate strategies exploring the set of possible sequences of actions, so that, once all available numerical resources (e.g. CPU time, number of calls to a generative model) have been used, one returns a recommendation on the best possible immediate action to follow based on this exploration. The performance of a strategy is assessed in terms of its simple regret, that is the loss in performance resulting from choosing the recommended action instead of an optimal one. We first provide a minimax lower bound for this problem, and show that a uniform planning strategy matches this minimax rate (up to a logarithmic factor). Then we propose a UCB (Upper Confidence Bounds)-based planning algorithm, called OLOP (Open-Loop Optimistic Planning), which is also minimax optimal, and prove that it enjoys much faster rates when there is a small proportion of near-optimal sequences of actions. Finally, we compare our results with the regret bounds one can derive for our setting with bandits algorithms designed for an infinite number of arms.}, author = {S.Bubeck, R.Munos}, file = {::}, journal = {Conference On Learning Theory (COLT) 2010}, number = {1}, pages = {15}, title = {{Open Loop Optimistic Planning}}, year = {2010} } @article{HrenMunos2008, author = {Hren, Jean-francois and Hren, Jean-francois}, file = {::}, journal = {European Workshop on Reinforcement Learning}, pages = {151--164}, title = {{Optimistic planning of deterministic systems}}, year = {2008} } @article{Azar2013, abstract = {In some reinforcement learning problems an agent may be provided with a set of input policies, perhaps learned from prior experience or provided by advisors. We present a reinforcement learning with policy advice (RLPA) algorithm which leverages this input set and learns to use the best policy in the set for the reinforcement learning task at hand. We prove that RLPA has a sub-linear regret of $\backslash$tilde O($\backslash$sqrt{\{}T{\}}) relative to the best input policy, and that both this regret and its computational complexity are independent of the size of the state and action space. Our empirical simulations support our theoretical analysis. This suggests RLPA may offer significant advantages in large domains where some prior good policies are provided.}, archivePrefix = {arXiv}, arxivId = {1305.1027}, author = {Azar, Mohammad Gheshlaghi and Lazaric, Alessandro and Brunskill, Emma}, doi = {10.1007/978-3-642-40988-2_7}, eprint = {1305.1027}, file = {::}, isbn = {9783642409875}, issn = {03029743}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, number = {PART 1}, pages = {97--112}, title = {{Regret bounds for reinforcement learning with policy advice}}, volume = {8188 LNAI}, year = {2013} } @article{Dann2016, abstract = {Recently, there has been significant progress in understanding reinforcement learning in discounted infinite-horizon Markov decision processes (MDPs) by deriving tight sample complexity bounds. However, in many real-world applications, an interactive learning agent operates for a fixed or bounded period of time, for example tutoring students for exams or handling customer service requests. Such scenarios can often be better treated as episodic fixed-horizon MDPs, for which only looser bounds on the sample complexity exist. A natural notion of sample complexity in this setting is the number of episodes required to guarantee a certain performance with high probability (PAC guarantee). In this paper, we derive an upper PAC bound {\$}\backslashtilde O(\backslashfrac{\{}|\backslashmathcal S|{\^{}}2 |\backslashmathcal A| H{\^{}}2{\}}{\{}\backslashepsilon{\^{}}2{\}} \backslashln\backslashfrac 1 \backslashdelta){\$} and a lower PAC bound {\$}\backslashtilde \backslashOmega(\backslashfrac{\{}|\backslashmathcal S| |\backslashmathcal A| H{\^{}}2{\}}{\{}\backslashepsilon{\^{}}2{\}} \backslashln \backslashfrac 1 {\{}\backslashdelta + c{\}}){\$} that match up to log-terms and an additional linear dependency on the number of states {\$}|\backslashmathcal S|{\$}. The lower bound is the first of its kind for this setting. Our upper bound leverages Bernstein's inequality to improve on previous bounds for episodic finite-horizon MDPs which have a time-horizon dependency of at least {\$}H{\^{}}3{\$}.}, archivePrefix = {arXiv}, arxivId = {1510.08906}, author = {Dann, Christoph and Brunskill, Emma}, doi = {10.1128/IAI.70.4.2245-2248.2002}, eprint = {1510.08906}, file = {::}, issn = {10495258}, pages = {1--28}, title = {{Sample Complexity of Episodic Fixed-Horizon Reinforcement Learning}}, url = {http://arxiv.org/abs/1510.08906}, year = {2016} } @article{Kocsis2006a, abstract = {Monte-Carlo search has been successful in many non-deter- ministic games, and recently in deterministic games with high branching factor. One of the drawbacks of the current approaches is that even if the iterative process would last for a very long time, the selected move does not necessarily converge to a game-theoretic optimal one. In this paper we introduce a new algorithm, UCT, which extends a bandit algo- rithm for Monte-Carlo search. It is proven that the probability that the algorithm selects the correct move converges to 1. Moreover it is shown empirically that the algorithm converges rather fast even in comparison with alpha-beta search. Experiments in Amazons and Clobber indicate that the UCT algorithm outperforms considerably a plain Monte-Carlo version, and it is competitive against alpha-beta based game programs.}, author = {Kocsis, Levente and Szepesv{\'{a}}ri, Csaba and Willemson, Jan}, file = {::}, journal = {Tech. Rep.}, keywords = {monte-carlo}, number = {1}, pages = {1--21}, title = {{Improved Monte-Carlo Search}}, year = {2006} } @article{Kearns2002a, author = {Kearns, Michael}, file = {::}, keywords = {exploration versus exploitation,markov decision processes,reinforcement learning}, pages = {209--232}, title = {{Near-Optimal Reinforcement Learning in Polynomial Time}}, year = {2002} } @inproceedings{Walsh2010, abstract = {Recent advancements in model-based reinforcement learn- ing have shown that the dynamics of many structured do- mains (e.g. DBNs) can be learned with tractable sample com- plexity, despite their exponentially large state spaces. Un- fortunately, these algorithms all require access to a planner that computes a near optimal policy, and while many tra- ditional MDP algorithms make this guarantee, their com- putation time grows with the number of states. We show how to replace these over-matched planners with a class of sample-based plannerswhose computation time is indepen- dent of the number of stateswithout sacrificing the sample- efficiency guarantees of the overall learning algorithms. To do so, we define sufficient criteria for a sample-based planner to be used in such a learning system and analyze two popu- lar sample-based approaches from the literature. We also in- troduce our own sample-based planner, which combines the strategies fromthese algorithms and stillmeets the criteria for integration into our learning system. In doing so, we define the first complete RL solution for compactly represented (ex- ponentially sized) state spaces with efficiently learnable dy- namics that is both sample efficient and whose computation time does not grow rapidly with the number of states.}, author = {Walsh, Thomas J and Goschin, Sergiu and Littman, Michael L}, booktitle = {AAAI Conference on Artificial Intelligence}, file = {::}, isbn = {9781577354642}, keywords = {Technical Papers -- Machine Learning}, title = {{Integrating Sample-based Planning and Model-based Reinforcement Learning}}, url = {http://www.aaai.org/ocs/index.php/AAAI/AAAI10/paper/viewPDFInterstitial/1880/2049}, year = {2010} } @inproceedings{Szorenyi2014, abstract = {We consider the problem of online planning in a Markov decision process with discounted rewards for any given initial state. We consider the PAC sample com-plexity problem of computing, with probability 1−$\delta$, an �-optimal action using the smallest possible number of calls to the generative model (which provides reward and next-state samples). We design an algorithm, called StOP (for Stochastic-Optimistic Planning), based on the " optimism in the face of uncertainty " princi-ple. StOP can be used in the general setting, requires only a generative model, and enjoys a complexity bound that only depends on the local structure of the MDP.}, author = {Szorenyi, B. and Kedenburg, Gunnar and Munos, Remi}, booktitle = {Advances in Neural Information Processing Systems 27 (NIPS)}, file = {::}, pages = {1--9}, title = {{Optimistic planning in Markov decision processes using a generative model}}, year = {2014} } @article{Neu2017a, abstract = {We study the problem of online learning in a class of Markov decision processes known as linearly solvable MDPs. In the stationary version of this problem, a learner interacts with its environment by directly controlling the state transitions, attempting to balance a fixed state-dependent cost and a certain smooth cost penalizing extreme control inputs. In the current paper, we consider an online setting where the state costs may change arbitrarily between consecutive rounds, and the learner only observes the costs at the end of each respective round. We are interested in constructing algorithms for the learner that guarantee small regret against the best stationary control policy chosen in full knowledge of the cost sequence. Our main result is showing that the smoothness of the control cost enables the simple algorithm of following the leader to achieve a regret of order {\$}\backslashlog{\^{}}2 T{\$} after {\$}T{\$} rounds, vastly improving on the best known regret bound of order {\$}T{\^{}}{\{}3/4{\}}{\$} for this setting.}, annote = {- Formulates the problem of finding an optiminal policy in LMDP as a convex optimization problem. - State cost function changes with time - Use framework of online convex optimization - Measure regret}, archivePrefix = {arXiv}, arxivId = {1702.06341}, author = {Neu, Gergely and G{\'{o}}mez, Vicen{\c{c}}}, eprint = {1702.06341}, file = {::}, keywords = {fast rates,markov decision processes,online learning,optimal control}, pages = {1--22}, title = {{Fast rates for online learning in Linearly Solvable Markov Decision Processes}}, url = {http://arxiv.org/abs/1702.06341}, volume = {65}, year = {2017} } @article{Coquelin2007, author = {Coquelin, Pierre-arnaud and Munos, R{\'{e}}mi}, file = {::}, title = {{Bandit Algorithms for Tree Search}}, year = {2007} } @article{Kearns2002a, author = {Kearns, Michael}, file = {::}, keywords = {exploration versus exploitation,markov decision processes,reinforcement learning}, pages = {209--232}, title = {{Near-Optimal Reinforcement Learning in Polynomial Time}}, year = {2002} } @article{Kaufmann2017, abstract = {Recent advances in bandit tools and techniques for sequential learning are steadily enabling new applications and are promising the resolution of a range of challenging related problems. We study the game tree search problem, where the goal is to quickly identify the optimal move in a given game tree by sequentially sampling its stochastic payoffs. We develop new algorithms for trees of arbitrary depth, that operate by summarizing all deeper levels of the tree into confidence intervals at depth one, and applying a best arm identification procedure at the root. We prove new sample complexity guarantees with a refined dependence on the problem instance. We show experimentally that our algorithms outperform existing elimination-based algorithms and match previous special-purpose methods for depth-two trees.}, archivePrefix = {arXiv}, arxivId = {1706.02986}, author = {Kaufmann, Emilie and Koolen, Wouter}, eprint = {1706.02986}, file = {::}, pages = {1--23}, title = {{Monte-Carlo Tree Search by Best Arm Identification}}, url = {http://arxiv.org/abs/1706.02986}, volume = {9189}, year = {2017} } @inproceedings{bartlett2009regal, address = {Arlington, Virginia, United States}, author = {Bartlett, Peter L and Tewari, Ambuj}, booktitle = {Proceedings of the 25th conference on Uncertainty in Artificial Intelligence}, file = {::}, isbn = {978-0-9749039-5-8}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {35--42}, publisher = {AUAI Press}, series = {UAI '09}, title = {{REGAL: a regularization based algorithm for reinforcement learning in weakly communicating MDPs}}, year = {2009} } @article{Kearns2002, archivePrefix = {arXiv}, arxivId = {0885-6125}, author = {Kearns, Michael and Mansour, Yishay and Ng, Andrew}, doi = {10.1023/A:1017932429737}, eprint = {0885-6125}, file = {::}, issn = {08856125}, journal = {Ukpmc.Ac.Uk}, keywords = {markov decision processes,planning,reinforcement learning}, pages = {193--208}, title = {{A Sparse Sampling Algorithm for Near-Optimal Planning in Large Markov Decision Processes}}, url = {http://ukpmc.ac.uk/abstract/CIT/512725}, year = {2002} } @inproceedings{azar2017minimax, archivePrefix = {arXiv}, arxivId = {arXiv:1703.05449v2}, author = {Azar, Mohammad Gheshlaghi and Osband, Ian and Munos, Remi}, booktitle = {International Conference on Machine Learning}, eprint = {arXiv:1703.05449v2}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Azar, Osband, Munos - 2017 - Minimax regret bounds for reinforcement learning.pdf:pdf}, title = {{Minimax regret bounds for reinforcement learning}}, year = {2017} } @inproceedings{bartlett2019simple, abstract = {We study the problem of optimizing a function under a budgeted number of evaluations. We only assume that the function is locally smooth around one of its global optima. The difficulty of optimization is measured in terms of 1) the amount of noise b of the function evaluation and 2) the local smoothness, d, of the function. A smaller d results in smaller optimization error. We come with a new, simple, and parameter-free approach. First, for all values of b and d, this approach recovers at least the state-of-the-art regret guarantees. Second, our approach additionally obtains these results while being agnostic to the values of both b and d. This leads to the first algorithm that naturally adapts to an unknown range of noise b and leads to significant improvements in a moderate and low-noise regime. Third, our approach also obtains a remarkable improvement over the state-of-the-art SOO algorithm when the noise is very low which includes the case of optimization under deterministic feedback (b=0). There, under our minimal local smoothness assumption, this improvement is of exponential magnitude and holds for a class of functions that covers the vast majority of functions that practitioners optimize (d=0). We show that our algorithmic improvement is borne out in experiments as we empirically show faster convergence on common benchmarks.}, author = {Bartlett, Peter L and Gabillon, Victor and Valko, Michal}, booktitle = {Algorithmic Learning Theory}, title = {{A simple parameter-free and adaptive approach to optimization under a minimal local smoothness assumption}}, year = {2019} } @inproceedings{leurent2019practical, abstract = {We consider the problem of online planning in a Markov Decision Process when given only access to a generative model, restricted to open-loop policies - i.e. sequences of actions - and under budget constraint. In this setting, the Open-Loop Optimistic Planning (OLOP) algorithm enjoys good theoretical guarantees but is overly conservative in practice, as we show in numerical experiments. We propose a modified version of the algorithm with tighter upper-confidence bounds, KLOLOP, that leads to better practical performances while retaining the sample complexity bound. Finally, we propose an efficient implementation that significantly improves the time complexity of both algorithms.}, author = {Leurent, Edouard and Maillard, Odalric-Ambrym}, booktitle = {European Conference on Machine Learning}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Leurent, Maillard - 2019 - Practical open-loop pptimistic planning.pdf:pdf}, keywords = {online learning,planning,tree search}, title = {{Practical open-loop pptimistic planning}}, url = {https://arxiv.org/pdf/1904.04700.pdf}, year = {2019} } @inproceedings{Lattimore2012, abstract = {We study upper and lower bounds on the sample-complexity of learning near-optimal behaviour in finite-state discounted Markov Decision Processes (MDPs). For the upper bound we make the assumption that each action leads to at most two possible next-states and prove a new bound for a UCRL-style algorithm on the number of time-steps when it is not Probably Approximately Correct (PAC). The new lower bound strengthens previous work by being both more general (it applies to all policies) and tighter. The upper and lower bounds match up to logarithmic factors.}, archivePrefix = {arXiv}, arxivId = {1202.3890v1}, author = {Lattimore, Tor and Hutter, Marcus}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, doi = {10.1007/978-3-642-34106-9_26}, eprint = {1202.3890v1}, file = {::}, isbn = {9783642341052}, issn = {03029743}, keywords = {Markov decision processes,PAC-MDP,Reinforcement learning,exploration exploitation,sample-complexity}, pages = {320--334}, title = {{PAC bounds for discounted MDPs}}, volume = {7568 LNAI}, year = {2012} } @article{huang2017structured, author = {Huang, Ruitong and Ajallooeian, Mohammad M and Szepesv{\'{a}}ri, Csaba and M{\"{u}}ller, Martin}, journal = {arXiv preprint arXiv:1706.05198}, title = {{Structured best arm identification with fixed confidence}}, year = {2017} } @article{Domshlak2013, author = {Domshlak, Carmel and Feldman, Zohar}, file = {::}, journal = {International Symposium on Combinatorial Search (SoCS)}, keywords = {Full Papers}, pages = {1--8}, title = {{To UCT, or not to UCT?}}, url = {http://www.aaai.org/ocs/index.php/SOCS/SOCS13/paper/view/7268}, year = {2013} } @inproceedings{Busoniu2012, author = {Busoniu, Lucian and Munos, R{\'{e}}mi}, booktitle = {15th International Conference on Artificial Intelligence and Statistics (AISTATS)}, file = {::}, title = {{Online optimistic planning for Markov decision processes}}, year = {2012} } @article{Munos2014, abstract = {This work covers several aspects of the optimism in the face of uncertainty principle applied to large scale optimization problems under finite numerical budget. The initial motivation for the research reported here originated from the empirical success of the so-called Monte-Carlo Tree Search method popularized in computer-go and further extended to many other games as well as optimization and planning problems. Our objective is to contribute to the development of theoretical foundations of the field by characterizing the complexity of the underlying optimization problems and designing efficient algorithms with performance guarantees.$\backslash$nThe main idea presented here is that it is possible to decompose a complex decision making problem (such as an optimization problem in a large search space) into a sequence of elementary decisions, where each decision of the sequence is solved using a (stochastic) multi-armed bandit (simple mathematical model for decision making in stochastic environments). This so-called hierarchical bandit approach (where the reward observed by a bandit in the hierarchy is itself the return of another bandit at a deeper level) possesses the nice feature of starting the exploration by a quasi-uniform sampling of the space and then focusing progressively on the most promising area, at different scales, according to the evaluations observed so far, and eventually performing a local search around the global optima of the function. The performance of the method is assessed in terms of the optimality of the returned solution as a function of the number of function evaluations.$\backslash$nOur main contribution to the field of function optimization is a class of hierarchical optimistic algorithms designed for general search spaces$\backslash$n(such as metric spaces, trees, graphs, Euclidean spaces, ...) with different algorithmic instantiations depending on whether the evaluations are noisy or noiseless and whether some measure of the “smoothness” of the function is known or unknown. The performance of the algorithms depend on the local behavior of the function around its global optima expressed in terms of the quantity of near-optimal states measured with some metric. If this local smoothness of the function is known then one can design very efficient optimization algorithms (with convergence rate independent of the space dimension), and when it is not known, we can build adaptive techniques that can, in some cases, perform almost as well as when it is known.$\backslash$nIn order to be self-contained, we start with a brief introduction to the stochastic multi-armed bandit problem in Chapter 1 and describe the UCB (Upper Confidence Bound) strategy and several extensions. In Chapter 2 we present the Monte-Carlo Tree Search method applied to computer-go and show the limitations of previous algorithms such as UCT (UCB applied to Trees). This provides motivation for designing theoretically well-founded optimistic optimization algorithms. The main contributions on hierarchical optimistic optimization are described in Chapters 3 and 4 where the general setting of a semi-metric space is introduced and algorithms designed for optimizing a function assumed to be locally smooth (around its maxima) with respect to a$\backslash$nsemi-metric are presented and analyzed. Chapter 3 considers the case when the semi-metric is known and can be used by the algorithm,$\backslash$nwhereas Chapter 4 considers the case when it is not known and describes an adaptive technique that does almost as well as when it is known. Finally in Chapter 5 we describe optimistic strategies for a specific structured problem, namely the planning problem in Markov decision processes with infinite horizon and discounted rewards setting.}, archivePrefix = {arXiv}, arxivId = {arXiv:1408.0952v2}, author = {Munos, Remi}, doi = {10.1561/2200000038}, eprint = {arXiv:1408.0952v2}, file = {::}, isbn = {9781601987662}, issn = {1935-8237}, journal = {Foundations and Trends{\textregistered} in Machine Learning}, title = {{From Bandits to Monte-Carlo Tree Search: The Optimistic Principle Applied to Optimization and Planning}}, year = {2014} } @article{Shah2019, archivePrefix = {arXiv}, arxivId = {1902.05213}, author = {Shah, Devavrat and Xie, Qiaomin and Xu, Zhi}, eprint = {1902.05213}, file = {::}, keywords = {monte carlo tree search,non-asymptotic analysis,reinforcement learning}, title = {{On Reinforcement Learning Using Monte Carlo Tree Search with Supervised Learning: Non-Asymptotic Analysis}}, url = {http://arxiv.org/abs/1902.05213}, year = {2019} } @article{Moerland2018, abstract = {We present an extension of Monte Carlo Tree Search (MCTS) that strongly increases its efficiency for trees with asymmetry and/or loops. Asymmetric termination of search trees introduces a type of uncertainty for which the standard upper confidence bound (UCB) formula does not account. Our first algorithm (MCTS-T), which assumes a non-stochastic environment, backs-up tree structure uncertainty and leverages it for exploration in a modified UCB formula. Results show vastly improved efficiency in a well-known asymmetric domain in which MCTS performs arbitrarily bad. Next, we connect the ideas about asymmetric termination to the presence of loops in the tree, where the same state appears multiple times in a single trace. An extension to our algorithm (MCTS-T+), which in addition to non-stochasticity assumes full state observability, further increases search efficiency for domains with loops as well. Benchmark testing on a set of OpenAI Gym and Atari 2600 games indicates that our algorithms always perform better than or at least equivalent to standard MCTS, and could be first-choice tree search algorithms for non-stochastic, fully-observable environments.}, archivePrefix = {arXiv}, arxivId = {1805.09218}, author = {Moerland, Thomas M. and Broekens, Joost and Plaat, Aske and Jonker, Catholijn M.}, eprint = {1805.09218}, file = {::}, pages = {1--9}, title = {{Monte Carlo Tree Search for Asymmetric Trees}}, url = {http://arxiv.org/abs/1805.09218}, year = {2018} } @article{Grill2016, abstract = {We study the sampling-based planning problem in Markov decision processes (MDPs) that we can access only through a generative model, usually referred to as Monte-Carlo planning. Our objective is to return a good estimate of the optimal value function at any state while minimizing the number of calls to the generative model, i.e. the sample complexity. We propose a new algorithm, TrailBlazer, able to handle MDPs with a finite or an infinite number of transitions from state-action to next states. TrailBlazer is an adaptive algorithm that exploits possible structures of the MDP by exploring only a subset of states reachable by following near-optimal policies. We provide bounds on its sample complexity that depend on a measure of the quantity of near-optimal states. The algorithm behavior can be considered as an extension of Monte-Carlo sampling (for estimating an expectation) to problems that alternate maximization (over actions) and expectation (over next states). Finally, another appealing feature of TrailBlazer is that it is simple to implement and computationally efficient.}, author = {Grill, Jean-Bastien and Valko, Michal and Munos, R{\'{e}}mi}, file = {::}, issn = {10495258}, journal = {Neural Information Processing Systems}, number = {Nips}, pages = {1--9}, title = {{Blazing the trails before beating the path: Sample-efficient Monte-Carlo planning}}, year = {2016} } @article{Szita2010, abstract = {One might believe that model-based algorithms of reinforcement learning$\backslash$ncan propagate the obtained experience more quickly, and are able$\backslash$nto direct exploration better. As a consequence, fewer exploratory$\backslash$nactions should be enough to learn a good policy. Strangely enough,$\backslash$ncurrent theoretical results for model-based algorithms do not support$\backslash$nthis claim: In a finite Markov decision process with N states,$\backslash$nthe best bounds on the number of exploratory steps necessary are$\backslash$nof order O(N{\^{}}2 $\backslash$log N), in contrast to the O(N $\backslash$log N) bound$\backslash$navailable for the model-free, delayed Q-learning algorithm. In this$\backslash$npaper we show that MoRmax, a modified version of the Rmax algorithm$\backslash$nneeds to make at most O(N $\backslash$log N) exploratory steps. This matches$\backslash$nthe lower bound up to logarithmic factors, as well as the upper bound$\backslash$nof the state-of-the-art model-free algorithm, while our new bound$\backslash$nimproves the dependence on other problem parameters.}, author = {Szita, Istv{\`{a}}n and Szepesv{\'{a}}ri, Csaba}, file = {::}, isbn = {9781605589077}, journal = {ICML}, pages = {1031--1038}, title = {{Model-based reinforcement learning with nearly tight exploration complexity bounds}}, url = {http://www.icml2010.org/papers/546.pdf}, year = {2010} } @article{Jaksch2010, abstract = {For undiscounted reinforcement learning in Markov decision processes (MDPs) we consider the total regret of a learning algorithm with respect to an optimal policy. In order to describe the transition structure of an MDP we propose a new parameter: An MDP has diameter D if for any pair of states s,s' there is a policy which moves from s to s' in at most D steps (on average). We present a reinforcement learning algorithm with total regret {\~{O}}(DS√AT) after T steps for any unknown MDP with S states, A actions per state, and diameter D. A corresponding lower bound of {\$}\Omega{\$}(√DSAT) on the total regret of any learning algorithm is given as well.{\$}\backslash{\$}r{\$}\backslash{\$}nThese results are complemented by a sample complexity bound on the number of suboptimal steps taken by our algorithm. This bound can be used to achieve a (gap-dependent) regret bound that is logarithmic in T.{\$}\backslash{\$}r{\$}\backslash{\$}nFinally, we also consider a setting where the MDP is allowed to change a fixed number of l times. We present a modification of our algorithm that is able to deal with this setting and show a regret bound of {\~{O}}(l1/3T2/3DS√A).}, archivePrefix = {arXiv}, arxivId = {1403.3741}, author = {Jaksch, Thomas and Ortner, Ronald and Auer, Peter}, eprint = {1403.3741}, file = {::}, isbn = {Technical Report No. CIT-2009-01}, issn = {15324435}, journal = {Journal of Machine Learning Research}, keywords = {Computational,Information-Theoretic Learning with,Learning/Statistics {\&} Optimisation,Theory {\&} Algorithms}, number = {1}, pages = {1563--1600}, title = {{Near-optimal Regret Bounds for Reinforcement Learning}}, url = {http://eprints.pascal-network.org/archive/00007081/}, volume = {11}, year = {2010} } @article{Arora2017, abstract = {Do GANS (Generative Adversarial Nets) actually learn the target distribution? The foundational paper of (Goodfellow et al 2014) suggested they do, if they were given sufficiently large deep nets, sample size, and computation time. A recent theoretical analysis in Arora et al (to appear at ICML 2017) raised doubts whether the same holds when discriminator has finite size. It showed that the training objective can approach its optimum value even if the generated distribution has very low support ---in other words, the training objective is unable to prevent mode collapse. The current note reports experiments suggesting that such problems are not merely theoretical. It presents empirical evidence that well-known GANs approaches do learn distributions of fairly low support, and thus presumably are not learning the target distribution. The main technical contribution is a new proposed test, based upon the famous birthday paradox, for estimating the support size of the generated distribution.}, archivePrefix = {arXiv}, arxivId = {1706.08224}, author = {Arora, Sanjeev and Zhang, Yi}, eprint = {1706.08224}, file = {::}, title = {{Do GANs actually learn the distribution? An empirical study}}, url = {http://arxiv.org/abs/1706.08224}, year = {2017} } @article{Theis2015, abstract = {Probabilistic generative models can be used for compression, denoising, inpainting, texture synthesis, semi-supervised learning, unsupervised feature learning, and other tasks. Given this wide range of applications, it is not surprising that a lot of heterogeneity exists in the way these models are formulated, trained, and evaluated. As a consequence, direct comparison between models is often difficult. This article reviews mostly known but often underappreciated properties relating to the evaluation and interpretation of generative models with a focus on image models. In particular, we show that three of the currently most commonly used criteria---average log-likelihood, Parzen window estimates, and visual fidelity of samples---are largely independent of each other when the data is high-dimensional. Good performance with respect to one criterion therefore need not imply good performance with respect to the other criteria. Our results show that extrapolation from one criterion to another is not warranted and generative models need to be evaluated directly with respect to the application(s) they were intended for. In addition, we provide examples demonstrating that Parzen window estimates should generally be avoided.}, archivePrefix = {arXiv}, arxivId = {1511.01844}, author = {Theis, Lucas and van den Oord, A{\"{a}}ron and Bethge, Matthias}, eprint = {1511.01844}, file = {::}, title = {{A note on the evaluation of generative models}}, url = {http://arxiv.org/abs/1511.01844}, year = {2015} } @book{Barber2017, abstract = {Machine learning methods extract value from vast data sets quickly and with modest resources. They are established tools in a wide range of industrial applications, including search engines, DNA sequencing, stock market analysis, and robot locomotion, and their use is spreading rapidly. People who know the methods have their choice of rewarding jobs. This hands-on text opens these opportunities to computer science students with modest mathematical backgrounds. It is designed for final-year undergraduates and master's students with limited background in linear algebra and calculus. Comprehensive and coherent, it develops everything from basic reasoning to advanced techniques within the framework of graphical models. Students learn more than a menu of techniques, they develop analytical and problem-solving skills that equip them for the real world. Numerous examples and exercises, both computer based and theoretical, are included in every chapter. Resources for students and instructors, including a MATLAB toolbox, are available online.}, author = {Barber, David}, booktitle = {Bayesian Reasoning and Machine Learning}, doi = {10.1017/cbo9780511804779}, file = {::}, publisher = {Cambridge University Press}, title = {{Bayesian Reasoning and Machine Learning}}, year = {2017} } @inproceedings{busoniu2012optimistic, abstract = {The reinforcement learning community has recently intensified its interest in online plan-ning methods, due to their relative inde-pendence on the state space size. However, tight near-optimality guarantees are not yet available for the general case of stochastic Markov decision processes and closed-loop, state-dependent planning policies. We there-fore consider an algorithm related to AO* that optimistically explores a tree represen-tation of the space of closed-loop policies, and we analyze the near-optimality of the action it returns after n tree node expan-sions. While this optimistic planning requires a finite number of actions and possible next states for each transition, its asymptotic per-formance does not depend directly on these numbers, but only on the subset of nodes that significantly impact near-optimal poli-cies. We characterize this set by introduc-ing a novel measure of problem complexity, called the near-optimality exponent. Special-izing the exponent and performance bound for some interesting classes of MDPs illus-trates the algorithm works better when there are fewer near-optimal policies and less uni-form transition probabilities.}, author = {Buşoniu, Lucian and Munos, R{\'{e}}mi}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Optimistic planning for Markov decision processes}}, url = {https://hal.archives-ouvertes.fr/hal-00756736/document}, year = {2012} } @book{bertsekas1996neuro-dynamic, address = {Belmont, MA}, author = {Bertsekas, Dimitri and Tsitsiklis, John}, publisher = {Athena Scientific}, title = {{Neuro-dynamic programming}}, url = {https://books.google.co.uk/books/about/Neuro{\_}dynamic{\_}Programming.html?id=WxCCQgAACAAJ{\&}source=kp{\_}book{\_}description{\&}redir{\_}esc=y}, year = {1996} } @inproceedings{mahalanobis1936generalized, author = {Mahalanobis, P}, booktitle = {Proc. National Inst. Sci. (India)}, pages = {49--55}, series = {12}, title = {{On the generalized distance in statistics}}, year = {1936} } @article{neill2010multivariate, abstract = {Abstract We present the multivariate Bayesian scan statistic (MBSS), a general framework for event detection and characterization in$\backslash$nmultivariate spatial time series data. MBSS integrates prior information and observations from multiple data streams in a$\backslash$nprincipled Bayesian framework, computing the posterior probability of each type of event in each space-time region. MBSS learns$\backslash$na multivariate Gamma-Poisson model from historical data, and models the effects of each event type on each stream using expert$\backslash$nknowledge or labeled training examples. We evaluate MBSS on various disease surveillance tasks, detecting and characterizing$\backslash$noutbreaks injected into three streams of Pennsylvania medication sales data. We demonstrate that MBSS can be used both as$\backslash$na “general” event detector, with high detection power across a variety of event types, and a “specific” detector that incorporates$\backslash$nprior knowledge of an event's effects to achieve much higher detection power. MBSS has many other advantages over previous$\backslash$nevent detection approaches, including faster computation and easy interpretation and visualization of results, and allows$\backslash$nfaster and more accurate event detection by integrating information from the multiple streams. Most importantly, MBSS can$\backslash$nmodel and differentiate between multiple event types, thus distinguishing between events requiring urgent responses and other,$\backslash$nless relevant patterns in the data.}, author = {Neill, Daniel B. and Cooper, Gregory F.}, doi = {10.1007/s10994-009-5144-4}, issn = {0885-6125}, journal = {Machine Learning}, pages = {261--282}, title = {{A multivariate Bayesian scan statistic for early event detection and characterization}}, volume = {79}, year = {2010} } @article{erdos1959on, abstract = {P. ErdH os, A. R nyi. Publ. Math. Debrecen, Vol. 6 (1959), pp. 290-297. bibtex-import network.}, author = {Erdős, Paul and R{\'{e}}nyi, Alfr{\'{e}}d}, journal = {Publicationes Mathematicae}, pages = {290--297}, title = {{On random graphs}}, volume = {6}, year = {1959} } @techreport{nene1996columbia, author = {Nene, S A and Nayar, S K and Murase, H}, booktitle = {Technical Report, Department of Computer Science, Columbia University CUCS-005-96}, institution = {Columbia University}, month = {feb}, title = {{{\{}C{\}}olumbia {\{}O{\}}bject {\{}I{\}}mage {\{}L{\}}ibrary ({\{}C{\}}{\{}O{\}}{\{}I{\}}{\{}L{\}}-100)}}, year = {1996} } @article{baxter2001experiments, author = {Baxter, Jonathan and Bartlett, Peter and Weaver, Lex}, journal = {Journal of Artificial Intelligence Research}, pages = {351--381}, title = {{Experiments with Infinite-Horizon, Policy-Gradient Estimation}}, volume = {15}, year = {2001} } @inproceedings{munos1999variable, author = {Munos, Remi and Moore, Andrew}, booktitle = {Proceedings of the 16th International Joint Conference on Artificial Intelligence}, pages = {1348--1355}, title = {{Variable Resolution Discretization for High-Accuracy Solutions of Optimal Control Problems}}, year = {1999} } @inproceedings{kawahara22submodularity, author = {Kawahara, Y and Nagano, K and Tsuda, K and Bilmes, J A}, booktitle = {Adv. NIPS 22}, title = {{Submodularity Cuts and Applications}}, year = {2009} } @inproceedings{locatelli2017adaptivity, abstract = {This work addresses various open questions in the theory of active learning for nonparametric classification. Our contributions are both statistical and algorithmic: -We establish new minimax-rates for active learning under common $\backslash$textit{\{}noise conditions{\}}. These rates display interesting transitions -- due to the interaction between noise $\backslash$textit{\{}smoothness and margin{\}} -- not present in the passive setting. Some such transitions were previously conjectured, but remained unconfirmed. -We present a generic algorithmic strategy for adaptivity to unknown noise smoothness and margin; our strategy achieves optimal rates in many general situations; furthermore, unlike in previous work, we avoid the need for $\backslash$textit{\{}adaptive confidence sets{\}}, resulting in strictly milder distributional requirements.}, author = {Locatelli, Andrea and Carpentier, Alexandra and Kpotufe, Samory}, booktitle = {Conference on Learning Theory}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Locatelli, Carpentier, Kpotufe - 2017 - Adaptivity to noise parameters in nonparametric active learning.pdf:pdf}, month = {mar}, title = {{Adaptivity to noise parameters in nonparametric active learning}}, url = {https://arxiv.org/pdf/1703.05841.pdf}, year = {2017} } @article{buntine1996guide, address = {Thinkbank, 1678 Shattuck Ave, Suite 320, Berkeley, Ca, 94709}, author = {Buntine, W}, journal = {IEEE Transactions on Knowledge and Data Engineering}, pages = {195--210}, title = {{A Guide to the Literature on Learning Probabilistic Networks from Data}}, url = {citeseer.nj.nec.com/buntine96guide.html}, volume = {8}, year = {1996} } @article{tran2014efficient, author = {Tran-Thanh, Long and Stein, Sebastian and Rogers, Alex and Jennings, Nicholas R}, journal = {Artificial Intelligence}, pages = {89--111}, publisher = {Elsevier}, title = {{Efficient crowdsourcing of unknown experts using bounded multi-armed bandits}}, url = {http://www.orchid.ac.uk/eprints/195/1/mab{\_}crowdsourcing{\_}AIJ.pdf}, volume = {214}, year = {2014} } @article{martino2011generalization, author = {Martino, Luca and M{\'{i}}guez, Joaqu{\'{i}}n}, journal = {Statistics and Computing}, number = {4}, pages = {633--647}, title = {{A generalization of the adaptive rejection sampling algorithm}}, volume = {21}, year = {2011} } @article{bolton2002statistical, abstract = {Summary: Fraud is increasing dramatically with the expansion of modern technology and the global superhighways of communication, resulting in the loss of billions of dollars worldwide each year. Although prevention technologies are the best way to reduce fraud, fraudsters are adaptive and, given time, will usually find ways to circumvent such measures. Methodologies for the detection of fraud are essential if we are to catch fraudsters once fraud prevention has failed. Statistics and machine learning provide effective technologies for fraud detection and have been applied successfully to detect activities such as money laundering, e-commerce credit card fraud, telecommunications fraud and computer intrusion, to name but a few. We describe the tools available for statistical fraud detection and the areas in which fraud detection technologies are most used.}, annote = {comps{\_}ano}, author = {Bolton, Richard J and Hand, David J}, doi = {doi:10.1214/ss/1042727940}, journal = {Stat. Sci.}, keywords = {fraud detection,fraud prevention,machine learnin}, number = {3}, pages = {235--255}, title = {{Statistical fraud detection: a review.}}, volume = {17}, year = {2002} } @article{Moreau1962, author = {Moreau, J J}, journal = {C. R. Acad. Sci. Paris S{\'{e}}r. A Math.}, pages = {2897--2899}, title = {{Fonctions convexes duales et points proximaux dans un espace {\{}H{\}}ilbertien}}, volume = {255}, year = {1962} } @inproceedings{velickovic2019deep, abstract = {We present Deep Graph Infomax (DGI), a general approach for learning node representations within graph-structured data in an unsupervised manner. DGI relies on maximizing mutual information between patch representations and corresponding high-level summaries of graphs-both derived using established graph convolutional network architectures. The learnt patch representations summarize subgraphs centered around nodes of interest, and can thus be reused for downstream node-wise learning tasks. In contrast to most prior approaches to unsupervised learning with GCNs, DGI does not rely on random walk objectives, and is readily applicable to both transductive and inductive learning setups. We demonstrate competitive performance on a variety of node classification benchmarks, which at times even exceeds the performance of supervised learning.}, author = {Veli{\v{c}}kovi{\'{c}}, Petar and Fedus, William and Hamilton, William L. and Bengio, Yoshua and Li{\`{o}}, Pietro and {Devon Hjelm}, R.}, booktitle = {7th International Conference on Learning Representations, ICLR 2019}, doi = {10.17863/CAM.40744}, title = {{Deep graph infomax}}, year = {2019} } @book{gelman2004bayesian, author = {Gelman, A}, publisher = {CRC press}, title = {{Bayesian data analysis}}, year = {2004} } @article{hunter2004tutorial, author = {Hunter, D R and Lange, K}, journal = {The American Statistician}, number = {1}, pages = {30--37}, publisher = {ASA}, title = {{A tutorial on {\{}MM{\}} algorithms}}, volume = {58}, year = {2004} } @article{tesauro1994td-gammon, author = {Tesauro, Gerald}, journal = {Neural Computation}, number = {2}, pages = {215--219}, title = {{{\{}TD-Gammon{\}}, a Self-Teaching Backgammon Program, Achieves Master-Level Play}}, volume = {6}, year = {1994} } @inproceedings{lei2015online, abstract = {Social networks are commonly used for marketing purposes. For example, free samples of a product can be given to a few influential social network users (or "seed nodes"), with the hope that they will convince their friends to buy it. One way to formalize marketers' objective is through influence maximization (or IM), whose goal is to find the best seed nodes to activate under a fixed budget, so that the number of people who get influenced in the end is maximized. Recent solutions to IM rely on the influence probability that a user influences another one. However, this probability information may be unavailable or incomplete. In this paper, we study IM in the absence of complete information on influence probability. We call this problem Online Influence Maximization (OIM) since we learn influence probabilities at the same time we run influence campaigns. To solve OIM, we propose a multiple-trial approach, where (1) some seed nodes are selected based on existing influence information; (2) an influence campaign is started with these seed nodes; and (3) users' feedback is used to update influence information. We adopt the Explore-Exploit strategy, which can select seed nodes using either the current influence probability estimation (exploit), or the confidence bound on the estimation (explore). Any existing IM algorithm can be used in this framework. We also develop an incremental algorithm that can significantly reduce the overhead of handling users' feedback information. Our experiments show that our solution is more effective than traditional IM methods on the partial information.}, author = {Lei, Siyu and Maniu, Silviu and Mo, Luyi and Cheng, Reynold and Senellart, Pierre}, booktitle = {Knowledge Discovery and Data mining}, title = {{Online influence maximization}}, year = {2015} } @techreport{kolmogorov2010minimizing, author = {Kolmogorov, V}, institution = {Arxiv}, number = {1006.1990}, title = {{Minimizing a sum of submodular functions}}, year = {2010} } @article{Jensen2001, author = {Jensen, Finn V and Kjaerulff, Uffe and Kristiansen, Brian and Langseth, Helge and Skaanning, Claus and Vomlel, Jiri and Vomlelova, Marta}, doi = {10.1017/S0890060401154065}, isbn = {0890060401}, issn = {08900604}, journal = {Artificial Intelligence for Engineering Design, Analysis and Manufacturing}, keywords = {Bayesian network,Decision theory,Troubleshooting}, number = {4}, pages = {321--333}, title = {{The SACSO methodology for troubleshooting complex systems}}, volume = {15}, year = {2001} } @i{azar2012on, author = {Azar, Mohammad Gheshlaghi and Munos, R{\'{e}}mi and Kappen, Bert}, booktitle = {International Conference on Machine Learning}, title = {{On the sample complexity of reinforcement learning with a generative model}}, url = {https://arxiv.org/pdf/1206.6461.pdf}, year = {2012} } @book{LT91, author = {Ledoux, M and Talagrand, M}, publisher = {Springer}, title = {{Probability in Banach Spaces}}, year = {1991} } @inproceedings{fiechter1994efficient, author = {Fiechter, Claude-Nicolas}, booktitle = {Conference on Learning Theory}, title = {{Efficient reinforcement learning}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download;jsessionid=7F5F8FCD1AA7ED07356410DDD5B384FE?doi=10.1.1.49.8652{\&}rep=rep1{\&}type=pdf}, year = {1994} } @inproceedings{NgJo01, author = {Ng, Andrew Y and Jordan, Michael I}, pages = {841--848}, title = {{On Discriminative vs. Generative Classifiers: A Comparison of Logistic Regression and Naive Bayes}} } @misc{Gautier2018, abstract = {Copyright {\textcopyright} 2018, arXiv, All rights reserved. Determinantal point processes (DPPs) are specific probability distributions over clouds of points that are used as models and computational tools across physics, probability, statistics, and more recently machine learning. Sampling from DPPs is a challenge and therefore we present DPPy, a Python toolbox that gathers known exact and approximate sampling algorithms for both finite and continuous DPPs. The project is hosted on GitHub and equipped with an extensive documentation.}, author = {Gautier, G. and Polito, G. and Bardenet, R. and Valko, M.}, booktitle = {arXiv}, keywords = {Determinantal point processes,MCMC,Python,Random matrices,Sampling}, title = {{DPPy: Sampling DPPs with python}}, year = {2018} } @article{zhou2004learning, author = {Zhou, D and Bousquet, O and Lal, T N and Weston, J and Scholkopf, B}, journal = {Advances in Neural Information Processing Systems}, keywords = {manifold{\_}learning}, pages = {321--328}, title = {{Learning with local and global consistency}}, volume = {16}, year = {2004} } @inproceedings{yu09ArbitraryRewardsTransitions, address = {Piscataway, NJ, USA}, author = {Yu, J Y and Mannor, S}, booktitle = {GameNets'09: Proceedings of the First ICST International Conference on Game Theory for Networks}, isbn = {978-1-4244-4176-1}, pages = {314--322}, publisher = {IEEE Press}, title = {{Online learning in {\{}M{\}}arkov decision processes with arbitrarily changing rewards and transitions}}, year = {2009} } @article{Flajolet2015, abstract = {Optimal regret bounds for Multi-Armed Bandit problems are now well documented. They can be classified into two categories based on the growth rate with respect to the time horizon {\$}T{\$}: (i) small, distribution-dependent, bounds of order of magnitude {\$}\backslashbackslashln(T){\$} and (ii) robust, distribution-free, bounds of order of magnitude {\$}\backslashbackslashsqrt{\{}\backslash{\{}{\}}T{\{}\backslash{\}}{\}}{\$}. The Bandits with Knapsacks model, an extension to the framework allowing to model resource consumption, lacks this clear-cut distinction. While several algorithms have been shown to achieve asymptotically optimal distribution-free bounds on regret, there has been little progress toward the development of small distribution-dependent regret bounds. We partially bridge the gap by designing a general-purpose algorithm with distribution-dependent regret bounds that are logarithmic in the initial endowments of resources in several important cases that cover many practical applications, including dynamic pricing with limited supply, bid optimization in online advertisement auctions, and dynamic procurement.}, archivePrefix = {arXiv}, arxivId = {1510.01800}, author = {Flajolet, Arthur and Jaillet, Patrick}, eprint = {1510.01800}, journal = {arXiv preprint}, title = {{Logarithmic regret bounds for Bandits with Knapsacks}}, url = {http://arxiv.org/abs/1510.01800}, year = {2015} } @inproceedings{chawla2003smoteboost:, author = {Chawla, Nitesh V and Lazarevic, Aleksandar and Hall, Lawrence O and Bowyer, Kevin W}, booktitle = {PKDD}, pages = {107--119}, title = {{SMOTEBoost: Improving Prediction of the Minority Class in Boosting.}}, year = {2003} } @article{farias2003linear, author = {de Farias, Daniela Pucci and {Van Roy}, Benjamin}, journal = {Mathematics of Operations Research}, number = {3}, pages = {462--478}, title = {{On Constraint Sampling for the Linear Programming Approach to Approximate Dynamic Programming}}, volume = {29}, year = {2003} } @inproceedings{belkin2004regularization, author = {Belkin, Mikhail and Matveeva, Irina and Niyogi, Partha}, booktitle = {Conference on Learning Theory}, title = {{Regularization and semi-supervised learning on large graphs}}, url = {http://people.cs.uchicago.edu/{~}niyogi/papersps/reg{\_}colt.pdf}, year = {2004} } @article{calvin2017adaptive, author = {Calvin, James M. and Hefter, Mario and Herzwurm, Andr{\'{e}}}, journal = {Journal of Complexity}, month = {apr}, number = {C}, pages = {17--37}, publisher = {Academic Press, Inc.}, title = {{Adaptive approximation of the minimum of Brownian motion}}, url = {https://arxiv.org/pdf/1601.01276.pdf}, volume = {39}, year = {2017} } @article{Agr95, author = {Agrawal, R}, journal = {Advances in Applied Mathematics}, pages = {1054--1078}, title = {{Sample mean based index policies with O(log n) regret for the multi-armed bandit problem}}, volume = {27}, year = {1995} } @book{cormen89introduction, author = {Cormen, T H and Leiserson, C E and Rivest, R L}, publisher = {MIT Press}, title = {{Introduction to Algorithms}}, year = {1989} } @article{mobius, author = {Foldes, S and Hammer, P L}, journal = {Mathematics of Operations Research}, number = {2}, pages = {453--461}, title = {{Submodularity, Supermodularity, and Higher-Order Monotonicities of Pseudo-{\{}B{\}}oolean Functions}}, volume = {30}, year = {2005} } @book{chow1997probability, abstract = {3rd ed. Classes of sets, measures, and probability spaces -- Binomial random variables -- Independence -- Integration in a probability space -- Sums of independent random variables -- Measure extensions, Lebesgue-Stieltjes measure, Kolmogorov consistency theorem -- Conditional expectation, conditional independence, introduction to martingales -- Distribution functions and characteristic functions -- Central limit theorems -- Limit theorems for independent random variables -- Martingales -- Infinitely divisible laws.}, author = {Chow, Yuan Shih and Teicher, Henry.}, isbn = {0387406077}, pages = {488}, publisher = {Springer}, title = {{Probability theory: independence, interchangeability, martingales}}, year = {1997} } @book{nara, annote = {Second edition}, author = {Narayanan, H}, publisher = {North-Holland}, title = {{Submodular Functions and Electrical Networks}}, year = {2009} } @inproceedings{kyng2016approximate, abstract = {We show how to perform sparse approximate Gaussian elimination for Laplacian matrices. We present a simple, nearly linear time algorithm that approximates a Laplacian by a matrix with a sparse Cholesky factorization, the version of Gaussian elimination for symmetric matrices. This is the first nearly linear time solver for Laplacian systems that is based purely on random sampling, and does not use any graph theoretic constructions such as low-stretch trees, sparsifiers, or expanders. The crux of our analysis is a novel concentration bound for matrix martingales where the differences are sums of conditionally independent variables.}, author = {Kyng, Rasmus and Sachdeva, Sushant}, booktitle = {Foundation of Computer Science}, keywords = {Cholesky factorization,Gaussian elimination,Laplacian systems,Linear system solvers,Matrix martingales,Randomized numerical linear algebra}, title = {{Approximate Gaussian Elimination for Laplacians - Fast, Sparse, and Simple}}, year = {2016} } @techreport{submodlp, author = {Obozinski, G and Bach, F}, institution = {HAL}, title = {{Convex relaxation of Combinatorial penalties}}, year = {2011} } @article{schedlbauer2009what, abstract = {Alerts and prompts represent promising types of decision support in electronic prescribing to tackle inadequacies in prescribing. A systematic review was conducted to evaluate the efficacy of computerized drug alerts and prompts searching EMBASE, CINHAL, MEDLINE, and PsychINFO up to May 2007. Studies assessing the impact of electronic alerts and prompts on clinicians' prescribing behavior were selected and categorized by decision support type. Most alerts and prompts (23 out of 27) demonstrated benefit in improving prescribing behavior and/or reducing error rates. The impact appeared to vary based on the type of decision support. Some of these alerts (n = 5) reported a positive impact on clinical and health service management outcomes. For many categories of reminders, the number of studies was very small and few data were available from the outpatient setting. None of the studies evaluated features that might make alerts and prompts more effective. Details of an updated search run in Jan 2009 are included in the supplement section of this review.}, author = {Schedlbauer, Angela and Prasad, Vibhore and Mulvaney, Caroline and Phansalkar, Shobha and Stanton, Wendy and Bates, David W and Avery, Anthony J}, doi = {10.1197/jamia.M2910}, institution = {Division of Primary Care, School of Community Health Sciences, Research and Learning Resources Division, Information Services, University of Nottingham, Nottingham, UK. angela.schedlbauer@nottingham.ac.uk}, journal = {J Am Med Inform Assoc}, keywords = {Clinical Competence; Decision Support Systems,Clinical; Drug Therapy,Computer-Assisted; Electronic Prescribing; Humans,prevention /{\&}/ control; Medication Systems; Remin}, number = {4}, pages = {531--538}, pmid = {19390110}, title = {{What evidence supports the use of computerized alerts and prompts to improve clinicians' prescribing behavior?}}, url = {http://dx.doi.org/10.1197/jamia.M2910}, volume = {16}, year = {2009} } @inproceedings{xing2003generalized, annote = {comps{\_}models}, author = {Xing, Eric P and Jordan, Michael I and Russell, Stuart J}, booktitle = {Proceedings of UAI}, pages = {583--591}, title = {{A generalized mean field algorithm for variational inference in exponential families}}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.7.6058}, year = {2003} } @techreport{pachocki2016analysis, abstract = {We show that schemes for sparsifying matrices based on iteratively resampling rows yield guarantees matching classic 'offline' sparsifiers (see e.g. Spielman and Srivastava [STOC 2008]). In particular, this gives a formal analysis of a scheme very similar to the one proposed by Kelner and Levin [TCS 2013].}, archivePrefix = {arXiv}, arxivId = {1605.08194}, author = {Pachocki, Jakub}, eprint = {1605.08194}, title = {{Analysis of resparsification}}, url = {http://arxiv.org/abs/1605.08194}, year = {2016} } @article{hazan2012online, author = {Hazan, Elad and Kale, Satyen}, journal = {Journal of Machine Learning Research}, title = {{Online Submodular Minimization}}, volume = {13(Oct)}, year = {2012} } @article{hoefling910path, author = {Hoefling, H}, journal = {Journal of Computational and Graphical Statistics}, number = {4}, pages = {984--1006}, title = {{A path algorithm for the fused {\{}L{\}}asso signal approximator}}, volume = {19}, year = {2010} } @inproceedings{hanawal2015cheap, abstract = {We consider stochastic sequential learning problems where the learner can observe the average reward of several actions. Such a setting is interesting in many applications involving monitoring and surveillance, where the set of the actions to observe represent some (geographical) area. The importance of this setting is that in these applications, it is actually cheaper to observe average reward of a group of actions rather than the reward of a single action. We show that when the reward is smooth over a given graph representing the neighboring actions, we can maximize the cumulative reward of learning while minimizing the sensing cost. In this paper we propose CheapUCB, an algorithm that matches the regret guarantees of the known algorithms for this setting and at the same time guarantees a linear cost again over them. As a by-product of our analysis, we establish a Omega($\backslash$sqrt(dT)) lower bound on the cumulative regret of spectral bandits for a class of graphs with effective dimension d.}, author = {Hanawal, Manjesh and Saligrama, Venkatesh and Valko, Michal and Munos, R{\'{e}}mi}, booktitle = {International Conference on Machine Learning}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Hanawal et al. - 2015 - Cheap bandits.pdf:pdf}, title = {{Cheap bandits}}, url = {http://proceedings.mlr.press/v37/hanawal15.pdf}, year = {2015} } @inproceedings{koller2000policy, author = {Koller, Daphne and Parr, Ronald}, booktitle = {Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence}, pages = {326--334}, title = {{Policy Iteration for Factored {\{}MDPs{\}}}}, year = {2000} } @inproceedings{flaxman2005online, abstract = {We consider a the general online convex optimization framework introduced by Zinkevich. In this setting, there is a sequence of convex functions. Each period, we must choose a signle point (from some feasible set) and pay a cost equal to the value of the next function on our chosen point. Zinkevich shows that, if the each function is revealed after the choice is made, then one can achieve vanishingly small regret relative the best single decision chosen in hindsight. We extend this to the bandit setting where we do not find out the entire functions but rather just their value at our chosen point. We show how to get vanishingly small regret in this setting. Our approach uses a simple approximation of the gradient that is computed from evaluating a function at a single (random) point. We show that this estimate is sufficient to mimic Zinkevich's gradient descent online analysis, with access to the gradient (only being able to evaluate the function at a single point).}, author = {Flaxman, Abraham D and Kalai, Adam Tauman and McMahan, H Brendan and {Brendan McMahan}, Hugh}, booktitle = {Proceedings of the 16th annual ACM-SIAM Symposium On Discrete Algorithms}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Flaxman, Kalai, McMahan - 2004 - Online convex optimization in the bandit setting gradient descent without a gradient.pdf:pdf}, keywords = {bandits}, mendeley-tags = {bandits}, number = {x}, organization = {Carnegie Mellon University}, pages = {385--394}, publisher = {SIAM}, series = {SODA '05}, title = {{Online convex optimization in the bandit setting: gradient descent without a gradient}}, url = {http://arxiv.org/abs/cs/0408007}, year = {2005} } @inproceedings{das2011submodular, author = {Das, A and Kempe, D}, booktitle = {Proc. ICML}, title = {{Submodular meets spectral: Greedy algorithms for subset selection, sparse approximation and dictionary selection}}, year = {2011} } @book{stone1976theory, author = {Stone, Lawrence D}, publisher = {Elsevier}, title = {{Theory of optimal search}}, url = {https://books.google.fr/books?id=DFLpiYM9cg8C}, year = {1976} } @techreport{zhu2008semi-supervised, author = {Zhu, Xiaojin}, institution = {University of Wisconsin-Madison}, number = {1530}, title = {{Semi-supervised learning literature survey}}, url = {http://pages.cs.wisc.edu/{~}jerryzhu/pub/ssl{\_}survey.pdf}, year = {2008} } @article{lo2006evaluation, abstract = {BACKGROUND: Heparin-induced thrombocytopenia (HIT) is a prothrombotic adverse drug reaction caused by heparin. As thrombocytopenia is common in hospitalized patients receiving heparin, it would be useful to have a clinical scoring system that could differentiate patients with HIT from those with other reasons for thrombocytopenia. AIM: To compare prospectively the diagnostic utility of a clinical score for HIT in two different clinical settings. METHODS: The pretest clinical scoring system, the '4 T's', was used to classify 100 consecutive patients referred for possible HIT in one hospital (Hamilton General Hospital, HGH) into high, intermediate, and low probability groups. This system was also used to classify likewise 236 patients by clinicians in Germany referring blood for diagnostic testing for HIT in Greifswald (GW). The clinical scores were correlated with the results of laboratory testing for HIT antibodies using the serologic criteria for HIT with high diagnostic specificity. RESULTS: In both centers, patients with low scores were unlikely to test positive for HIT antibodies [HGH: 1/64 (1.6{\%}), GW: 0/55 (0{\%})]. Patients with intermediate [HGH: 8/28 (28.6{\%}), GW: 11/139 (7.9{\%})] or high scores [HGH: 8/8 (100{\%}), GW: 9/42 (21.4{\%})] were more likely to test positive for clinically significant HIT antibodies. The positive predictive value of an intermediate or high clinical score for clinically significant HIT antibodies was higher at one center (HGH). CONCLUSIONS: A low pretest clinical score for HIT seems to be suitable for ruling out HIT in most situations (high-negative predictive value). The implications of an intermediate or high score vary in different clinical settings.}, author = {Lo, G K and Juhl, D and Warkentin, T E and Sigouin, C S and Eichler, P and Greinacher, A}, doi = {10.1111/j.1538-7836.2006.01787.x}, institution = {Department of Medicine, McMaster University, Hamilton, ON, Canada.}, journal = {J Thromb Haemost}, keywords = {Adult; Aged; Algorithms; Enzyme-Linked Immunosorbe}, month = {apr}, number = {4}, pages = {759--765}, pmid = {16634744}, title = {{Evaluation of pretest clinical score (4 T's) for the diagnosis of heparin-induced thrombocytopenia in two clinical settings.}}, url = {http://dx.doi.org/10.1111/j.1538-7836.2006.01787.x}, volume = {4}, year = {2006} } @inproceedings{schuurmans2002direct, author = {Schuurmans, Dale and Patrascu, Relu}, booktitle = {Advances in Neural Information Processing Systems 14}, pages = {1579--1586}, title = {{Direct Value-Approximation for Factored {\{}MDPs{\}}}}, year = {2002} } @inproceedings{kaufmann2013information, author = {Kaufmann, Emilie and Kalyanakrishnan, Shivaram}, booktitle = {Conference on Learning Theory}, title = {{Information complexity in bandit subset selection}}, year = {2013} } @inproceedings{cesa-bianchi2013online, author = {Cesa-Bianchi, Nicol{\`{o}} and Dekel, Ofer and Shamir, Ohad}, booktitle = {Advances in Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Cesa-Bianchi, Dekel, Shamir - 2013 - Online Learning with Switching Costs and Other Adaptive Adversaries.pdf:pdf}, pages = {1160--1168}, title = {{Online Learning with Switching Costs and Other Adaptive Adversaries}}, url = {http://papers.nips.cc/paper/5151-online-learning-with-switching-costs-and-other-adaptive-adversaries}, year = {2013} } @article{nemhauser1978analysis, author = {Nemhauser, G L and Wolsey, L A and Fisher, M L}, journal = {Mathematical Programming}, number = {1}, pages = {265--294}, publisher = {Springer}, title = {{An analysis of approximations for maximizing submodular set functions--I}}, volume = {14}, year = {1978} } @inproceedings{hauskrecht2004linear, author = {Hauskrecht, Milos and Kveton, Branislav}, booktitle = {Proceedings of the 14th International Conference on Automated Planning and Scheduling}, pages = {306--314}, title = {{Heuristic Refinements of Approximate Linear Programming for Factored Continuous-State {\{}Markov{\}} Decision Processes}}, year = {2004} } @inproceedings{hertz2006learning, address = {New York, NY, USA}, annote = {comps{\_}distance}, author = {Hertz, Tomer and Hillel, Aharon Bar and Weinshall, Daphna}, booktitle = {ICML '06: Proceedings of the 23rd international conference on Machine learning}, doi = {http://doi.acm.org/10.1145/1143844.1143895}, isbn = {1-59593-383-2}, pages = {401--408}, publisher = {ACM}, title = {{Learning a kernel function for classification with small training samples}}, year = {2006} } @article{best1990active, author = {Best, M J and Chakravarti, N}, journal = {Mathematical Programming}, number = {1}, pages = {425--439}, publisher = {Springer}, title = {{Active set algorithms for isotonic regression; a unifying framework}}, volume = {47}, year = {1990} } @article{fot, annote = {To appear}, author = {Bach, F and Jenatton, R and Mairal, J and Obozinski, G}, journal = {Foundations and Trends{\{}$\backslash$textregistered{\}} in Machine Learning}, publisher = {Now Publishers Inc.}, title = {{Optimization with sparsity-inducing penalties}}, year = {2011} } @article{samukhin2008laplacian, abstract = {We study the Laplacian operator of an uncorrelated random network and, as an application, consider hopping processes (diffusion, random walks, signal propagation, etc.) on networks. We develop a strict approach to these problems. We derive an exact closed set of integral equations, which provide the averages of the Laplacian operator's resolvent. This enables us to describe the propagation of a signal and random walks on the network. We show that the determining parameter in this problem is the minimum degree {\$}q{\_}m{\$} of vertices in the network and that the high-degree part of the degree distribution is not that essential. The position of the lower edge of the Laplacian spectrum {\$}\backslashlambda{\_}c{\$} appears to be the same as in the regular Bethe lattice with the coordination number {\$}q{\_}m{\$}. Namely, {\$}\backslashlambda{\_}c{\textgreater}0{\$} if {\$}q{\_}m{\textgreater}2{\$}, and {\$}\backslashlambda{\_}c=0{\$} if {\$}q{\_}m\backslashleq2{\$}. In both these cases the density of eigenvalues {\$}\backslashrho(\backslashlambda)\backslashto0{\$} as {\$}\backslashlambda\backslashto\backslashlambda{\_}c+0{\$}, but the limiting behaviors near {\$}\backslashlambda{\_}c{\$} are very different. In terms of a distance from a starting vertex, the hopping propagator is a steady moving Gaussian, broadening with time. This picture qualitatively coincides with that for a regular Bethe lattice. Our analytical results include the spectral density {\$}\backslashrho(\backslashlambda){\$} near {\$}\backslashlambda{\_}c{\$} and the long-time asymptotics of the autocorrelator and the propagator.}, archivePrefix = {arXiv}, arxivId = {0706.1176}, author = {Samukhin, A. N. and Dorogovtsev, S. N. and Mendes, J. F. F.}, doi = {10.1103/PhysRevE.77.036115}, eprint = {0706.1176}, issn = {1539-3755}, journal = {Physical Review E}, month = {mar}, number = {3}, pages = {036115}, title = {{Laplacian spectra of, and random walks on, complex networks: Are scale-free architectures really important?}}, url = {http://arxiv.org/abs/0706.1176 http://dx.doi.org/10.1103/PhysRevE.77.036115 https://link.aps.org/doi/10.1103/PhysRevE.77.036115}, volume = {77}, year = {2008} } @article{narayanan1995rounding, author = {Narayanan, H}, journal = {Linear algebra and its applications}, pages = {41--57}, publisher = {Elsevier}, title = {{A rounding technique for the polymatroid membership problem}}, volume = {221}, year = {1995} } @article{horst1999dc, author = {Horst, R and Thoai, N V}, journal = {Journal of Optimization Theory and Applications}, number = {1}, pages = {1--43}, publisher = {Springer}, title = {{DC programming: overview}}, volume = {103}, year = {1999} } @misc{tran-thang2012knapsack, author = {Tran-Thanh, Long and Chapman, Archie C. and Rogers, Alex and Jennings, Nicholas R.}, booktitle = {AAAI}, title = {{Knapsack Based Optimal Policies for Budget-Limited Multi-Armed Bandits.}}, urldate = {2014-10-16}, year = {2012} } @inproceedings{saluja2014graph, author = {Saluja, Avneesh and Hassan, Hany and Toutanova, Kristina and Quirk, Chris}, booktitle = {Proceedings of ACL'14}, title = {{Graph-based semi-supervised learning of translation models from monolingual data}}, year = {2014} } @article{chow1991optimal, author = {Chow, Chee-Seng and Tsitsiklis, John}, journal = {IEEE Transactions on Automatic Control}, number = {8}, pages = {898--914}, title = {{An Optimal One-Way Multigrid Algorithm for Discrete-Time Stochastic Control}}, volume = {36}, year = {1991} } @inproceedings{charpiatexhaustive, author = {Charpiat, G}, booktitle = {Proc. CVPR}, title = {{Exhaustive Family of Energies Minimizable Exactly by a Graph Cut}}, year = {2011} } @article{fisher1936use, author = {Fisher, R A}, journal = {Annals of Eugenics}, keywords = {classic classification linear-classification linea}, number = {7}, pages = {179--188}, title = {{The Use of Multiple Measurements in Taxonomic Problems}}, volume = {7}, year = {1936} } @article{dunn1978conditional, author = {Dunn, J C and Harshbarger, S}, journal = {Journal of Mathematical Analysis and Applications}, number = {2}, pages = {432--444}, publisher = {Elsevier}, title = {{Conditional gradient algorithms with open loop step size rules}}, volume = {62}, year = {1978} } @inproceedings{zhu2020deep, abstract = {Graph representation learning nowadays becomes fundamental in analyzing graph-structured data. Inspired by recent success of contrastive methods, in this paper, we propose a novel framework for unsupervised graph representation learning by leveraging a contrastive objective at the node level. Specifically, we generate two graph views by corruption and learn node representations by maximizing the agreement of node representations in these two views. To provide diverse node contexts for the contrastive objective, we propose a hybrid scheme for generating graph views on both structure and attribute levels. Besides, we provide theoretical justification behind our motivation from two perspectives, mutual information and the classical triplet loss. We perform empirical experiments on both transductive and inductive learning tasks using a variety of real-world datasets. Experimental experiments demonstrate that despite its simplicity, our proposed method consistently outperforms existing state-of-the-art methods by large margins. Moreover, our unsupervised method even surpasses its supervised counterparts on transductive tasks, demonstrating its great potential in real-world applications.}, archivePrefix = {arXiv}, arxivId = {2006.04131}, author = {Zhu, Yanqiao and Xu, Yichen and Yu, Feng and Liu, Qiang and Wu, Shu and Wang, Liang}, booktitle = {Deep Graph Contrastive Representation Learning}, eprint = {2006.04131}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Zhu et al. - 2020 - Deep Graph Contrastive Representation Learning.pdf:pdf}, title = {{Deep graph contrastive representation learning}}, url = {http://arxiv.org/abs/2006.04131}, year = {2020} } @inproceedings{yang2012simple, abstract = {In this work, we develop a simple algorithm for semi-supervised regression. The key idea is to use the top eigenfunctions of integral operator derived from both labeled and unlabeled examples as the basis functions and learn the prediction function by a simple linear regression. We show that under appropriate assumptions about the integral operator, this approach is able to achieve an improved regression error bound better than existing bounds of supervised learning. We also verify the effectiveness of the proposed algorithm by an empirical study.}, archivePrefix = {arXiv}, arxivId = {1206.6412}, author = {Ji, Ming and Yang, Tianbao and Lin, Binbin and Jin, Rong and Han, Jiawei}, booktitle = {International Conference on Machine Learning}, eprint = {1206.6412}, month = {jun}, title = {{A Simple Algorithm for Semi-supervised Learning with Improved Generalization Error Bound}}, url = {http://arxiv.org/abs/1206.6412}, year = {2012} } @inproceedings{chambolle2005total, author = {Chambolle, A}, booktitle = {Energy Minimization Methods in Computer Vision and Pattern Recognition}, organization = {Springer}, pages = {136--152}, title = {{Total variation minimization and a class of binary {\{}MRF{\}} models}}, year = {2005} } @inproceedings{auer2009near, author = {Auer, Peter and Jaksch, Thomas and Ortner, Ronald}, booktitle = {Neural Information Processing Systems}, title = {{Near-optimal regret bounds for reinforcement learning}}, url = {https://papers.nips.cc/paper/3401-near-optimal-regret-bounds-for-reinforcement-learning.pdf}, year = {2009} } @article{menard2020fast, abstract = {Realistic environments often provide agents with very limited feedback. When the environment is initially unknown, the feedback, in the beginning, can be completely absent, and the agents may first choose to devote all their effort on exploring efficiently. The exploration remains a challenge while it has been addressed with many hand-tuned heuristics with different levels of generality on one side, and a few theoretically backed exploration strategies on the other. Many of them are incarnated by intrinsic motivation and in particular explorations bonuses. A common rule of thumb for exploration bonuses is to use 1/n‾√ bonus that is added to the empirical estimates of the reward, where n is a number of times this particular state (or a state-action pair) was visited. We show that, surprisingly, for a pure-exploration objective of reward-free exploration, bonuses that scale with 1/n bring faster learning rates, improving the known upper bounds with respect to the dependence on the horizon H. Furthermore, we show that with an improved analysis of the stopping time, we can improve by a factor H the sample complexity in the best-policy identification setting, which is another pure-exploration objective, where the environment provides rewards but the agent is not penalized for its behavior during the exploration phase.}, author = {M{\'{e}}nard, Pierre and Domingues, Omar Darwiche and Jonsson, Anders and Kaufmann, Emilie and Leurent, Edouard and Valko, Michal}, journal = {arXiv preprint arXiv:2007.13442}, title = {{Fast active learning for pure exploration in reinforcement learning}}, year = {2020} } @article{hall1984best, author = {Hall, Peter and Welsh, Alan H.}, doi = {10.1214/aos/1176346723}, journal = {The Annals of Statistics}, number = {3}, pages = {1079--1084}, publisher = {The Institute of Mathematical Statistics}, title = {{Best Attainable Rates of Convergence for Estimates of Parameters of Regular Variation}}, volume = {12}, year = {1984} } @techreport{BN99, author = {Ben-Tal, A and Nemirovski, A}, institution = {MINERVA Optimization Center Report, Faculty of Industrial Engineering and Management, Technion—Israel Institute of Technology, Haifa}, title = {{The conjugate barrier mirror descent method for non-smooth convex optimization}}, year = {1999} } @book{alpern2013search, author = {Alpern, Steve and Fokkink, Robbert and Gasieniec, L and Lindelauf, Roy and Subrahmanian, V S}, publisher = {Springer}, title = {{Search theory}}, url = {https://www.springer.com/fr/book/9781461468240}, year = {2013} } @article{duane1987hybrid, author = {Duane, Simon and Kennedy, A D and Pendleton, Brian and Roweth, Duncan}, journal = {Physics Letters B}, number = {2}, pages = {216--222}, title = {{Hybrid {\{}Monte Carlo{\}}}}, volume = {195}, year = {1987} } @article{chomsky56, author = {Chomsky, N}, journal = {IEEE Transactions on Information Theory}, keywords = {3mdl,cfg,chomsky,fsm,language}, number = {3}, pages = {113--124}, title = {{Three models for the description of language}}, url = {http://ieeexplore.ieee.org/xpls/abs{\_}all.jsp?arnumber=1056813}, volume = {2}, year = {1956} } @inproceedings{erraqabi2016rewards, abstract = {In multi-armed bandits, the most common objective is the maximization of the cumulative reward. Alternative settings include active exploration, where a learner tries to gain accurate estimates of the rewards of all arms. While these objectives are contrasting, in many scenarios it is desirable to trade off rewards and errors. For instance, in educational games the designer wants to gather generalizable knowledge about the behavior of the students and teaching strategies (small estimation errors) but, at the same time, the system needs to avoid giving a bad experience to the players, who may leave the system permanently (large reward). In this paper, we formalize this tradeoff and introduce the ForcingBalance algorithm whose performance is provably close to the best possible tradeoff strategy. Finally, we demonstrate on real-world educational data that ForcingBalance returns useful information about the arms without compromising the overall reward.}, author = {Erraqabi, Akram and Lazaric, Alessandro and Valko, Michal and Brunskill, Emma and Liu, Yun-en}, booktitle = {Challenges in Machine Learning: Gaming and Education workshop at Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Erraqabi et al. - 2016 - Rewards and errors in multi-arm bandits for interactive education.pdf:pdf}, title = {{Rewards and errors in multi-arm bandits for interactive education}}, year = {2016} } @inproceedings{wang2018batched, abstract = {Bayesian optimization (BO) has become an effective approach for black-box function optimization problems when function evaluations are expensive and the optimum can be achieved within a relatively small number of queries. However, many cases, such as the ones with high-dimensional inputs, may require a much larger number of observations for optimization. Despite an abundance of observations thanks to parallel experiments, current BO techniques have been limited to merely a few thousand observations. In this paper, we propose ensemble Bayesian optimization (EBO) to address three current challenges in BO simultaneously: (1) large-scale observations; (2) high dimensional input spaces; and (3) selections of batch queries that balance quality and diversity. The key idea of EBO is to operate on an ensemble of additive Gaussian process models, each of which possesses a randomized strategy to divide and conquer. We show unprecedented, previously impossible results of scaling up BO to tens of thousands of observations within minutes of computation.}, archivePrefix = {arXiv}, arxivId = {1706.01445}, author = {Wang, Zi and Gehring, Clement and Kohli, Pushmeet and Jegelka, Stefanie}, booktitle = {International Conference on Artificial Intelligence and Statistics}, eprint = {1706.01445}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/485315a947c28f9dfd29aa1ccf2aa8b5b38e8b8f.pdf:pdf}, title = {{Batched Large-scale Bayesian Optimization in High-dimensional Spaces}}, url = {http://arxiv.org/abs/1706.01445}, year = {2018} } @inproceedings{carvalho06voting, address = {New York, NY, USA}, author = {Carvalho, Vitor R and Cohen, William W}, booktitle = {KDD '06}, pages = {548--553}, publisher = {ACM}, title = {{Single-pass online learning: performance, voting schemes and online feature selection}}, year = {2006} } @article{HW98, author = {Herbster, M and Warmuth, M}, journal = {Machine Learning}, pages = {151--178}, title = {{Tracking the Best Expert}}, volume = {32}, year = {1998} } @inproceedings{williams2001using, abstract = {A major problem for kernel-based predictors (such as Support Vector Machines and Gaussian processes) is that the amount of computation required to find the solution scales as O(n ), where n is the number of training examples. We show that an approximation to the eigendecomposition of the Gram matrix can be computed by the Nystr{\"{o}}m method (which is used for the numerical solution of eigenproblems). This is achieved by carrying out an eigendecomposition on a smaller system of size m {\textless} n, and then expanding the results back up to n dimensions. The computational complexity of a predictor using this approximation is O(m n). We report experiments on the USPS and abalone data sets and show that we can set m n without any significant decrease in the accuracy of the solution.}, author = {Williams, Christopher and Seeger, Matthias}, booktitle = {Neural Information Processing Systems}, title = {{Using the Nystrom method to speed up kernel machines}}, year = {2001} } @misc{baxter99direct, author = {Baxter, J and Bartlett, P}, title = {{Direct Gradient-Based Reinforcement Learning}}, url = {citeseer.ist.psu.edu/baxter99direct.html}, year = {1999} } @inproceedings{gopalan2014thompson, author = {Gopalan, Aditya and Mannor, Shie and Mansour, Yishay}, booktitle = {International Conference on Machine Learning}, title = {{Thompson sampling for complex online problems}}, url = {http://proceedings.mlr.press/v32/gopalan14.pdf}, year = {2014} } @article{cornuejols1977location, author = {Cornuejols, G and Fisher, M L and Nemhauser, G L}, journal = {Management Science}, number = {8}, pages = {789--810}, publisher = {JSTOR}, title = {{Location of bank accounts to optimize float: An analytic study of exact and approximate algorithms}}, volume = {23}, year = {1977} } @article{Spa97, author = {Spall, J}, journal = {Automatica}, number = {1}, pages = {109--112}, title = {{A one-measurement form of simultaneous perturbation stochastic approximation}}, volume = {33}, year = {1997} } @article{bull2011convergence, author = {Bull, Adam}, journal = {The Journal of Machine Learning Research}, pages = {2879--2904}, publisher = {JMLR. org}, title = {{Convergence rates of efficient global optimization algorithms}}, volume = {12}, year = {2011} } @article{KV05, author = {Kalai, A and Vempala, S}, journal = {Journal of Computer and System Sciences}, pages = {291--307}, title = {{Efficient algorithms for online decision problems}}, volume = {71}, year = {2005} } @inproceedings{ng2000pegasus:, author = {Ng, Andrew and Jordan, Michael}, booktitle = {Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence}, pages = {406--415}, title = {{{\{}PEGASUS{\}}: A Policy Search Method for Large {\{}MDPs{\}} and {\{}POMDPs{\}}}}, year = {2000} } @article{ching2015one, author = {Ching, Avery and Edunov, Sergey and Kabiljo, Maja and Logothetis, Dionysios and Muthukrishnan, Sambavi}, journal = {Proceedings of the VLDB Endowment}, number = {12}, pages = {1804--1815}, publisher = {VLDB Endowment}, title = {{One trillion edges: graph processing at Facebook-scale}}, volume = {8}, year = {2015} } @book{hansen2004global, author = {Hansen, Eldon and Walster, William}, isbn = {9780824740597}, publisher = {Marcel Dekker}, series = {Pure and Applied Mathematics Series}, title = {{Global Optimization Using Interval Analysis: Revised and Expanded}}, url = {http://books.google.fr/books?id=tY2wAkb-zLcC}, year = {2004} } @inproceedings{finkel08crfpcfg, author = {Finkel, Jenny R and Kleeman, Alex and Manning, Christopher D}, booktitle = {ACL '08: Proceedings of the 46th Annual Meeting of the Association for Computational Linguistics}, pages = {959--967}, publisher = {Association for Computational Linguistics}, title = {{Efficient, Feature-based, Conditional Random Field Parsing}}, year = {2008} } @article{Langford2007, abstract = {We present Epoch-Greedy, an algorithm for contextual multi-armed bandits (also known as bandits with side information). Epoch-Greedy has the following properties: 1. No knowledge of a time horizon T is necessary. 2. The regret incurred by Epoch-Greedy is controlled by a sample complexity bound for a hypothesis class. 3. The regret scales as O(T 2/3 S 1/3) or better (sometimes, much better). Here S is the complexity term in a sample complexity bound for standard supervised learning. 1}, author = {Langford, John}, editor = {Platt, J C and Koller, D and Singer, Y and Roweis, S}, journal = {Statistics}, pages = {1--8}, publisher = {Citeseer}, title = {{The Epoch-Greedy Algorithm for Contextual Multi-armed Bandits}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.143.8000{\&}rep=rep1{\&}type=pdf}, volume = {20}, year = {2007} } @inproceedings{cortes2008stability, author = {Cortes, Corinna and Mohri, Mehryar and Pechyony, Dmitry and Rastogi, Ashish}, booktitle = {Proceedings of the 25th International Conference on Machine Learning}, pages = {176--183}, title = {{Stability of Transductive Regression Algorithms}}, year = {2008} } @inproceedings{joachims1999transductive, address = {San Francisco, CA, USA}, author = {Joachims, Thorsten}, booktitle = {ICML '99: Proceedings of the Sixteenth International Conference on Machine Learning}, isbn = {1-55860-612-2}, pages = {200--209}, title = {{Transductive Inference for Text Classification using Support Vector Machines}}, year = {1999} } @article{GyLiLu08, author = {Gy{\"{o}}rgy, A and Linder, T and Lugosi, G}, journal = {IEEE Transactions on Information Theory}, pages = {1604--1625}, title = {{Tracking the Best Quantizer}}, volume = {54}, year = {2008} } @book{dehaan2006extreme, author = {de Haan, Laurens and Ferreira, Ana}, publisher = {Springer}, series = {Springer Series in Operations Research and Financial Engineering}, title = {{Extreme Value Theory: An Introduction}}, year = {2006} } @inproceedings{taskar05learning, author = {Taskar, Ben and Chatalbashev, Vassil and Koller, Daphne and Guestrin, Carlos}, pages = {896--903}, title = {{Learning structured prediction models: a large margin approach}} } @article{karlin1994competitive, author = {Karlin, Anna and Manasse, Mark and McGeoch, Lyle and Owicki, Susan}, journal = {Algorithmica}, number = {6}, pages = {542--571}, title = {{Competitive Randomized Algorithms for Nonuniform Problems}}, volume = {11}, year = {1994} } @article{johnson74approximation, author = {Johnson, David}, journal = {Journal of Computer and System Sciences}, number = {3}, pages = {256--278}, title = {{Approximation Algorithms for Combinatorial Problems}}, volume = {9}, year = {1974} } @techreport{chen2020simple, archivePrefix = {arXiv}, arxivId = {2002.05709}, author = {Chen, Ting and Kornblith, Simon and Norouzi, Mohammad and Hinton, Geoffrey}, eprint = {2002.05709}, month = {feb}, title = {{A simple framework for contrastive learning of visual representations}}, url = {https://arxiv.org/abs/2002.05709}, year = {2020} } @inproceedings{Kveton2015, abstract = {A stochastic combinatorial semi-bandit is an online learning problem where at each step a learning agent chooses a subset of ground items subject to constraints, and then observes stochastic weights of these items and receives their sum as a payoff. In this paper, we close the problem of computationally and sample efficient learning in stochastic combinatorial semi-bandits. In particular, we analyze a UCB-like algorithm for solving the problem, which is known to be computationally efficient; and prove {\$}O(K L (1 / \backslashDelta) \backslashlog n){\$} and {\$}O(\backslashsqrt{\{}K L n \backslashlog n{\}}){\$} upper bounds on its {\$}n{\$}-step regret, where {\$}L{\$} is the number of ground items, {\$}K{\$} is the maximum number of chosen items, and {\$}\backslashDelta{\$} is the gap between the expected returns of the optimal and best suboptimal solutions. The gap-dependent bound is tight up to a constant factor and the gap-free bound is tight up to a polylogarithmic factor.}, archivePrefix = {arXiv}, arxivId = {1410.0949}, author = {Kveton, Branislav and Wen, Zheng and Ashkan, Azin and Szepesvari, Csaba}, booktitle = {Proceedings of AISTATS}, eprint = {1410.0949}, issn = {15337928}, title = {{Tight Regret Bounds for Stochastic Combinatorial Semi-Bandits}}, url = {http://arxiv.org/abs/1410.0949}, volume = {38}, year = {2015} } @inproceedings{bartletthigh, author = {Bartlett, Peter L and Dani, Varsha and Hayes, Thomas P and Kakade, Sham M and Rakhlin, Alexander and Tewari, Ambuj}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {335--342}, title = {{High-probability Regret Bounds for Bandit Online Linear Optimization}} } @book{Dud99, author = {Dudley, R}, publisher = {Cambridge University Press}, title = {{Uniform Central Limit Theorems}}, year = {1999} } @article{shiraev03, author = {Shiraev, Dmitry Eric}, institution = {VT Electronic Thesis and Dissertation Archive [http://scholar.lib.vt.edu/theses/OAI2/] (United States)}, keywords = {Computer Science}, publisher = {VT}, title = {{Inverse Reinforcement Learning and Routing Metric Discovery}}, url = {http://scholar.lib.vt.edu/theses/available/etd-08242003-224906/}, year = {2003} } @inproceedings{kivinen2002online, address = {Cambridge, MA}, author = {Kivinen, J and Smola, A J and Williamson, R C}, booktitle = {Advances in Neural Information Processing Systems 14}, editor = {Dietterich, T G and Becker, S and Ghahramani, Z}, publisher = {MIT Press}, title = {{Online Learning with Kernels}}, year = {2002} } @article{golovin2011adaptive, author = {{Daniel Golovin} and {Andreas Krause}}, journal = {Journal of Artificial Intelligence Research (JAIR)}, pages = {427--486}, title = {{Adaptive Submodularity: Theory and Applications in Active Learning and Stochastic Optimization}}, volume = {42}, year = {2011} } @inproceedings{narasimhan2004pac, author = {Narasimhan, M and Bilmes, J}, booktitle = {Proc. UAI}, title = {{{\{}PAC{\}}-learning bounded tree-width graphical models}}, year = {2004} } @article{ziebart2012probabilistic, abstract = {Numerous interaction techniques have been developed that make "virtual" pointing at targets in graphical user interfaces easier than analogous physical pointing tasks by invoking target-based interface modifications. These pointing facilitation techniques crucially depend on methods for estimating the relevance of potential targets. Unfortunately, many of the simple methods employed to date are inaccurate in common settings with many selectable targets in close proximity. In this paper, we bring recent advances in statistical machine learning to bear on this underlying target relevance estimation problem. By framing past target-driven pointing trajectories as approximate solutions to well-studied control problems, we learn the probabilistic dynamics of pointing trajectories that enable more accurate predictions of intended targets.}, author = {Ziebart, Brian D and Dey, Anind K and Bagnell, J Andrew}, doi = {10.1145/2166966.2166968}, isbn = {9781450310482}, journal = {Proceedings of the 2012 ACM international conference on Intelligent User Interfaces IUI 12}, pages = {1}, publisher = {ACM Press}, series = {IUI '12}, title = {{Probabilistic Pointing Target Prediction via Inverse Optimal Control}}, url = {http://dl.acm.org/citation.cfm?doid=2166966.2166968}, year = {2012} } @inproceedings{szummer2001partially, abstract = {To classify a large number of unlabeled examples we combine a limited number of labeled examples with a Markov random walk representation over the unlabeled examples. The random walk representation exploits any low dimensional structure in the data in a robust, probabilistic manner. We develop and compare several estimation criteria/algorithms suited to this representation. This includes in particular multi-way classification with an average margin criterion which permits a closed form...}, author = {Szummer, Martin and Jaakkola, Tommi}, booktitle = {Advances in Neural Information Processing Systems}, keywords = {classification,graph,machine-learning}, title = {{Partially labeled classification with Markov random walks}}, volume = {14}, year = {2001} } @inproceedings{nodelman2002continuous, author = {Nodelman, Uri and Shelton, Christian and Koller, Daphne}, booktitle = {Proceedings of the 18th Conference on Uncertainty in Artificial Intelligence}, pages = {378--387}, title = {{Continuous Time {\{}Bayesian{\}} Networks}}, year = {2002} } @article{mairal2010online, author = {Mairal, J and Bach, F and Ponce, J and Sapiro, G}, issn = {1532-4435}, journal = {Journal of Machine Learning Research}, pages = {19--60}, publisher = {MIT Press}, title = {{Online learning for matrix factorization and sparse coding}}, volume = {11}, year = {2010} } @article{Ambuhl2011, abstract = {We consider the single-machine scheduling problem to minimize the weighted sum of completion times under precedence constraints. In a series of recent papers, it was established that this scheduling problem is a special case of minimum weighted vertex cover. In this paper, we show that the vertex cover graph associated with the scheduling problem is exactly the graph of incomparable pairs defined in the dimension theory of partial orders. Exploiting this relationship allows us to present a framework for obtaining (2 — 2/f)-approximation algorithms, provided that the set of precedence constraints has fractional dimension of at most f. Our approach yields the best-known approximation ratios for all previously considered special classes of precedence constraints, and it provides the first results for bounded degree and orders of interval dimension 2. On the negative side, we show that the addressed problem remains NP-hard even when restricted to the special case of interval orders. Furthermore, we prove that the general problem, if a fixed cost present in all feasible schedules is ignored, becomes as hard to approximate as vertex cover. We conclude by giving the first inapproximability result for this problem, showing under a widely believed assumption that it does not admit a polynomial-time approximation scheme.}, author = {Amb{\"{u}}hl, Christoph and Mastrolilli, Monaldo and Mutsanas, Nikolaus and Svensson, Ola}, doi = {10.2307/41412330}, isbn = {0364765X}, issn = {0364765X}, journal = {Mathematics of Operations Research}, number = {4}, pages = {653--669}, title = {{On the approximability of single-machine scheduling with precedence constraints}}, url = {http://www.jstor.org/stable/41412330}, volume = {36}, year = {2011} } @misc{Tang2020, abstract = {Copyright {\textcopyright} 2020, arXiv, All rights reserved. In this work, we investigate the application of Taylor expansions in reinforcement learning. In particular, we propose Taylor expansion policy optimization, a policy optimization formalism that generalizes prior work (e.g., TRPO) as a first-order special case. We also show that Taylor expansions intimately relate to off-policy evaluation. Finally, we show that this new formulation entails modifications which improve the performance of several state-of-the-art distributed algorithms.}, author = {Tang, Y. and Valko, M. and Munos, R.}, booktitle = {I}, title = {{Taylor Expansion Policy Optimization}}, year = {2020} } @inproceedings{subramanya2009large, author = {Subramanya, Amarnag and Bilmes, Jeff}, booktitle = {Workshop on Large-Scale Machine Learning: Parallelism and Massive Datasets at Neural Information Processing Systems}, title = {{Large-Scale Graph-based Transductive Inference}}, year = {2009} } @inproceedings{kocak2014wspectral, abstract = {Smooth functions on graphs have wide applications in manifold and semi-supervised learning. In this paper, we study a bandit problem where the payoffs of arms are smooth on a graph. This framework is suitable for solving online learning problems that involve graphs, such as content-based recommendation. In this problem, each recommended item is a node and its expected rating is similar to its neighbors. The goal is to recommend items that have high expected ratings. We aim for the algorithms where the cumulative regret would not scale poorly with the number of nodes. In particular, we introduce the notion of an effective dimension, which is small in real-world graphs, and propose two algorithms for solving our problem that scale linearly in this dimension. Our experiments on real-world content recommendation problem show that a good estimator of user preferences for thousands of items can be learned from just tens nodes evaluations.}, author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Valko, Michal and Munos, R{\'{e}}mi and Kveton, Branislav and Agrawal, Shipra}, booktitle = {AAAI Workshop on Sequential Decision-Making with Big Data}, title = {{Spectral bandits for smooth graph functions with applications in recommender systems}}, year = {2014} } @article{Yun2014, abstract = {In this paper, we consider networks consisting of a finite number of non-overlapping communities. To extract these communities, the interaction between pairs of nodes may be sampled from a large available data set, which allows a given node pair to be sampled several times. When a node pair is sampled, the observed outcome is a binary random variable, equal to 1 if nodes interact and to 0 otherwise. The outcome is more likely to be positive if nodes belong to the same communities. For a given budget of node pair samples or observations, we wish to jointly design a sampling strategy (the sequence of sampled node pairs) and a clustering algorithm that recover the hidden communities with the highest possible accuracy. We consider both non-adaptive and adaptive sampling strategies, and for both classes of strategies, we derive fundamental performance limits satisfied by any sampling and clustering algorithm. In particular, we provide necessary conditions for the existence of algorithms recovering the communities accurately as the network size grows large. We also devise simple algorithms that accurately reconstruct the communities when this is at all possible, hence proving that the proposed necessary conditions for accurate community detection are also sufficient. The classical problem of community detection in the stochastic block model can be seen as a particular instance of the problems consider here. But our framework covers more general scenarios where the sequence of sampled node pairs can be designed in an adaptive manner. The paper provides new results for the stochastic block model, and extends the analysis to the case of adaptive sampling.}, archivePrefix = {arXiv}, arxivId = {1402.3072}, author = {Yun, Se-Young and Proutiere, Alexandre}, eprint = {1402.3072}, month = {feb}, title = {{Community Detection via Random and Adaptive Sampling}}, url = {http://arxiv.org/abs/1402.3072}, year = {2014} } @inproceedings{perrault2019finding, abstract = {We consider the problem where an agent wants to find a hidden object that is randomly located in some vertex of a directed acyclic graph (DAG) according to a fixed but possibly unknown distribution. The agent can only examine vertices whose in-neighbors have already been examined. In scheduling theory, this problem is denoted by 1|prec|∑wjCj [Graham1979]. However, in this paper we address a learning setting where we allow the agent to stop before having found the object and restart searching on a new independent instance of the same problem. The goal is to maximize the total number of hidden objects found under a time constraint. The agent can thus skip an instance after realizing that it would spend too much time on it. Our contributions are both to the search theory and multi-armed bandits. If the distribution is known, we provide a quasi-optimal greedy strategy with the help of known computationally efficient algorithms for solving 1|prec|∑wjCj under some assumption on the DAG. If the distribution is unknown, we show how to sequentially learn it and, at the same time, act near-optimally in order to collect as many hidden objects as possible. We provide an algorithm, prove theoretical guarantees, and empirically show that it outperforms the naive baseline.}, author = {Perrault, Pierre and Perchet, Vianney and Valko, Michal}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Finding the bandit in a graph: Sequential search-and-stop}}, year = {2019} } @inproceedings{DBLP:conf/icml/2010, booktitle = {ICML}, editor = {F{\"{u}}rnkranz, Johannes and Joachims, Thorsten}, publisher = {Omnipress}, title = {{Proceedings of the 27th International Conference on Machine Learning (ICML-10), June 21-24, 2010, Haifa, Israel}}, year = {2010} } @inproceedings{hauskrecht2007evidence-based, abstract = {Anomaly detection methods can be very useful in identifying interesting or concerning events. In this work, we develop and examine new probabilistic anomaly detection methods that let us evaluate management decisions for a specific patient and identify those decisions that are highly unusual with respect to patients with the same or similar condition. The statistics used in this detection are derived from probabilistic models such as Bayesian networks that are learned from a database of past patient cases. We evaluate our methods on the problem of detection of unusual hospitalization patterns for patients with community acquired pneumonia. The results show very encouraging detection performance with 0.5 precision at 0.53 recall and give us hope that these techniques may provide the basis of intelligent monitoring systems that alert clinicians to the occurrence of unusual events or decisions.}, author = {Hauskrecht, Milos and Valko, Michal and Kveton, Branislav and Visweswaran, Shyam and Cooper, Gregory F}, booktitle = {Annual American Medical Informatics Association Symposium}, keywords = {misovalko}, mendeley-tags = {misovalko}, month = {nov}, pages = {319--324}, title = {{Evidence-based anomaly detection}}, year = {2007} } @inproceedings{chapelle2014modeling, abstract = {Download data set at http://labs.criteo.com/downloads/2014-conversion-logs-dataset/}, author = {Chapelle, Olivier}, booktitle = {Knowledge Discovery and Data Mining}, doi = {10.1145/2623330.2623634}, title = {{Modeling delayed feedback in display advertising}}, year = {2014} } @book{Nocedal:1999:NO1, author = {Nocedal, J and Wright, S J}, edition = {2nd}, publisher = {Springer}, title = {{Numerical Optimization}}, year = {2006} } @inproceedings{elliott84gibbs, author = {Elliott, H and Derin, H and Cristi, R and Geman, D}, booktitle = {Proceeding of the 1984 Int. Conf. Acoust., Speech, Signal Processing, ICASSP'84}, pages = {32.5.1----32.5.4}, title = {{Application of the {\{}G{\}}ibbs distribution to image segmentation}}, year = {1984} } @unpublished{BaPaSzSz11-online, annote = {From Duplicate 1 (Online learning - Bartok, G; Pal, D; Szepesvari, C; Szita, I) Lecture Notes From Duplicate 2 (Online Learning - Bart{\'{o}}k, G; P{\'{a}}l, D; Szepesv{\'{a}}ri, Cs.; Szita, I) https://moodle.cs.ualberta.ca/file.php/354/notes.pdf}, author = {Bart{\'{o}}k, G and P{\'{a}}l, D and Szepesv{\'{a}}ri, Cs. and Szita, I and Bartok, G and Pal, D and Szepesvari, C and Szita, I}, howpublished = {Lecture notes, University of Alberta}, title = {{Online learning}}, year = {2011} } @techreport{gerchinovitz2017fano, abstract = {We extend Fano's inequality, which controls the average probability of (disjoint) events in terms of the average of some Kullback-Leibler divergences, to work with arbitrary [0,1]-valued random variables. Our simple two-step methodology is general enough to cover the case of an arbitrary (possibly continuously infinite) family of distributions as well as [0,1]-valued random variables not necessarily summing up to 1. Several novel applications are provided, in which the consideration of random variables is particularly handy. The most important applications deal with the problem of Bayesian posterior concentration (minimax or distribution-dependent) rates and with a lower bound on the regret in non-stochastic sequential learning. We also improve in passing some earlier fundamental results: in particular, we provide a simple and enlightening proof of the refined Pinsker's inequality of Ordentlich and Weinberger and derive a sharper Bretagnolle-Huber inequality.}, archivePrefix = {arXiv}, arxivId = {1702.05985}, author = {Gerchinovitz, Sebastien and M{\'{e}}nard, Pierre and Stoltz, Gilles}, eprint = {1702.05985}, month = {feb}, title = {{Fano's inequality for random variables}}, url = {http://arxiv.org/abs/1702.05985}, year = {2017} } @book{cover2006elements, author = {Cover, Thomas M. and Thomas, Joy A.}, publisher = {John Wiley {\&} Sons}, title = {{Elements of information theory}}, url = {https://www.amazon.com/Elements-Information-Theory-Telecommunications-Processing/dp/0471241954}, year = {2006} } @article{Kveton2010, abstract = {This paper proposes a novel algorithm for semi-supervised learning. This algorithm learns graph cuts that maximize the margin with respect to the labels induced by the harmonic function solution. We motivate the approach, compare it to existing work, and prove a bound on its generalization error. The quality of our solutions is evaluated on a synthetic problem and three UCI ML repository datasets. In most cases, we outperform manifold regularization of support vector machines, which is a state-of-the-art approach to semi-supervised max-margin learning.}, author = {Kveton, B. and Valko, M. and Rahimi, A. and Huang, L.}, issn = {15324435}, journal = {Journal of Machine Learning Research}, title = {{Semi-supervised learning with max-margin graph cuts}}, volume = {9}, year = {2010} } @article{devroye13rwalk_it, author = {Devroye, L and Lugosi, G and Neu, G}, journal = {Submitted to the IEEE Transactions on Information Theory}, title = {{Prediction by random-walk perturbation}}, year = {2013} } @inproceedings{Popineau2019, author = {Popineau, F. and Valko, M. and Vie, J.-J.}, booktitle = {CEUR Workshop Proceedings}, issn = {16130073}, title = {{Optimizing human learning workshop eliciting adaptive sequences for learning (WeASeL)}}, volume = {2354}, year = {2019} } @misc{gyorfi08survey, address = {Tuebingen, Germany}, author = {Gyorfi, L and Ottucs{\'{a}}k, Gy. and Urb{\'{a}}n, A}, howpublished = {Machine Learning Summer School 2007, MLSS 2007 (invited lecture)}, title = {{Empirical log-optimal portfolio selections: a survey}}, year = {2008} } @inproceedings{streeter2008online, author = {{Matthew J. Streeter} and {Daniel Golovin}}, booktitle = {NIPS}, pages = {1577--1584}, title = {{An Online Algorithm for Maximizing Submodular Functions}}, year = {2008} } @inproceedings{hauskrecht2010conditional, abstract = {We develop and evaluate a data-driven approach for detecting unusual (anomalous) patient-management actions using past patient cases stored in an electronic health record (EHR) system. Our hypothesis is that patient-management actions that are unusual with respect to past patients may be due to a potential error and that it is worthwhile to raise an alert if such a condition is encountered. We evaluate this hypothesis using data obtained from the electronic health records of 4,486 post-cardiac surgical patients. We base the evaluation on the opinions of a panel of experts. The results support that anomaly-based alerting can have reasonably low false alert rates and that stronger anomalies are correlated with higher alert rates.}, author = {Hauskrecht, Milos and Valko, Michal and Batal, Iyad and Clermont, Gilles and Visweswaran, Shyam and Cooper, Gregory F}, booktitle = {Annual American Medical Informatics Association Symposium}, keywords = {misovalko}, mendeley-tags = {misovalko}, title = {{Conditional outlier detection for clinical alerting}}, year = {2010} } @inproceedings{guestrin2001max-norm, author = {Guestrin, Carlos and Koller, Daphne and Parr, Ronald}, booktitle = {Proceedings of the 17th International Joint Conference on Artificial Intelligence}, pages = {673--682}, title = {{Max-Norm Projections for Factored {\{}MDPs{\}}}}, year = {2001} } @inproceedings{charikar1997incremental, author = {Charikar, Moses and Chekuri, Chandra and Feder, Tomas and Motwani, Rajeev}, booktitle = {Proceedings of the 29th Annual ACM Symposium on Theory of Computing}, pages = {626--635}, title = {{Incremental Clustering and Dynamic Information Retrieval}}, year = {1997} } @article{gyorfi12empirical, author = {Gy{\"{o}}rfi, L{\'{a}}szl{\'{o}} and Walk, Harro}, journal = {IEEE Transactions on Information Theory}, number = {10}, pages = {6320--6331}, title = {{Empirical Portfolio Selection Strategies With Proportional Transaction Costs}}, volume = {58}, year = {2012} } @book{sutton1998reinforcement, address = {Cambridge, MA}, author = {Sutton, Richard and Barto, Andrew}, publisher = {MIT Press}, title = {{Reinforcement Learning: An Introduction}}, url = {https://pdfs.semanticscholar.org/aa32/c33e7c832e76040edc85e8922423b1a1db77.pdf}, year = {1998} } @inproceedings{jebara2009graph, author = {Jebara, Tony and Wang, Jun and Chang, Shih-Fu}, booktitle = {Proceedings of ICML}, pages = {441--448}, title = {{Graph construction and b-matching for semi-supervised learning}}, year = {2009} } @misc{movielens, author = {Lam, Shyong and Herlocker, Jon}, howpublished = {http://www.grouplens.org/node/12}, title = {{MovieLens 1M Dataset}}, year = {2012} } @inproceedings{sammut92learning, author = {Sammut, Claude and Hurst, Scott and Kedzier, Dana and Michie, Donald}, pages = {385--393}, title = {{Learning to Fly}} } @inproceedings{chen2010scalable, author = {Chen, Wei and Wang, Chi and Wang, Yajun}, booktitle = {Knowledge Discovery and Data Mining}, title = {{Scalable influence maximization for prevalent viral marketing in large-scale social networks}}, year = {2010} } @inproceedings{Tarbouriech2019, abstract = {We introduce the active exploration problem in Markov decision processes (MDPs). Each state of the MDP is characterized by a random value and the learner should gather samples to estimate the mean value of each state as accurately as possible. Similarly to active exploration in multi-armed bandit (MAB), states may have different levels of noise, so that the higher the noise, the more samples are needed. As the noise level is initially unknown, we need to trade off the exploration of the environment to estimate the noise and the exploitation of these estimates to compute a policy maximizing the accuracy of the mean predictions. We introduce a novel learning algorithm to solve this problem showing that active exploration in MDPs may be significantly more difficult than in MAB. We also derive a heuristic procedure to mitigate the negative effect of slowly mixing policies. Finally, we validate our findings on simple numerical simulations.}, archivePrefix = {arXiv}, arxivId = {1902.11199}, author = {Tarbouriech, Jean and Lazaric, Alessandro}, booktitle = {International Conference on Artificial Intelligence and Statistics}, eprint = {1902.11199}, month = {feb}, title = {{Active Exploration in Markov Decision Processes}}, url = {http://arxiv.org/abs/1902.11199}, year = {2019} } @book{hastie2001elements, abstract = {During the past decade there has been an explosion in computation and information technology. With it has come vast amounts of data in a variety of fields such as medicine, biology, finance, and marketing. The challenge of understanding these data has led to the development of new tools in the field of statistics, and spawned new areas such as data mining, machine learning, and bioinformatics. Many of these tools have common underpinnings but are often expressed with different terminology. This book descibes theimprtant ideas in these areas ina common conceptual framework. While the approach is statistical, the emphasis is on concepts rather than mathematics. Many examples are given, with a liberal use of color graphics. It should be a vluable resource for statisticians and anyone interested in data mining in science or industry. The book's coverage is broad, from supervised learing (prediction) to unsupervised learning. The many topics include neural networks, support vector machines, classification trees and boosting-the first comprehensive treatment of this topic in any book. Trevor Hastie, Robert Tibshirani, and Jerome Friedman are professors of statistics at Stanford University. They are prominent researchers in this area: Hastie and Tibshirani developed generalized additive models and wrote a popular book of that title. Hastie wrote much of the statistical modeling software in S-PLUS and invented principal curves and surfaces. Tibshirani proposed the Lasso and is co-author of the very successful An Introduction to the Bootstrap. Friedman is the co-inventor of many data-mining tools including CART, MARS, and projection pursuit.}, author = {Hastie, T and Tibshirani, R and Friedman, J H}, howpublished = {Hardcover}, isbn = {0387952845}, keywords = {machine-learning,statistic}, month = {aug}, publisher = {Springer}, title = {{The Elements of Statistical Learning}}, year = {2001} } @inproceedings{2004advances, booktitle = {NIPS}, title = {{Advances in Neural Information Processing Systems 17 [Neural Information Processing Systems, NIPS 2004, December 13-18, 2004, Vancouver, British Columbia, Canada]}}, year = {2004} } @inproceedings{neu12ssp-trans, author = {Neu, Gergely and Gy{\"{o}}rgy, Andr{\'{a}}s and Szepesv{\'{a}}ri, $\backslash$textCsaba}, pages = {805--813}, title = {{The adversarial stochastic shortest path problem with unknown transition probabilities}} } @article{isaac2009overrides, abstract = {BACKGROUND: Electronic prescribing systems with decision support may improve patient safety in ambulatory care by offering drug allergy and drug interaction alerts. However, preliminary studies show that clinicians override most of these alerts. METHODS: We performed a retrospective analysis of 233 537 medication safety alerts generated by 2872 clinicians in Massachusetts, New Jersey, and Pennsylvania who used a common electronic prescribing system from January 1, 2006, through September 30, 2006. We used multivariate techniques to examine factors associated with alert acceptance. RESULTS: A total of 6.6{\%} of electronic prescription attempts generated alerts. Clinicians accepted 9.2{\%} of drug interaction alerts and 23.0{\%} of allergy alerts. High-severity interactions accounted for most alerts (61.6{\%}); clinicians accepted high-severity alerts slightly more often than moderate- or low-severity interaction alerts (10.4{\%}, 7.3{\%}, and 7.1{\%}, respectively; P {\textless} .001). Clinicians accepted 2.2{\%} to 43.1{\%} of high-severity interaction alerts, depending on the classes of interacting medications. In multivariable analyses, we found no difference in alert acceptance among clinicians of different specialties (P = .16). Clinicians were less likely to accept a drug interaction alert if the patient had previously received the alerted medication (odds ratio, 0.03; 95{\%} confidence interval, 0.03-0.03). CONCLUSION: Clinicians override most medication alerts, suggesting that current medication safety alerts may be inadequate to protect patient safety.}, author = {Isaac, Thomas and Weissman, Joel S and Davis, Roger B and Massagli, Michael and Cyrulik, Adrienne and Sands, Daniel Z and Weingart, Saul N}, doi = {10.1001/archinternmed.2008.551}, institution = {Division of General Medicine and Primary Care, Beth Israel Deaconess Medical Center, 330 Brookline Ave., Boston, MA 02215, USA. tisaac@bidmc.harvard.edu}, journal = {Arch Intern Med}, keywords = {80 and over; Ambulatory Care; Drug Hypersensitivi,Adolescent; Adult; Adverse Drug Reaction Reporting,Computer-Assisted; Female; Humans; Male; Medical,prevention /{\&}/ control; Drug Interactions; Drug T,prevention /{\&}/ control; Medicine,statistics /{\&}/ numerical data; Middle Aged; Physi,statistics /{\&}/ numerical data; Retrospective Stud}, month = {feb}, number = {3}, pages = {305--311}, pmid = {19204222}, title = {{Overrides of medication alerts in ambulatory care.}}, url = {http://dx.doi.org/10.1001/archinternmed.2008.551}, volume = {169}, year = {2009} } @book{borwein2006caa, author = {Borwein, J M and Lewis, A S}, publisher = {Springer}, title = {{Convex Analysis and Nonlinear Optimization: Theory and Examples}}, year = {2006} } @inproceedings{crites1996improving, author = {Crites, Robert and Barto, Andrew}, booktitle = {Advances in Neural Information Processing Systems 8}, pages = {1017--1023}, title = {{Improving Elevator Performance Using Reinforcement Learning}}, year = {1996} } @inproceedings{feige2006maximizing, author = {Feige, U}, booktitle = {Proc. ACM symposium on Theory of computing}, pages = {41--50}, title = {{On maximizing welfare when utility functions are subadditive}}, year = {2006} } @inproceedings{audibert09minimax, annote = {From Duplicate 1 (Minimax Policies for Bandits Games - Audibert, J.-Y.; Bubeck, S) Submitted}, author = {Audibert, J.-Y. and Bubeck, S}, booktitle = {Journal of Machine Learning Research}, title = {{Minimax Policies for Bandits Games}}, year = {2010} } @inproceedings{rudi2015less, abstract = {We study Nystr$\backslash$"om type subsampling approaches to large scale kernel methods, and prove learning bounds in the statistical learning setting, where random sampling and high probability estimates are considered. In particular, we prove that these approaches can achieve optimal learning bounds, provided the subsampling level is suitably chosen. These results suggest a simple incremental variant of Nystr$\backslash$"om Kernel Regularized Least Squares, where the subsampling level implements a form of computational regularization, in the sense that it controls at the same time regularization and computations. Extensive experimental analysis shows that the considered approach achieves state of the art performances on benchmark large scale datasets.}, author = {Rudi, Alessandro and Camoriano, Raffaello and Rosasco, Lorenzo}, booktitle = {Neural Information Processing Systems}, title = {{Less is more: Nystr{\"{o}}m computational regularization}}, year = {2015} } @article{boykov2001fast, author = {Boykov, Y and Veksler, O and Zabih, R}, journal = {IEEE Trans. PAMI}, number = {11}, pages = {1222--1239}, title = {{Fast approximate energy minimization via graph cuts}}, volume = {23}, year = {2001} } @techreport{wainwright2003graphical, abstract = {The formalism of probabilistic graphical models provides a unifying framework for the development of large-scale multivariate statistical models. Graphical models have become a focus of research in many applied statistical and computational fields, including bioinformatics, information theory, signal and image processing, information retrieval and machine learning. Many problems that arise in specific instances�including the key problems of computing marginals and modes of probability distributions�are best studied in the general setting. Working with exponential family representations, and exploiting the conjugate duality between the cumulant generating function and the entropy for exponential families, we develop general variational representations of the problems of computing marginal probabilities and modes. We describe how a wide variety of known computational algorithms�including mean field methods and cluster variational techniques�can be understood in terms of approximations of these variational representations. We also present novel convex relaxations based on the variational framework. The variational approach provides a complementary alternative to Markov chain Monte Carlo as a general source of approximation methods for inference in large-scale statistical models.}, annote = {comps{\_}models}, author = {Wainwright, Martin J and Jordan, Michael I}, howpublished = {Technical Report 649}, institution = {Dept. of Statistics}, keywords = {duality}, month = {sep}, title = {{Graphical models, exponential families, and variational inference}}, url = {http://www.eecs.berkeley.edu/{~}wainwrig/Papers/WaiJorVariational03.pdf}, year = {2003} } @phdthesis{ortiz2002selecting, author = {Ortiz, Luis}, school = {Brown University}, title = {{Selecting Approximately-Optimal Actions in Complex Structured Domains}}, year = {2002} } @article{lepski1997optimal, author = {Lepski, O. V. and Spokoiny, V. G.}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Lepski, Spokoiny - 1997 - Optimal pointwise adaptive methods in nonparametric estimation.pdf:pdf}, journal = {The Annals of Statistics}, keywords = {Bandwidth selection,H{\"{o}}lder-type constraints,pointwise adaptive estimation}, number = {6}, pages = {2512--2546}, title = {{Optimal pointwise adaptive methods in nonparametric estimation}}, url = {https://www.wias-berlin.de/people/spokoiny/publications/6{\_}Spokoiny{\_}a1{\_}97/1030741083.pdf}, volume = {25}, year = {1997} } @article{hefter2016optimal, author = {Hefter, Mario and Herzwurm, Andr{\'{e}}}, journal = {Communications in Mathematical Sciences}, keywords = {Adaptive algorithm,Cox-Ingersoll-Ross process,Reflected Brownian motion,Strong approximation,n-th minimal error}, number = {8}, pages = {2121--2141}, title = {{Optimal strong approximation of the one-dimensional squared Bessel process}}, url = {https://arxiv.org/pdf/1601.01455.pdf}, volume = {15}, year = {2017} } @article{fine2001efficient, author = {Fine, S and Scheinberg, K}, journal = {Journal of Machine Learning Research}, pages = {243--264}, title = {{Efficient {\{}SVM{\}} training using low-rank kernel representations}}, volume = {2}, year = {2001} } @article{ross2010reduction, abstract = {Sequential prediction problems such as imitation learning, where future observations depend on previous predictions (actions), violate the common i.i.d. assumptions made in statistical learning. This leads to poor performance in theory and often in practice. Some recent approaches provide stronger guarantees in this setting, but remain somewhat unsatisfactory as they train either non-stationary or stochastic policies and require a large number of iterations. In this paper, we propose a new iterative algorithm, which trains a stationary deterministic policy, that can be seen as a no regret algorithm in an online learning setting. We show that any such no regret algorithm, combined with additional reduction assumptions, must find a policy with good performance under the distribution of observations it induces in such sequential settings. We demonstrate that this new approach outperforms previous approaches on two challenging imitation learning problems and a benchmark sequence labeling problem.}, author = {Ross, Stephane and Gordon, Geoffrey J and Bagnell, J Andrew}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Ross, Gordon, Bagnell - 2010 - A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning.pdf:pdf}, journal = {AISTATS}, pages = {627--635}, title = {{A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning}}, url = {http://arxiv.org/abs/1011.0686}, volume = {15}, year = {2010} } @inproceedings{klimt2004introducing, abstract = {A large set of email messages, the Enron corpus, was made public during the legal investigation concerning the Enron corporation. This dataset, along with a thorough explanation of its origin, is available at http://www-2.cs.cmu.edu/{\~{}}enron/. This paper provides a brief introduction and analysis of the dataset. The raw Enron corpus contains 619,446 messages belonging to 158 users. We cleaned the corpus before this analysis by removing certain folders from each user, such as discussionthreads. These folders were present for most users, and did not appear to be used directly by the users, but rather were computer generated. Many, such as alldocuments, also contained large numbers of duplicate emails, which were already present in the users other folders. Our goal in this paper is to analyze the suitability of this corpus for exploring how to classify messages as organized by a human, so these folders would have likely been misleading.}, author = {Klimt, Bryan and Yang, Yiming}, booktitle = {Collaboration, Electronic messaging, Anti-Abuse and Spam Conference}, title = {{Introducing the Enron corpus}}, year = {2004} } @inproceedings{gramacy2003adaptive, author = {Gramacy, Robert and Warmuth, Manfred and Brandt, Scott and Ari, Ismail}, booktitle = {Advances in Neural Information Processing Systems 15}, pages = {1465--1472}, title = {{Adaptive Caching by Refetching}}, year = {2003} } @inproceedings{jegelka2011-fast-approx-sfm, author = {Jegelka, S and Lin, H and Bilmes, J A}, booktitle = {Adv. NIPS}, title = {{Fast Approximate Submodular Minimization}}, year = {2011} } @misc{doyle2000random, abstract = {A popular account of the connection between random walks and electric networks.}, annote = {* Fundamental matrix of the absorbing chain * Probabilistic interpretation of current and voltage}, author = {Doyle, Peter G and Snell, Laurie J}, keywords = {resistance}, month = {jan}, title = {{Random Walks and Electric Networks}}, url = {http://arxiv.org/abs/math/0001057}, year = {2000} } @article{Graham1979, abstract = {The theory of deterministic sequencing and scheduling has expanded rapidly during the past years. In this paper we survey the state of the art with respect to optimization and approximation algorithms and interpret these in terms of computational complexity theory. Special cases considered are single machine scheduling, identical, uniform and unrelated parallel machine scheduling, and open shop, flow shop and job shop scheduling. We indicate some problems for future research and include a selective bibliography. {\textcopyright}1979, North-Holland Publishing Company.}, author = {Graham, R L and Lawler, E L and Lenstra, J K and Kan, A H.G.Rinnooy}, doi = {10.1016/S0167-5060(08)70356-X}, isbn = {9780080867670}, issn = {01675060}, journal = {Annals of Discrete Mathematics}, number = {C}, pages = {287--326}, pmid = {384}, title = {{Optimization and approximation in deterministic sequencing and scheduling: A survey}}, volume = {5}, year = {1979} } @book{lauritzen96graphical, author = {Lauritzen, S L}, howpublished = {Hardcover}, month = {jul}, publisher = {Oxford University Press, USA}, title = {{Graphical Models (Oxford Statistical Science Series)}}, year = {1996} } @inproceedings{alaoui2014fast, abstract = {One approach to improving the running time of kernel-based machine learning methods is to build a small sketch of the input and use it in lieu of the full kernel matrix in the machine learning task of interest. Here, we describe a version of this approach that comes with running time guarantees as well as improved guarantees on its statistical performance. By extending the notion of $\backslash$emph{\{}statistical leverage scores{\}} to the setting of kernel ridge regression, our main statistical result is to identify an importance sampling distribution that reduces the size of the sketch (i.e., the required number of columns to be sampled) to the $\backslash$emph{\{}effective dimensionality{\}} of the problem. This quantity is often much smaller than previous bounds that depend on the $\backslash$emph{\{}maximal degrees of freedom{\}}. Our main algorithmic result is to present a fast algorithm to compute approximations to these scores. This algorithm runs in time that is linear in the number of samples---more precisely, the running time is {\$}O(np{\^{}}2){\$}, where the parameter {\$}p{\$} depends only on the trace of the kernel matrix and the regularization parameter---and it can be applied to the matrix of feature vectors, without having to form the full kernel matrix. This is obtained via a variant of length-squared sampling that we adapt to the kernel setting in a way that is of independent interest. Lastly, we provide empirical results illustrating our theory, and we discuss how this new notion of the statistical leverage of a data point captures in a fine way the difficulty of the original statistical learning problem.}, author = {Alaoui, Ahmed El and Mahoney, Michael W.}, booktitle = {Neural Information Processing Systems}, title = {{Fast randomized kernel methods with statistical guarantees}}, url = {https://papers.nips.cc/paper/5716-fast-randomized-kernel-ridge-regression-with-statistical-guarantees.pdf}, year = {2015} } @book{bertsekas1999nonlinear, address = {Belmont, MA}, author = {Bertsekas, Dimitri}, publisher = {Athena Scientific}, title = {{Nonlinear Programming}}, year = {1999} } @article{mannor2004sample, author = {Mannor, S and Tsitsiklis, J N}, journal = {Journal of Machine Learning Research}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {623--648}, title = {{The Sample Complexity of Exploration in the Multi-Armed Bandit Problem}}, volume = {5}, year = {2004} } @inproceedings{Taskar+al:EMNLP04, author = {Taskar, B and Klein, D and Collins, M and Koller, D and Manning, C}, booktitle = {Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP)}, pages = {1--8}, title = {{Max-Margin Parsing}}, year = {2004} } @article{kelner_spectral_2013, author = {Kelner, Jonathan A and Levin, Alex}, journal = {Theory of Computing Systems}, number = {2}, pages = {243--262}, title = {{Spectral sparsification in the semi-streaming setting}}, volume = {53}, year = {2013} } @article{yang2015defining, abstract = {Nodes in real-world networks organize into densely linked communities where edges appear with high concentration among the members of the community. Identifying such communities of nodes has proven to be a challenging task mainly due to a plethora of definitions of a community, intractability of algorithms, issues with evaluation and the lack of a reliable gold-standard ground-truth. In this paper we study a set of 230 large real-world social, collaboration and information networks where nodes explicitly state their group memberships. For example, in social networks nodes explicitly join various interest based social groups. We use such groups to define a reliable and robust notion of ground-truth communities. We then propose a methodology which allows us to compare and quantitatively evaluate how different structural definitions of network communities correspond to ground-truth communities. We choose 13 commonly used structural definitions of network communities and examine their sensitivity, robustness and performance in identifying the ground-truth. We show that the 13 structural definitions are heavily correlated and naturally group into four classes. We find that two of these definitions, Conductance and Triad-participation-ratio, consistently give the best performance in identifying ground-truth communities. We also investigate a task of detecting communities given a single seed node. We extend the local spectral clustering algorithm into a heuristic parameter-free community detection method that easily scales to networks with more than hundred million nodes. The proposed method achieves 30{\%} relative improvement over current local clustering methods.}, author = {Yang, Jaewon and Leskovec, Jure}, journal = {Knowledge and Information Systems}, keywords = {Community detection,Community scoring function,Ground-truth communities,Modularity,Network communities}, title = {{Defining and evaluating network communities based on ground-truth}}, year = {2015} } @book{ahuja1993network, author = {Ahuja, R K and Magnanti, T L and Orlin, J B}, publisher = {Prentice hall}, title = {{Network flows: theory, algorithms, and applications}}, year = {1993} } @article{hildebrand2014canonical, abstract = {On the interior of a regular convex cone K in n-dimensional real space there exist two canonical Hessian metrics, the one generated by the logarithm of the characteristic function, and the Cheng-Yau metric. The former is associated with a self-concordant logarithmically homogeneous barrier on K, the universal barrier. It is invariant with respect to the unimodular automorphism subgroup of K and is compatible with the operation of taking product cones, but in general it does not behave well under duality. Here we introduce a barrier associated with the Cheng-Yau metric, the canonical barrier. It shares with the universal barrier the invariance, existence, and uniqueness properties and is compatible with the operation of taking product cones, but in addition is well behaved under duality. The canonical barrier can be characterized as the convex solution of the partial differential equation log det F? = 2F that tends to infinity as the argument tends to the boundary of K. Its barrier parameter does not exceed the dimension n of the cone. On homogeneous cones both barriers essentially coincide.}, author = {Hildebrand, Roland}, journal = {Mathematics of Operations Research}, number = {3}, pages = {841--850}, title = {{Canonical barriers on convex cones}}, volume = {39}, year = {2014} } @techreport{trick1993linear, author = {Trick, Michael and Zin, Stanley}, institution = {Carnegie Mellon University}, title = {{A Linear Programming Approach to Solving Stochastic Dynamic Programs}}, year = {1993} } @inproceedings{azar2017minimax, archivePrefix = {arXiv}, arxivId = {arXiv:1703.05449v2}, author = {Azar, Mohammad Gheshlaghi and Osband, Ian and Munos, R{\'{e}}mi}, booktitle = {International Conference on Machine Learning}, eprint = {arXiv:1703.05449v2}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Azar, Osband, Munos - 2017 - Minimax regret bounds for reinforcement learning.pdf:pdf}, title = {{Minimax regret bounds for reinforcement learning}}, url = {https://arxiv.org/pdf/1703.05449.pdf}, year = {2017} } @inproceedings{seznec2020single, abstract = {In many application domains (e.g., recommender systems, intelligent tutoring systems), the rewards associated to the available actions tend to decrease over time. This decay is either caused by the actions executed in the past (e.g., a user may get bored when songs of the same genre are recommended over and over) or by an external factor (e.g., content becomes outdated). These two situations can be modeled as specific instances of the rested and restless bandit settings, where arms are rotting (i.e., their value decrease over time). These problems were thought to be significantly different, since Levine et al. (2017) showed that state-of-the-art algorithms for restless bandit perform poorly in the rested rotting setting. In this paper, we introduce a novel algorithm, Rotting Adaptive Window UCB (RAW-UCB), that achieves near-optimal regret in both rotting rested and restless bandit, without any prior knowledge of the setting (rested or restless) and the type of non-stationarity (e.g., piece-wise constant, bounded variation). This is in striking contrast with previous negative results showing that no algorithm can achieve similar results as soon as rewards are allowed to increase. We confirm our theoretical findings on a number of synthetic and dataset-based experiments.}, author = {Seznec, Julien and M{\'{e}}nard, Pierre and Lazaric, Alessandro and Valko, Michal}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{A single algorithm for both restless and rested rotting bandits}}, year = {2020} } @phdthesis{zhu2005semi-supervised, address = {Pittsburgh, PA, USA}, annote = {AAI3179046}, author = {Zhu, Xiaojin}, isbn = {0-542-19059-1}, publisher = {Carnegie Mellon University}, school = {Carnegie Mellon University}, title = {{Semi-supervised learning with graphs}}, year = {2005} } @article{rohde2011estimation, abstract = {Suppose that we observe entries or, more generally, linear combinations of entries of an unknown {\$}m\backslashtimes T{\$}-matrix {\$}A{\$} corrupted by noise. We are particularly interested in the high-dimensional setting where the number {\$}mT{\$} of unknown entries can be much larger than the sample size {\$}N{\$}. Motivated by several applications, we consider estimation of matrix {\$}A{\$} under the assumption that it has small rank. This can be viewed as dimension reduction or sparsity assumption. In order to shrink toward a low-rank representation, we investigate penalized least squares estimators with a Schatten-{\$}p{\$} quasi-norm penalty term, {\$}p\backslashleq1{\$}. We study these estimators under two possible assumptions---a modified version of the restricted isometry condition and a uniform bound on the ratio "empirical norm induced by the sampling operator/Frobenius norm." The main results are stated as nonasymptotic upper bounds on the prediction risk and on the Schatten-{\$}q{\$} risk of the estimators, where {\$}q\backslashin[p,2]{\$}. The rates that we obtain for the prediction risk are of the form {\$}rm/N{\$} (for {\$}m=T{\$}), up to logarithmic factors, where {\$}r{\$} is the rank of {\$}A{\$}. The particular examples of multi-task learning and matrix completion are worked out in detail. The proofs are based on tools from the theory of empirical processes. As a by-product, we derive bounds for the {\$}k{\$}th entropy numbers of the quasi-convex Schatten class embeddings {\$}S{\_}p{\^{}}M\backslashhookrightarrow S{\_}2{\^{}}M{\$}, {\$}p{\textless}1{\$}, which are of independent interest.}, author = {Rohde, Angelika and Tsybakov, Alexandre B.}, journal = {Annals of Statistics}, keywords = {Empirical process,High-dimensional low-rank matrices,Penalized least-squares estimator,Quasi-convex Schatten class embeddings,Schatten norm,Sparse recovery}, number = {2}, pages = {887--930}, title = {{Estimation of high-dimensional low-rank matrices}}, volume = {39}, year = {2011} } @article{johnson98pcfg, author = {Johnson, Mark}, journal = {Computational Linguistics}, number = {4}, pages = {613--632}, title = {{{\{}PCFG{\}} Models of Linguistic Tree Representations}}, url = {citeseer.ist.psu.edu/johnson98pcfg.html}, volume = {24}, year = {1998} } @inproceedings{shang2019simple, abstract = {Hyper-parameter tuning is a major part of modern machine learning systems. The tuning itself can be seen as a sequential resource allocation problem. As such, methods for multi-armed bandits have been already applied. In this paper, we view hyper-parameter optimization as an instance of best-arm identification in infinitely many-armed bandits. We propose D-TTTS, a new adaptive algorithm inspired by Thompson sampling, which dynamically balances between refining the estimate of the quality of hyper-parameter configurations previously explored and adding new hyper-parameter configurations to the pool of candidates. The algorithm is easy to implement and shows competitive performance compared to state-of-the-art algorithms for hyper-parameter tuning.}, author = {Shang, Xuedong and Kaufmann, Emilie and Valko, Michal}, booktitle = {Workshop on Automated Machine Learning at International Conference on Machine Learning}, title = {{A simple dynamic bandit algorithm for hyper-parameter tuning}}, year = {2019} } @book{durrett2010probability, author = {Durrett, Rick}, edition = {4}, isbn = {978-0-521-76539-8}, publisher = {Cambridge University Press}, series = {Cambridge Series in Statistical and Probabilistic Mathematics}, title = {{Probability: Theory and Examples}}, url = {https://services.math.duke.edu/{~}rtd/PTE/PTE5{\_}011119.pdf}, year = {2010} } @book{bertsimas1997introduction, author = {Bertsimas, Dimitris and Tsitsiklis, John}, publisher = {Athena Scientific}, title = {{Introduction to linear optimization}}, year = {1997} } @book{zhang2005schur, author = {Zhang, Fuzhen}, publisher = {Springer}, title = {{The Schur complement and its applications}}, volume = {4}, year = {2005} } @inproceedings{dann2019policy, author = {Dann, Christoph and Li, Lihong and Wei, Wei and Brunskill, Emma}, booktitle = {International Conference on Machine Learning}, title = {{Policy certificates: Towards accountable reinforcement learning}}, url = {https://arxiv.org/pdf/1811.03056.pdf}, year = {2019} } @article{tesauro1992practical, author = {Tesauro, Gerald}, journal = {Machine Learning}, number = {3-4}, pages = {257--277}, title = {{Practical Issues in Temporal Difference Learning}}, volume = {8}, year = {1992} } @inproceedings{ST10, author = {Sridharan, K and Tewari, A}, booktitle = {Proceedings of the 23rd Annual Conference on Learning Theory (COLT)}, title = {{Convex Games in Banach Spaces}}, year = {2010} } @inproceedings{schultz2003learning, annote = {comps{\_}distance}, author = {Schultz, Matthew and Joachims, Thorsten}, booktitle = {In NIPS}, publisher = {MIT Press}, title = {{Learning a distance metric from relative comparisons}}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.1616}, year = {2003} } @book{wedel200market, abstract = {Second edition. Modern marketing techniques in industrialized countries cannot be implemented without segmentation of the potential market. Goods are no longer produced and sold without a significant consideration of customer needs combined with a recognition that these needs are heterogeneous. Since first emerging in the late 1950s, the concept of segmentation has been one of the most researched topics in the marketing literature. Segmentation has become a central topic to both the theory and practice of marketing, particularly in the recent development of finite mixture models to better identify market segments. This second edition of Market Segmentation updates and extends the integrated examination of segmentation theory and methodology begun in the first edition. A chapter on mixture model analysis of paired comparison data has been added, together with a new chapter on the pros and cons of the mixture model. The book starts with a framework for considering the various bases and methods available for conducting segmentation studies. The second section contains a more detailed discussion of the methodology for market segmentation, from traditional clustering algorithms to more recent developments in finite mixtures and latent class models. Three types of finite mixture models are discussed in this second section: simple mixtures, mixtures of regressions and mixtures of unfolding models. The third main section is devoted to special topics in market segmentation such as joint segmentation, segmentation using tailored interviewing and segmentation with structural equation models. The fourth part covers four major approaches to applied market segmentation: geo-demographic, lifestyle, response-based, and conjoint analysis. The final concluding section discusses directions for further research. 1 -- Introduction -- 1 The Historical Development of the Market Segmentation Concept -- 2 Segmentation Bases -- 3 Segmentation Methods -- 4 Tools for Market Segmentation -- 2 -- Segmentation Methodology -- 5 Clustering Methods -- 6 Mixture Models -- 7 Mixture Regression Models -- 8 Mixture Unfolding Models -- 9 Profiling Segments -- 10 Dynamic Segmentation -- 3 -- Special Topics in Market Segmentation -- 11 Joint Segmentation -- 12 Market Segmentation with Tailored Interviewing -- 13 Model-Based Segmentation Using Structural Equation Models -- 14 Segmentation Based on Product Dissimilarity Judgements -- 4 -- Applied Market Segmentation -- 15 General Observable Bases: Geo-demographics -- 16 General Unobservable Bases: Values and Lifestyles -- 17 Product-specific observable Bases: Response-based Segmentation -- 18 Product-Specific Unobservable Bases: Conjoint Analysis -- 5 -- Conclusions and Directions for Future Research -- 19 Conclusions: Representations of Heterogeneity -- 20 Directions for Future Research -- References.}, author = {Wedel, Michel. and Kamakura, Wagner A.}, pages = {382}, publisher = {Springer US}, title = {{Market segmentation : Conceptual and methodological foundations}}, year = {2000} } @inproceedings{guestrin2003generalizing, author = {Guestrin, Carlos and Koller, Daphne and Gearhart, Chris and Kanodia, Neal}, booktitle = {Proceedings of the 18th International Joint Conference on Artificial Intelligence}, pages = {1003--1010}, title = {{Generalizing Plans to New Environments in Relational {\{}MDPs{\}}}}, year = {2003} } @inproceedings{levine2011nonlinear, author = {Levine, Sergey and Popovic, Zoran and Koltun, Vladlen}, booktitle = {NIPS}, isbn = {9781457700798}, pages = {1--9}, title = {{Nonlinear Inverse Reinforcement Learning with Gaussian Processes}}, url = {http://www.stanford.edu/{~}svlevine/papers/gpirl.pdf}, year = {2011} } @book{vapnik1995nature, address = {New York, NY, USA}, author = {Vapnik, Vladimir N}, isbn = {0-387-94559-8}, publisher = {Springer-Verlag New York, Inc.}, title = {{The nature of statistical learning theory}}, year = {1995} } @inproceedings{gabillon2012best, author = {Gabillon, Victor and Ghavamzadeh, Mohammad and Lazaric, Alessandro}, booktitle = {Neural Information Processing Systems}, title = {{Best-arm identification: A unified approach to fixed budget and fixed confidence}}, url = {https://hal.inria.fr/hal-00747005v1/document}, year = {2012} } @inproceedings{zhang2005learning-based, address = {New York, NY, USA}, author = {Zhang, Jian and Rexford, Jennifer and Feigenbaum, Joan}, booktitle = {MineNet '05: Proceedings of the 2005 ACM SIGCOMM workshop on Mining network data}, doi = {http://doi.acm.org/10.1145/1080173.1080189}, isbn = {1-59593-026-4}, pages = {219--220}, publisher = {ACM}, title = {{Learning-based anomaly detection in BGP updates}}, year = {2005} } @book{jordan1999learning, address = {Cambridge, MA, USA}, editor = {Jordan, Michael I}, isbn = {0-262-60032-3}, publisher = {MIT Press}, title = {{Learning in graphical models}}, year = {1999} } @inproceedings{derezinski2019exact, abstract = {We study the complexity of sampling from a distribution over all index subsets of the set (1,...,n) with the probability of a subset S proportional to the determinant of the submatrix LS of some n x n p.s.d. matrix L, where LS corresponds to the entries of L indexed by S. Known as a determinantal point process, this distribution is used in machine learning to induce diversity in subset selection. In practice, we often wish to sample multiple subsets S with small expected size k = E[card(S)] that is much smaller then n from a very large matrix L, so it is important to minimize the preprocessing cost of the procedure (performed once) as well as the sampling cost (performed repeatedly). For this purpose, we propose a new algorithm which, given access to L, samples exactly from a determinantal point process while satisfying the following two properties: (1) its preprocessing cost is n x poly(k) (sublinear in the size of L) and (2) its sampling cost is poly(k) (independent of the size of L). Prior to our results, state-of-the-art exact samplers required O(n3) preprocessing time and sampling time linear in n or dependent on the spectral properties of L. We also give a reduction which allows using our algorithm for sampling from cardinality constrained determinantal point processes with n x poly(k) time preprocessing.}, author = {Derezi{\'{n}}ski, Micha{\l} and Calandriello, Daniele and Valko, Michal}, booktitle = {Neural Information Processing Systems}, title = {{Exact sampling of determinantal point processes with sublinear time preprocessing}}, year = {2019} } @inproceedings{dekel13det, author = {Dekel, Ofer and Hazan, Elad}, booktitle = {Proceedings of the 30th International Conference on Machine Learning (ICML-13)}, editor = {Dasgupta, Sanjoy and McAllester, David}, number = {3}, pages = {675--683}, publisher = {JMLR Workshop and Conference Proceedings}, title = {{Better Rates for Any Adversarial Deterministic MDP}}, volume = {28}, year = {2013} } @inproceedings{klein03accurate, author = {Klein, D and Manning, C D}, booktitle = {ACL '03: Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics}, pages = {423--430}, title = {{Accurate Unlexicalized Parsing}}, year = {2003} } @inproceedings{calandriello2019gaussian, abstract = {Gaussian processes (GP) are a stochastic processes, used as Bayesian approach for the optimization of black-box functions. Despite their effectiveness in simple problems, GP-based algorithms hardly scale to high-dimensional functions, as their per-iteration time and space cost is at least quadratic in the number of dimensions d and iterations t. Given a set of A alternatives to choose from, the overall runtime O(t 3 A) is prohibitive. In this paper, we introduce BKB (budgeted kernelized bandit), a new approximate GP algorithm for optimization under bandit feedback that achieves near-optimal regret (and hence near-optimal convergence rate) with near-constant per-iteration complexity and remarkably no assumption on the input space or covariance of the GP. We combine a kernelized linear bandit algorithm (GP-UCB) leverage score sampling as a randomized matrix sketching and prove that selecting inducing points based on their posterior variance gives an accurate low-rank approximation of the GP, preserving variance estimates and confidence intervals. As a consequence, BKB does not suffer from variance starvation, an important problem faced by many previous sparse GP approximations. Moreover, we show that our procedure selects at most O(d eff) points, where d eff is the effective dimension of the explored space, which is typically much smaller than both d and t. This greatly reduces the dimensionality of the problem, thus leading to a O(T Ad 2 eff) runtime and O(Ad eff) space complexity.}, archivePrefix = {arXiv}, arxivId = {1903.05594}, author = {Calandriello, Daniele and Carratino, Luigi and Lazaric, Alessandro and Valko, Michal and Rosasco, Lorenzo}, booktitle = {Conference on Learning Theory}, eprint = {1903.05594}, month = {mar}, title = {{Gaussian process optimization with adaptive sketching: Scalable and no regret}}, url = {https://arxiv.org/abs/1903.05594}, year = {2019} } @inproceedings{calandriello2016analysis, abstract = {Large-scale kernel ridge regression (KRR) is limited by the need to store a large kernel matrix Kt. To avoid storing the entire matrix Kt, Nyström methods subsample a subset of columns of the kernel matrix, and efficiently find an approximate KRR solution on the reconstructed Kt . The chosen subsampling distribution in turn affects the statistical and computational tradeoffs. For KRR problems, [15, 1] show that a sampling distribution proportional to the ridge leverage scores (RLSs) provides strong reconstruction guarantees for Kt. While exact RLSs are as difficult to compute as a KRR solution, we may be able to approximate them well enough. In this paper, we study KRR problems in a sequential setting and introduce the INK-ESTIMATE algorithm, that incrementally computes the RLSs estimates. INK-ESTIMATE maintains a small sketch of Kt, that at each step is used to compute an intermediate es- timate of the RLSs. First, our sketch update does not require access to previously seen columns, and therefore a single pass over the kernel ma- trix is sufficient. Second, the algorithm requires a fixed, small space budget to run dependent only on the effective dimension of the kernel matrix. Finally, our sketch provides strong approximation guarantees on the distance ∥Kt−Kt∥2 , and on the statistical risk of the approximate KRR solution at any time, because all our guarantees hold at any intermediate step.}, author = {Calandriello, Daniele and Lazaric, Alessandro and Valko, Michal}, booktitle = {Uncertainty in Artificial Intelligence}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Calandriello, Lazaric, Valko - 2016 - Analysis of Nystr{\"{o}}m method with sequential ridge leverage scores.pdf:pdf}, title = {{Analysis of Nystr{\"{o}}m method with sequential ridge leverage scores}}, year = {2016} } @inproceedings{farias2005exploration-exploitation, author = {de Farias, Daniela Pucci and Megiddo, Nimrod}, booktitle = {Advances in Neural Information Processing Systems 17}, pages = {409--416}, title = {{Exploration-Exploitation Tradeoffs for Experts Algorithms in Reactive Environments}}, year = {2005} } @inproceedings{feng2004dynamic, author = {Feng, Zhengzhu and Dearden, Richard and Meuleau, Nicolas and Washington, Richard}, booktitle = {Proceedings of the 20th Conference on Uncertainty in Artificial Intelligence}, pages = {154--161}, title = {{Dynamic Programming for Structured Continuous {\{}Markov{\}} Decision Problems}}, year = {2004} } @article{tropp2011freedman, author = {Tropp, Joel Aaron}, journal = {Electronic Communications in Probability}, pages = {262--270}, title = {{Freedman's inequality for matrix martingales}}, volume = {16}, year = {2011} } @article{hochbaum1995strongly, author = {Hochbaum, D S and Hong, S P}, journal = {Mathematical Programming}, number = {1}, pages = {269--309}, publisher = {Springer}, title = {{About strongly polynomial time algorithms for quadratic optimization over submodular constraints}}, volume = {69}, year = {1995} } @article{watts1998collective, abstract = {Networks of coupled dynamical systems have been used to model biological oscillators1–4 , Josephson junction arrays5,6 , excitable media7 , neural networks8–10 , spatial games11 , genetic control networks12 and many other self-organizing systems. Ordinarily, the connection topology is assumed to be either completely regular or completely random. Butmany biological, technological and social networks lie somewhere between these two extremes. Here we explore simple models of networks that can be tuned through this middle ground: regular networks ‘rewired' to intro- duce increasing amounts of disorder. We find that these systems can be highly clustered, like regular lattices, yet have small characteristic path lengths, like random graphs. We call them ‘small-world' networks, by analogy with the small-world phenomenon13,14 (popularly known as six degrees of separation15 ). The neural network of the worm Caenorhabditis elegans, the power grid of the western United States, and the collaboration graph of film actors are shown to be small-world networks. Models of dynamical systems with small-world coupling display enhanced signal-propagation speed, computational power, and synchronizability. In particular, infectious diseases spread more easily in small-world networks than in regular lattices.}, author = {Watts, Duncan J. and Strogatz, Steven H.}, journal = {Nature}, pages = {440--442}, title = {{Collective dynamics of small-world networks}}, volume = {393}, year = {1998} } @inproceedings{toby, author = {Hocking, T and Joulin, A and Bach, F and Vert, J.-P.}, booktitle = {Proc. ICML}, title = {{Clusterpath: an Algorithm for Clustering using Convex Fusion Penalties}}, year = {2011} } @inproceedings{gittens2013revisiting, abstract = {We reconsider randomized algorithms for the low-rank approximation of symmetric positive semi-definite (SPSD) matrices such as Laplacian and kernel matrices that arise in data analysis and machine learning applications. Our main results consist of an empirical evaluation of the performance quality and running time of sampling and projection methods on a diverse suite of SPSD matrices. Our results highlight complementary aspects of sampling versus projection methods based on leverage scores. We complement our empirical results with a suite of worst-case theoretical bounds for both random sampling and random projections methods. These bounds are qualitatively superior to existing bounds---e.g., improved additive-error bounds for the spectral and Frobenius norm errors and relative-error bounds for the trace norm error.}, archivePrefix = {arXiv}, arxivId = {1303.1849}, author = {Gittens, Alex and Mahoney, Michael W}, booktitle = {International Conference on Machine Learning}, eprint = {1303.1849}, keywords = {ized algorithms,kernel methods,low-rank approximation,numerical linear algebra,nystr,om approximation,random-}, title = {{Revisiting the Nystr{\"{o}}m method for improved large-scale machine learning}}, year = {2013} } @inproceedings{bubeck2011lipschitz, author = {Bubeck, S{\'{e}}bastien and Stoltz, Gilles and Yu, Jia Yuan}, booktitle = {Algorithmic Learning Theory}, title = {{Lipschitz Bandits without the Lipschitz Constant}}, url = {https://arxiv.org/pdf/1105.5041.pdf}, year = {2011} } @inproceedings{lee2003video-based, author = {Lee, Kuang-Chih and Ho, Jeffrey and Yang, Ming-Hsuan and Kriegman, David}, booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition}, pages = {313--320}, title = {{Video-Based Face Recognition Using Probabilistic Appearance Manifolds}}, year = {2003} } @article{Becker2009, author = {Becker, S and Bobin, J and Candes, E}, journal = {SIAM J. on Imaging Sciences}, number = {1}, pages = {1--39}, title = {{NESTA: A Fast and Accurate First-order Method for Sparse Recovery}}, volume = {4}, year = {2011} } @article{HW09, author = {Helmbold, D P and Warmuth, M}, journal = {Journal of Machine Learning Research}, pages = {1705--1736}, title = {{Learning Permutations with Exponential Weights}}, volume = {10}, year = {2009} } @inproceedings{globerson2006metric, address = {Cambridge, MA}, annote = {comps{\_}distance}, author = {Globerson, Amir and Roweis, Sam}, booktitle = {Advances in Neural Information Processing Systems 18}, editor = {Weiss, Y and Sch{\"{o}}lkopf, B and Platt, J}, pages = {451--458}, publisher = {MIT Press}, title = {{Metric Learning by Collapsing Classes}}, url = {http://books.nips.cc/papers/files/nips18/NIPS2005{\_}0388.pdf}, year = {2006} } @unpublished{Rak09, author = {Rakhlin, A}, title = {{Lecture Notes on Online Learning}}, year = {2009} } @inproceedings{carpentier2015simple, abstract = {We consider a stochastic bandit problem with infinitely many arms. In this setting, the learner has no chance of trying all the arms even once and has to dedicate its limited number of samples only to a certain number of arms. All previous algorithms for this setting were designed for minimizing the cumulative regret of the learner. In this paper, we propose an algorithm aiming at minimizing the simple regret. As in the cumulative regret setting of infinitely many armed bandits, the rate of the simple regret will depend on a parameter {\$}\backslashbeta{\$} characterizing the distribution of the near-optimal arms. We prove that depending on {\$}\backslashbeta{\$}, our algorithm is minimax optimal either up to a multiplicative constant or up to a {\$}\backslashlog(n){\$} factor. We also provide extensions to several important cases: when {\$}\backslashbeta{\$} is unknown, in a natural setting where the near-optimal arms have a small variance, and in the case of unknown time horizon.}, author = {Carpentier, Alexandra and Valko, Michal}, booktitle = {International Conference on Machine Learning}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Carpentier, Valko - 2015 - Simple regret for infinitely many armed bandits.pdf:pdf}, title = {{Simple regret for infinitely many armed bandits}}, year = {2015} } @inproceedings{WKH11, author = {Warmuth, M and Koolen, W and Helmbold, D}, booktitle = {In Proceedings of the 22nd International Conference on Algorithmic Learning Theory (ALT)}, title = {{Combining initial segments of lists}}, year = {2011} } @article{chandola2009anomaly, address = {New York, NY, USA}, author = {Chandola, Varun and Banerjee, Arindam and Kumar, Vipin}, doi = {http://doi.acm.org/10.1145/1541880.1541882}, issn = {0360-0300}, journal = {ACM Comput. Surv.}, keywords = {Anomaly detection,outlier detection}, month = {jul}, number = {3}, pages = {15:1--15:58}, publisher = {ACM}, title = {{Anomaly detection: A survey}}, volume = {41}, year = {2009} } @book{mccullagh1989generalized, address = {London}, author = {Mccullagh, P and Nelder, J A}, edition = {2nd}, keywords = {asymptotics,glm,logit,probit,social{\_}science{\_}statistics}, publisher = {Chapman and Hall}, title = {{Generalized Linear Models}}, year = {1989} } @article{bach2008cgl, author = {Bach, F}, journal = {Journal of Machine Learning Research}, pages = {1179--1225}, publisher = {MIT Press Cambridge, MA, USA}, title = {{Consistency of the group {\{}L{\}}asso and multiple kernel learning}}, volume = {9}, year = {2008} } @inproceedings{mary2015bandits, abstract = {This paper addresses the on-line recommendation problem facing new users and new items; we assume that no information is available neither about users, nor about the items. The only source of information is a set of ratings given by users to some items. By on-line, we mean that the set of users, and the set of items, and the set of ratings is evolving along time and that at any moment, the recommendation system has to select items to recommend based on the currently available information, that is basically the sequence of past events. We also mean that each user comes with her preferences which may evolve along short and longer scales of time; so we have to continuously update their preferences. When the set of ratings is the only available source of information , the traditional approach is matrix factorization. In a decision making under uncertainty setting, actions should be selected to balance exploration with exploitation; this is best modeled as a bandit problem. Matrix factors provide a latent representation of users and items. These representations may then be used as contextual information by the bandit algorithm to select items. This last point is exactly the originality of this paper: the combination of matrix factorization and bandit algorithms to solve the on-line recommendation problem. Our work is driven by considering the recommendation problem as a feedback controlled loop. This leads to interactions between the representation learning, and the recommendation policy.}, author = {Mary, J{\'{e}}r{\'{e}}mie and Gaudel, Romaric and Preux, Philippe}, booktitle = {First International Workshop on Machine Learning, Optimization, and Big Data}, keywords = {Collaborative Filtering,Matrix Factorization,Multi-Armed Bandtis,Recommender Systems,contextual Bandits,sequential Recommender Systems}, title = {{Bandits and recommender systems}}, year = {2015} } @incollection{oki2012glpk, abstract = {The GLPK (GNU Linear Programming Kit) package is intended for solving large-scale linear programming (LP), mixed integer programming (MIP), and other related problems. It is a set of routines written in ANSI C and organized in the form of a callable library.}, author = {Oki, Eiji}, booktitle = {Linear Programming and Algorithms for Communication Networks - A Practical Guide to Network Design, Control, and Management}, title = {{GNU Linear Programming Kit, Version 4.61}}, url = {http://www.gnu.org/software/glpk/}, year = {2012} } @article{brodley1999identifying, author = {Brodley, Carla E and Friedl, Mark A}, journal = {J. Artif. Intell. Res. (JAIR)}, pages = {131--167}, title = {{Identifying Mislabeled Training Data}}, volume = {11}, year = {1999} } @techreport{cohen2015ridge, abstract = {Often used as importance sampling probabilities, leverage scores have become indispensable in randomized algorithms for linear algebra, optimization, graph theory, and machine learning. A major body of work seeks to adapt these scores to low-rank approximation problems. However, existing "low-rank leverage scores" can be difficult to compute, often work for just a single application, and are sensitive to matrix perturbations. We show how to avoid these issues by exploiting connections between low-rank approximation and regularization. Specifically, we employ ridge leverage scores, which are simply standard leverage scores computed with respect to an {\$}\backslashell{\_}2{\$} regularized input. Importance sampling by these scores gives the first unified solution to two of the most important low-rank sampling problems: {\$}(1+\backslashepsilon){\$} error column subset selection and {\$}(1+\backslashepsilon){\$} error projection-cost preservation. Moreover, ridge leverage scores satisfy a key monotonicity property that does not hold for any prior low-rank leverage scores. Their resulting robustness leads to two sought-after results in randomized linear algebra. 1) We give the first input-sparsity time low-rank approximation algorithm based on iterative column sampling, resolving an open question posed in [LMP13], [CLM+15], and [AM15]. 2) We give the first single-pass streaming column subset selection algorithm whose real-number space complexity has no dependence on stream length.}, archivePrefix = {arXiv}, arxivId = {1511.07263}, author = {Cohen, Michael B. and Musco, Cameron and Musco, Christopher}, eprint = {1511.07263}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Cohen, Musco, Musco - 2015 - Ridge leverage scores for low-rank approximation.pdf:pdf}, month = {nov}, title = {{Ridge leverage scores for low-rank approximation}}, url = {http://arxiv.org/abs/1511.07263}, year = {2015} } @inproceedings{jonsson2020planning, abstract = {We propose MDP-GapE, a new trajectory-based Monte-Carlo Tree Search algorithm for planning in a Markov Decision Process in which transitions have a finite support. We prove an upper bound on the number of calls to the generative models needed for MDP-GapE to identify a near-optimal action with high probability. This problem-dependent sample complexity result is expressed in terms of the sub-optimality gaps of the state-action pairs that are visited during exploration. Our experiments reveal that MDP-GapE is also effective in practice, in contrast with other algorithms with sample complexity guarantees in the fixed-confidence setting, that are mostly theoretical.}, author = {Jonsson, Anders and Kaufmann, Emilie and M{\'{e}}nard, Pierre and Domingues, Omar Darwiche and Leurent, Edouard and Valko, Michal}, booktitle = {Neural Information Processing Systems}, title = {{Planning in markov decision processes with gap-dependent sample complexity}}, year = {2020} } @inproceedings{LaurentGuillaumeGroupLasso, author = {Jacob, L and Obozinski, G and Vert, J.-P.}, booktitle = {Proc. ICML}, title = {{Group {\{}L{\}}asso with overlaps and graph {\{}L{\}}asso}}, year = {2009} } @inproceedings{RaSriTe11, author = {Rakhlin, A and Sridharan, K and Tewari, A}, title = {{Online Learning: Stochastic and Constrained Adversaries}} } @techreport{wen2016influence, abstract = {We study a stochastic online problem of learning to influence in a social network with semi-bandit feedback, individual observations of how influenced users influence others. Our problem combines challenges of partial monitoring, because the learning agent only observes the influenced portion of the network, and combinatorial bandits, because the cardinality of the feasible set is exponential in the maximum number of influencers. We propose a computationally efficient UCB-like algorithm for solving our problem, IMLinUCB, and analyze it on forests. Our regret bounds are polynomial in all quantities of interest; reflect the structure of the network; and do not depend on inherently large quantities, such as the reciprocal of the minimum probability of being influenced and the cardinality of the action set. To the best of our knowledge, these are the first such results. IMLinUCB permits linear generalization and therefore is suitable for large-scale problems. We evaluate IMLinUCB on several synthetic problems and observe that the regret of IMLinUCB scales as suggested by our upper bounds. A special form of our problem can be viewed as a linear bandit and we match the regret bounds of LinUCB in this case.}, archivePrefix = {arXiv}, arxivId = {1605.06593}, author = {Wen, Zheng and Kveton, Branislav and Valko, Michal}, eprint = {1605.06593}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Wen, Kveton, Valko - 2016 - Influence maximization with semi-bandit feedback.pdf:pdf}, title = {{Influence maximization with semi-bandit feedback}}, year = {2016} } @article{evans2013static, author = {Evans, Thomas P Ol{\'{e}}ron and Bishop, Steven R}, journal = {European Journal of Operational Research}, number = {3}, pages = {667--689}, title = {{Static search games played over graphs and general metric spaces}}, url = {http://discovery.ucl.ac.uk/1414598/1/Evans{\_}European Journal of Operational Research.pdf}, volume = {231}, year = {2013} } @book{berry1985bandit, author = {Berry, Donald A and Fristedt, Bert}, booktitle = {Journal of the Operational Research Society}, number = {8}, pages = {viii, 275}, publisher = {Chapman and Hall}, series = {Monographs on statistics and applied probability}, title = {{Bandit Problems: Sequential Allocation of Experiments}}, volume = {38}, year = {1985} } @inproceedings{Petrov-Klein-2007:AAAI, annote = {(Nectar Track)}, author = {Petrov, Slav and Klein, Dan}, pages = {1663--1666}, title = {{Learning and Inference for Hierarchically Split {\{}PCFG{\}}s}}, url = {www.eecs.berkeley.edu/{~}petrov/data/aaai2007.pdf} } @article{helmbold2000adaptive, author = {Helmbold, David and Long, Darrell and Sconyers, Tracey and Sherrod, Bruce}, journal = {Mobile Networks and Applications}, number = {4}, pages = {285--297}, title = {{Adaptive Disk Spin-Down for Mobile Computers}}, volume = {5}, year = {2000} } @article{joachims1999making, abstract = {Training a support vector machine (SVM) leads to a quadratic optimization problem with bound constraints and one linear equality constraint. Despite the fact that this type of problem is well understood, there are many issues to be considered in designing an SVM learner. In particular, for large learning tasks with many training examples, o -the-shelf optimization techniques for general quadratic programs quickly become intractable in their memory and time requirements. S V Mlight1 is an implementation of an SVM learner which addresses the problem of large tasks. This chapter presents algorithmic and computational results developed for S V MlightV2.0, which make large-scale SVM training more practical. The results give guidelines for the application of SVMs to large domains}, author = {Joachims, Thorsten}, chapter = {11}, doi = {10.1109/ICEMI.2009.5274151}, editor = {Sch{\"{o}}lkopf, B and Burges, C and Smola, A}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Joachims - 1999 - Making large-Scale SVM Learning Practical.pdf:pdf}, institution = {University of Dortmund}, isbn = {9781424438631}, journal = {Advances in Kernel Methods Support Vector Learning}, pages = {169--184}, publisher = {MIT Press}, series = {Advances in Kernel Methods - Support Vector Learning}, title = {{Making large-Scale SVM Learning Practical}}, url = {http://www-ai.cs.uni-dortmund.de/DOKUMENTE/joachims{\_}99a.ps.gz}, year = {1999} } @inproceedings{littlestone1991on-line, author = {Littlestone, Nick and Long, Philip and Warmuth, Manfred}, booktitle = {Proceedings of the 23rd Annual ACM Symposium on Theory of Computing}, pages = {465--475}, title = {{On-Line Learning of Linear Functions}}, year = {1991} } @book{alpern2006theory, author = {Alpern, Steve and Gal, Shmuel}, publisher = {Springer}, title = {{The theory of search games and rendezvous}}, url = {https://www.springer.com/fr/book/9780792374688}, year = {2006} } @inproceedings{lim2012autonomous, abstract = {While intrinsically motivated learning agents hold considerable promise to overcome limitations of more supervised learning systems, quantitative evaluation and theoretical analysis of such agents are difficult. We propose to consider a restricted setting for autonomous learning where systematic evaluation of learning performance is possible. In this setting the agent needs to learn to navigate in a Markov Decision Process where extrinsic rewards are not present or are ignored. We present a learning algorithm for this scenario and evaluate it by the amount of exploration it uses to learn the environment. {\textcopyright} 2012 S.H. Lim {\&} P. Auer.}, author = {Lim, Shiau Hong and Auer, Peter}, booktitle = {Conference on Learning Theory}, issn = {15337928}, keywords = {Autonomous exploration,Computational learning theory,Optimism in the face of uncertainty,Reinforcement learning}, title = {{Autonomous exploration for navigating in MDPs}}, year = {2012} } @article{tarbouriech2020provably, abstract = {A common assumption in reinforcement learning (RL) is to have access to a generative model (i.e., a simulator of the environment), which allows to generate samples from any desired state-action pair. Nonetheless, in many settings a generative model may not be available and an adaptive exploration strategy is needed to efficiently collect samples from an unknown environment by direct interaction. In this paper, we study the scenario where an algorithm based on the generative model assumption defines the (possibly time-varying) amount of samples b(s,a) required at each state-action pair (s,a) and an exploration strategy has to learn how to generate b(s,a) samples as fast as possible. Building on recent results for regret minimization in the stochastic shortest path (SSP) setting (Cohen et al., 2020; Tarbouriech et al., 2020), we derive an algorithm that requires O(BD+D3/2S2A) time steps to collect the B=∑s,ab(s,a) desired samples, in any unknown and communicating MDP with S states, A actions and diameter D. Leveraging the generality of our strategy, we readily apply it to a variety of existing settings (e.g., model estimation, pure exploration in MDPs) for which we obtain improved sample-complexity guarantees, and to a set of new problems such as best-state identification and sparse reward discovery.}, archivePrefix = {arXiv}, arxivId = {2007.06437}, author = {Tarbouriech, Jean and Pirotta, Matteo and Valko, Michal and Lazaric, Alessandro}, eprint = {2007.06437}, journal = {Arxiv preprint arXiv:2007.06437}, month = {jul}, title = {{A provably efficient sample collection strategy for reinforcement learning}}, url = {https://arxiv.org/abs/2007.06437}, year = {2020} } @inproceedings{koutis2010approaching, author = {Koutis, Ioannis and Miller, Gary L. and Peng, Richard}, booktitle = {2010 IEEE 51st Annual Symposium on Foundations of Computer Science}, doi = {10.1109/FOCS.2010.29}, isbn = {978-1-4244-8525-3}, issn = {0272-5428}, language = {English}, month = {oct}, pages = {235--244}, publisher = {IEEE}, title = {{Approaching Optimality for Solving SDD Linear Systems}}, url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=5671167}, year = {2010} } @inproceedings{valko2008conditional, abstract = {Anomaly detection methods can be very useful in identifying unusual or interesting patterns in data. A recently proposed conditional anomaly detection framework extends anomaly detection to the problem of identifying anomalous patterns on a subset of attributes in the data. The anomaly always depends (is conditioned) on the value of remaining attributes. The work presented in this paper focuses on instance-based methods for detecting conditional anomalies. The methods rely on the distance metric to identify examples in the dataset that are most critical for detecting the anomaly. We investigate various metrics and metric learning methods to optimize the performance of the instance-based anomaly detection methods. We show the benefits of the instance-based methods on two real-world detection problems: detection of unusual admission decisions for patients with the community-acquired pneumonia and detection of unusual orders of an HPF4 test that is used to confirm Heparin induced thrombocytopenia - a life-threatening condition caused by the Heparin therapy.}, author = {Valko, Michal and Cooper, Gregory F and Seybert, Amy and Visweswaran, Shyam and Saul, Melissa and Hauskrecht, Milos}, booktitle = {Workshop on Machine Learning in Health Care Applications in The 25th International Conference on Machine Learning}, keywords = {misovalko}, mendeley-tags = {misovalko}, title = {{Conditional anomaly detection methods for patient-management alert systems}}, year = {2008} } @inproceedings{konda2000actor-critic, author = {Konda, Vijay and Tsitsiklis, John}, booktitle = {Advances in Neural Information Processing Systems 12}, pages = {1008--1014}, title = {{Actor-Critic Algorithms}}, year = {2000} } @inproceedings{bach2013sharp, abstract = {We consider supervised learning problems within the positive-definite kernel framework, such as kernel ridge regression, kernel logistic regression or the support vector machine. With kernels leading to infinite-dimensional feature spaces, a common practical limiting difficulty is the necessity of computing the kernel matrix, which most frequently leads to algorithms with running time at least quadratic in the number of observations n, i.e., O(n{\^{}}2). Low-rank approximations of the kernel matrix are often considered as they allow the reduction of running time complexities to O(p{\^{}}2 n), where p is the rank of the approximation. The practicality of such methods thus depends on the required rank p. In this paper, we show that in the context of kernel ridge regression, for approximations based on a random subset of columns of the original kernel matrix, the rank p may be chosen to be linear in the degrees of freedom associated with the problem, a quantity which is classically used in the statistical analysis of such methods, and is often seen as the implicit number of parameters of non-parametric estimators. This result enables simple algorithms that have sub-quadratic running time complexity, but provably exhibit the same predictive performance than existing algorithms, for any given problem instance, and not only for worst-case situations.}, author = {Bach, Francis}, booktitle = {Conference on Learning Theory}, title = {{Sharp analysis of low-rank kernel matrix approximations}}, year = {2013} } @book{wahba1990spline, author = {Wahba, Grace}, doi = {10.1137/1.9781611970128}, isbn = {978-0-89871-244-5}, month = {jan}, publisher = {Society for Industrial and Applied Mathematics}, title = {{Spline models for observational data}}, url = {http://epubs.siam.org/doi/book/10.1137/1.9781611970128}, year = {1990} } @incollection{howard1984influence, address = {Menlo Park, CA}, author = {Howard, Ronald and Matheson, James}, booktitle = {Readings on the Principles and Applications of Decision Analysis}, pages = {719--762}, publisher = {Strategic Decisions Group}, title = {{Influence Diagrams}}, volume = {2}, year = {1984} } @inproceedings{bubeck2015entropic, abstract = {We prove that the Cram$\backslash$'er transform of the uniform measure on a convex body in {\$}\backslashmathbb{\{}R{\}}{\^{}}n{\$} is a {\$}(1+o(1)) n{\$}-self-concordant barrier, improving a seminal result of Nesterov and Nemirovski. This gives the first explicit construction of a universal barrier for convex bodies with optimal self-concordance parameter. The proof is based on basic geometry of log-concave distributions, and elementary duality in exponential families.}, author = {Bubeck, S{\'{e}}bastien and Eldan, Ronen}, booktitle = {Conference on Learning Theory}, title = {{The entropic barrier: A simple and optimal universal self-concordant barrier}}, year = {2015} } @article{smola2004tutorial, author = {Smola, Alex and Sch{\"{o}}lkopf, Bernhard}, journal = {Statistics and Computing}, number = {3}, pages = {199--222}, title = {{A Tutorial on Support Vector Regression}}, volume = {14}, year = {2004} } @inproceedings{even-dar02pacbounds, author = {Even-dar, Eyal and Mannor, Shie and Mansour, Yishay}, booktitle = {In Fifteenth Annual Conference on Computational Learning Theory (COLT)}, pages = {255--270}, title = {{{\{}PAC{\}} bounds for multi-armed bandit and {\{}M{\}}arkov decision processes}}, year = {2002} } @inproceedings{rousseau2013graph, author = {Rousseau, Fran{\c{c}}ois and Vazirgiannis, Michalis}, booktitle = {Proceedings of the 22nd ACM international conference on Conference on information {\&} knowledge management}, organization = {ACM}, pages = {59--68}, title = {{Graph-of-word and TW-IDF: new approach to ad hoc IR}}, year = {2013} } @inproceedings{mohri2014optimal, author = {Mohri, Mehryar and Munoz, Andres}, booktitle = {Neural Information Processing Systems}, title = {{Optimal regret minimization in posted-price auctions with strategic buyers}}, url = {https://papers.nips.cc/paper/5438-optimal-regret-minimization-in-posted-price-auctions-with-strategic-buyers.pdf}, year = {2014} } @techreport{prisadnikov2014exploration, author = {Prisadnikov, Nedyalko}, doi = {10.3929/ethz-a-010211630}, institution = {Master Thesis, ETH-Z{\"{u}}rich, Department of Computer Science}, title = {{Exploration-exploitation trade-offs via probabilistic matrix factorization}}, year = {2014} } @inproceedings{chowdhury2019online, abstract = {We consider online learning for minimizing regret in unknown, episodic Markov decision processes (MDPs) with continuous states and actions. We develop variants of the UCRL and posterior sampling algorithms that employ nonparametric Gaussian process priors to generalize across the state and action spaces. When the transition and reward functions of the true MDP are members of the associated Reproducing Kernel Hilbert Spaces of functions induced by symmetric psd kernels (frequentist setting), we show that the algorithms enjoy sublinear regret bounds. The bounds are in terms of explicit structural parameters of the kernels, namely a novel generalization of the information gain metric from kernelized bandit, and highlight the influence of transition and reward function structure on the learning performance. Our results are applicable to multidimensional state and action spaces with composite kernel structures, and generalize results from the literature on kernelized bandits, and the adaptive control of parametric linear dynamical systems with quadratic costs.}, archivePrefix = {arXiv}, arxivId = {1805.08052}, author = {Chowdhury, Sayak Ray and Gopalan, Aditya}, booktitle = {International Conference on Artificial Intelligence and Statistics}, eprint = {1805.08052}, month = {may}, title = {{Online learning in kernelized Markov decision processes}}, url = {http://arxiv.org/abs/1805.08052}, year = {2019} } @misc{graclus, author = {Graclus}, publisher = {University of Texas}, title = {{Graclus}}, url = {http://www.cs.utexas.edu/users/dml/Software/graclus.html}, year = {2013} } @article{zheng2000lazy, address = {Hingham, MA, USA}, annote = {comps{\_}models}, author = {Zheng, Zijian and Webb, Geoffrey I}, doi = {http://dx.doi.org/10.1023/A:1007613203719}, issn = {0885-6125}, journal = {Mach. Learn.}, number = {1}, pages = {53--84}, publisher = {Kluwer Academic Publishers}, title = {{Lazy Learning of Bayesian Rules}}, volume = {41}, year = {2000} } @techreport{asadi2016new, abstract = {A softmax operator applied to a set of values acts somewhat like the maximization function and somewhat like an average. In sequential decision making, softmax is often used in settings where it is necessary to maximize utility but also to hedge against problems that arise from putting all of one's weight behind a single maximum utility decision. The Boltzmann softmax operator is the most commonly used softmax operator in this setting, but we show that this operator is prone to misbehavior. In this work, we study an alternative softmax operator that, among other properties, is both a non-expansion (ensuring convergent behavior in learning and planning) and differentiable (making it possible to improve decisions via gradient descent methods). We provide proofs of these properties and present empirical comparisons between various softmax operators.}, archivePrefix = {arXiv}, arxivId = {1612.05628}, author = {Asadi, Kavosh and Littman, Michael L.}, eprint = {1612.05628}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Asadi, Littman - 2016 - A new softmax operator for reinforcement learning.pdf:pdf}, title = {{A new softmax operator for reinforcement learning}}, url = {http://arxiv.org/abs/1612.05628}, year = {2016} } @article{arandjelovic2009methodology, author = {Arandjelovic, Ognjen and Cipolla, Roberto}, journal = {Computer Vision and Image Understanding}, number = {2}, pages = {159--171}, title = {{A Methodology for Rapid Illumination-Invariant Face Recognition using Image Processing Filters}}, volume = {113}, year = {2009} } @inproceedings{herbster1995tracking, author = {Herbster, Mark and Warmuth, Manfred}, booktitle = {Proceedings of the 12th International Conference on Machine Learning}, pages = {286--294}, title = {{Tracking the Best Expert}}, year = {1995} } @inproceedings{boularias2011model, abstract = {We consider the problem of imitation learning where the examples, demonstrated by an expert, cover only a small part of a large state space. Inverse Reinforcement Learning (IRL) provides an efficient tool for generalizing the demonstration, based on the assumption that the expert is optimally acting in a Markov Decision Process (MDP). Past work on IRL requires that an accurate model of the underlying MDP is known. However, this requirement can hardly be satisfied in practice, as learning a model of a dynamical system with a large, or continuous, state space is a challenging task. In this paper, we propose a model-free IRL algorithm, where the relative entropy between the empirical distribution of the trajectories under a uniform policy and their distribution under the learned policy is minimized by stochastic gradient descent. We compare this new approach to well-known IRL algorithms using approximate MDP models. Empirical results on simulated car racing, gridworld and ball-in-a-cup problems show that our approach is able to learn good policies from a small number of demonstrations.}, author = {Boularias, Abdeslam and Kober, Jens and Peters, Jan}, booktitle = {Proceedings of Fourteenth International Conference on Artificial Intelligence and Statistics}, keywords = {learning,statistics {\&} optimisation}, pages = {182--189}, title = {{Model-free inverse reinforcement learning}}, url = {http://eprints.pascal-network.org/archive/00008041/}, year = {2011} } @article{ShMe99, author = {Shamir, G I and Merhav, N}, journal = {IEEE Transactions on Information Theory}, pages = {1498--1519}, title = {{Low-complexity sequential lossless coding for piecewise-stationary memoryless sources}}, volume = {IT-45}, year = {1999} } @book{soille, author = {Soille, P}, publisher = {Springer}, title = {{Morphological Image Analysis: Principles and Applications}}, year = {2003} } @techreport{Nes11, author = {Nesterov, Y}, institution = {Universit{\{}{\'{e}}{\}} catholique de Louvain, Center for Operations Research and Econometrics (CORE)}, title = {{Random gradient-free minimization of convex functions}}, type = {CORE Discussion Papers}, year = {2011} } @inproceedings{ding2013multi-armed, abstract = {We study the multi-armed bandit problems with budget constraint and variable costs (MAB-BV). In this setting, pulling an arm will receive a random reward together with a random cost, and the objective of an algorithm is to pull a sequence of arms in order to maximize the expected total reward with the costs of pulling those arms complying with a budget constraint. This new setting models many Internet applications (e.g., ad exchange, sponsored search, and cloud computing) in a more accurate manner than previous settings where the pulling of arms is either costless or with a fixed cost.We propose two UCB based algorithms for the new setting. The first algorithm needs prior knowledge about the lower bound of the expected costs when computing the exploration term. The second algorithm eliminates this need by estimating the minimal expected costs from empirical observations, and therefore can be applied to more real-world applications where pri- or knowledge is not available.We prove that both algorithms have nice learning abilities, with regret bounds of O(lnB). Furthermore, we show that when applying our proposed algorithms to a previous setting with fixed costs (which can be regarded as our special case), one can improve the previously obtained regret bound. Our simulation results on real-time bidding in ad exchange verify the effectiveness of the algorithms and are consistent with our theoretical analysis}, author = {Ding, Wenkui and Qin, Tao and Zhang, Xu-dong and Liu, Tie-yan}, booktitle = {Proceedings of the Twenty-Seventh AAAI Conference on Artificial Intelligence}, isbn = {9781577356158}, title = {{Multi-Armed Bandit with Budget Constraint and Variable Costs}}, url = {http://dblp.uni-trier.de/db/conf/aaai/aaai2013.html{\#}DingQZL13}, year = {2013} } @book{howard1960dynamic, address = {Cambridge, MA}, author = {Howard, Ronald}, publisher = {MIT Press}, title = {{Dynamic Programming and {\{}Markov{\}} Processes}}, year = {1960} } @inproceedings{kocak2014efficient, abstract = {We consider online learning problems under a partial observability model capturing situations where the information conveyed to the learner is between full information and bandit feedback. In the simplest variant, we assume that in addition to its own loss, the learner also gets to observe losses of some other actions. The revealed losses depend on the learner's action and a directed observation system chosen by the environment. For this setting, we propose the first algorithm that enjoys near-optimal regret guarantees without having to know the observation system before selecting its actions. Along similar lines, we also define a new partial information setting that models online combinatorial optimization problems where the feedback received by the learner is between semi-bandit and full feedback. As the predictions of our first algorithm cannot be always computed efficiently in this setting, we propose another algorithm with similar properties and with the benefit of always being computationally efficient, at the price of a slightly more complicated tuning mechanism. Both algorithms rely on a novel exploration strategy called implicit exploration, which is shown to be more efficient both computationally and information-theoretically than previously studied exploration strategies for the problem.}, author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Neu, Gergely and Valko, Michal and Munos, R{\'{e}}mi}, booktitle = {Neural Information Processing Systems}, title = {{Efficient learning by implicit exploration in bandit problems with side observations}}, url = {https://papers.nips.cc/paper/5462-efficient-learning-by-implicit-exploration-in-bandit-problems-with-side-observations.pdf}, year = {2014} } @inproceedings{talebi2018variance, author = {Talebi, Mohammad Sadegh and Maillard, Odalric-Ambrym}, booktitle = {Algorithmic Learning Theory}, title = {{Variance-aware regret bounds for undiscounted reinforcement learning in MDPs}}, url = {https://arxiv.org/pdf/1803.01626.pdf}, year = {2018} } @article{Wright2009, author = {Wright, S J and Nowak, R D and Figueiredo, M A T}, journal = {IEEE Transactions on Signal Processing}, number = {7}, pages = {2479--2493}, title = {{Sparse reconstruction by separable approximation}}, volume = {57}, year = {2009} } @inproceedings{bach2010structured, author = {Bach, F}, booktitle = {Adv. NIPS}, title = {{Structured sparsity-inducing norms through submodular functions}}, year = {2010} } @inproceedings{devroye2013random, author = {Devroye, Luc and Lugosi, G{\'{a}}bor and Neu, Gergely}, booktitle = {Conference on Learning Theory}, title = {{Prediction by random-walk perturbation}}, year = {2013} } @article{audibert2014regret, author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien and Lugosi, G{\'{a}}bor}, journal = {Mathematics of Operations Research}, pages = {31--45}, title = {{Regret in online combinatorial optimization}}, volume = {39}, year = {2014} } @inproceedings{weinberger2008fast, address = {New York, NY, USA}, annote = {comps{\_}distance}, author = {Weinberger, Kilian Q and Saul, Lawrence K}, booktitle = {ICML '08: Proceedings of the 25th international conference on Machine learning}, doi = {http://doi.acm.org/10.1145/1390156.1390302}, isbn = {978-1-60558-205-4}, pages = {1160--1167}, publisher = {ACM}, title = {{Fast solvers and efficient implementations for distance metric learning}}, year = {2008} } @article{gautier2019dppy, abstract = {Determinantal point processes (DPPs) are specific probability distributions over clouds of points that are used as models and computational tools across physics, probability, statistics, and more recently machine learning. Sampling from DPPs is a challenge and therefore we present DPPy, a Python toolbox that gathers known exact and approximate sampling algorithms. The project is hosted on GitHub and equipped with an extensive documentation. This documentation takes the form of a short survey of DPPs and relates each mathematical property with DPPy objects.}, author = {Gautier, Guillaume and Bardenet, R{\'{e}}mi and Valko, Michal}, journal = {Journal of Machine Learning Research}, title = {{DPPy: Sampling determinantal point processes with Python}}, year = {2019} } @book{bertsekas1995dynamic, address = {Belmont, MA}, author = {Bertsekas, Dimitri}, publisher = {Athena Scientific}, title = {{Dynamic Programming and Optimal Control}}, year = {1995} } @inproceedings{matsuzaki05latent, address = {Morristown, NJ, USA}, author = {Matsuzaki, Takuya and Miyao, Yusuke and Tsujii, Jun'ichi}, booktitle = {ACL '05: Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics}, doi = {http://dx.doi.org/10.3115/1219840.1219850}, pages = {75--82}, publisher = {Association for Computational Linguistics}, title = {{Probabilistic {\{}CFG{\}} with latent annotations}}, year = {2005} } @phdthesis{sondik1971optimal, author = {Sondik, Edward}, school = {Stanford University}, title = {{The Optimal Control of Partially Observable {\{}Markov{\}} Decision Processes}}, year = {1971} } @inproceedings{krause2005near, author = {Krause, A and Guestrin, C}, booktitle = {Proc. UAI}, title = {{Near-optimal nonmyopic value of information in graphical models}}, year = {2005} } @article{bickel_lasso_dantzig, author = {Bickel, P and Ritov, Y and Tsybakov, A}, journal = {Annals of Statistics}, number = {4}, pages = {1705--1732}, title = {{Simultaneous analysis of {\{}L{\}}asso and {\{}D{\}}antzig selector}}, volume = {37}, year = {2009} } @article{Rapaport2008, author = {Rapaport, F and Barillot, E and Vert, J.-P.}, journal = {Bioinformatics}, month = {jul}, number = {13}, pages = {i375----i382}, title = {{Classification of array{\{}CGH{\}} data using fused {\{}SVM{\}}}}, volume = {24}, year = {2008} } @inproceedings{seznec2019rotting, abstract = {In bandits, arms' distributions are stationary. This is often violated in practice, where rewards change over time. In applications as recommendation systems, online advertising, and crowdsourcing, the changes may be triggered by the pulls, so that the arms' rewards change as a function of the number of pulls. In this paper, we consider the specific case of non-parametric rotting bandits, where the expected reward of an arm may decrease every time it is pulled. We introduce the filtering on expanding window average (FEWA) algorithm that at each round constructs moving averages of increasing windows to identify arms that are more likely to return high rewards when pulled once more. We prove that, without any knowledge on the decreasing behavior of the arms, FEWA achieves similar anytime problem-dependent, O(log(KT)), and problem-independent, O(sqrtKT), regret bounds of near-optimal stochastic algorithms as UCB1 of Auer et al. (2002a). This result substantially improves the prior result of Levine et al. (2017) which needed knowledge of the horizon and decaying parameters to achieve problem-independent bound of only O(K1/3T2/3). Finally, we report simulations confirming the theoretical improvements of FEWA.}, author = {Seznec, Julien and Locatelli, Andrea and Carpentier, Alexandra and Lazaric, Alessandro and Valko, Michal}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Rotting bandits are not harder than stochastic ones}}, year = {2019} } @article{wang2020on, archivePrefix = {arXiv}, arxivId = {cs.LG/2006.11274}, author = {Wang, Ruosong and Du, Simon S and Yang, Lin F and Salakhutdinov, Ruslan}, eprint = {2006.11274}, journal = {arXiv preprint arXiv:2006.11274}, primaryClass = {cs.LG}, title = {{On reward-free reinforcement learning with linear function approximation}}, url = {https://arxiv.org/pdf/2006.11274.pdf}, year = {2020} } @book{grunbaum2003convex, author = {Gr{\"{u}}nbaum, B}, publisher = {Springer Verlag}, title = {{Convex polytopes}}, volume = {221}, year = {2003} } @inproceedings{engel2002sparse, abstract = {We present a novel algorithm for sparse online greedy kernel- based nonlinear regression. This algorithm improves current approaches to kernel-based regression in two aspects. First, it operates online - at each time step it observes a single new input sample, performs an update and discards it. Second, the solution maintained is extremely sparse. This is achieved by an explicit greedy sparsification process that admits into the kernel representation a new input sample only if its feature space image is linearly independent of the images of previously admitted samples. We show that the algorithm implements a form of gradient ascent and demonstrate its scaling and noise tolerance properties on three benchmark regression problems.}, author = {Engel, Yaakov and Mannor, Shie and Meir, Ron}, booktitle = {European Conference on Machine Learning}, title = {{Sparse online greedy support vector regression}}, year = {2002} } @article{agrawal2012thompsonarxiv, abstract = {Thompson Sampling is one of the oldest heuristics for multi-armed bandit problems. It is a randomized algorithm based on Bayesian ideas, and has recently generated significant interest after several studies demonstrated it to have better empirical performance compared to the state-of-the-art methods. However, many questions regarding its theoretical performance remained open. In this paper, we design and analyze a generalization of Thompson Sampling algorithm for the stochastic contextual multi-armed bandit problem with linear payoff functions, when the contexts are provided by an adaptive adversary. This is among the most important and widely studied versions of the contextual bandits problem. We provide the first theoretical guarantees for the contextual version of Thompson Sampling. We prove a high probability regret bound of {\$}\backslashtilde{\{}O{\}}(d{\^{}}{\{}3/2{\}}\backslashsqrt{\{}T{\}}){\$} (or {\$}\backslashtilde{\{}O{\}}(d\backslashsqrt{\{}T \backslashlog(N){\}}){\$}), which is the best regret bound achieved by any computationally efficient algorithm available for this problem in the current literature, and is within a factor of {\$}\backslashsqrt{\{}d{\}}{\$} (or {\$}\backslashsqrt{\{}\backslashlog(N){\}}{\$}) of the information-theoretic lower bound for this problem.}, archivePrefix = {arXiv}, arxivId = {1209.3352}, author = {Agrawal, Shipra and Goyal, Navin}, eprint = {1209.3352}, journal = {CoRR, abs/1209.3352, http://arxiv.org/abs/1209.3352}, month = {sep}, title = {{Thompson Sampling for Contextual Bandits with Linear Payoffs}}, url = {http://arxiv.org/abs/1209.3352}, year = {2012} } @article{KW52, author = {Kiefer, J and Wolfowitz, J}, journal = {Annals of Mathematical Statistics}, pages = {462--466}, title = {{Stochastic estimation of the maximum of a regression function}}, volume = {23}, year = {1952} } @article{Graham1979, abstract = {The theory of deterministic sequencing and scheduling has expanded rapidly during the past years. In this paper we survey the state of the art with respect to optimization and approximation algorithms and interpret these in terms of computational complexity theory. Special cases considered are single machine scheduling, identical, uniform and unrelated parallel machine scheduling, and open shop, flow shop and job shop scheduling. We indicate some problems for future research and include a selective bibliography. {\textcopyright}1979, North-Holland Publishing Company.}, author = {Graham, R L and Lawler, E L and Lenstra, J K and Kan, A H.G.Rinnooy}, doi = {10.1016/S0167-5060(08)70356-X}, isbn = {9780080867670}, issn = {01675060}, journal = {Annals of Discrete Mathematics}, number = {C}, pages = {287--326}, pmid = {384}, title = {{Optimization and approximation in deterministic sequencing and scheduling: A survey}}, volume = {5}, year = {1979} } @article{GyLiLu11Corr, annote = {From Duplicate 3 (Efficient Tracking of Large Classes of Experts - Gy{\"{o}}rgy, A; Linder, T; Lugosi, G) Accepted with minor revisions}, author = {Gy{\"{o}}rgy, Andr{\'{a}}s and Linder, Tam{\'{a}}s and Lugosi, G{\'{a}}bor}, journal = {IEEE Transactions on Information Theory}, number = {11}, pages = {6709--6725}, title = {{Efficient Tracking of Large Classes of Experts}}, volume = {abs/1110.2}, year = {2012} } @inproceedings{geman02dynamic, address = {Morristown, NJ, USA}, author = {Geman, Stuart and Johnson, Mark}, booktitle = {ACL '02: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics}, doi = {http://dx.doi.org/10.3115/1073083.1073130}, pages = {279--286}, publisher = {Association for Computational Linguistics}, title = {{Dynamic programming for parsing and estimation of stochastic unification-based grammars}}, year = {2001} } @book{Brent1973, abstract = {Outstanding text for graduate students and research workers proposes improvements to existing algorithms, extends their related mathematical theories, and offers details on new algorithms for approximating local and global minima. Many numerical examples, along with complete analysis of rate of convergence for most of the algorithms and error bounds that allow for the effect of rounding errors.}, author = {Brent, R P}, booktitle = {Book}, chapter = {4}, editor = {Englewood, N and Cliffs, J}, isbn = {0486419983}, pages = {195}, publisher = {Prentice-Hall}, series = {Prentice-Hall series in automatic computation}, title = {{Algorithms for minimization without derivatives}}, url = {http://www.cs.ox.ac.uk/people/richard.brent/pd/rpb011a.pdf}, year = {1973} } @article{bradski2000opencv, author = {Bradski, G}, journal = {Dr. Dobb's Journal of Software Tools}, keywords = {bibtex-import}, title = {{The OpenCV Library}}, year = {2000} } @inproceedings{sidford2018near, author = {Sidford, Aaron and Wang, Mengdi and Wu, Xian and Yang, Lin F. and Ye, Yinyu}, booktitle = {Neural Information Processing Systems}, title = {{Near-optimal time and sample complexities for solving discounted Markov decision process with a generative model}}, url = {https://arxiv.org/pdf/1806.01492.pdf}, year = {2018} } @inproceedings{Gupta2013, abstract = {We study a general stochastic probing problem defined on a universe V, where each element e in V is "active" independently with probability p{\_}e. Elements have weights {\{}w{\_}e{\}} and the goal is to maximize the weight of a chosen subset S of active elements. However, we are given only the p{\_}e values-- to determine whether or not an element e is active, our algorithm must probe e. If element e is probed and happens to be active, then e must irrevocably be added to the chosen set S; if e is not active then it is not included in S. Moreover, the following conditions must hold in every random instantiation: (1) the set Q of probed elements satisfy an "outer" packing constraint, and (2) the set S of chosen elements satisfy an "inner" packing constraint. The kinds of packing constraints we consider are intersections of matroids and knapsacks. Our results provide a simple and unified view of results in stochastic matching and Bayesian mechanism design, and can also handle more general constraints. As an application, we obtain the first polynomial-time {\$}\backslashbackslashOmega(1/k){\$}-approximate "Sequential Posted Price Mechanism" under k-matroid intersection feasibility constraints.}, archivePrefix = {arXiv}, arxivId = {arXiv:1302.5913v1}, author = {Gupta, Anupam and Nagarajan, Viswanath}, booktitle = {Integer Programming and Combinatorial Optimization}, doi = {10.1007/978-3-642-36694-9_18}, eprint = {arXiv:1302.5913v1}, isbn = {9783642366932}, issn = {03029743}, pages = {205--216}, title = {{A stochastic probing problem with applications}}, volume = {7801}, year = {2013} } @inproceedings{ferns2005metrics, author = {Ferns, Norm and Panangaden, Prakash and Precup, Doina}, booktitle = {Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence}, title = {{Metrics for {\{}Markov{\}} Decision Processes with Infinite State Spaces}}, year = {2005} } @inproceedings{klein2013cascaded, abstract = {This paper considers the Inverse Reinforcement Learning (IRL) problem, that is inferring a reward function for which a demonstrated expert policy is optimal. We propose to break the IRL problem down into two generic Supervised Learning steps: this is the Cascaded Supervised IRL (CSI) approach. A classification step that defines a score function is followed by a regression step providing a reward function. A theoretical analysis shows that the demonstrated expert policy is nearoptimal for the computed reward function. Not needing to repeatedly solve a Markov Decision Process (MDP) and the ability to leverage existing techniques for classification and regression are two important advantages of the CSI approach. It is furthermore empirically demonstrated to compare positively to state-of-the-art approaches when using only transitions sampled according to the expert policy, up to the use of some heuristics. This is exemplified on two classical benchmarks (the mountain car problem and a highway driving simulator).}, address = {Prague (Czech Republic)}, author = {Klein, Edouard and PIOT, Bilal and Geist, Matthieu and Pietquin, Olivier}, booktitle = {Proceedings of the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML/PKDD 2013)}, doi = {10.1007/978-3-642-40988-2_1}, editor = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and Zelezny, Filip}, isbn = {978-3-642-40987-5}, month = {sep}, pages = {1--16}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {{A cascaded supervised learning approach to inverse reinforcement learning}}, url = {http://www.ecmlpkdd2013.org/wp-content/uploads/2013/07/327.pdf}, volume = {8188}, year = {2013} } @inproceedings{HKW10, author = {Hazan, E and Kale, S and Warmuth, M}, booktitle = {Proceedings of the 23rd Annual Conference on Learning Theory (COLT)}, pages = {144--154}, title = {{Learning rotations with little regret}}, year = {2010} } @inproceedings{leskovec2005graphs, author = {Leskovec, Jure and Kleinberg, Jon and Faloutsos, Christos}, booktitle = {Proceedings of KDD'05}, organization = {ACM}, pages = {177--187}, title = {{Graphs over time: densification laws, shrinking diameters and possible explanations}}, year = {2005} } @inproceedings{snoek2012practical, author = {Snoek, Jasper and Larochelle, Hugo and Adams, Ryan P.}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Snoek, Larochelle, Adams - 2012 - Practical bayesian optimization of machine learning algorithms.pdf:pdf}, title = {{Practical bayesian optimization of machine learning algorithms.}}, url = {https://papers.nips.cc/paper/4522-practical-bayesian-optimization-of-machine-learning-algorithms.pdf}, year = {2012} } @inproceedings{AYSze11, author = {Abbasi-Yadkori, Yasin and Szepesv{\'{a}}ri, $\backslash$textCsaba}, title = {{Regret Bounds for the Adaptive Control of Linear Quadratic Systems}}, url = {http://webdocs.cs.ualberta.ca/{~}abbasiya/LQR.pdf} } @article{zhan2010distributions, abstract = {In this paper, the important issue of Laplacian eigenvalue distributions is investigated through theory-guided extensive numerical simulations, for four typical complex network models, namely, the ER random-graph networks, WS and NW small-world networks, and BA scale-free networks. It is found that these four types of complex networks share some common features, particularly similarities between the Laplacian eigenvalue distributions and the node degree distributions. {\textcopyright} 2009 Elsevier B.V. All rights reserved.}, author = {Zhan, Choujun and Chen, Guanrong and Yeung, Lam F.}, journal = {Physica A: Statistical Mechanics and its Applications}, keywords = {Complex network,Eigenvalue,Graph theory,Laplacian matrix,Node-degree,Random-graph network,Scale-free network,Small-world network}, number = {8}, pages = {1779--1788}, title = {{On the distributions of Laplacian eigenvalues versus node degrees in complex networks}}, volume = {389}, year = {2010} } @inproceedings{sutton2008dyna-style, author = {Sutton, Richard and Szepesvari, Csaba and Geramifard, Alborz and Bowling, Michael}, booktitle = {Proceedings of the 24th Conference on Uncertainty in Artificial Intelligence}, pages = {528--536}, title = {{Dyna-Style Planning with Linear Function Approximation and Prioritized Sweeping}}, year = {2008} } @inproceedings{lin2011-class-submod-sum, address = {Portland, OR}, annote = {(long paper)}, author = {Lin, H and Bilmes, J}, booktitle = {North American chapter of the Association for Computational Linguistics/Human Language Technology Conference (NAACL/HLT-2011)}, month = {jun}, title = {{A Class of Submodular Functions for Document Summarization}}, year = {2011} } @inproceedings{park2001approximating, author = {Park, James and Darwiche, Adnan}, booktitle = {Proceedings of the 17th Conference on Uncertainty in Artificial Intelligence}, pages = {403--410}, title = {{Approximating {\{}MAP{\}} Using Local Search}}, year = {2001} } @article{hill1975simple, author = {Hill, Bruce M.}, journal = {The Annals of Statistics}, keywords = {Bayesian inference,Tail of distribution,order statistics}, language = {EN}, number = {5}, pages = {1163--1174}, publisher = {Institute of Mathematical Statistics}, title = {{A Simple General Approach to Inference About the Tail of a Distribution}}, volume = {3}, year = {1975} } @inproceedings{kolla2016collaborative, abstract = {We consider a collaborative online learning paradigm, wherein a group of agents connected through a social network are engaged in playing a stochastic multi-armed bandit game. Each time an agent takes an action, the corresponding reward is instantaneously observed by the agent, as well as its neighbours in the social network. We perform a regret analysis of various policies in this collaborative learning setting. A key finding of this paper is that natural extensions of widely-studied single agent learning policies to the network setting need not perform well in terms of regret. In particular, we identify a class of non-altruistic and individually consistent policies, and argue by deriving regret lower bounds that they are liable to suffer a large regret in the networked setting. We also show that the learning performance can be substantially improved if the agents exploit the structure of the network, and develop a simple learning algorithm based on dominating sets of the network. Specifically, we first consider a star network, which is a common motif in hierarchical social networks, and show analytically that the hub agent can be used as an information sink to expedite learning and improve the overall regret. We also derive networkwide regret bounds for the algorithm applied to general networks. We conduct numerical experiments on a variety of networks to corroborate our analytical results.}, author = {Kolla, Ravi Kumar and Jagannathan, Krishna and Gopalan, Aditya}, booktitle = {Annual Allerton Conference on Communication, Control, and Computing}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Kolla, Jagannathan, Gopalan - 2016 - Collaborative learning of stochastic bandits over a social network.pdf:pdf}, title = {{Collaborative learning of stochastic bandits over a social network}}, year = {2016} } @inproceedings{cai2014comparison, author = {Cai, Zhuhua and Gao, Zekai J and Luo, Shangyu and Perez, Luis L and Vagena, Zografoula and Jermaine, Christopher}, booktitle = {SIGMOD}, title = {{A comparison of platforms for implementing and running very large scale machine learning algorithms}}, year = {2014} } @inproceedings{samothrakis2013training, author = {Samothrakis, Spyridon and Perez, Diego and Lucas, Simon}, booktitle = {NIPS Workshop on Causality}, month = {jan}, title = {{Training gradient boosting machines using curve-fitting and information-theoretic features for causal direction detection}}, url = {http://ssamot.me/papers/Samothrakis-NIPS2013-causality.pdf}, year = {2013} } @article{Abernethy2008, author = {Abernethy, Jacob and Berkeley, U C and Rakhlin, Alexander}, journal = {Online}, number = {3}, publisher = {Citeseer}, title = {{Competing in the Dark : An Efficient Algorithm for Bandit Linear Optimization}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.152.2096{\&}rep=rep1{\&}type=pdf}, volume = {3}, year = {2008} } @inproceedings{domingues2020episodic, abstract = {In this paper, we propose new problem-independent lower bounds on the sample complexity and regret in episodic MDPs, with a particular focus on the non-stationary case in which the transition kernel is allowed to change in each stage of the episode. Our main contribution is a novel lower bound of Omega((H3SA/ϵ2)log(1/$\delta$)) on the sample complexity of an ($\epsilon$,$\delta$)-PAC algorithm for best policy identification in a non-stationary MDP. This lower bound relies on a construction of "hard MDPs" which is different from the ones previously used in the literature. Using this same class of MDPs, we also provide a rigorous proof of the Omega(sqrtH3SAT) regret bound for non-stationary MDPs. Finally, we discuss connections to PAC-MDP lower bounds.}, author = {Domingues, Omar Darwiche and M{\'{e}}nard, Pierre and Kaufmann, Emilie and Valko, Michal}, booktitle = {Algorithmic Learning Theory}, title = {{Episodic reinforcement learning in finite MDPs: Minimax lower bounds revisited}}, url = {https://arxiv.org/pdf/2010.03531.pdf}, year = {2021} } @inproceedings{AweKlein04, author = {Awerbuch, Baruch and Kleinberg, Robert D}, pages = {45--53}, title = {{Adaptive routing with end-to-end feedback: distributed learning and geometric approaches}} } @inproceedings{collins00discriminative, author = {Collins, Michael}, pages = {175--182}, title = {{Discriminative Reranking for Natural Language Parsing}} } @article{lagoudakis2003least-squares, author = {Lagoudakis, Michail and Parr, Ronald}, journal = {Journal of Machine Learning Research}, pages = {1107--1149}, title = {{Least-Squares Policy Iteration}}, volume = {4}, year = {2003} } @inproceedings{wagstaff2000clustering, author = {Wagstaff, K and Cardie, C}, booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning}, pages = {1103--1110}, title = {{Clustering with instance-level constraints}}, url = {citeseer.ist.psu.edu/wagstaff00clustering.html}, year = {2000} } @inproceedings{hauskrecht2004linear, author = {Hauskrecht, Milos and Kveton, Branislav}, booktitle = {Advances in Neural Information Processing Systems 16}, pages = {895--902}, title = {{Linear Program Approximations for Factored Continuous-State {\{}Markov{\}} Decision Processes}}, year = {2004} } @inproceedings{florez-larrahondo2005efficient, abstract = {Anomaly detection systems are developed by learning a baseline-model from a set of events captured from a computer system operating under normal conditions. The model is then used to recognize unusual activities as deviations from normality. Hidden Markov models (HMMs) are powerful probabilistic finite state machines that have been used to acquire these baseline-models. Although previous research has indicated that HMMs can effectively represent complex sequences, the traditional learning algorithm for HMMs is too computationally expensive for use with real-world anomaly detection systems. This paper describes the use of a novel incremental learning algorithm for HMMs that allows the efficient acquisition of anomaly detection models. The new learning algorithm requires less memory and training time than previous approaches for learning discrete HMMs and can be used to perform online learning of accurate baseline-models from complex computer applications to support anomaly detection.}, annote = {comps{\_}anX}, author = {Florez-Larrahondo, German and Bridges, Susan M and Vaughn, Rayford}, booktitle = {Information Security}, doi = {10.1007/11556992_38}, isbn = {978-3-540-29001-8}, issn = {0302-9743 (Print) 1611-3349 (Online)}, pages = {506--514}, publisher = {Springer Berlin / Heidelberg}, series = {Lecture Notes in Computer Science}, title = {{Efficient Modeling of Discrete Events for Anomaly Detection Using Hidden Markov Models}}, url = {http://www.springerlink.com/content/eqdqtr9hwfyxwg3k/}, volume = {3650/2005}, year = {2005} } @article{RM51, author = {Robbins, H and Monro, S}, journal = {Annals of Mathematical Statistics}, pages = {400--407}, title = {{A stochastic approximation method}}, volume = {22}, year = {1951} } @article{chambolle2009total, author = {Chambolle, A and Darbon, J}, journal = {International Journal of Computer Vision}, number = {3}, pages = {288--307}, publisher = {Springer}, title = {{On total variation minimization and surface evolution using parametric maximum flows}}, volume = {84}, year = {2009} } @article{Hofmann2011, abstract = {In this paper we give an overview of and outlook on research at the intersection of information retrieval (IR) and contextual bandit problems. A critical problem in information retrieval is online learning to rank, where a search engine strives to improve the quality of the ranked result lists it presents to users on the basis of those users' interactions with those result lists. Recently, researchers have started to model interactions between users and search engines as contextual bandit problems, and initial methods for learning in this setting have been devised. Our research focuses on two aspects: balancing exploration and exploitation and inferring preferences from implicit user interactions. This paper summarizes our recent work on online learning to rank for information retrieval and points out challenges that are characteristic of this application area.}, author = {Hofmann, Katja}, journal = {NIPS 2011 Proceedings of the Conference on Neural Information Processing Systems Workshop on Bayesian Optimization Experimental Design and Bandits Theory and Applications}, pages = {1--5}, title = {{Contextual Bandits for Information Retrieval}}, url = {http://www.cs.ubc.ca/{~}hutter/nips2011workshop/papers{\_}and{\_}posters/nips-2012-rl4ir.pdf}, year = {2011} } @article{korostelev1999asymptotic, author = {Korostelev, Alexander and Nussbaum, Michael}, journal = {Bernoulli}, number = {6}, pages = {1099--1118}, title = {{The asymptotic minimax constant for sup-norm loss in nonparametric density estimation}}, volume = {5}, year = {1999} } @article{crammer2002algorithmic, address = {Cambridge, MA, USA}, author = {Crammer, Koby and Singer, Yoram}, issn = {1533-7928}, journal = {J. Mach. Learn. Res.}, pages = {265--292}, publisher = {MIT Press}, title = {{On the algorithmic implementation of multiclass kernel-based vector machines}}, volume = {2}, year = {2002} } @article{cunningham1984testing, author = {Cunningham, W H}, journal = {Journal of Combinatorial Theory, Series B}, number = {2}, pages = {161--188}, publisher = {Elsevier}, title = {{Testing membership in matroid polyhedra}}, volume = {36}, year = {1984} } @inproceedings{narasimhan2007local, author = {Narasimhan, M and Bilmes, J}, booktitle = {Proc. IJCAI}, title = {{Local search for balanced submodular clusterings}}, year = {2007} } @book{boyd, author = {Boyd, S P and Vandenberghe, L}, publisher = {Cambridge University Press}, title = {{Convex Optimization}}, year = {2004} } @techreport{vershynin_note_2009, author = {Vershynin, Roman}, title = {{A note on sums of independent random matrices after Ahlswede-Winter}}, url = {http://www.umich.edu/{~}romanv/teaching/reading-group/ahlswede-winter.pdf}, year = {2009} } @book{Mas06, author = {Massart, P}, publisher = {Springer}, title = {{Ecole d'Ete de Probabilites de Saint-Flour XXXIII - 2003}}, year = {2006} } @article{durrett1977functionals, author = {Durrett, Richard T. and Iglehart, Donald L.}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Durrett, Iglehart - 1977 - Functionals of Brownian meander and Brownian excursion.pdf:pdf}, journal = {The Annals of Probability}, number = {1}, pages = {130--135}, title = {{Functionals of Brownian meander and Brownian excursion}}, volume = {5}, year = {1977} } @techreport{poulson2019high-performance, abstract = {Determinantal Point Processes (DPPs) were introduced by Mac-chi [1] as a model for repulsive (fermionic) particle distributions. But their recent popularization is largely due to their usefulness for encouraging diversity in the final stage of a recommender system [2]. The standard sampling scheme for finite DPPs is a spectral decomposition followed by an equivalent of a randomly diagonally-pivoted Cholesky factorization of an orthogonal projection, which is only applicable to Hermitian kernels and has an expensive setup cost. Researchers have begun to connect DPP sampling to LDL H factoriza-tions as a means of avoiding the initial spectral decomposition, but existing approaches have only outperformed the spectral decomposition approach in special circumstances, where the number of kept modes is a small percentage of the ground set size. This article proves that trivial modifications of LU and LDL H fac-torizations yield efficient direct sampling schemes for non-Hermitian and Hermitian DPP kernels, respectively. Further, it is experimentally shown that even dynamically-scheduled, shared-memory paralleliza-tions of high-performance dense and sparse-direct factorizations can be trivially modified to yield DPP sampling schemes with essentially identical performance. The software developed as part of this research, Catamari [hodges-tar.com/catamari] is released under the Mozilla Public License v2.0. It contains header-only, C++14 plus OpenMP 4.0 implementations of dense and sparse-direct, Hermitian and non-Hermitian DPP samplers. * jack@hodgestar.com, Hodge Star Scientific Computing}, archivePrefix = {arXiv}, arxivId = {1905.00165v1}, author = {Poulson, Jack}, eprint = {1905.00165v1}, title = {{High-performance sampling of generic Determinantal Point Processes}}, url = {https://arxiv.org/pdf/1905.00165.pdf}, year = {2019} } @book{davey2002introduction, author = {Davey, B A and Priestley, H A}, publisher = {Cambridge Univ. Press}, title = {{Introduction to Lattices and Order}}, year = {2002} } @inproceedings{wen2017online, abstract = {We study the online influence maximization problem in social networks under the independent cascade model. Specifically, we aim to learn the set of " best influencers " in a social network online while repeatedly interacting with it. We address the challenges of (i) combinatorial action space, since the number of feasible influencer sets grows exponentially with the maximum number of influencers, and (ii) limited feedback, since only the influenced portion of the network is observed. Under a stochastic semi-bandit feedback, we propose and analyze IMLinUCB, a computationally efficient UCB-based algorithm. Our bounds on the cumulative regret are polynomial in all quantities of interest, achieve near-optimal dependence on the number of interactions and reflect the topology of the network and the activation probabilities of its edges, thereby giving insights on the problem complexity. To the best of our knowledge, these are the first such results. Our experiments show that in several representative graph topologies, the regret of IMLinUCB scales as suggested by our upper bounds. IMLinUCB permits linear generalization and thus is both statistically and computationally suitable for large-scale problems. Our experiments also show that IMLinUCB with linear generalization can lead to low regret in real-world online influence maximization.}, author = {Wen, Zheng and Kveton, Branislav and Valko, Michal and Vaswani, Sharan}, booktitle = {Neural Information Processing Systems}, title = {{Online influence maximization under independent cascade model with semi-bandit feedback}}, year = {2017} } @book{SNW11, editor = {Sra, S and Nowozin, S and Wright, S}, publisher = {MIT Press}, title = {{Optimization for Machine Learning}}, year = {2011} } @article{gilbert1952comparison, abstract = {Two channels are considered; a discrete channel which can transmit sequences of binary digits, and a continuous channel which can transmit band limited signals. The performance of a large number of simple signalling alphabets is computed and it is concluded that one cannot signal at rates near the channel capacity without using very complicated, alphabets.}, author = {Gilbert, Edgar Nelson}, journal = {Bell System Technical Journal}, number = {3}, pages = {504--522}, title = {{A comparison of signalling alphabets}}, volume = {31}, year = {1952} } @book{Led01, author = {Ledoux, M}, publisher = {American Mathematical Society}, title = {{The Concentration of Measure Phenomenon}}, year = {2001} } @article{scholkopf1999estimating, annote = {comps{\_}ano}, author = {Scholkopf, Bernhard and Platt, John C and Shawe-taylor, John and Smola, Alex J and Williamson, Robert C and Sch{\"{o}}lkopf, Bernhard}, journal = {Neural Computation}, pages = {2001}, title = {{Estimating the support of a high-dimensional distribution}}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.60.9423}, volume = {13}, year = {1999} } @article{Vov99, author = {Vovk, V}, journal = {Machine Learning}, number = {3}, pages = {247--282}, title = {{Derandomizing stochastic prediction strategies}}, volume = {35}, year = {1999} } @inproceedings{neu2014online, abstract = {Most work on sequential learning assumes a fixed set of actions that are available all the time. However, in practice, actions can consist of picking subsets of readings from sensors that may break from time to time, road segments that can be blocked or goods that are out of stock. In this paper we study learning algorithms that are able to deal with stochastic availability of such unreliable composite actions. We propose and analyze algorithms based on the Follow-The-Perturbed-Leader prediction method for several learning settings differing in the feedback provided to the learner. Our algorithms rely on a novel loss estimation technique that we call Counting Asleep Times. We deliver regret bounds for our algorithms for the previously studied full information and (semi-)bandit settings, as well as a natural middle point between the two that we call the restricted information setting. A special consequence of our results is a significant improvement of the best known performance guarantees achieved by an efficient algorithm for the sleeping bandit problem with stochastic availability. Finally, we evaluate our algorithms empirically and show their improvement over the known approaches.}, author = {Neu, Gergely and Valko, Michal}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Neu, Valko - 2014 - Online combinatorial optimization with stochastic decision sets and adversarial losses(2).pdf:pdf}, title = {{Online combinatorial optimization with stochastic decision sets and adversarial losses}}, year = {2014} } @article{burnetas1996optimal, abstract = {Consider the problem of sequential sampling from m statistical populations to maximize the expected sum of outcomes in the long run. Under suitable assumptions on the unknown parameters u gQpopulations to maximize the expected sum of outcomes in the lon, it is shown that there exists a class CR of adaptive policies with the following properties: i. The expected n horizon reward p 0Vn u . under any policy p 0 in CR is equal to nm*u .yMu .log nqolog n., as n{\textordfeminine}`, where m*u . is the largest population mean and Mu . is a constant. ii. Policies in CR are asymptotically optimal within a larger class CUF of ‘‘uniformly fast convergent'' policies in the sense that lim   . p 0 .. n{\textordfeminine}` nm* u y Vn u r nm*u .yVnp u ..F1, for any p gCUF and any u gQ such that Mu .)0. Policies in CR are specified via easily computable indices, defined as unique solutions to dual problems that arise naturally from the functional form of Mu .. In addition, the assumptions are verified for populations specified by nonparametric discrete univariate distributions with finite support. In the case of normal populations with unknown means and variances, we leave as an open problem the verification of one assumption.}, author = {Burnetas, Apostolos N. and Katehakis, Michael N.}, journal = {Advances in Applied Mathematics}, number = {2}, pages = {122--142}, title = {{Optimal adaptive policies for sequential allocation problems}}, volume = {17}, year = {1996} } @techreport{golding1996idleness, author = {Golding, Richard and Bosch, Peter and Wilkes, John}, institution = {Hewlett-Packard Laboratories}, number = {HPL-96-140}, title = {{Idleness Is Not Sloth}}, year = {1996} } @book{crame1999mathematical, author = {Cram{\'{e}}r, H}, isbn = {9780691005478}, publisher = {Princeton University Press}, series = {Princeton landmarks in mathematics and physics}, title = {{Mathematical methods of statistics}}, url = {http://books.google.com/books?id=CRTKKaJO0DYC}, year = {1999} } @techreport{jonsson2020planning, archivePrefix = {arXiv}, arxivId = {2006.05879}, author = {Jonsson, Anders and Kaufmann, Emilie and M{\'{e}}nard, Pierre and Domingues, Omar Darwiche and Leurent, Edouard and Valko, Michal}, eprint = {2006.05879}, title = {{Planning in Markov decision processes with gap-dependent sample complexity}}, year = {2020} } @article{hary12cooperation, author = {Hartmann, B and D{\'{a}}n, A}, journal = {IEEE Transactions on Sustainable Energy}, number = {1}, pages = {49--56}, title = {{Cooperation of a grid-connected wind farm and an energy storage unit demonstration of a simulation tool}}, volume = {3}, year = {2012} } @article{candes2009exact, abstract = {We consider a problem of considerable practical interest: the recovery of a data matrix from a sampling of its entries. Suppose that we observe m entries selected uniformly at random from a matrix M. Can we complete the matrix and recover the entries that we have not seen? We show that one can perfectly recover most low-rank matrices from what appears to be an incomplete set of entries. We prove that if the number m of sampled entries obeys m {\textgreater}= C n{\^{}}{\{}1.2{\}} r log n for some positive numerical constant C, then with very high probability, most n by n matrices of rank r can be perfectly recovered by solving a simple convex optimization program. This program finds the matrix with minimum nuclear norm that fits the data. The condition above assumes that the rank is not too large. However, if one replaces the 1.2 exponent with 1.25, then the result holds for all values of the rank. Similar results hold for arbitrary rectangular matrices as well. Our results are connected with the recent literature on compressed sensing, and show that objects other than signals and images can be perfectly reconstructed from very limited information.}, author = {Cand{\`{e}}s, Emmanuel J. and Recht, Benjamin}, journal = {Foundations of Computational Mathematics}, keywords = {Compressed sensing,Convex optimization,Decoupling,Duality in optimization,Low-rank matrices,Matrix completion,Noncommutative Khintchine inequality,Nuclear norm minimization,Random matrices}, number = {6}, pages = {717--772}, title = {{Exact matrix completion via convex optimization}}, volume = {9}, year = {2009} } @article{gilks1992derivative, author = {Gilks, W. R.}, journal = {Bayesian Statistics}, title = {{Derivative-free adaptive rejection sampling for Gibbs sampling}}, volume = {4}, year = {1992} } @inproceedings{Wang2017a, abstract = {Bayesian optimization (BO) has become an effective approach for black-box function optimization problems when function evaluations are expensive and the optimum can be achieved within a relatively small number of queries. However, many cases, such as the ones with high-dimensional inputs, may require a much larger number of observations for optimization. Despite an abundance of observations thanks to parallel experiments, current BO techniques have been limited to merely a few thousand observations. In this paper, we propose ensemble Bayesian optimization (EBO) to address three current challenges in BO simultaneously: (1) large-scale observations; (2) high dimensional input spaces; and (3) selections of batch queries that balance quality and diversity. The key idea of EBO is to operate on an ensemble of additive Gaussian process models, each of which possesses a randomized strategy to divide and conquer. We show unprecedented, previously impossible results of scaling up BO to tens of thousands of observations within minutes of computation.}, archivePrefix = {arXiv}, arxivId = {1706.01445}, author = {Wang, Zi and Gehring, Clement and Kohli, Pushmeet and Jegelka, Stefanie}, booktitle = {International Conference on Artificial Intelligence and Statistics}, eprint = {1706.01445}, title = {{Batched large-scale bayesian optimization in high-dimensional spaces}}, url = {https://arxiv.org/pdf/1706.01445.pdf}, year = {2017} } @inproceedings{garcke2005semi, author = {Garcke, Jochen and Griebel, Michael}, booktitle = {International Conference on Machine Learning}, title = {{Semi-supervised learning with sparse grids}}, year = {2005} } @incollection{gorban2009principal, author = {Gorban, Alexander and Zinovyev, Andrei}, booktitle = {Handbook of Research on Machine Learning Applications and Trends: Algorithms, Methods and Techniques}, pages = {28--59}, publisher = {Information Science Reference}, title = {{Principal Graphs and Manifolds}}, year = {2009} } @article{orlin2009faster, author = {Orlin, James B}, doi = {10.1007/s10107-007-0189-2}, isbn = {1010700701892}, issn = {00255610}, journal = {Mathematical Programming}, number = {2}, pages = {237--251}, publisher = {Springer}, title = {{A faster strongly polynomial time algorithm for submodular function minimization}}, url = {http://www.springerlink.com/index/10.1007/s10107-007-0189-2}, volume = {118}, year = {2009} } @incollection{he2012imitation, author = {He, He and III, Hal Daume and Eisner, Jason}, booktitle = {Advances in Neural Information Processing Systems 25}, editor = {Bartlett, P and Pereira, F C N and Burges, C J C and Bottou, L and Weinberger, K Q}, pages = {3158--3166}, title = {{Imitation Learning by Coaching}}, url = {http://books.nips.cc/papers/files/nips25/NIPS2012{\_}1449.pdf}, year = {2012} } @article{hastings1970monte, author = {Hastings, W K}, journal = {Biometrika}, pages = {97--109}, title = {{{\{}Monte Carlo{\}} Sampling Methods Using {\{}Markov{\}} Chains and Their Application}}, volume = {57}, year = {1970} } @inproceedings{ashbrook2008quickdraw:, author = {Ashbrook, Daniel L and Clawson, James R and Lyons, Kent and Starner, Thad E and Patel, Nirmal}, booktitle = {Proceeding of the twenty-sixth annual SIGCHI conference on Human factors in computing systems}, pages = {219--222}, series = {CHI '08}, title = {{Quickdraw: the impact of mobility and on-body placement on device access time}}, year = {2008} } @inproceedings{kyng2016framework, abstract = {A spectral sparsifier of a graph {\$}G{\$} is a sparser graph {\$}H{\$} that approximately preserves the quadratic form of {\$}G{\$}, i.e. for all vectors {\$}x{\$}, {\$}x{\^{}}T L{\_}G x \backslashapprox x{\^{}}T L{\_}H x{\$}, where {\$}L{\_}G{\$} and {\$}L{\_}H{\$} denote the respective graph Laplacians. Spectral sparsifiers generalize cut sparsifiers, and have found many applications in designing graph algorithms. In recent years, there has been interest in computing spectral sparsifiers in semi-streaming and dynamic settings. Natural algorithms in these settings often involve repeated sparsification of a graph, and accumulation of errors across these steps. We present a framework for analyzing algorithms that perform repeated sparsifications that only incur error corresponding to a single sparsification step, leading to better results for many resparsification-based algorithms. As an application, we show how to maintain a spectral sparsifier in the semi-streaming setting: We present a simple algorithm that, for a graph {\$}G{\$} on {\$}n{\$} vertices and {\$}m{\$} edges, computes a spectral sparsifier of {\$}G{\$} with {\$}O(n \backslashlog n){\$} edges in a single pass over {\$}G{\$}, using only {\$}O(n \backslashlog n){\$} space, and {\$}O(m \backslashlog{\^{}}2 n){\$} total time. This improves on previous best semi-streaming algorithms for both spectral and cut sparsifiers by a factor of {\$}\backslashlog{\{}n{\}}{\$} in both space and runtime. The algorithm extends to semi-streaming row sampling for general PSD matrices. We also use our framework to combine a spectral sparsification algorithm by Koutis with improved spanner constructions to give a parallel algorithm for constructing {\$}O(n\backslashlog{\^{}}2{\{}n{\}}\backslashlog\backslashlog{\{}n{\}}){\$} sized spectral sparsifiers in {\$}O(m\backslashlog{\^{}}2{\{}n{\}}\backslashlog\backslashlog{\{}n{\}}){\$} time. This is the best known combinatorial graph sparsification algorithm.The size of the sparsifiers is only a factor {\$}\backslashlog{\{}n{\}}\backslashlog\backslashlog{\{}n{\}}{\$} more than ones produced by numerical routines.}, author = {Kyng, Rasmus and Pachocki, Jakub and Peng, Richard and Sachdeva, Sushant}, booktitle = {Symposium on Discrete Algorithms}, title = {{A Framework for Analyzing Resparsification Algorithms}}, year = {2016} } @article{weissman03ineq, author = {Weissman, Tsachy and Ordentlich, Erik and Seroussi, Gadiel and Verdu, Sergio and Weinberger, Marcelo J}, institution = {Technical Report, HP Laboratories}, title = {{Inequalities for the L1 Deviation of the Empirical Distribution}}, year = {2003} } @inproceedings{fiegel2020adaptive, abstract = {In multi-fidelity optimization, biased approximations of varying costs of the target function are available. This paper studies the problem of optimizing a locally smooth function with a limited budget, where the learner has to make a tradeoff between the cost and the bias of these approximations. We first prove lower bounds for the simple regret under different assumptions on the fidelities, based on a cost-to-bias function. We then present the Kometo algorithm which achieves, with additional logarithmic factors, the same rates without any knowledge of the function smoothness and fidelity assumptions, and improves previously proven guarantees. We finally empirically show that our algorithm outperforms previous multi-fidelity optimization methods without the knowledge of problem-dependent parameters.}, author = {Fiegel, C{\^{o}}me and Gabillon, Victor and Valko, Michal}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Adaptive multi-fidelity optimization with fast learning rates}} } @inproceedings{hazan2009online, abstract = {We consider an online decision problem over a discrete space in which the loss function is submodular. We give algorithms which are computationally efficient and are Hannan-consistent in both the full information and bandit settings.}, author = {Hazan, Elad and Kale, Satyen}, booktitle = {Advances in Neural Information Processing Systems 22}, editor = {Bengio, Y and Schuurmans, D and Lafferty, J and Williams, C K I and Culotta, A}, file = {::}, pages = {700--708}, publisher = {Citeseer}, title = {{Beyond Convexity: Online Submodular Minimization}}, url = {http://www.satyenkale.com/papers/submodular.pdf http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.157.4365{\&}rep=rep1{\&}type=pdf}, year = {2009} } @inproceedings{szepes06learning, author = {Antos, A and Szepesv{\'{a}}ri, $\backslash$textCs and Munos, R}, doi = {10.1007/11776420_42}, pages = {574--588}, title = {{Learning near-optimal policies with {\{}B{\}}ellman-residual minimization based fitted policy iteration and a single sample path}}, url = {http://www.springerlink.com/content/x85884360133802l/?p=68d26700c320427caf7c027981b37b8f{\&}pi=41} } @book{shor85minimization, address = {New York, NY, USA}, author = {Shor, N Z and Kiwiel, K C and Ruszczynski, A}, isbn = {0-387-12763-1}, publisher = {Springer-Verlag New York, Inc.}, title = {{Minimization methods for non-differentiable functions}}, year = {1985} } @book{rockafellar81theory, author = {Rockafellar, R Tyrell}, publisher = {Heldermann Verlag, Berlin}, title = {{The theory of subgradients and its applications to problems of optimization: Convex and nonconvex functions}}, year = {1981} } @inproceedings{liu2010large, author = {Liu, W and He, Junfeng and Chang, Shih-Fu}, booktitle = {ICML}, title = {{Large Graph Construction for Scalable Semi-Supervised Learning}}, year = {2010} } @inproceedings{Jenatton2010, author = {Jenatton, R and Obozinski, G and Bach, F}, booktitle = {Proc. AISTATS}, title = {{Structured sparse principal component analysis}}, year = {2009} } @article{dean1989model, author = {Dean, Thomas and Kanazawa, Keiji}, journal = {Computational Intelligence}, pages = {142--150}, title = {{A Model for Reasoning about Persistence and Causation}}, volume = {5}, year = {1989} } @inproceedings{catoni2012challenging, author = {Catoni, Olivier}, booktitle = {Annales de l'Institut Henri Poincar{\'{e}}, Probabilit{\'{e}}s et Statistiques}, number = {4}, pages = {1148--1185}, title = {{Challenging the empirical mean and empirical variance: A deviation study}}, volume = {48}, year = {2012} } @article{kivinen2004online, abstract = {Kernel-based algorithms such as support vector machines have achieved considerable success in various problems in batch setting, where all of the training data is available in advance. Support vector machines combine the so-called kernel trick with the large margin idea. There has been little use of these methods in an online setting suitable for real-time applications. In this paper, we consider online learning in a reproducing kernel Hilbert space. By considering classical stochastic gradient descent within a feature space and the use of some straightforward tricks, we develop simple and computationally efficient algorithms for a wide range of problems such as classification, regression, and novelty detection. In addition to allowing the exploitation of the kernel trick in an online setting, we examine the value of large margins for classification in the online setting with a drifting target. We derive worst-case loss bounds, and moreover, we show the convergence of the hypothesis to the minimizer of the regularized risk functional. We present some experimental results that support the theory as well as illustrating the power of the new algorithms for online novelty detection.}, author = {Kivinen, Jyrki and Smola, Alexander J. and Williamson, Robert C.}, journal = {IEEE Transactions on Signal Processing}, number = {8}, pages = {2165--2176}, title = {{Online learning with kernels}}, volume = {52}, year = {2004} } @article{ramachandran2007bayesian, abstract = {Inverse Reinforcement Learning (IRL) is the problem of learning the reward function underlying a Markov Decision Process given the dynamics of the system and the behaviour of an expert. IRL is motivated by situations where knowledge of the rewards is a goal by itself (as in preference elicitation) and by the task of apprenticeship learning (learning policies from an expert). In this paper we show how to combine prior knowledge and evidence from the expert's actions to derive a probability distribution over the space of reward functions. We present efficient algorithms that find solutions for the reward learning and apprenticeship learning tasks that generalize well over these distributions. Experimental results show strong improvement for our methods over previous heuristic-based approaches.}, author = {Ramachandran, Deepak and Amir, Eyal}, editor = {Shawe-Taylor, J and Zemel, R S and Bartlett, P and Pereira, F C N and Weinberger, K Q}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Ramachandran, Amir - 2007 - Bayesian Inverse Reinforcement Learning.pdf:pdf}, journal = {Learning}, keywords = {markov decision processes,reinforcement learning}, pages = {2586--2591}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {{Bayesian Inverse Reinforcement Learning}}, url = {http://www.aaai.org/Papers/IJCAI/2007/IJCAI07-416.pdf}, volume = {51}, year = {2007} } @inproceedings{calandriello2017distributed, abstract = {Most kernel-based methods, such as kernel regression, kernel PCA, ICA, or k-means clustering, do not scale to large datasets, because constructing and storing the kernel matrix Kn requires at least O(n2) time and space for n samples. Recent works (Alaoui 2014, Musco 2016) show that sampling points with replacement according to their ridge leverage scores (RLS) generates small dictionaries of relevant points with strong spectral approximation guarantees for Kn. The drawback of RLS-based methods is that computing exact RLS requires constructing and storing the whole kernel matrix. In this paper, we introduce SQUEAK, a new algorithm for kernel approximation based on RLS sampling that sequentially processes the dataset, storing a dictionary which creates accurate kernel matrix approximations with a number of points that only depends on the effective dimension deffgamma of the dataset. Moreover since all the RLS estimations are efficiently performed using only the small dictionary, SQUEAK never constructs the whole matrix kermatrixn, runs in linear time widetildeO(ndeffgamma3) w.r.t.n, and requires only a single pass over the dataset. We also propose a parallel and distributed version of SQUEAK achieving similar accuracy in as little as widetildeO(log(n)deffgamma3) time.}, author = {Calandriello, Daniele and Lazaric, Alessandro and Valko, Michal}, booktitle = {International Conference on Artificial Intelligence and Statistics}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Calandriello, Lazaric, Valko - 2017 - Distributed adaptive sampling for kernel matrix approximation.pdf:pdf}, title = {{Distributed adaptive sampling for kernel matrix approximation}}, url = {http://researchers.lille.inria.fr/{~}valko/hp/publications/calandriello2017distributed.pdf}, year = {2017} } @inproceedings{doya95novel, author = {Doya, K and Sejnowski, T J}, pages = {101--108}, title = {{A Novel Reinforcement Model of Birdsong Vocalization Learning}}, url = {citeseer.ist.psu.edu/doya95novel.html} } @article{tsitsiklis1997analysis, author = {Tsitsiklis, John and {Van Roy}, Benjamin}, journal = {IEEE Transactions on Automatic Control}, number = {5}, pages = {674--690}, title = {{An Analysis of Temporal-Difference Learning with Function Approximation}}, volume = {42}, year = {1997} } @article{dreiseitl2002logistic, address = {San Diego, USA}, author = {Dreiseitl, Stephan and Ohno-Machado, Lucila}, doi = {http://dx.doi.org/10.1016/S1532-0464(03)00034-0}, issn = {1532-0464}, journal = {J. of Biomedical Informatics}, number = {5/6}, pages = {352--359}, publisher = {Elsevier Science}, title = {{Logistic regression and artificial neural network classification models: a methodology review}}, volume = {35}, year = {2002} } @article{Batson:2013:SSG:2492007.2492029, author = {Batson, Joshua and Spielman, Daniel A and Srivastava, Nikhil and Teng, Shang-Hua}, journal = {Commun. ACM}, number = {8}, pages = {87--94}, title = {{Spectral Sparsification of Graphs: Theory and Algorithms}}, volume = {56}, year = {2013} } @article{boros2002pseudo, author = {Boros, E and Hammer, P L}, journal = {Discrete Applied Mathematics}, number = {1-3}, pages = {155--225}, publisher = {Elsevier}, title = {{Pseudo-{\{}B{\}}oolean optimization}}, volume = {123}, year = {2002} } @article{smith1956various, author = {Smith, Wayne E}, journal = {Naval Research Logistics}, number = {1-2}, pages = {59--66}, publisher = {Wiley Online Library}, title = {{Various optimizers for single-stage production}}, url = {https://pdfs.semanticscholar.org/f02e/1823cc1f80b129125ceb94af5f62f862b791.pdf}, volume = {3}, year = {1956} } @article{Zhao2009, author = {Zhao, P and Rocha, G and Yu, B}, journal = {Annals of Statistics}, number = {6A}, pages = {3468--3497}, title = {{The composite absolute penalties family for grouped and hierarchical variable selection}}, volume = {37}, year = {2009} } @inproceedings{Kathuria2016, abstract = {Gaussian Process bandit optimization has emerged as a powerful tool for optimizing noisy black box functions. One example in machine learning is hyper-parameter optimization where each evaluation of the target function requires training a model which may involve days or even weeks of computation. Most methods for this so-called "Bayesian optimization" only allow sequential exploration of the parameter space. However, it is often desirable to propose batches or sets of parameter values to explore simultaneously, especially when there are large parallel processing facilities at our disposal. Batch methods require modeling the interaction between the different evaluations in the batch, which can be expensive in complex scenarios. In this paper, we propose a new approach for parallelizing Bayesian optimization by modeling the diversity of a batch via Determinantal point processes (DPPs) whose kernels are learned automatically. This allows us to generalize a previous result as well as prove better regret bounds based on DPP sampling. Our experiments on a variety of synthetic and real-world robotics and hyper-parameter optimization tasks indicate that our DPP-based methods, especially those based on DPP sampling, outperform state-of-the-art methods.}, archivePrefix = {arXiv}, arxivId = {1611.04088}, author = {Kathuria, Tarun and Deshpande, Amit and Kohli, Pushmeet}, booktitle = {Neural Information Processing Systems}, eprint = {1611.04088}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Kathuria, Deshpande, Kohli - 2016 - Batched Gaussian Process Bandit Optimization via Determinantal Point Processes.pdf:pdf}, month = {nov}, title = {{Batched Gaussian Process Bandit Optimization via Determinantal Point Processes}}, url = {http://arxiv.org/abs/1611.04088}, year = {2016} } @inproceedings{Jiang-2004-Mislabeled, author = {Jiang, Y and Zhou, Z.-H.}, booktitle = {Lecture Notes in Computer Science 3173}, pages = {356--361}, title = {{Editing Training Data for kNN Classifiers with Neural Network Ensemble.}}, year = {2004} } @inproceedings{hendrickson1995multilevel, author = {Hendrickson, B and Leland, R}, booktitle = {Proceedings of Supercomputing}, title = {{A multilevel algorithm for partitioning graphs}}, year = {1995} } @book{kearfott1996rigorous, author = {Kearfott, R Baker}, isbn = {9780792342380}, publisher = {Springer}, series = {Nonconvex Optimization and Its Applications}, title = {{Rigorous Global Search: Continuous Problems}}, url = {http://books.google.fr/books?id=GBVnnsN5yCYC}, year = {1996} } @article{post2008temporal, abstract = {Large-scale clinical databases provide a detailed perspective on patient phenotype in disease and the characteristics of health care processes. Important information is often contained in the relationships between the values and timestamps of sequences of clinical data. The analysis of clinical time sequence data across entire patient populations may reveal data patterns that enable a more precise understanding of disease presentation, progression, and response to therapy, and thus could be of great value for clinical and translational research. Recent work suggests that the combination of temporal data mining methods with techniques from artificial intelligence research on knowledge-based temporal abstraction may enable the mining of clinically relevant temporal features from these previously problematic general clinical data.}, author = {Post, Andrew R and Harrison, James H}, doi = {10.1016/j.cll.2007.10.005}, institution = {Division of Clinical Informatics, Department of Public Health Sciences, University of Virginia, Suite 3181 West Complex, 1335 Hospital Drive, Charlottesville, VA 22908-0717, USA. arp4m@virginia.edu}, journal = {Clin Lab Med}, keywords = {Algorithms; Artificial Intelligence; Databases as,Automated; Software; Time Factors,methods; Pattern Recognition}, month = {mar}, number = {1}, pages = {83----100, vii}, pmid = {18194720}, title = {{Temporal data mining.}}, url = {http://dx.doi.org/10.1016/j.cll.2007.10.005}, volume = {28}, year = {2008} } @book{CSV09, author = {Conn, A and Scheinberg, K and Vicente, L}, publisher = {Society for Industrial and Applied Mathematics (SIAM)}, title = {{Introduction to Derivative-Free Optimization}}, year = {2009} } @inproceedings{bartlettadaptive, author = {Bartlett, Peter L and Hazan, Elad and Rakhlin, Alexander}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {65--72}, title = {{Adaptive Online Gradient Descent.}} } @phdthesis{neu13thesis, author = {Neu, Gergely}, school = {Budapest University of Technology and Economics}, title = {{Online learning in non-stochastic {\{}M{\}}arkov decision processes}}, year = {2013} } @article{Zhang2015, abstract = {In this paper, we study a special bandit setting of online stochastic linear optimization, where only one-bit of information is revealed to the learner at each round. This problem has found many applications including online advertisement and online recommendation. We assume the binary feedback is a random variable generated from the logit model, and aim to minimize the regret defined by the unknown linear function. Although the existing method for generalized linear bandit can be applied to our problem, the high computational cost makes it impractical for real-world problems. To address this challenge, we develop an efficient online learning algorithm by exploiting particular structures of the observation model. Specifically, we adopt online Newton step to estimate the unknown parameter and derive a tight confidence region based on the exponential concavity of the logistic loss. Our analysis shows that the proposed algorithm achieves a regret bound of {\$}O(d\backslashsqrt{\{}T{\}}){\$}, which matches the optimal result of stochastic linear bandits.}, archivePrefix = {arXiv}, arxivId = {1509.07728}, author = {Zhang, Lijun and Yang, Tianbao and Jin, Rong and Zhou, Zhi-Hua}, eprint = {1509.07728}, title = {{Online Stochastic Linear Optimization under One-bit Feedback}}, year = {2015} } @inproceedings{richemond2020byol, abstract = {Bootstrap Your Own Latent (BYOL) is a self-supervised learning approach for image representation. From an augmented view of an image, BYOL trains an online network to predict a target network representation of a different augmented view of the same image. Unlike contrastive methods, BYOL does not explicitly use a repulsion term built from negative pairs in its training objective. Yet, it avoids collapse to a trivial, constant representation. Thus, it has recently been hypothesized that batch normalization (BN) is critical to prevent collapse in BYOL. Indeed, BN flows gradients across batch elements, and could leak information about negative views in the batch, which could act as an implicit negative (contrastive) term. However, we experimentally show that replacing BN with a batch-independent normalization scheme (namely, a combination of group normalization and weight standardization) achieves performance comparable to vanilla BYOL ({\$}73.9\backslash{\%}{\$} vs. {\$}74.3\backslash{\%}{\$} top-1 accuracy under the linear evaluation protocol on ImageNet with ResNet-{\$}50{\$}). Our finding disproves the hypothesis that the use of batch statistics is a crucial ingredient for BYOL to learn useful representations.}, archivePrefix = {arXiv}, arxivId = {2010.10241}, author = {Richemond, Pierre H. and Grill, Jean-Bastien and Altch{\'{e}}, Florent and Tallec, Corentin and Strub, Florian and Brock, Andrew and Smith, Samuel and De, Soham and Pascanu, Razvan and Piot, Bilal and Valko, Michal}, booktitle = {NeurIPS 2020 Workshop on Self-Supervised Learning: Theory and Practice}, eprint = {2010.10241}, title = {{BYOL works even without batch statistics}}, url = {http://arxiv.org/abs/2010.10241}, year = {2020} } @article{donoho, author = {Donoho, D L and Johnstone, I M}, journal = {Journal of the American Statistical Association}, number = {432}, pages = {1200--1224}, publisher = {American Statistical Association}, title = {{Adapting to Unknown Smoothness Via Wavelet Shrinkage.}}, volume = {90}, year = {1995} } @inproceedings{kocak2016online, abstract = {We propose a new partial-observability model for online learning problems where the learner, besides its own loss, also observes some noisy feedback about the other actions, depending on the underlying structure of the problem. We represent this structure by a weighted directed graph, where the edge weights are related to the quality of the feedback shared by the connected nodes. Our main contribution is an efficient algorithm that guarantees a regret of O(sqrt(alpha{\^{}}* T) after T rounds, where alpha{\^{}}* is a novel graph property that we call the effective independence number. Our algorithm is completely parameter-free and does not require knowledge (or even estimation) of alpha{\^{}}*. For the special case of binary edge weights, our setting reduces to the partial-observability models of Mannor {\&} Shamir (2011) and Alon et al. (2013) and our algorithm recovers the near-optimal regret bounds.}, author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Neu, Gergely and Valko, Michal}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Online learning with noisy side observations}}, url = {http://proceedings.mlr.press/v51/kocak16-supp.pdf}, year = {2016} } @article{BMSS11, author = {Bubeck, S and Munos, R and Stoltz, G and Szepesvari, C}, journal = {Journal of Machine Learning Research}, pages = {1587--1627}, title = {{$\backslash$mathcal{\{}X{\}}-Armed Bandits}}, volume = {12}, year = {2011} } @inproceedings{zimmert2019optimal, abstract = {We derive an algorithm that achieves the optimal (up to constants) pseudo-regret in both adversarial and stochastic multi-armed bandits without prior knowledge of the regime and time horizon. The algorithm is based on online mirror descent with Tsallis entropy regularizer. We provide a complete characterization of such algorithms and show that Tsallis entropy with power {\$}\alpha= 1/2{\$} achieves the goal. In addition, the proposed algorithm enjoys improved regret guarantees in two intermediate regimes: the moderately contaminated stochastic regime defined by Seldin and Slivkins [22] and the stochastically constrained adversary studied by Wei and Luo [26]. The algorithm also obtains adversarial and stochastic optimality in the utility-based dueling bandit setting. We provide empirical evaluation of the algorithm demonstrating that it outperforms Ucb1 and Exp3 in stochastic environments. In certain adversarial regimes the algorithm significantly outperforms Ucb1 and Thompson Sampling, which exhibit close to linear regret.}, author = {Zimmert, Julian and Seldin, Yevgeny}, booktitle = {International Conference on Artificial Intelligence and Statistics}, editor = {Chaudhuri, Kamalika and Sugiyama, Masashi}, pages = {467--475}, publisher = {PMLR}, series = {Proceedings of Machine Learning Research}, title = {{An optimal algorithm for stochastic and adversarial bandits}}, url = {http://proceedings.mlr.press/v89/zimmert19a.html}, volume = {89}, year = {2019} } @book{DeLu2001book, author = {Devroye, L and Lugosi, G}, publisher = {Springer}, title = {{Combinatorial methods in density estimation}}, year = {2001} } @inproceedings{koutis2012improved, author = {Koutis, Ioannis and Levin, Alex and Peng, Richard}, booktitle = {STACS'12 (29th Symposium on Theoretical Aspects of Computer Science)}, organization = {LIPIcs}, pages = {266--277}, title = {{Improved spectral sparsification and numerical algorithms for SDD matrices}}, volume = {14}, year = {2012} } @article{Sidney1975, abstract = {A one-machine deterministic job-shop sequencing problem is considered. Associated with each job is its processing time and linear deferral cost. In addition, the jobs are related by a general precedence relation. The objective is to order the jobs so as to minimize the sum of the deferral costs, subject to the constraint that the ordering must be consistent with the precedence relation. A decomposition algorithm is presented, and it is proved that a permutation is optimal if and only if it can be generated by this algorithm. Four special network structures are then considered, and specializations of the general algorithm are presented. [ABSTRACT FROM AUTHOR]}, author = {Sidney, J B}, doi = {10.1287/opre.23.2.283}, issn = {0030-364X}, journal = {Operations Research}, number = {2}, pages = {283--298}, title = {{Decomposition Algorithms for Single-Machine Sequencing with Precedence Relations and Deferral Costs}}, url = {http://or.journal.informs.org/cgi/doi/10.1287/opre.23.2.283}, volume = {23}, year = {1975} } @article{kempe2015maximizing, author = {Kempe, David and Kleinberg, Jon and Tardos, {\'{E}}va}, journal = {Theory of Computing}, number = {4}, pages = {105--147}, title = {{Maximizing the spread of influence through a social network}}, volume = {11}, year = {2015} } @inproceedings{dann2017unifying, author = {Dann, Christoph and Lattimore, Tor and Brunskill, Emma}, booktitle = {Neural Information Processing Systems}, title = {{Unifying PAC and regret: Uniform PAC bounds for episodic reinforcement learning}}, url = {https://arxiv.org/pdf/1703.07710.pdf}, year = {2017} } @inproceedings{oyallon2018compressing, abstract = {We study the first-order scattering transform as a candidate for reducing the signal processed by a convolutional neural network (CNN). We show theoretical and empirical evidence that in the case of natural images and sufficiently small translation invariance, this transform preserves most of the signal information needed for classification while substantially reducing the spatial resolution and total signal size. We demonstrate that cascading a CNN with this representation performs on par with ImageNet classification models, commonly used in downstream tasks, such as the ResNet-50. We subsequently apply our trained hybrid ImageNet model as a base model on a detection system, which has typically larger image inputs. On Pascal VOC and COCO detection tasks we demonstrate improvements in the inference speed and training memory consumption compared to models trained directly on the input image.}, author = {Oyallon, Edouard and Belilovsky, Eugene and Zagoruyko, Sergey and Valko, Michal}, booktitle = {European Conference on Computer Vision}, title = {{Compressing the input for CNNs with the first-order scattering transform}}, year = {2018} } @book{winograd72understanding, address = {Orlando, FL, USA}, author = {Winograd, Terry}, isbn = {0127597506}, publisher = {Academic Press, Inc.}, title = {{Understanding Natural Language}}, year = {1972} } @article{WK14, author = {Warmuth, Manfred and Koolen, Wouter}, journal = {COLT 2014 open problem}, title = {{Shifting experts on easy data}}, year = {2014} } @article{lowe1999object, author = {Lowe, David}, journal = {IEEE International Conference on Computer Vision}, pages = {1150--1157}, title = {{Object Recognition from Local Scale-Invariant Features}}, volume = {2}, year = {1999} } @inproceedings{ADX10, author = {Agarwal, Alekh and Dekel, Ofer and Xiao, Lin}, booktitle = {Proceedings of the 23rd Annual Conference on Learning Theory (COLT)}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {28--40}, title = {{Optimal Algorithms for Online Convex Optimization with Multi-Point Bandit Feedback}}, url = {http://www.cs.berkeley.edu/{~}alekh/bandits-colt.pdf}, year = {2010} } @inproceedings{boyan2001exact, author = {Boyan, Justin and Littman, Michael}, booktitle = {Advances in Neural Information Processing Systems 13}, pages = {1026--1032}, title = {{Exact Solutions to Time-Dependent {\{}MDPs{\}}}}, year = {2001} } @article{zhang2020task-agnostic, author = {Zhang, Xuezhou and Ma, Yuzhe and Singla, Adish}, journal = {arXiv preprint: arXiv:2006.09497}, title = {{Task-agnostic exploration in reinforcement learning}}, url = {https://arxiv.org/pdf/2006.09497.pdf}, year = {2020} } @article{globerson2007visualizing, annote = {comps{\_}distance}, author = {Globerson, A and Roweis, S}, journal = {The 11th International Workshop on Artificial Intelligence and Statistics Puerto-Rico}, title = {{Visualizing pairwise similarity via semidefinite programming}}, url = {http://people.csail.mit.edu/gamir/pubs/psde.pdf}, year = {2007} } @inproceedings{shelton2001policy, author = {Shelton, Christian}, booktitle = {Proceedings of the 17th Conference on Uncertainty in Artificial Intelligence}, pages = {496--503}, title = {{Policy Improvement for {\{}POMDPs{\}} Using Normalized Importance Sampling}}, year = {2001} } @article{subramanya2014graph, author = {Subramanya, Amarnag and Talukdar, Partha Pratim}, journal = {Synthesis Lectures on Artificial Intelligence and Machine Learning}, number = {4}, pages = {1--125}, publisher = {Morgan {\&} Claypool Publishers}, title = {{Graph-Based Semi-Supervised Learning}}, volume = {8}, year = {2014} } @inproceedings{cooper1988method, author = {Cooper, Gregory}, booktitle = {Proceedings of the Workshop on Uncertainty in Artificial Intelligence}, pages = {55--63}, title = {{A Method for Using Belief Networks as Influence Diagrams}}, year = {1988} } @inproceedings{rubin2005auctioning, address = {New York, NY, USA}, author = {Rubin, Shai and Christodorescu, Mihai and Ganapathy, Vinod and Giffin, Jonathon T and Kruger, Louis and Wang, Hao and Kidd, Nicholas}, booktitle = {Proceedings of the 12th ACM conference on Computer and communications security}, isbn = {1-59593-226-7}, keywords = {anomaly detection,auction,reputation system,shilling}, pages = {270--279}, publisher = {ACM}, series = {CCS '05}, title = {{An auctioning reputation system based on anomaly detection}}, year = {2005} } @inproceedings{aktolga2010detecting, author = {Aktolga, Elif and Ros, Irene and Assogba, Yannick}, booktitle = {Proceedings of SIGIR}, title = {{Detecting Outlier Sections in US Congressional Legislation}}, type = {IR}, year = {2010} } @article{Seldin2011, abstract = {We derive an instantaneous (per-round) data-dependent regret bound for stochastic multiarmed bandits with side information (also known as contextual bandits). The scaling of our regret bound with the number of states (contexts) N goes as sqrtN Irhot(S;A), where Irhot(S;A) is the mutual information between states and actions (the side information) used by the algorithm at round t. If the algorithm uses all the side information, the regret bound scales as sqrtN ln K, where K is the number of actions (arms). However, if the side information Irhot(S;A) is not fully used, the regret bound is significantly tighter. In the extreme case, when Irhot(S;A) = 0, the dependence on the number of states reduces from linear to logarithmic. Our analysis allows to provide the algorithm large amount of side information, let the algorithm to decide which side information is relevant for the task, and penalize the algorithm only for the side information that it is using de facto. We also present an algorithm for multiarmed bandits with side information with O(K) computational complexity per game round.}, author = {Seldin, Yevgeny and Auer, Peter and Laviolette, Fran{\c{c}}ois and Shawe-Taylor, John and Ortner, Ronald}, editor = {Shawe-Taylor, J and Zemel, R S and Bartlett, P and Pereira, F C N and Weinberger, K Q}, journal = {Computer}, keywords = {complacs,computational,information theoretic learning with statistics,learning,statistics {\&} optimisation,theory {\&} algorithms}, pages = {1--9}, title = {{PAC-Bayesian Analysis of Contextual Bandits}}, url = {http://eprints.pascal-network.org/archive/00008826/}, year = {2011} } @inproceedings{bartlett2019simple, abstract = {We study the problem of optimizing a function under a budgeted number of evaluations. We only assume that the function is locally smooth around one of its global optima. The difficulty of optimization is measured in terms of 1) the amount of noise b of the function evaluation and 2) the local smoothness, d, of the function. A smaller d results in smaller optimization error. We come with a new, simple, and parameter-free approach. First, for all values of b and d, this approach recovers at least the state-of-the-art regret guarantees. Second, our approach additionally obtains these results while being agnostic to the values of both b and d. This leads to the first algorithm that naturally adapts to an unknown range of noise b and leads to significant improvements in a moderate and low-noise regime. Third, our approach also obtains a remarkable improvement over the state-of-the-art SOO algorithm when the noise is very low which includes the case of optimization under deterministic feedback (b=0). There, under our minimal local smoothness assumption, this improvement is of exponential magnitude and holds for a class of functions that covers the vast majority of functions that practitioners optimize (d=0). We show that our algorithmic improvement is borne out in experiments as we empirically show faster convergence on common benchmarks.}, author = {Bartlett, Peter L and Gabillon, Victor and Valko, Michal}, booktitle = {Algorithmic Learning Theory}, title = {{A simple parameter-free and adaptive approach to optimization under a minimal local smoothness assumption}}, url = {http://researchers.lille.inria.fr/{~}valko/hp/serve.php?what=publications/bartlett2019simple.pdf}, year = {2019} } @misc{kaggle2013, title = {{Kaggle}}, url = {https://www.kaggle.com/}, year = {2013} } @inproceedings{Audibert2007, address = {Berlin, Heidelberg}, author = {Audibert, Jean-Yves and Munos, R{\'{e}}mi and Szepesv{\'{a}}ri, Csaba}, doi = {10.1007/978-3-540-75225-7}, editor = {Hutter, Marcus and Servedio, Rocco A. and Takimoto, Eiji}, isbn = {978-3-540-75224-0}, issn = {0302-9743}, month = {oct}, pages = {150--165}, publisher = {Springer Berlin Heidelberg}, series = {Lecture Notes in Computer Science}, title = {{Algorithmic Learning Theory}}, url = {http://dl.acm.org/citation.cfm?id=1422422.1422442}, volume = {4754}, year = {2007} } @article{kikuta1994initial, author = {Kikuta, Kensaku and Ruckle, William H}, doi = {10.1002/1520-6750(199410)41:6<821::AID-NAV3220410610>3.0.CO;2-Y}, journal = {Naval Research Logistics}, number = {6}, pages = {821--831}, publisher = {Wiley Online Library}, title = {{Initial point search on weighted trees}}, volume = {41}, year = {1994} } @inproceedings{manegueu2020stochastic, abstract = {Significant work has been recently dedicated to the stochastic delayed bandit setting because of its relevance in applications. The applicability of existing algorithms is however restricted by the fact that strong assumptions are often made on the delay distributions, such as full observability, restrictive shape constraints, or uniformity over arms. In this work, we weaken them significantly and only assume that there is a bound on the tail of the delay. In particular, we cover the important case where the delay distributions vary across arms, and the case where the delays are heavy-tailed. Addressing these difficulties, we propose a simple but efficient UCB-based algorithm called the PATIENTBANDITS. We provide both problem-dependent and problem-independent bounds on the regret as well as performance lower bounds.}, author = {Manegueu, Anne Gael and Vernade, Claire and Carpentier, Alexandra and Valko, Michal}, booktitle = {International Conference on Machine Learning}, title = {{Stochastic bandits with arm-dependent delays}}, year = {2020} } @techreport{kapoor1996assessment, author = {Kapoor, W N}, institution = {Agency for Health Policy and Research (AHCPR)}, title = {{Assessment of the Variantion and Outcomes of Pneumonia: Pneumonia Patient Outcomes Research Team ({\{}PORT{\}}) Final Report}}, year = {1996} } @inproceedings{singh2008unlabeled, author = {Singh, Aarti and Nowak, Robert D and Zhu, Xiaojin}, booktitle = {Advances in Neural Information Processing Systems 21}, title = {{Unlabeled data: Now it helps, now it doesn't}}, year = {2008} } @inproceedings{grudic2000localizing, author = {Grudic, Gregory and Ungar, Lyle}, booktitle = {Proceedings of 17th International Conference on Machine Learning}, pages = {343--350}, title = {{Localizing Policy Gradient Estimates to Action Transitions}}, year = {2000} } @book{rockafellar_subgrad, author = {Rockafellar, R T}, publisher = {Heldermann Verlag, Berlin, D}, title = {{The theory of subgradients and its applications to problems of optimization: {\{}C{\}}onvex and nonconvex functions}}, year = {1981} } @inproceedings{Flaounas2011, author = {Flaounas, I and Ali, O and Turchi, M and Snowsill, T and Nicart, F and {De Bie}, T and Cristianini, N}, booktitle = {Proceedings of the 2011 ACM SIGMOD international conference on Management of data}, pages = {1275--1278}, publisher = {ACM}, title = {{NOAM: News Outlets Analysis and Monitoring System}}, year = {2011} } @article{CBGe08, author = {Cesa-Bianchi, N and Gentile, C}, doi = {10.1109/TIT.2007.911292}, issn = {0018-9448}, journal = {IEEE Transactions on Information Theory}, keywords = {arbitrary learning algorithm;ensemble;incremental}, number = {1}, pages = {386--390}, title = {{Improved Risk Tail Bounds for On-Line Algorithms}}, volume = {54}, year = {2008} } @inproceedings{schulman2017equivalence, author = {Schulman, John and Chen, Xi and Abbeel, Pieter}, booktitle = {arXiv:1704.06440}, title = {{Equivalence between policy gradients and soft Q-learning}}, url = {https://ui.adsabs.harvard.edu/abs/2017arXiv170406440S}, year = {2017} } @article{aha1991instance-based, address = {Hingham, MA, USA}, author = {Aha, David W and Kibler, Dennis and Albert, Marc K}, doi = {http://dx.doi.org/10.1023/A:1022689900470}, issn = {0885-6125}, journal = {Mach. Learn.}, number = {1}, pages = {37--66}, publisher = {Kluwer Academic Publishers}, title = {{Instance-Based Learning Algorithms}}, volume = {6}, year = {1991} } @techreport{gelly2006modifications, author = {Gelly, Sylvain and Yizao, Wang and Munos, R{\'{e}}mi and Teytaud, Olivier}, institution = {Inria}, title = {{Modification of UCT with patterns in Monte-Carlo Go}}, url = {https://hal.inria.fr/inria-00117266}, year = {2006} } @article{bar-hillel2005learning, annote = {comps{\_}distance}, author = {Bar-Hillel, Aharon and Hertz, Tomer and Shental, Noam and Weinshall, Daphna}, journal = {Journal of Machine Learning Research}, pages = {937--965}, title = {{Learning a Mahalanobis Metric from Equivalence Constraints}}, url = {http://www.jmlr.org/papers/v6/bar-hillel05a.html}, volume = {6}, year = {2005} } @inproceedings{Vov90, author = {Vovk, V}, booktitle = {Proceedings of the third annual workshop on Computational learning theory (COLT)}, pages = {371--386}, title = {{Aggregating strategies}}, year = {1990} } @inproceedings{ratliff2007online, author = {Ratliff, Nathan and Bagnell, Andrew and Zinkevich, Martin}, booktitle = {Proceedings of the 11th International Conference on Artificial Intelligence and Statistics}, title = {{({\{}Online{\}}) Subgradient Methods for Structured Prediction}}, year = {2007} } @article{Zhaoyu, author = {Zhao, P and Yu, B}, journal = {Journal of Machine Learning Research}, pages = {2541--2563}, title = {{On Model Selection Consistency of {\{}L{\}}asso.}}, volume = {7}, year = {2006} } @inproceedings{Schmidt2010, author = {Schmidt, M and Murphy, K}, booktitle = {Proceedings of the International Conference on Artificial Intelligence and Statistics (AISTATS)}, title = {{Convex Structure Learning in Log-Linear Models: Beyond Pairwise Potentials}}, year = {2010} } @inproceedings{marecki2007fast, author = {Marecki, Janusz and Koenig, Sven and Tambe, Milind}, booktitle = {Proceedings of the 20th International Joint Conference on Artificial Intelligence}, title = {{A Fast Analytical Algorithm for Solving {\{}Markov{\}} Decision Processes with Continuous Resources}}, year = {2007} } @article{koltchinskii2011nuclear, author = {Koltchinskii, Vladimir and Lounici, Karim and Tsybakov, Alexandre B.}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Koltchinskii, Lounici, Tsybakov - 2011 - Nuclear-norm penalization and optimal rates for noisy low-rank matrix completion.pdf:pdf}, journal = {The Annals of Statistics}, keywords = {Lasso,Matrix completion,low-rank matrix estimation,noncommutative Bernstein inequality,optimal rate of convergence,recovery of the rank,statistical learning}, number = {5}, pages = {2302--2329}, publisher = {Institute of Mathematical Statistics}, title = {{Nuclear-norm penalization and optimal rates for noisy low-rank matrix completion}}, volume = {39}, year = {2011} } @inproceedings{Varoquaux2010a, author = {Varoquaux, G and Jenatton, R and Gramfort, A and Obozinski, G and Thirion, B and Bach, F}, booktitle = {NIPS Workshop on Practical Applications of Sparse Modeling: Open Issues and New Directions}, title = {{Sparse Structured Dictionary Learning for Brain Resting-State Activity Modeling}}, year = {2010} } @inproceedings{Huang2009, author = {Huang, J and Zhang, T and Metaxas, D}, booktitle = {Proceedings of the International Conference on Machine Learning (ICML)}, title = {{Learning with structured sparsity}}, year = {2009} } @inproceedings{grill2020monte-carlo, abstract = {The combination of Monte-Carlo tree search (MCTS) with deep reinforcement learning has led to groundbreaking results in artificial intelligence. However, AlphaZero, the current state-of-the-art MCTS algorithm still relies on handcrafted heuristics that are only partially understood. In this paper, we show that AlphaZero's search heuristic, along with other common ones, can be interpreted as an approximation to the solution of a specific regularized policy optimization problem. With this insight, we propose a variant of AlphaZero which uses the exact solution to this policy optimization problem, and show experimentally that it reliably outperforms the original algorithm in multiple domains.}, author = {Grill, Jean-Bastien and Altch{\'{e}}, Florent and Tang, Yunhao and Hubert, Thomas and Valko, Michal and Antonoglou, Ioannis and Munos, R{\'{e}}mi}, booktitle = {International Conference on Machine Learning}, title = {{Monte-Carlo tree search as regularized policy optimization}}, year = {2020} } @article{nagano2007strongly, author = {Nagano, K}, journal = {Discrete Optimization}, number = {3-4}, pages = {349--359}, publisher = {Elsevier}, title = {{A strongly polynomial algorithm for line search in submodular polyhedra}}, volume = {4}, year = {2007} } @book{neuts1981matrix-geometric, address = {Baltimore, MD}, author = {Neuts, Marcel}, publisher = {Johns Hopkins University Press}, title = {{Matrix-Geometric Solutions in Stochastic Models: An Algorithmic Approach}}, year = {1981} } @incollection{altun2006maximum, address = {Cambridge, MA}, author = {Altun, Yasemin and McAllester, David and Belkin, Mikhail}, booktitle = {Advances in Neural Information Processing Systems 18}, editor = {Weiss, Y and Sch{\"{o}}lkopf, B and Platt, J}, pages = {33--40}, publisher = {MIT Press}, title = {{Maximum Margin Semi-Supervised Learning for Structured Variables}}, year = {2006} } @article{gyorfi2007sequential, author = {Gy{\"{o}}rfi, L{\'{a}}szl{\'{o}} and Ottucs{\'{a}}k, Gy{\"{o}}rgy}, journal = {IEEE Transactions on Information Theory}, number = {5}, pages = {1866--1872}, title = {{Sequential prediction of unbounded stationary time series}}, volume = {53}, year = {2007} } @article{TW03, author = {Takimoto, E and Warmuth, M}, journal = {Journal of Machine Learning Research}, pages = {773--818}, title = {{Paths kernels and multiplicative updates}}, volume = {4}, year = {2003} } @inproceedings{levine2012continuous, author = {Levine, Sergey and Koltun, Vladlen}, booktitle = {ICML '12: Proceedings of the 29th International Conference on Machine Learning}, title = {{Continuous Inverse Optimal Control with Locally Optimal Examples}}, url = {http://graphics.stanford.edu/projects/cioc/cioc.pdf}, year = {2012} } @phdthesis{kakade2013on, author = {Kakade, Sham}, school = {University College London}, title = {{On the sample complexity of reinforcement learning}}, url = {https://homes.cs.washington.edu/{~}sham/papers/thesis/sham{\_}thesis.pdf}, year = {2003} } @inproceedings{guestrin2002multiagent, author = {Guestrin, Carlos and Koller, Daphne and Parr, Ronald}, booktitle = {Advances in Neural Information Processing Systems 14}, pages = {1523--1530}, title = {{Multiagent Planning with Factored {\{}MDPs{\}}}}, year = {2002} } @inproceedings{perrault2020covariance-adapting, abstract = {We investigate stochastic combinatorial semi-bandits, where the entire joint distribution of rewards impacts the complexity of the problem instance (unlike in the standard bandits). Typical distributions considered depend on specific parameter values, whose prior knowledge is required in theory but quite difficult to estimate in practice; an example is the commonly assumed sub-Gaussian family. We alleviate this issue by instead considering a new general family of sub-exponential distributions, which contains bounded and Gaussian ones. We prove a new lower bound on the expected regret on this family, that is parameterized by the unknown covariance matrix of rewards, a tighter quantity than the sub-Gaussian matrix. We then construct an algorithm that uses covariance estimates, and provide a tight asymptotic analysis of the regret. Finally, we apply and extend our results to the family of sparse rewards, which has applications in many recommender systems.}, author = {Perrault, Pierre and Perchet, Vianney and Valko, Michal}, booktitle = {Conference on Learning Theory}, title = {{Covariance-adapting algorithm for semi-bandits with application to sparse rewards}}, year = {2020} } @inproceedings{kearns1998finite-sample, author = {Kearns, Michael J. and Singh, Satinder P.}, booktitle = {Neural Information Processing Systems}, title = {{Finite-sample convergence rates for Q-learning and indirect algorithms}}, url = {http://papers.neurips.cc/paper/1531-finite-sample-convergence-rates-for-q-learning-and-indirect-algorithms.pdf}, year = {1998} } @inproceedings{kveton2014matroid, author = {Kveton, Branislav and Wen, Zheng and Ashkan, Azin and Eydgahi, Hoda and Eriksson, Brian}, booktitle = {Uncertainty in Artificial Intelligence}, title = {{Matroid bandits: Fast combinatorial optimization with learning}}, year = {2014} } @misc{seeger-submod, annote = {$\backslash$url{\{}http://lapmal.epfl.ch/papers/subm{\_}lindesign.pdf{\}}}, author = {Seeger, M}, title = {{On the Submodularity of Linear Experimental Design}}, year = {2009} } @article{Audibert2009, abstract = {Algorithms based on upper confidence bounds for balancing exploration and exploitation are gaining popularity since they are easy to implement, efficient and effective. This paper considers a variant of the basic algorithm for the stochastic, multi-armed bandit problem that takes into account the empirical variance of the different arms. In earlier experimental works, such algorithms were found to outperform the competing algorithms. We provide the first analysis of the expected regret for such algorithms. As expected, our results show that the algorithm that uses the variance estimates has a major advantage over its alternatives that do not use such estimates provided that the variances of the payoffs of the suboptimal arms are low. We also prove that the regret concentrates only at a polynomial rate. This holds for all the upper confidence bound based algorithms and for all bandit problems except those special ones where with probability one the payoff obtained by pulling the optimal arm is larger than the expected payoff for the second best arm. Hence, although upper confidence bound bandit algorithms achieve logarithmic expected regret rates, they might not be suitable for a risk-averse decision maker. We illustrate some of the results by computer simulations. {\textcopyright}2009 Elsevier B.V. All rights reserved.}, author = {Audibert, Jean Yves and Munos, R{\'{e}}mi and Szepesv{\'{a}}ri, Csaba}, doi = {10.1016/j.tcs.2009.01.016}, isbn = {0304-3975}, issn = {03043975}, journal = {Theoretical Computer Science}, keywords = {Bernstein's inequality,Exploration-exploitation tradeoff,High-probability bound,Multi-armed bandits,Risk analysis}, number = {19}, pages = {1876--1902}, title = {{Exploration-exploitation tradeoff using variance estimates in multi-armed bandits}}, volume = {410}, year = {2009} } @inproceedings{zolghadr2013online, author = {Zolghadr, Navid and Bartok, Gabor and Greiner, Russell and Gy{\"{o}}rgy, Andr{\'{a}}s and Szepesvari, Csaba}, booktitle = {Advances in Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Zolghadr et al. - 2013 - Online Learning with Costly Features and Labels.pdf:pdf}, pages = {1241--1249}, title = {{Online Learning with Costly Features and Labels}}, url = {http://papers.nips.cc/paper/5149-online-learning-with-costly-features-and-labels}, year = {2013} } @incollection{filippi2010parametric, author = {Filippi, Sarah and Cappe, Olivier and Garivier, Aur{\'{e}}lien and Szepesvari, Csaba}, booktitle = {Advances in Neural Information Processing Systems 23}, editor = {Lafferty, J and Williams, C K I and Shawe-Taylor, J and Zemel, R S and Culotta, A}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {586--594}, title = {{Parametric Bandits: The Generalized Linear Case}}, year = {2010} } @inproceedings{kleinbergregret, author = {Kleinberg, Robert D and Niculescu-Mizil, Alexandru and Sharma, Yogeshwer}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {425--436}, title = {{Regret Bounds for Sleeping Experts and Bandits}} } @inproceedings{drugan2013designing, author = {Drugan, Madalina and Nowe, Ann}, booktitle = {International Joint Conference on Neural Networks}, title = {{Designing multi-objective multi-armed bandits algorithms: a study}}, year = {2013} } @inproceedings{silver2010monte-carlo, abstract = {This paper introduces a Monte-Carlo algorithm for online planning in large POMDPs. The algorithm combines a Monte-Carlo update of the agent's belief state with a Monte-Carlo tree search from the current belief state. The new algorithm, POMCP, has two important properties. First, Monte- Carlo sampling is used to break the curse of dimensionality both during belief state updates and during planning. Second, only a black box simulator of the POMDP is required, rather than explicit probability distributions. These properties enable POMCP to plan e ectively in signi cantly larger POMDPs than has previously been possible. We demonstrate its effectiveness in three large POMDPs. We scale up a well-known benchmark problem, rocksample, by several orders of magnitude. We also introduce two challenging new POMDPs: 10 X 10 battleship and partially observable PacMan, with approximately 10{\^{}}18 and 10{\^{}}56 states respectively. Our Monte- Carlo planning algorithm achieved a high level of performance with no prior knowledge, and was also able to exploit simple domain knowledge to achieve better results with less search. POMCP is the rst general purpose planner to achieve high performance in such large and unfactored POMDPs.}, author = {Silver, David and Veness, Joel}, booktitle = {Neural Information Processing Systems}, title = {{Monte-Carlo planning in large POMDPs}}, year = {2010} } @article{song2007conditional, address = {Piscataway, NJ, USA}, annote = {Fellow-Sanjay Ranka{\textless}m:linebreak{\textgreater}{\textless}/m:linebreak{\textgreater}comps{\_}ano}, author = {Song, Xiuyao and Wu, Mingxi and Jermaine, Christopher}, doi = {http://dx.doi.org/10.1109/TKDE.2007.1009}, issn = {1041-4347}, journal = {IEEE Transactions on Knowledge and Data Engineering}, number = {5}, pages = {631--645}, publisher = {IEEE Educational Activities Department}, title = {{Conditional Anomaly Detection}}, volume = {19}, year = {2007} } @inproceedings{le2013fastfood, abstract = {Despite their successes, what makes kernel methods difficult to use in many large scale problems is the fact that computing the de- cision function is typically expensive, espe- cially at prediction time. In this paper, we overcome this difficulty by proposing Fast- food, an approximation that accelerates such computation significantly. Key to Fastfood is the observation that Hadamard matri- ces when combined with diagonal Gaussian matrices exhibit properties similar to dense Gaussian random matrices. Yet unlike the latter, Hadamard and diagonal matrices are inexpensive to multiply and store. These two matrices can be used in lieu of Gaussian matrices in Random Kitchen Sinks (Rahimi {\&} Recht, 2007) and thereby speeding up the computation for a large range of ker- nel functions. Specifically, Fastfood requires O(n log d) time and O(n) storage to compute n non-linear basis functions in d dimensions, a significant improvement from O(nd) com- putation and storage, without sacrificing ac- curacy. We prove that the approximation is unbiased and has low variance. Extensive ex- periments show that we achieve similar accu- racy to full kernel expansions and Random Kitchen Sinks while being 100x faster and us- ing 1000x less memory. These improvements, especially in terms of memory usage, make kernel methods more practical for applica- tions that have large training sets and/or re- quire real-time prediction.}, author = {Le, Quoc and Sarl{\'{o}}s, Tam{\'{a}}s and Smola, Alex J}, booktitle = {International Conference on Machine Learning}, title = {{Fastfood --- Approximating kernel expansions in loglinear time}}, year = {2013} } @article{jegelka2011online, abstract = {Key processes during recruitment of Quercus petraea and Corylus avellana were investigated in abandoned calcareous grasslands and adjacent scrub using the following methods: (1) observation of hoarding animals during the main period of ripening of acorns and hazel nuts, (2) exposition of seeds on the soil surface and in 56 cm depth to test differences in predation and germination, and (3) mapping of seedlings in the grasslands. European jays (Garrulus glandarius) and mice were the main disperser of seeds. Jays preferred acorns, whereas the rodents were less selective, but probably more important for dispersal of nuts. The maximum dispersal distance was about 1020 m for mice and was estimated several hundred metres for jays. Mice collected hoards of several seeds in about 2 cm depth in the soil, whereas jays stored single seeds. Seed predation and probably hoarding by mice were highest under scrub and in unmown grassland, while jays preferred mown sites for hoarding. However, hiding of seeds in the soil reduced losses in all sites. Predation of nuts was slightly less intensive than that of acorns. Seeds of Corylus were more sensitive to desiccation than Quercus, but in both species germination was lower for seeds exposed on the soil surface and in drier sites. Quercus and Corylus were the most abundant woody species in the fallow grasslands, probably due to the effective multi-staged dispersal by jays and mice, whereas wind-dispersed and fleshy-fruited species were less common; the latter restricted to margins of adjacent scrubland. The study provides several examples for discordance in suitability of patches for seeds and seedlings due to different habitat requirements of successive developmental stages. This result emphasizes the need for studies in the multiple stages during recruitment of vertebrate-dispersed plants.}, author = {Jegelka, Stefanie and Bilmes, Jeff}, editor = {Getoor, Lise and Scheffer, Tobias}, isbn = {9781450306195}, journal = {Strategies}, number = {1}, pages = {345--352}, publisher = {ACM}, series = {ICML '11}, title = {{Online Submodular Minimization for Combinatorial Structures}}, url = {http://ssli.ee.washington.edu/{~}bilmes/mypubs/jegelka2011-online-submodular-min.extended.pdf}, volume = {125}, year = {2011} } @book{strongin2000global, author = {Strongin, Roman and Sergeyev, Yaroslav}, isbn = {9780792364900}, publisher = {Springer}, series = {Nonconvex Optimization and Its Applications}, title = {{Global Optimization with Non-Convex Constraints: Sequential and Parallel Algorithms}}, url = {http://books.google.fr/books?id=xh{\_}GF9Dor3AC}, year = {2000} } @inproceedings{goldberg2008online, author = {Goldberg, Andrew and Li, Ming and Zhu, Xiaojin}, booktitle = {Proceeding of European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases}, title = {{Online Manifold Regularization: A New Learning Setting and Empirical Study}}, year = {2008} } @inproceedings{zhu2003semi-supervised, author = {Zhu, Xiaojin and Ghahramani, Zoubin and Lafferty, John}, booktitle = {Proceedings of the 20th International Conference on Machine Learning}, pages = {912--919}, title = {{Semi-Supervised Learning Using Gaussian Fields and Harmonic Functions}}, year = {2003} } @inproceedings{ma2003online, address = {New York, NY, USA}, author = {Ma, Junshui and Perkins, Simon}, booktitle = {KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {http://doi.acm.org/10.1145/956750.956828}, isbn = {1-58113-737-0}, pages = {613--618}, publisher = {ACM}, title = {{Online novelty detection on temporal sequences}}, year = {2003} } @article{mannor2004sample, author = {Mannor, S and Tsitsiklis, J N}, journal = {Journal of Machine Learning Research}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {623--648}, title = {{The Sample Complexity of Exploration in the Multi-Armed Bandit Problem}}, volume = {5}, year = {2004} } @inproceedings{silander2006simple, address = {Arlington, Virginia}, author = {Silander, Tomi and Myllymaki, Petri}, booktitle = {Proceedings of the 22nd Annual Conference on Uncertainty in Artificial Intelligence (UAI-06)}, publisher = {AUAI Press}, title = {{A simple approach for finding the globally optimal Bayesian network structure}}, year = {2006} } @inproceedings{piot2013learning, abstract = {This paper provides a comparative study between Inverse Reinforcement Learning (IRL) and Apprenticeship Learning (AL). IRL and AL are two frameworks, using Markov Decision Processes (MDP), which are used for the imitation learning problem where an agent tries to learn from demonstrations of an expert. In the AL Framework, the agent tries to learn the expert policy whereas in the IRL Framework, the agent tries to learn a reward which can explain the behavior of the expert. This reward is then optimized to imitate the expert. One can wonder if it is worth estimating such a reward, or if estimating a Policy is sufficient. This quite natural question has not really been addressed in the literature right now. We provide partial answers, both from a theoretical and empirical point of view.}, address = {Prague (Czech Republic)}, author = {PIOT, Bilal and Geist, Matthieu and Pietquin, Olivier}, booktitle = {Proceedings of the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML/PKDD 2013)}, doi = {10.1007/978-3-642-40988-2_2}, editor = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and Zelezny, Filip}, isbn = {978-3-642-40987-5}, month = {sep}, pages = {17--32}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {{Learning from demonstrations: Is it worth estimating a reward function?}}, url = {http://www.ecmlpkdd2013.org/wp-content/uploads/2013/07/384.pdf}, volume = {8188}, year = {2013} } @misc{sailing, annote = {http://www.sor.princeton.edu/{\~{}}rvdb/sail/sail.html}, author = {Vanderbei, R}, publisher = {University of Princeton}, title = {{Optimal sailing strategies, statistics and operations research program}}, year = {1996} } @inproceedings{Chapelle2014, abstract = {Download data set at http://labs.criteo.com/downloads/2014-conversion-logs-dataset/}, author = {Chapelle, Olivier}, doi = {10.1145/2623330.2623634}, title = {{Modeling delayed feedback in display advertising}}, year = {2014} } @article{whitney35abstract, author = {Whitney, Hassler}, journal = {American Journal of Mathematics}, number = {3}, pages = {509--533}, title = {{On the abstract properties of linear dependence}}, volume = {57}, year = {1935} } @inproceedings{jin2020reward-free, abstract = {Exploration is widely regarded as one of the most challenging aspects of reinforcement learning (RL), with many naive approaches succumbing to exponential sample complexity. To isolate the challenges of exploration, we propose a new "reward-free RL" framework. In the exploration phase, the agent first collects trajectories from an MDP {\$}\backslashmathcal{\{}M{\}}{\$} without a pre-specified reward function. After exploration, it is tasked with computing near-optimal policies under for {\$}\backslashmathcal{\{}M{\}}{\$} for a collection of given reward functions. This framework is particularly suitable when there are many reward functions of interest, or when the reward function is shaped by an external agent to elicit desired behavior. We give an efficient algorithm that conducts {\$}\backslashtilde{\{}\backslashmathcal{\{}O{\}}{\}}(S{\^{}}2A\backslashmathrm{\{}poly{\}}(H)/\backslashepsilon{\^{}}2){\$} episodes of exploration and returns {\$}\backslashepsilon{\$}-suboptimal policies for an arbitrary number of reward functions. We achieve this by finding exploratory policies that visit each "significant" state with probability proportional to its maximum visitation probability under any possible policy. Moreover, our planning procedure can be instantiated by any black-box approximate planner, such as value iteration or natural policy gradient. We also give a nearly-matching {\$}\backslashOmega(S{\^{}}2AH{\^{}}2/\backslashepsilon{\^{}}2){\$} lower bound, demonstrating the near-optimality of our algorithm in this setting.}, archivePrefix = {arXiv}, arxivId = {2002.02794}, author = {Jin, Chi and Krishnamurthy, Akshay and Simchowitz, Max and Yu, Tiancheng}, booktitle = {International Conference on Machine Learning}, eprint = {2002.02794}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Jin et al. - 2020 - Reward-Free Exploration for Reinforcement Learning.pdf:pdf}, title = {{Reward-free exploration for reinforcement learning}}, url = {http://arxiv.org/abs/2002.02794}, year = {2020} } @inproceedings{SST11, author = {Srebro, N and Sridharan, K and Tewari, A}, booktitle = {Advances in Neural Information Processing Systems (NIPS)}, title = {{On the Universality of Online Mirror Descent}}, year = {2011} } @article{jamakovic2006laplacian, abstract = {The set of all eigenvalues of a characteristic matrix of a graph, also referred to as the spectrum, is a well-known topology retrieval method. In this paper, we study the spectrum of the Laplacian matrix of an ob- servable part of the Internet graph at the IP- level, extracted from traceroute measurements performed via RIPE NCC and PlanetLab. In order to investigate the factors influencing the Laplacian spectrum of the observed graphs, we study the following complex network models: the random graph of Erd˝ os-R{\'{e}}nyi, the small- world of Watts and Strogatz and the scale-free graph, derived from a Havel-Hakimi power- law degree sequence. Along with these complex network models, we also study the correspond- ing Minimum Spanning Tree (MST). Extensive simulations show that the Laplacian spectra of complex network models differ substantially from the spectra of the observed graphs. How- ever, the Laplacian spectra of the MST in the Erd˝ os-R{\'{e}}nyi random graph with uniformly distributed link weights does bear resemblance to it. Furthermore, we discuss an extensive set of topological characteristics extracted from the Laplacian spectra of the observed real-world graphs as well as from complex network models.}, author = {Jamakovic, A and Mieghem, P Van}, journal = {European Conference on Complex Systems}, pages = {1--6}, title = {{The Laplacian spectrum of complex networks}}, url = {http://repository.tudelft.nl/assets/uuid:abe61d93-4e25-41ab-90d4-2a55cf2982f5/The Laplacian Spectrum of Complex Networks.pdf}, year = {2006} } @article{arora2012deterministic, author = {Arora, Raman and Dekel, Ofer and Tewari, Ambuj}, journal = {CoRR}, title = {{Deterministic {\{}MDP{\}}s with Adversarial Rewards and Bandit Feedback}}, volume = {abs/1210.4}, year = {2012} } @inproceedings{calandriello2016pack, abstract = {Most kernel-based methods, such as kernel regression, kernel PCA, ICA, or k-means clustering, do not scale to large datasets, because constructing and storing the kernel matrix Kn requires at least O(n2) time and space for n samples. Recent works (Alaoui 2014, Musco 2016) show that sampling points with replacement according to their ridge leverage scores (RLS) generates small dictionaries of relevant points with strong spectral approximation guarantees for Kn. The drawback of RLS-based methods is that computing exact RLS requires constructing and storing the whole kernel matrix. In this paper, we introduce SQUEAK, a new algorithm for kernel approximation based on RLS sampling that sequentially processes the dataset, storing a dictionary which creates accurate kernel matrix approximations with a number of points that only depends on the effective dimension deffgamma of the dataset. Moreover since all the RLS estimations are efficiently performed using only the small dictionary, SQUEAK never constructs the whole matrix kermatrixn, runs in linear time widetildeO(ndeffgamma3) w.r.t.n, and requires only a single pass over the dataset.}, author = {Calandriello, Daniele and Lazaric, Alessandro and Valko, Michal}, booktitle = {Adaptive and Scalable Nonparametric Methods in Machine Learning at Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Calandriello, Lazaric, Valko - 2016 - Pack only the essentials Adaptive dictionary learning for kernel ridge regression.pdf:pdf}, title = {{Pack only the essentials: Adaptive dictionary learning for kernel ridge regression}}, year = {2016} } @inproceedings{korda2016distributed, abstract = {We provide two distributed confidence ball algorithms for solving linear bandit problems in peer to peer networks with limited communication capabilities. For the first, we assume that all the peers are solving the same linear bandit problem, and prove that our algorithm achieves the optimal asymptotic regret rate of any centralised algorithm that can instantly communicate information between the peers. For the second, we assume that there are clusters of peers solving the same bandit problem within each cluster, and we prove that our algorithm discovers these clusters, while achieving the optimal asymptotic regret rate within each one. Through experiments on several real-world datasets, we demonstrate the performance of proposed algorithms compared to the state-of-the-art.}, author = {Korda, Nathan and Sz{\"{o}}r{\'{e}}nyi, Bal{\'{a}}zs and Li, Shuai}, booktitle = {International Conference on Machine Learning}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Korda, Sz{\"{o}}r{\'{e}}nyi, Li - 2016 - Distributed clustering of linear bandits in peer to peer networks.pdf:pdf}, month = {apr}, title = {{Distributed clustering of linear bandits in peer to peer networks}}, url = {http://proceedings.mlr.press/v48/korda16.pdf}, year = {2016} } @article{gray1998quantization, author = {Gray, Robert and Neuhoff, David}, journal = {IEEE Transactions on Information Theory}, number = {6}, pages = {2325--2383}, title = {{Quantization}}, volume = {44}, year = {1998} } @book{Spa03, author = {Spall, J}, publisher = {Wiley Interscience}, title = {{Introduction to stochastic search and optimization. Estimation, simulation, and control}}, year = {2003} } @phdthesis{kassel1995comparison, address = {Cambridge, MA, USA}, author = {Kassel, Robert Howard}, publisher = {Massachusetts Institute of Technology}, school = {MIT Spoken Language Systems Group}, title = {{A comparison of approaches to on-line handwritten character recognition}}, year = {1995} } @incollection{choi2011map, author = {Choi, Jaedeug and Kim, Kee-Eung}, booktitle = {Advances in Neural Information Processing Systems 24}, editor = {Shawe-Taylor, J and Zemel, R S and Bartlett, P and Pereira, F C N and Weinberger, K Q}, pages = {1989--1997}, title = {{MAP Inference for Bayesian Inverse Reinforcement Learning}}, year = {2011} } @inproceedings{he2007graph-based, address = {San Francisco, CA, USA}, author = {He, Jingrui and Carbonell, Jaime and Liu, Yan}, booktitle = {Proceedings of the 20th international joint conference on Artifical intelligence}, pages = {2492--2497}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {{Graph-based semi-supervised learning as a generative model}}, url = {http://portal.acm.org/citation.cfm?id=1625275.1625677}, year = {2007} } @book{VW95, author = {van der Vaart, A and Wellner, J}, publisher = {Springer}, title = {{Weak Convergence and Empirical Processes}}, year = {1995} } @article{yedidia2005constructing, annote = {comps{\_}models}, author = {Yedidia, J S and Freeman, W T and Weiss, Y}, doi = {10.1109/TIT.2005.850085}, issn = {0018-9448}, journal = {Information Theory, IEEE Transactions on}, keywords = {GBP algorithm,Kikuchi free energy,backpropagation,belief networks,cluster variation method,factor graphs,free energy approximation,generalized belief propagation,graph theory,inference mechanisms,inference problem,junction graph method,message passing,message passing Bethe approximation,region graph method,sum-product algorithm}, month = {jul}, number = {7}, pages = {2282--2312}, title = {{Constructing free-energy approximations and generalized belief propagation algorithms}}, volume = {51}, year = {2005} } @inproceedings{gautier2017zonotope, abstract = {Determinantal point processes (DPPs) are distributions over sets of items that model diversity using kernels. Their applications in machine learning include summary extraction and recommendation systems. Yet, the cost of sampling from a DPP is prohibitive in large-scale applications, which has triggered an effort towards efficient approximate samplers. We build a novel MCMC sampler that combines ideas from combinatorial geometry, linear programming, and Monte Carlo methods to sample from DPPs with a fixed sample cardinality, also called projection DPPs. Our sampler leverages the ability of the hit-and-run MCMC kernel to efficiently move across convex bodies. Previous theoretical results yield a fast mixing time of our chain when targeting a distribution that is close to a projection DPP, but not a DPP in general. Our empirical results demonstrate that this extends to sampling projection DPPs, i.e., our sampler is more sample-efficient than previous approaches which in turn translates to faster convergence when dealing with costly-to-evaluate functions, such as summary extraction in our experiments.}, author = {Gautier, Guillaume and Bardenet, R{\'{e}}mi and Valko, Michal}, booktitle = {International Conference on Machine Learning}, title = {{Zonotope hit-and-run for efficient sampling from projection DPPs}}, year = {2017} } @article{baxter2001infinite-horizon, author = {Baxter, Jonathan and Bartlett, Peter and Weaver, Lex}, journal = {Journal of Artificial Intelligence Research}, pages = {319--350}, title = {{Infinite-Horizon Policy-Gradient Estimation}}, volume = {15}, year = {2001} } @article{gelly2012grand, author = {Gelly, Sylvain and Kocsis, Levente and Schoenauer, Marc and Sebag, Mich{\`{e}}le and Silver, David and Szepesv{\'{a}}ri, Csaba and Teytaud, Olivier}, journal = {Communications of the ACM}, month = {mar}, number = {3}, pages = {106--113}, publisher = {ACM}, title = {{The grand challenge of computer Go: Monte Carlo tree search and extensions}}, volume = {55}, year = {2012} } @inproceedings{daniel2012hreps, author = {Daniel, C and Neumann, G and Peters, J}, booktitle = {Proceedings of the Fifteenth International Conference on Artificial Intelligence and Statistics}, pages = {273--281}, series = {JMLR Workshop and Conference Proceedings}, title = {{Hierarchical Relative Entropy Policy Search}}, volume = {22}, year = {2012} } @article{streeter2006simple, author = {Streeter, Matthew J. and Smith, Stephen F.}, isbn = {3-540-46267-8}, journal = {Principles and Practice of Constraint Programming}, keywords = {dblp}, pages = {560--574}, title = {{A Simple Distribution-Free Approach to the Max k-Armed Bandit Problem.}}, url = {http://dblp.uni-trier.de/db/conf/cp/cp2006.html{\#}StreeterS06}, volume = {4204}, year = {2006} } @article{hodge2004survey, address = {Norwell, MA, USA}, annote = {comps{\_}ano}, author = {Hodge, Victoria and Austin, Jim}, doi = {http://dx.doi.org/10.1023/B:AIRE.0000045502.10941.a9}, issn = {0269-2821}, journal = {Artif. Intell. Rev.}, number = {2}, pages = {85--126}, publisher = {Kluwer Academic Publishers}, title = {{A Survey of Outlier Detection Methodologies}}, volume = {22}, year = {2004} } @article{carpentier2017adaptive, abstract = {In the present paper we study the problem of existence of honest and adaptive confidence sets for matrix completion. We consider two statistical models: the trace regression model and the Bernoulli model. In the trace regression model, we show that honest confidence sets that adapt to the unknown rank of the matrix exist even when the error variance is unknown. Contrary to this, we prove that in the Bernoulli model, honest and adaptive confidence sets exist only when the error variance is known a priori. In the course of our proofs we obtain bounds for the minimax rates of certain composite hypothesis testing problems arising in low rank inference.}, archivePrefix = {arXiv}, arxivId = {1608.04861}, author = {Carpentier, Alexandra and Klopp, Olga and L{\"{o}}ffler, Matthias and Nickl, Richard}, eprint = {1608.04861}, journal = {Bernoulli}, title = {{Adaptive confidence sets for matrix completion}}, year = {2017} } @book{chapelle2006semi-supervised, address = {Cambridge, MA}, editor = {Chapelle, O and Sch{\"{o}}lkopf, B and Zien, A}, publisher = {MIT Press}, title = {{Semi-Supervised Learning}}, url = {http://www.kyb.tuebingen.mpg.de/ssl-book}, year = {2006} } @inproceedings{russell1998learning, abstract = {This talk proposes a very simple 'baseline architecture' for a learning agent that can handle stochastic, partially observable environments. The architecture uses reinforcement learning together with a method for representing temporal processes as graphical models. I will discuss methods for learning the parameters and structure of such representations from sensory inputs, and for computing posterior probabilities. Some open problems remain before we can try out the complete agent; more arise when we consider scaling up. A second theme of the talk will be whether reinforcement learning can provide a good model of animal and human learning. To answer this question, we must do inverse reinforcement learning: given the observed behaviour, what reward signal, if any, is being optimized? This seems to be a very interesting problem for the COLT, UAI, and ML communities, and has been addressed in econometrics under the heading of structural estimation of Markov decision processes.}, author = {Russell, Stuart}, booktitle = {Proceedings of the 11th Annual Conference on Computational Learning Theory (COLT)}, doi = {10.1145/279943.279964}, isbn = {1581130570}, keywords = {Decision theory,Graphic methods,Inverse problems,Inverse reinforcement learning,Learning agents,Learning systems,Markov processes,Mathematical models,Optimization,Probability,Problem solving,Reinforcement learning}, pages = {101--103}, title = {{Learning agents for uncertain environments (extended abstract)}}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-0031640746{\&}partnerID=40}, year = {1998} } @book{GKKW02, author = {Gy{\"{o}}rfi, L and Kohler, M and Krzyzak, A and Walk, H}, publisher = {Springer}, title = {{A Distribution-Free Theory of Nonparametric Regression}}, year = {2001} } @inproceedings{collins04perceptron, address = {Morristown, NJ, USA}, author = {Collins, Michael and Roark, Brian}, booktitle = {ACL '04: Proceedings of the 42nd Annual Meeting of the Association for Computational Linguistics}, doi = {http://dx.doi.org/10.3115/1218955.1218970}, pages = {111--118}, publisher = {Association for Computational Linguistics}, title = {{Incremental parsing with the perceptron algorithm}}, year = {2004} } @article{schrijver2000combinatorial, author = {Schrijver, A}, journal = {Journal of Combinatorial Theory, Series B}, number = {2}, pages = {346--355}, publisher = {Elsevier}, title = {{A combinatorial algorithm minimizing submodular functions in strongly polynomial time}}, volume = {80}, year = {2000} } @book{scholkopf2001learning, author = {Sch{\"{o}}lkopf, Bernhard and Smola, Alexander J.}, publisher = {MIT Press}, title = {{Learning with kernels: Support vector machines, regularization, optimization, and beyond}}, year = {2001} } @inproceedings{ICML2011Nagano_506, author = {Nagano, K and Kawahara, Y and Aihara, K}, booktitle = {Proc. ICML}, title = {{Size-constrained Submodular Minimization through Minimum Norm Base}}, year = {2011} } @article{chen2009similarity, abstract = {This paper reviews and extends the field of similarity-based classification, presenting new analyses, algorithms, data sets, and a comprehensive set of experimental results for a rich collection of classification problems. Specifically, the generalizability of using similarities as features is analyzed, design goals and methods for weighting nearest-neighbors for similarity-based learning are proposed, and different methods for consistently converting similarities into kernels are compared. Experiments on eight real data sets compare eight approaches and their variants to similarity-based learning.}, author = {Chen, Yihua and Garcia, Eric K and Gupta, Maya R and Rahimi, Ali and Cazzanti, Luca}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Chen et al. - 2009 - Similarity-based Classification Concepts and Algorithms.pdf:pdf}, issn = {15324435}, journal = {Journal of Machine Learning Research}, number = {206}, pages = {747--776}, publisher = {JMLR. org}, title = {{Similarity-based Classification: Concepts and Algorithms}}, url = {http://jmlr.csail.mit.edu/papers/volume10/chen09a/chen09a.pdf}, volume = {10}, year = {2009} } @article{cunningham1985minimum, author = {Cunningham, W H}, journal = {Networks}, number = {2}, pages = {205--215}, publisher = {Wiley Online Library}, title = {{Minimum cuts, modular functions, and matroid polyhedra}}, volume = {15}, year = {1985} } @incollection{wahba1999support, address = {Cambridge, MA}, author = {Wahba, Grace}, booktitle = {Advances in Kernel Methods: Support Vector Learning}, pages = {69--88}, publisher = {MIT Press}, title = {{Support Vector Machines, Reproducing Kernel Hilbert Spaces, and Randomized GACV}}, year = {1999} } @article{blei2005variational, annote = {c{\_}omps{\_}models}, author = {Blei, David M and Jordan, Michael I}, journal = {Bayesian Analysis}, pages = {2006}, title = {{Variational inference for Dirichlet process mixtures}}, url = {http://stat-www.berkeley.edu/tech-reports/674.pdf}, volume = {1}, year = {2005} } @article{amari, author = {Amari, S}, journal = {Neural Computation}, number = {2}, pages = {251--276}, title = {{Natural Gradient Works Efficiently in Learning}}, volume = {10}, year = {1998} } @inproceedings{shang2019general, abstract = {Hierarchical bandits are an approach for global optimization of extremely irregular functions. This paper provides new elements regarding POO, an adaptive meta-algorithm that does not require the knowledge of local smoothness of the target function. We first highlight the fact that the subroutine algorithm used in POO should have a small regret under the assumption of local smoothness with respect to the chosen partitioning, which is unknown if it is satisfied by the standard subroutine HOO. In this work, we establish such regret guarantee for HCT, which is another hierarchical optimistic optimization algorithm that needs to know the smoothness. This confirms the validity of POO. We show that POO can be used with HCT as a subroutine with a regret upper bound that matches the one of best-known algorithms using the knowledge of smoothness up to a √ log n factor. On top of that, we propose a general wrapper, called GPO, that can cope with algorithms that only have simple regret guarantees. Finally, we complement our findings with experiments on difficult functions.}, author = {Shang, Xuedong and Kaufmann, Emilie and Valko, Michal}, booktitle = {Algorithmic Learning Theory}, title = {{General parallel optimization without metric}}, url = {http://researchers.lille.inria.fr/{~}valko/hp/publications/shang2019general.pdf}, year = {2019} } @inproceedings{jin2018is, author = {Jin, Chi and Allen-Zhu, Zeyuan and Bubeck, S{\'{e}}bastien and Jordan, Michael I.}, booktitle = {Neural Information Processing Systems}, title = {{Is Q-learning provably efficient?}}, url = {https://arxiv.org/pdf/1807.03765.pdf}, year = {2018} } @article{narasimhan2006q, author = {Narasimhan, M and Jojic, N and Bilmes, J}, journal = {Adv. NIPS}, title = {{Q-clustering}}, volume = {18}, year = {2006} } @book{pinter1995global, author = {Pint{\'{e}}r, J{\'{a}}nos}, isbn = {9780792337577}, publisher = {Springer}, series = {Nonconvex Optimization and Its Applications}, title = {{Global Optimization in Action: Continuous and Lipschitz Optimization: Algorithms, Implementations and Applications}}, url = {http://books.google.fr/books?id=G8pF982ckNsC}, year = {1995} } @inproceedings{sutton1996generalization, author = {Sutton, Richard}, booktitle = {Advances in Neural Information Processing Systems 8}, pages = {1038--1044}, title = {{Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding}}, year = {1996} } @inproceedings{Komiyama2015, abstract = {We discuss a multiple-play multi-armed bandit (MAB) problem in which several arms are selected at each round. Recently, Thompson sampling (TS), a randomized algorithm with a Bayesian spirit, has attracted much attention for its empirically excellent performance, and it is revealed to have an optimal regret bound in the standard single-play MAB problem. In this paper, we propose the multiple-play Thompson sampling (MP-TS) algorithm, an extension of TS to the multiple-play MAB problem, and discuss its regret analysis. We prove that MP-TS for binary rewards has the optimal regret upper bound that matches the regret lower bound provided by Anantharam et al. (1987). Therefore, MP-TS is the first computationally efficient algorithm with optimal regret. A set of computer simulations was also conducted, which compared MP-TS with state-of-the-art algorithms. We also propose a modification of MP-TS, which is shown to have better empirical performance.}, archivePrefix = {arXiv}, arxivId = {1506.00779}, author = {Komiyama, Junpei and Honda, Junya and Nakagawa, Hiroshi}, booktitle = {International Conference on Machine Learning}, eprint = {1506.00779}, title = {{Optimal regret analysis of Thompson sampling in stochastic multi-armed bandit problem with multiple plays}}, url = {http://arxiv.org/abs/1506.00779}, year = {2015} } @article{Yasin2015, abstract = {We present a new algorithm for the contextual bandit learning problem, where the learner repeatedly takes one of {\$}K{\$} actions in response to the observed context, and observes the reward only for that chosen action. Our method assumes access to an oracle for solving fully supervised cost-sensitive classification problems and achieves the statistically optimal regret guarantee with only {\$}\backslashtilde{\{}O{\}}(\backslashsqrt{\{}KT/\backslashlog N{\}}){\$} oracle calls across all {\$}T{\$} rounds, where {\$}N{\$} is the number of policies in the policy class we compete against. By doing so, we obtain the most practical contextual bandit learning algorithm amongst approaches that work for general policy classes. We further conduct a proof-of-concept experiment which demonstrates the excellent computational and prediction performance of (an online variant of) our algorithm relative to several baselines.}, archivePrefix = {arXiv}, arxivId = {cs/9605103}, author = {Yasin, M. A. and Al-Ashwal, W. A M and Shire, A. M. and Hamzah, S. A. and Ramli, K. N.}, doi = {10.1613/jair.301}, eprint = {9605103}, isbn = {0-7803-3213-X}, issn = {18196608}, journal = {ARPN Journal of Engineering and Applied Sciences}, keywords = {Bluetooth,GSM,PIFA,Tri-band}, number = {19}, pages = {8740--8744}, pmid = {17255001}, primaryClass = {cs}, title = {{Tri-band planar inverted F-antenna (PIFA) for GSM bands and bluetooth applications}}, volume = {10}, year = {2015} } @inproceedings{kalyanakrishnan2012pac, author = {Kalyanakrishnan, Shivaram and Tewari, Ambuj and Auer, Peter and Stone, Peter}, booktitle = {International Conference on Machine Learning}, title = {{PAC subset selection in stochastic multi-armed bandits}}, year = {2012} } @article{fill1998interruptible, abstract = {For a large class of examples arising in statistical physics known as attractive spin systems (e.g., the Ising model), one seeks to sample from a probability distribution $\pi$ on an enormously large state space, but elementary sampling is ruled out by the infeasibility of calculating an appropriate normalizing constant. The same difficulty arises in computer science problems where one seeks to sample randomly from a large finite distributive lattice whose precise size cannot be ascertained in any reasonable amount of time. The Markov chain Monte Carlo (MCMC) approximate sampling approach to such a problem is to construct and run "for a long time" a Markov chain with long-run distribution $\pi$. But determining how long is long enough to get a good approximation can be both analytically and empirically difficult. Recently, Propp and Wilson have devised an ingenious and efficient algorithm to use the same Markov chains to produce perfect (i.e., exact) samples from $\pi$. However, the running time of their algorithm is an unbounded random variable whose order of magnitude is typically unknown a priori and which is not independent of the state sampled, so a naive user with limited patience who aborts a long run of the algorithm will introduce bias. We present a new algorithm which (1) again uses the same Markov chains to produce perfect samples from $\pi$, but is based on a different idea (namely, acceptance/rejection sampling); and (2) eliminates user-impatience bias. Like the Propp-Wilson algorithm, the new algorithm applies to a general class of suitably monotone chains, and also (with modification) to "anti-monotone" chains. When the chain is reversible, naive implementation of the algorithm uses fewer transitions but more space than Propp-Wilson. When fine-tuned and applied with the aid of a typical pseudorandom number generator to an attractive spin system on n sites using a random site updating Gibbs sampler whose mixing time $\tau$ is polynomial in n, the algorithm runs in time of the same order (bound) as Propp-Wilson [expectation O($\tau$ log n)] and uses only logarithmically more space [expectation O(n log n), vs. O(n) for Propp-Wilson].}, author = {Fill, James Allen}, journal = {Annals of Applied Probability}, keywords = {Attractive spin system,Duality,Gibbs sampler,Ising model,Markov chain Monte Carlo,Monotone chain,Partially ordered set,Perfect simulation,Rejection sampling,Separation,Strong stationary time}, number = {1}, pages = {131--162}, title = {{An interruptible algorithm for perfect sampling via Markov chains}}, volume = {8}, year = {1998} } @article{bikel04intricacies, address = {Cambridge, MA, USA}, author = {Bikel, Daniel M}, doi = {http://dx.doi.org/10.1162/0891201042544929}, issn = {0891-2017}, journal = {Computational Linguistics}, number = {4}, pages = {479--511}, publisher = {MIT Press}, title = {{Intricacies of {\{}C{\}}ollins' Parsing Model}}, volume = {30}, year = {2004} } @inproceedings{Cevher2008, author = {Cevher, V and Duarte, M F and Hegde, C and Baraniuk, R G}, booktitle = {Adv. NIPS}, title = {{Sparse signal recovery using {\{}M{\}}arkov random fields}}, year = {2008} } @inproceedings{shental2003learning, annote = {comps{\_}distance}, author = {Shental, Noam and Weinshall, Daphna}, booktitle = {In Proceedings of the Twentieth International Conference on Machine Learning}, pages = {11--18}, title = {{Learning Distance Functions using Equivalence Relations}}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.7.8086}, year = {2003} } @article{casella1996rao-blackwellisation, author = {Casella, George and Robert, Christian}, journal = {Biometrika}, number = {1}, pages = {81--94}, title = {{{\{}Rao-Blackwellisation{\}} of Sampling Schemes}}, volume = {83}, year = {1996} } @article{walsh2010integrating, abstract = {Recent advancements in model-based reinforcement learn- ing have shown that the dynamics of many structured do- mains (e.g. DBNs) can be learned with tractable sample com- plexity, despite their exponentially large state spaces. Un- fortunately, these algorithms all require access to a planner that computes a near optimal policy, and while many tra- ditional MDP algorithms make this guarantee, their com- putation time grows with the number of states. We show how to replace these over-matched planners with a class of sample-based plannerswhose computation time is indepen- dent of the number of stateswithout sacrificing the sample- efficiency guarantees of the overall learning algorithms. To do so, we define sufficient criteria for a sample-based planner to be used in such a learning system and analyze two popu- lar sample-based approaches from the literature. We also in- troduce our own sample-based planner, which combines the strategies fromthese algorithms and stillmeets the criteria for integration into our learning system. In doing so, we define the first complete RL solution for compactly represented (ex- ponentially sized) state spaces with efficiently learnable dy- namics that is both sample efficient and whose computation time does not grow rapidly with the number of states.}, author = {Walsh, Thomas J and Goschin, Sergiu and Littman, Michael L}, journal = {AAAI Conference on Artificial Intelligence}, title = {{Integrating sample-based planning and model-based reinforcement learning}}, url = {https://www.aaai.org/ocs/index.php/AAAI/AAAI10/paper/view/1880/2049}, year = {2010} } @article{de2004self, author = {de la Pe{\~{n}}a, Victor H. and Klass, Michael J. and Lai, Tze Leung}, journal = {Annals of probability}, pages = {1902--1933}, title = {{Self-normalized processes: Exponential inequalities, moment bounds and iterated logarithm laws}}, url = {https://arxiv.org/pdf/math/0410102.pdf}, volume = {32}, year = {2004} } @inproceedings{cevher, author = {Krause, A and Cevher, V}, booktitle = {Proc. ICML}, title = {{Submodular dictionary selection for sparse representation}}, year = {2010} } @article{rusmevichientong2010linearly, address = {Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA}, author = {Rusmevichientong, Paat and Tsitsiklis, John N}, journal = {Math. Oper. Res.}, keywords = {bandits}, mendeley-tags = {bandits}, month = {may}, number = {2}, pages = {395--411}, publisher = {Informs}, title = {{Linearly Parameterized Bandits}}, volume = {35}, year = {2010} } @article{kendall1945treatment, author = {Kendall, M G}, journal = {Biometrika}, pages = {239--251}, title = {{The treatment of ties in ranking problems}}, volume = {33}, year = {1945} } @article{domingos1997optimality, author = {Domingos, Pedro and Pazzani, Michael J}, journal = {Machine Learning}, number = {2-3}, pages = {103--130}, title = {{On the Optimality of the Simple Bayesian Classifier under Zero-One Loss}}, url = {citeseer.ist.psu.edu/article/domingos97optimality.html}, volume = {29}, year = {1997} } @article{nino-nora2010computing, author = {Nino-Mora, J}, doi = {10.1287/ijoc.1100.0398}, issn = {10919856}, journal = {INFORMS Journal on Computing}, keywords = {accepted may 2010,accepted winfried grassmann,advance,analysis algorithms,area editor computational,bandits,computational complexity,dynamic programming,finite horizon,history,index policies,march 2009,markov,may 2010,probability analysis,published online articles,received,revised january 2010}, number = {2}, pages = {254--267}, title = {{Computing a Classic Index for Finite-Horizon Bandits}}, url = {http://joc.journal.informs.org/cgi/doi/10.1287/ijoc.1100.0398}, volume = {23}, year = {2010} } @article{edmonds70submodular, abstract = {The viewpoint of the subject of matroids, and related areas of lattice theory, has always been, in one way or another, abstraction$\backslash$nof algebraic dependence or, equivalently, abstraction of the incidence relations in geometric representations of algebra.$\backslash$nOften one of the main derived facts is that all bases have the same cardinality. (See Van der Waerden, Section 33.)}, author = {Edmonds, Jack}, journal = {Combinatorial Structures and Their Applications}, pages = {69--87}, title = {{Submodular functions, matroids, and certain polyhedra}}, year = {1970} } @inproceedings{propp1998coupling, abstract = {The Markov chain Monte Carlo method is a general technique for obtaining samples from a probability distribution. In earlier work, we showed that for many applications one can modify the Markov chain Monte Carlo method so as to remove all bias in the output resulting from the biased choice of an initial state for the chain; we have called this method coupling from the past (CFTP). Here we describe this method in a fashion that should make our ideas accessible to researchers from diverse areas. Our expository strategy is to avoid proofs and focus on sample applications.}, author = {Propp, James and Wilson, David}, booktitle = {Microsurveys in Discrete Probability}, title = {{Coupling from the past: A user's guide}}, year = {1998} } @article{boularias2013apprenticeship, author = {Boularias, Abdeslam and Chaib-draa, Brahim}, doi = {http://dx.doi.org/10.1016/j.neucom.2012.11.002}, issn = {0925-2312}, journal = {Neurocomputing}, keywords = {Bootstrapping,Imitation learning,Inverse reinforcement learning,Transfer learning}, number = {0}, pages = {83--96}, title = {{Apprenticeship learning with few examples}}, url = {http://www.sciencedirect.com/science/article/pii/S0925231212008363}, volume = {104}, year = {2013} } @phdthesis{valko2005evolving, abstract = {Real biological networks are able to make decisions. We will show that this behavior can be observed even in some simple architectures of biologically plausible neural models. The great interest of this thesis is also to contribute to methods of statistical decision theory by giving a lead how to evolve the neural networks to solve miscellaneous decision tasks.}, author = {Valko, Michal}, keywords = {misovalko}, mendeley-tags = {misovalko}, month = {jun}, school = {Comenius University, Bratislava, Slovakia}, title = {{Evolving Neural Networks for Statistical Decision Theory}}, year = {2005} } @incollection{sinha2009semi, author = {Sinha, K and M.Belkin}, booktitle = {Advances in Neural Information Processing Systems 22}, publisher = {NIPS Foundation (http://books.nips.cc)}, title = {{Semi-supervised Learning using Sparse Eigenfunction Bases}}, year = {2009} } @inproceedings{ortner08deterministic, author = {Ortner, Ronald}, booktitle = {Proceedings of the 19th International Conference on Algorithmic Learning Theory, ALT 2008}, title = {{Online Regret Bounds for {\{}M{\}}arkov Decision Processes with Deterministic Transitions}}, year = {2008} } @article{Sanchez-2003-Mislabeled, author = {Sanchez, J S and Barandela, R and Marques, A I and Alejo, R and J., Badenas.}, journal = {Pattern Recognition Letteres 24}, pages = {1015--1022}, title = {{Analysis of New Techniques to Obtain Quality Training Sets.}}, year = {2003} } @book{Bar02, author = {Barvinok, A}, publisher = {American Mathematical Society}, title = {{A Course in Convexity}}, year = {2002} } @article{levine2010feature, author = {Levine, S and Popovic, Z and Koltun, V}, journal = {Advances in Neural Information Processing Systems}, title = {{Feature construction for inverse reinforcement learning}}, url = {http://www.stanford.edu/{~}svlevine/papers/firl.pdf}, volume = {23}, year = {2010} } @article{neu2017unified, archivePrefix = {arXiv}, arxivId = {cs.LG/1705.07798}, author = {Neu, Gergely and Jonsson, Anders and G{\'{o}}mez, Vicen{\c{c}}}, eprint = {1705.07798}, journal = {arXiv:1705.07798}, month = {may}, primaryClass = {cs.LG}, title = {{A unified view of entropy-regularized Markov decision processes}}, url = {https://ui.adsabs.harvard.edu/abs/2017arXiv170507798N}, year = {2017} } @inproceedings{agarwal2020model, author = {Agarwal, Alekh and Kakade, Sham and Yang, Lin F}, booktitle = {Conference on Learning Theory}, title = {{Model-based reinforcement learning with a generative model is minimax optimal}}, url = {https://arxiv.org/pdf/1906.03804.pdf}, year = {2020} } @article{gorur2011concave, author = {G{\"{o}}r{\"{u}}r, Dilan and Teh, Yee Whye}, journal = {Journal of Computational and Graphical Statistics}, title = {{Concave-Convex adaptive rejection sampling}}, year = {2011} } @article{zong2016cascading, author = {Zong, Shi and Ni, Hao and Sung, Kenny and Ke, Nan Rosemary and Wen, Zheng and Kveton, Branislav}, journal = {arXiv preprint arXiv:1603.05359}, title = {{Cascading Bandits for Large-Scale Recommendation Problems}}, year = {2016} } @inproceedings{ghahramani2000graphical, annote = {comps{\_}models}, author = {Ghahramani, Zoubin and Beal, Matthew J}, booktitle = {Advanced Mean Field Methods - Theory and Practice}, publisher = {MIT Press}, title = {{Graphical models and variational methods}}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.7693}, year = {2000} } @article{WK08, author = {Warmuth, M and Kuzmin, D}, journal = {Journal of Machine Learning Research}, pages = {2287--2320}, title = {{Randomized Online {\{}PCA{\}} Algorithms with Regret Bounds that are Logarithmic in the Dimension}}, volume = {9}, year = {2008} } @inproceedings{shalev07pegasos, author = {Shalev-Shwartz, Shai and Singer, Yoram and Srebro, Nathan}, doi = {http://doi.acm.org/10.1145/1273496.1273598}, pages = {807--814}, title = {{{\{}P{\}}egasos: {\{}P{\}}rimal {\{}E{\}}stimated sub-{\{}G{\}}r{\{}A{\}}dient {\{}SO{\}}lver for {\{}SVM{\}}}} } @article{kumar2012sampling, author = {Kumar, Sanjiv and Mohri, Mehryar and Talwalkar, Ameet}, journal = {J. Mach. Learn. Res.}, number = {1}, pages = {981--1006}, title = {{Sampling Methods for the Nystr{\"{o}}m Method}}, volume = {13}, year = {2012} } @inproceedings{valko2011conditional, abstract = {Timely detection of concerning events is an important problem in clinical practice. In this paper, we consider the problem of conditional anomaly detection that aims to identify data instances with an unusual response, such as the omission of an important lab test. We develop a new non-parametric approach for conditional anomaly detection based on the soft harmonic solution, with which we estimate the confidence of the label to detect anomalous mislabeling. We further regularize the solution to avoid the detection of isolated examples and examples on the boundary of the distribution support. We demonstrate the efficacy of the proposed method in detecting unusual labels on a real-world electronic health record dataset and compare it to several baseline approaches.}, author = {Valko, Michal and Valizadegan, Hamed and Kveton, Branislav and Cooper, Gregory F and Hauskrecht, Milos}, booktitle = {The 28th International Conference on Machine Learning Workshop on Machine Learning for Global Challenges}, keywords = {misovalko}, mendeley-tags = {misovalko}, month = {jun}, title = {{Conditional Anomaly Detection Using Soft Harmonic Functions: An Application to Clinical Alerting}}, year = {2011} } @inproceedings{davis2007information-theoretic, address = {New York, NY, USA}, annote = {comps{\_}distancX}, author = {Davis, Jason V and Kulis, Brian and Jain, Prateek and Sra, Suvrit and Dhillon, Inderjit S}, booktitle = {ICML '07: Proceedings of the 24th international conference on Machine learning}, doi = {http://doi.acm.org/10.1145/1273496.1273523}, isbn = {978-1-59593-793-3}, pages = {209--216}, publisher = {ACM}, title = {{Information-theoretic metric learning}}, year = {2007} } @article{groenevelt1991two, author = {Groenevelt, H}, journal = {European Journal of Operational Research}, number = {2}, pages = {227--236}, publisher = {Elsevier}, title = {{Two algorithms for maximizing a separable concave function over a polymatroid feasible region}}, volume = {54}, year = {1991} } @book{kohn2000to, address = {Washington DC}, author = {Kohn, L and Corrigan, J and Donaldson, M}, keywords = {imported}, publisher = {National Academy Press}, title = {{To Err Is Human: Building a Safer Health System}}, year = {2000} } @article{Wil96, author = {Willems, F M J}, journal = {IEEE Transactions on Information Theory}, pages = {2210--2217}, title = {{Coding for a binary independent piecewise-identically-distributed source}}, volume = {IT-42}, year = {1996} } @inproceedings{feige2007maximizing, author = {Feige, U and Mirrokni, V S and Vondrak, J}, booktitle = {Proc. Symposium on Foundations of Computer Science}, organization = {IEEE Computer Society}, pages = {461--471}, title = {{Maximizing Non-Monotone Submodular Functions}}, year = {2007} } @inproceedings{dechter1997mini-buckets:, author = {Dechter, Rina}, booktitle = {Proceedings of the 15th International Joint Conference on Artificial Intelligence}, pages = {1297--1303}, title = {{Mini-Buckets: A General Scheme for Generating Approximations in Automated Reasoning}}, year = {1997} } @inproceedings{zhang1995reinforcement, author = {Zhang, Wei and Dietterich, Thomas}, booktitle = {Proceedings of the 14th International Joint Conference on Artificial Intelligence}, pages = {1114--1120}, title = {{A Reinforcement Learning Approach to Job-Shop Scheduling}}, year = {1995} } @inproceedings{ratliff2007boosting, address = {Cambridge, MA}, author = {Ratliff, Nathan and Bradley, David and Bagnell, J Andrew (Drew) and Chestnutt, Joel}, booktitle = {Advances in Neural Information Processing Systems 19}, editor = {Sch{\"{o}}lkopf, B and Platt, J C and Hofmann, T}, publisher = {MIT Press}, title = {{Boosting Structured Prediction for Imitation Learning}}, url = {http://www-clmc.usc.edu/publications/B/bagnell-NIPS2006.pdf}, year = {2007} } @inproceedings{Kavukcuoglu2009, author = {Kavukcuoglu, K and Ranzato, M A and Fergus, R and Le-Cun, Y}, booktitle = {Proc. CVPR}, title = {{Learning invariant features through topographic filter maps}}, year = {2009} } @book{BoLuMa13, author = {Boucheron, S and Lugosi, G and Massart, P}, publisher = {Oxford University Press}, title = {{Concentration inequalities:A Nonasymptotic Theory of Independence}}, year = {2013} } @inproceedings{orabona2017training, abstract = {Deep learning methods achieve state-of-the-art performance in many application scenarios. Yet, these methods require a significant amount of hyperparameters tuning in order to achieve the best results. In particular, tuning the learning rates in the stochastic optimization process is still one of the main bottlenecks. In this paper, we propose a new stochastic gradient descent procedure for deep networks that does not require any learning rate setting. Contrary to previous methods, we do not adapt the learning rates nor we make use of the assumed curvature of the objective function. Instead, we reduce the optimization process to a game of betting on a coin and propose a learning rate free optimal algorithm for this scenario. Theoretical convergence is proven for convex and quasi-convex functions and empirical evidence shows the advantage of our algorithm over popular stochastic gradient algorithms.}, archivePrefix = {arXiv}, arxivId = {1705.07795}, author = {Orabona, Francesco and Tommasi, Tatiana}, booktitle = {Neural Information Processing Systems}, eprint = {1705.07795}, title = {{Training deep networks without learning rates through coin betting}}, url = {http://papers.neurips.cc/paper/6811-training-deep-networks-without-learning-rates-through-coin-betting.pdf}, year = {2017} } @inproceedings{kocak2014spectral, abstract = {Thompson Sampling (TS) has surged a lot of interest due to its good empirical performance, in particular in the computational advertising. Though successful, the tools for its performance analysis appeared only recently. In this paper, we describe and analyze SpectralTS algorithm for a bandit problem, where the payoffs of the choices are smooth given an underlying graph. In this setting, each choice is a node of a graph and the expected payoffs of the neighboring nodes are assumed to be similar. Although the setting has application both in recommender systems and advertising, the traditional algorithms would scale poorly with the number of choices. For that purpose we consider an effective dimension d, which is small in real-world graphs. We deliver the analysis showing that the regret of SpectralTS scales as d$\backslash$sqrt(T $\backslash$ln N) with high probability, where T is the time horizon and N is the number of choices. Since a d$\backslash$sqrt(T $\backslash$ln N) regret is comparable to the known results, SpectralTS offers a computationally more efficient alternative. We also show that our algorithm is competitive on both synthetic and real-world data.}, author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Valko, Michal and Munos, R{\'{e}}mi and Agrawal, Shipra}, booktitle = {AAAI Conference on Artificial Intelligence}, title = {{Spectral Thompson sampling}}, url = {https://hal.inria.fr/hal-00981575v2/document}, year = {2014} } @techreport{chekuri2011submodular, author = {Chekuri, C and Vondr{\'{a}}k, J and Zenklusen, R}, institution = {Arxiv}, number = {1105.4593}, title = {{Submodular function maximization via the multilinear relaxation and contention resolution schemes}}, year = {2011} } @inproceedings{liu2011dynamic, abstract = {We consider a large-scale cyber network with N components (e.g., paths, servers, subnets). Each component is either in a healthy state (0) or an abnormal state (1). Due to random intrusions, the state of each component transits from 0 to 1 over time according to certain stochastic process. At each time, a subset of K (K {\textless} N) components are checked and those observed in abnormal states are fixed. The objective is to design the optimal scheduling for intrusion detection such that the long-term network cost incurred by all abnormal components is minimized. We formulate the problem as a special class of Restless Multi-Armed Bandit (RMAB) process. A general RMAB suffers from the curse of dimensionality (PSPACE-hard) and numerical methods are often inapplicable. We show that, for this class of RMAB, Whittle index exists and can be obtained in closed form, leading to a low-complexity implementation of Whittle index policy with a strong performance. For homogeneous components, Whittle index policy is shown to have a simple structure that does not require any prior knowledge on the intrusion processes. Based on this structure, Whittle index policy is further shown to be optimal over a finite time horizon with an arbitrary length. Beyond intrusion detection, these results also find applications in queuing networks with finite-size buffers.}, archivePrefix = {arXiv}, arxivId = {1112.0101}, author = {Liu, Keqin and Zhao, Qing}, booktitle = {IEEE International Symposium on Information Theory Proceedings}, eprint = {1112.0101}, title = {{Dynamic Intrusion Detection in Resource-Constrained Cyber Networks}}, url = {http://arxiv.org/abs/1112.0101}, year = {2012} } @incollection{scholkopf1999kernel, author = {Sch{\"{o}}lkopf, Bernhard and Smola, Alexander J. and M{\"{u}}ller, Klaus-Robert}, booktitle = {Advances in kernel methods}, pages = {327--352}, publisher = {MIT Press Cambridge, MA, USA}, title = {{Kernel principal component analysis}}, year = {1999} } @inproceedings{gopalan2013thompson, abstract = {We consider stochastic multi-armed bandit problems with complex actions over a set of basic arms, where the decision maker plays a complex action rather than a basic arm in each round. The reward of the complex action is some function of the basic arms' rewards, and the feedback observed may not necessarily be the reward per-arm. For instance, when the complex actions are subsets of the arms, we may only observe the maximum reward over the chosen subset. Thus, feedback across complex actions may be coupled due to the nature of the reward function. We prove a frequentist regret bound for Thompson sampling in a very general setting involving parameter, action and observation spaces and a likelihood function over them. The bound holds for discretely-supported priors over the parameter space and without additional structural properties such as closed-form posteriors, conjugate prior structure or independence across arms. The regret bound scales logarithmically with time but, more importantly, with an improved constant that non-trivially captures the coupling across complex actions due to the structure of the rewards. As applications, we derive improved regret bounds for classes of complex bandit problems involving selecting subsets of arms, including the first nontrivial regret bounds for nonlinear MAX reward feedback from subsets.}, author = {Gopalan, Aditya and Mannor, Shie and Mansour, Yishay}, booktitle = {International Conference on Machine Learning}, title = {{Thompson sampling for complex bandit problems}}, year = {2014} } @article{seeger2008bayesian, author = {Seeger, M W}, journal = {Journal of Machine Learning Research}, pages = {759--813}, publisher = {JMLR. org}, title = {{Bayesian inference and optimal design for the sparse linear model}}, volume = {9}, year = {2008} } @inproceedings{kanade2009sleeping, author = {Kanade, Varun and McMahan, H Brendan and Bryan, Brent}, booktitle = {International conference on Artificial Intelligence and Statistics}, keywords = {bandits}, mendeley-tags = {bandits}, number = {5}, pages = {272--279}, series = {AI{\&}Stats '09}, title = {{Sleeping Experts and Bandits with Stochastic Action Availability and Adversarial Rewards}}, year = {2009} } @inproceedings{ashkan15optimal, author = {Ashkan, Azin and Kveton, Branislav and Berkovsky, Shlomo and Wen, Zheng}, booktitle = {International Joint Conferences on Artificial Intelligence}, title = {{Optimal greedy diversity for recommendation}}, year = {2015} } @article{madigan2002likelihood-based, author = {Madigan, David and Raghavan, Ini and Dumouchel, William and Nason, Martha and Posse, Christian and Ridgeway, Greg}, journal = {Data Mining and Knowledge Discovery}, number = {2}, pages = {173--190}, title = {{Likelihood-based data squashing: a modeling approach to instance construction}}, volume = {6}, year = {2002} } @inproceedings{kveton2006learning, author = {Kveton, Branislav and Hauskrecht, Milos}, booktitle = {Proceedings of the 21st National Conference on Artificial Intelligence}, pages = {1161--1166}, title = {{Learning Basis Functions in Hybrid Domains}}, year = {2006} } @inproceedings{neu12o-mdp-full, annote = {From Duplicate 1 (Online {\{}M{\}}arkov Decision Processes under Bandit Feedback - Neu, G; Gy{\"{o}}rgy, A; Szepesv{\'{a}}ri, $\backslash$textCs; Antos, A) Accepted for publication}, author = {Neu, G and Gy{\"{o}}rgy, A and Szepesv{\'{a}}ri, $\backslash$relax Cs. $\backslash$textCs and Antos, A}, booktitle = {IEEE Transactions on Automatic Control}, pages = {1804--1812}, title = {{Online {\{}M{\}}arkov Decision Processes under Bandit Feedback}}, year = {2013} } @article{zhang1998characterization, author = {Zhang, Z and Yeung, R W}, journal = {IEEE Transactions on Information Theory}, number = {4}, pages = {1440--1452}, publisher = {IEEE}, title = {{On characterization of entropy function via information inequalities}}, volume = {44}, year = {1998} } @article{carpentier2013adaptive, abstract = {We consider the problem of estimating the tail index $\backslash$alpha of a distribution satisfying a ($\backslash$alpha, $\backslash$beta) second-order Pareto-type condition, where $\backslash$beta is the second-order coefficient. When $\backslash$beta is available, it was previously proved that $\backslash$alpha can be estimated with the oracle rate n{\^{}}{\{}-$\backslash$beta/(2$\backslash$beta+1){\}}. On the contrary, when $\backslash$beta is not available, estimating $\backslash$alpha with the oracle rate is challenging; so additional assumptions that imply the estimability of $\backslash$beta are usually made. In this paper, we propose an adaptive estimator of $\backslash$alpha, and show that this estimator attains the rate (n/$\backslash$log$\backslash$log n){\^{}}{\{}-$\backslash$beta/(2$\backslash$beta+1){\}} without a priori knowledge of $\backslash$beta and any additional assumptions. Moreover, we prove that this ($\backslash$log$\backslash$log n){\^{}}{\{}$\backslash$beta/(2$\backslash$beta+1){\}} factor is unavoidable by obtaining the companion lower bound.}, author = {Carpentier, Alexandra and Kim, Arlene K. H.}, journal = {Statistica Sinica}, title = {{Adaptive and minimax optimal estimation of the tail coefficient}}, year = {2014} } @article{CBMS07, author = {Cesa-Bianchi, Nicol{\`{o}} and Mansour, Yishay and Stoltz, Gilles}, journal = {Machine Learning}, number = {2-3}, pages = {321--352}, title = {{Improved second-order bounds for prediction with expert advice}}, volume = {66}, year = {2007} } @inproceedings{gao2006novel, annote = {comps{\_}anX}, author = {Gao, Jing and Cheng, Haibin and Tan, Pang-Ning}, booktitle = {SDM}, title = {{A Novel Framework for Incorporating Labeled Examples into Anomaly Detection.}}, url = {http://www.siam.org/meetings/sdm06/proceedings/068gaoj2.pdf}, year = {2006} } @inproceedings{dvijotham2010inverse, abstract = {We present new algorithms for inverse optimal control (or inverse$\backslash$nreinforcement learning, IRL) within the framework of linearlysolvable$\backslash$nMDPs (LMDPs). Unlike most prior IRL algorithms which recover only$\backslash$nthe control policy of the expert, we recover the policy, the value$\backslash$nfunction and the cost function. This is possible because here the$\backslash$ncost and value functions are uniquely deffined given the policy.$\backslash$nDespite these special properties, we can handle a wide variety of$\backslash$nproblems such as the grid worlds popular in RL and most of the nonlinear$\backslash$nproblems arising in robotics and control engineering. Direct comparisons$\backslash$nto prior IRL algorithms show that our new algorithms provide more$\backslash$ninformation and are orders of magnitude faster. Indeed our fastest$\backslash$nalgorithm is the first inverse algorithm which does not require solving$\backslash$nthe forward problem; instead it performs unconstrained optimization$\backslash$nof a convex and easy-to-compute log-likelihood. Our work also sheds$\backslash$nlight on the recent Maximum Entropy (MaxEntIRL) algorithm, which$\backslash$nwas dened in terms of density estimation and the corresponding forward$\backslash$nproblem was left unspecified. We show that MaxEntIRL is inverting$\backslash$nan LMDP, using the less efficient of the algorithms derived here.$\backslash$nUnlike all prior IRL algorithms which assume pre-existing features,$\backslash$nwe study feature adaptation and show that such adaptation is essential$\backslash$nin continuous state spaces.}, author = {Dvijotham, Krishnamurthy and Todorov, Emanuel}, booktitle = {ICML}, keywords = {dblp}, pages = {335--342}, title = {{Inverse Optimal Control with Linearly-Solvable MDPs.}}, year = {2010} } @inproceedings{ghashami2016streaming, abstract = {Kernel principal component analysis (KPCA) provides a concise set of basis vectors which capture non-linear structures within large data sets, and is a central tool in data analysis and learning. To allow for non-linear relations, typically a full {\$}n \backslashtimes n{\$} kernel matrix is constructed over {\$}n{\$} data points, but this requires too much space and time for large values of {\$}n{\$}. Techniques such as the Nystr$\backslash$"om method and random feature maps can help towards this goal, but they do not explicitly maintain the basis vectors in a stream and take more space than desired. We propose a new approach for streaming KPCA which maintains a small set of basis elements in a stream, requiring space only logarithmic in {\$}n{\$}, and also improves the dependence on the error parameter. Our technique combines together random feature maps with recent advances in matrix sketching, it has guaranteed spectral norm error bounds with respect to the original kernel matrix, and it compares favorably in practice to state-of-the-art approaches.}, author = {Ghashami, Mina and Perry, Daniel and Phillips, Jeff M.}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Streaming kernel principal component analysis}}, year = {2016} } @article{marbach2001simulation-based, author = {Marbach, Peter and Tsitsiklis, John}, journal = {IEEE Transactions on Automatic Control}, number = {2}, pages = {191--209}, title = {{Simulation-Based Optimization of {\{}Markov{\}} Reward Processes}}, volume = {46}, year = {2001} } @article{kalai2003efficient, author = {Kalai, Adam and Vempala, Santosh}, journal = {Journal of Computer and System Sciences}, number = {3}, pages = {291--307}, title = {{Efficient algorithms for online decision problems}}, volume = {71}, year = {2005} } @inproceedings{younes2004solving, author = {Younes, Hakan and Simmons, Reid}, booktitle = {Proceedings of the 19th National Conference on Artificial Intelligence}, pages = {742--747}, title = {{Solving Generalized Semi-{\{}Markov{\}} Decision Processes Using Continuous Phase-Type Distributions}}, year = {2004} } @inproceedings{neu2007apprenticeship, abstract = {In this paper we propose a novel gradient algorithm to learn a policy from an expert's observed behavior assuming that the expert behaves optimally with respect to some unknown reward function of a Markovian Decision Problem. The algorithm's aim is to find a reward function such that the resulting optimal policy matches well the expert's observed behavior. The main difficulty is that the mapping from the parameters to policies is both nonsmooth and highly redundant. Resorting to subdifferentials solves the first difficulty, while the second one is overcome by computing natural gradients. We tested th eproposed method in two artificial domains and found it to be more reliable and efficient than some previous methods.}, author = {Neu, Gergely and Szepesv{\'{a}}ri, C}, booktitle = {Proceedings of the Twenty-Third Conference Annual Conference on Uncertainty in Artificial Intelligence (UAI-07)}, isbn = {0-9749039-3-00-9749039-3-0}, pages = {295----302}, title = {{Apprenticeship learning using inverse reinforcement learning and gradient methods}}, url = {http://arxiv.org/abs/1206.5264}, year = {2007} } @techreport{williams1993tight, author = {Williams, Ronald and {Baird III}, Leemon}, institution = {Northeastern University}, number = {NU-CCS-93-14}, title = {{Tight Performance Bounds on Greedy Policies Based on Imperfect Value Functions}}, year = {1993} } @book{haykin1994neural, address = {Upper Saddle River, NJ, USA}, author = {Haykin, Simon}, edition = {1st}, isbn = {0023527617}, publisher = {Prentice Hall PTR}, title = {{Neural Networks: A Comprehensive Foundation}}, year = {1994} } @article{bubeck2012regret, abstract = {Multi-armed bandit problems are the most basic examples of sequential decision problems with an exploration-exploitation trade-off. This is the balance between staying with the option that gave highest payoffs in the past and exploring new options that might give higher payoffs in the future. Although the study of bandit problems dates back to the Thirties, exploration-exploitation trade-offs arise in several modern applications, such as ad placement, website optimization, and packet routing. Mathematically, a multi-armed bandit is defined by the payoff process associated with each option. In this survey, we focus on two extreme cases in which the analysis of regret is particularly simple and elegant: i.i.d. payoffs and adversarial payoffs. Besides the basic setting of finitely many actions, we also analyze some of the most important variants and extensions, such as the contextual bandit model.}, archivePrefix = {arXiv}, arxivId = {1204.5721}, author = {Bubeck, S{\'{e}}bastien and Cesa-Bianchi, Nicol{\`{o}}}, eprint = {1204.5721}, journal = {Foundations and Trends in Machine Learning}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {1--122}, title = {{Regret Analysis of Stochastic and Nonstochastic Multi-armed Bandit Problems}}, url = {http://arxiv.org/abs/1204.5721}, volume = {5}, year = {2012} } @article{engel2004kernel, abstract = {We present a nonlinear version of the recursive least squares (RLS) algorithm. Our algorithm performs linear regression in a high-dimensional feature space induced by a Mercer kernel and can therefore be used to recursively construct minimum mean-squared-error solutions to nonlinear least-squares problems that are frequently encountered in signal processing applications. In order to regularize solutions and keep the complexity of the algorithm bounded, we use a sequential sparsification process that admits into the kernel representation a new input sample only if its feature space image cannot be sufficiently well approximated by combining the images of previously admitted samples. This sparsification procedure allows the algorithm to operate online, often in real time. We analyze the behavior of the algorithm, compare its scaling properties to those of support vector machines, and demonstrate its utility in solving two signal processing problems-time-series prediction and channel equalization.}, author = {Engel, Yaakov and Mannor, Shie and Meir, Ron}, journal = {IEEE Transactions on Signal Processing}, number = {8}, pages = {2275--2285}, title = {{The kernel recursive least-squares algorithm}}, volume = {52}, year = {2004} } @inproceedings{narasimhan2006submodular, author = {Narasimhan, M and Bilmes, J}, booktitle = {Adv. NIPS}, title = {{A submodular-supermodular procedure with applications to discriminative structure learning}}, volume = {19}, year = {2006} } @inproceedings{abbasi-yadkori2018best, abstract = {We study bandit best-arm identification with arbitrary and potentially adversarial rewards. A simple random uniform learner obtains the optimal rate of error in the adversarial scenario. However, this type of strategy is suboptimal when the rewards are sampled stochastically. Therefore, we ask: Can we design a learner that performs optimally in both the stochastic and adversarial problems while not being aware of the nature of the rewards? First, we show that designing such a learner is impossible in general. In particular, to be robust to adversarial rewards, we can only guarantee optimal rates of error on a subset of the stochastic problems. We give a lower bound that characterizes the optimal rate in stochastic problems if the strategy is constrained to be robust to adversarial rewards. Finally, we design a simple parameter-free algorithm and show that its probability of error matches (up to log factors) the lower bound in stochastic problems, and it is also robust to adversarial ones.}, author = {Abbasi-Yadkori, Yasin and Bartlett, Peter and Gabillon, Victor and Malek, Alan and Valko, Michal}, booktitle = {Conference on Learning Theory}, title = {{Best of both worlds: Stochastic {\&} adversarial best-arm identification}}, year = {2018} } @inproceedings{calandriello2020near-linear, abstract = {Gaussian processes (GP) are one of the most successful frameworks to model uncertainty. However , GP optimization (e.g., GP-UCB) suffers from major scalability issues. Experimental time grows linearly with the number of evaluations, unless candidates are selected in batches (e.g., using GP-BUCB) and evaluated in parallel. Furthermore , computational cost is often prohibitive since algorithms such as GP-BUCB require a time at least quadratic in the number of dimensions and iterations to select each batch. In this paper, we introduce BBKB (Batch Budgeted Kernel Bandits), the first no-regret GP optimization algorithm that provably runs in near-linear time and selects candidates in batches. This is obtained with a new guarantee for the tracking of the posterior variances that allows BBKB to choose increasingly larger batches, improving over GP-BUCB. Moreover , we show that the same bound can be used to adaptively delay costly updates to the sparse GP approximation used by BBKB, achieving a near-constant per-step amortized cost. These findings are then confirmed in several experiments, where BBKB is much faster than state-of-the-art methods.}, author = {Calandriello, Daniele and Carratino, Luigi and Lazaric, Alessandro and Valko, Michal and Rosasco, Lorenzo}, booktitle = {International Conference on Machine Learning}, title = {{Near-linear time Gaussian process optimization with adaptive batching and resparsification}}, year = {2020} } @inproceedings{black92, author = {Black, Ezra}, booktitle = {{\{}LINGUIST{\}} {\{}L{\}}ist 3.587, $\backslash$texttt{\{}http://www.linguistlist.org/issues/3/3-587.html{\}}}, title = {{Meeting of interest group on evaluation of broad-coverage parsers of English}}, year = {1992} } @inproceedings{grill2015black-box, abstract = {We study the problem of black-box optimization of a function f of any dimension, given function evaluations perturbed by noise. The function is assumed to be locally smooth around one of its global optima, but this smoothness is unknown. Our contribution is an adaptive optimization algorithm, POO or parallel optimistic optimization, that is able to deal with this setting. POO performs almost as well as the best known algorithms requiring the knowledge of the smoothness. Furthermore, POO works for a larger class of functions than what was previously considered, especially for functions that are difficult to optimize, in a very precise sense. We provide a finite-time analysis of POO's performance, which shows that its error after n evaluations is at most a factor of sqrt(ln n) away from the error of the best known optimization algorithms using the knowledge of the smoothness.}, author = {Grill, Jean-Bastien and Valko, Michal and Munos, R{\'{e}}mi}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Grill, Valko, Munos - 2015 - Black-box optimization of noisy functions with unknown smoothness.pdf:pdf}, title = {{Black-box optimization of noisy functions with unknown smoothness}}, url = {https://papers.nips.cc/paper/5721-black-box-optimization-of-noisy-functions-with-unknown-smoothness.pdf}, year = {2015} } @incollection{chickering1996learning, author = {Chickering, David M}, booktitle = {Learning from Data: Artificial Intelligence and Statistics V}, editor = {Fisher, D and Lenz, H}, keywords = {complexity,graphical-models}, pages = {121--130}, publisher = {Springer-Verlag}, title = {{Learning {\{}Bayesian{\}} Networks is {\{}NP{\}}-{\{}Complete{\}}}}, url = {http://research.microsoft.com/copyright/accept.asp?path=http://research.microsoft.com/{~}dmax/publications/lns96.pdf{\&}{\#}38;pub=15}, year = {1996} } @article{titsias2009efficient, abstract = {Sampling functions in Gaussian process (GP) models is challenging because of the highly correlated posterior distribution. We describe an efficient Markov chain Monte Carlo algorithm for sampling from the posterior process of the GP model. This algorithm uses control variables which are auxiliary function values that provide a low dimensional representation of the function. At each iteration, the algorithm proposes new values for the control variables and generates the function from the conditional GP prior. The control variable input locations are found by continuously minimizing an objective function. We demonstrate the algorithm on regression and classification problems and we use it to estimate the parameters of a differential equation model of gene regulation.}, author = {Titsias, Michalis K and Lawrence, Neil D and Rattray, Magnus}, editor = {Koller, D and Schuurmans, D and Bengio, Y and Bottou, L}, journal = {Advances in Neural Information Processing Systems 21}, pages = {1681--1688}, publisher = {Citeseer}, title = {{Efficient Sampling for Gaussian Process Inference using Control Variables}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.159.74{\&}rep=rep1{\&}type=pdf}, volume = {21}, year = {2009} } @inproceedings{mnih2016asynchronous, author = {Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray}, booktitle = {International Conference on Machine Learning}, title = {{Asynchronous methods for deep reinforcement learning}}, url = {https://arxiv.org/pdf/1602.01783.pdf}, year = {2016} } @article{tsybakov1998pointwise, abstract = {The problem of nonparametric function estimation in the Gaussian white noise model is considered. It is assumed that the unknown function belongs to one of the Sobolev classes, with an unknown regularity parameter. Asymptotically exact adaptive estimators of functions are proposed on the scale of Sobolev classes, with respect to pointwise and sup-norm risks. It is shown that, unlike the case of L2-risk, a loss of efficiency under adaptation is inevitable here. Bounds on the value of the loss of efficiency are obtained.}, author = {Tsybakov, A. B.}, journal = {Annals of Statistics}, keywords = {Adaptive nonparametric estimation,Exact constants,Gaussian white noise,Loss of efficiency under adaptation,Minimax risk,Sobolev class}, number = {6}, pages = {2420--2469}, title = {{Pointwise and sup-norm sharp adaptive estimation of functions on the Sobolev classes}}, volume = {26}, year = {1998} } @article{drineas2011fast, abstract = {The statistical leverage scores of a matrix {\$}A{\$} are the squared row-norms of the matrix containing its (top) left singular vectors and the coherence is the largest leverage score. These quantities are of interest in recently-popular problems such as matrix completion and Nystr$\backslash$"{\{}o{\}}m-based low-rank matrix approximation as well as in large-scale statistical data analysis applications more generally; moreover, they are of interest since they define the key structural nonuniformity that must be dealt with in developing fast randomized matrix algorithms. Our main result is a randomized algorithm that takes as input an arbitrary {\$}n \backslashtimes d{\$} matrix {\$}A{\$}, with {\$}n \backslashgg d{\$}, and that returns as output relative-error approximations to all {\$}n{\$} of the statistical leverage scores. The proposed algorithm runs (under assumptions on the precise values of {\$}n{\$} and {\$}d{\$}) in {\$}O(n d \backslashlog n){\$} time, as opposed to the {\$}O(nd{\^{}}2){\$} time required by the na$\backslash$"{\{}i{\}}ve algorithm that involves computing an orthogonal basis for the range of {\$}A{\$}. Our analysis may be viewed in terms of computing a relative-error approximation to an underconstrained least-squares approximation problem, or, relatedly, it may be viewed as an application of Johnson-Lindenstrauss type ideas. Several practically-important extensions of our basic result are also described, including the approximation of so-called cross-leverage scores, the extension of these ideas to matrices with {\$}n \backslashapprox d{\$}, and the extension to streaming environments.}, author = {Drineas, Petros and Magdon-Ismail, Malik and Mahoney, Michael W and Woodruff, David P.}, journal = {International Conference on Machine Learning}, keywords = {matrix coherence,randomized algorithm,statistical leverage}, title = {{Fast approximation of matrix coherence and statistical leverage}}, year = {2012} } @article{koutis_solving_2011, author = {Koutis, Ioannis and Miller, Gary L and Peng, Richard}, journal = {arXiv preprint arXiv:1102.4842}, title = {{Solving {\{}SDD{\}} linear systems in {\{}timeO{\}} (mlognlog (1/{\{}$\backslash$k o{\}}))}}, url = {http://www.researchgate.net/profile/Richard{\_}Peng/publication/221499482{\_}A{\_}Nearly-m{\_}log{\_}n{\_}Time{\_}Solver{\_}for{\_}SDD{\_}Linear{\_}Systems/links/004635362a1ac2587f000000.pdf}, year = {2011} } @inproceedings{mahadevan2005samuel, author = {Mahadevan, Sridhar}, booktitle = {Proceedings of the 20th National Conference on Artificial Intelligence}, pages = {1000--1005}, title = {{Samuel Meets {\{}Amarel{\}}: Automating Value Function Approximation Using Global State Space Analysis}}, year = {2005} } @inproceedings{quattoni2009efficient, author = {Quattoni, A and Carreras, X and Collins, M and Darrell, T}, booktitle = {Proc. ICML}, title = {{An efficient projection for $\backslash$ell{\_}1-$\backslash$ell{\_}$\backslash$infty regularization}}, year = {2009} } @inproceedings{Kim2009, author = {Kim, S and Xing, E P}, booktitle = {Proceedings of the International Conference on Machine Learning (ICML)}, title = {{Tree-Guided Group {\{}L{\}}asso for Multi-Task Regression with Structured Sparsity}}, year = {2010} } @article{gale2003bayesian, abstract = {We extend the standard model of social learning in two ways. First, we introduce a social network and assume that agents can only observe the actions of agents to whom they are connected by this network. Secondly, we allow agents to choose a different action at each date. If the network satisfies a connectedness assumption, the initial diversity resulting from diverse private information is eventually replaced by uniformity of actions, though not necessarily of beliefs, in finite time with probability one. We look at particular networks to illustrate the impact of network architecture on speed of convergence and the optimality of absorbing states. Convergence is remarkably rapid, so that asymptotic results are a good approximation even in the medium run. {\textcopyright} 2003 Elsevier Inc. All rights reserved.}, author = {Gale, Douglas and Kariv, Shachar}, journal = {Games and Economic Behavior}, number = {2}, pages = {329--346}, title = {{Bayesian learning in social networks}}, volume = {45}, year = {2003} } @article{williams1992simple, author = {Williams, Ronald}, journal = {Machine Learning}, number = {3-4}, pages = {229--256}, title = {{Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning}}, volume = {8}, year = {1992} } @inproceedings{auerlogarithmic, author = {Auer, Peter and Ortner, Ronald}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {49--56}, title = {{Logarithmic online regret bounds for undiscounted reinforcement learning}} } @inproceedings{black93history, address = {Morristown, NJ, USA}, author = {Black, Ezra and Jelinek, Fred and Lafferty, John and Magerman, David M and Mercer, Robert and Roukos, Salim}, booktitle = {ACL '93: Proceedings of the 31st Annual Meeting of the Association for Computational Linguistics}, pages = {31--37}, publisher = {Association for Computational Linguistics}, title = {{Towards history-based grammars: using richer models for probabilistic parsing}}, year = {1993} } @inproceedings{carpentier11active, author = {Carpentier, Alexandra and Lazaric, Alessandro and Ghavamzadeh, Mohammad and Munos, R{\'{e}}mi and Auer, Peter}, booktitle = {Algorithmic Learning Theory}, pages = {189--203}, title = {{Upper-Confidence-Bound algorithms for active learning in multi-armed bandits}}, year = {2011} } @inproceedings{erraqabi2017trading, abstract = {In multi-armed bandits, the most common objective is the maximization of the cumulative reward. Alternative settings include active exploration, where a learner tries to gain accurate estimates of the rewards of all arms. While these objectives are contrasting, in many scenarios it is desirable to trade off rewards and errors. For instance, in educational games the designer wants to gather generalizable knowledge about the behavior of the students and teaching strategies (small estimation errors) but, at the same time, the system needs to avoid giving a bad experience to the players, who may leave the system permanently (large reward). In this paper, we formalize this tradeoff and introduce the ForcingBalance algorithm whose performance is provably close to the best possible tradeoff strategy. Finally, we demonstrate on real-world educational data that ForcingBalance returns useful information about the arms without compromising the overall reward.}, author = {Erraqabi, Akram and Lazaric, Alessandro and Valko, Michal and Brunskill, Emma and Liu, Yun-en}, booktitle = {International Conference on Artificial Intelligence and Statistics}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Erraqabi et al. - 2017 - Trading off rewards and errors in multi-armed bandits.pdf:pdf}, title = {{Trading off rewards and errors in multi-armed bandits}}, year = {2017} } @article{erdos1960evolution, author = {Erdos, Paul and R{\'{e}}nyi, Alfr{\'{e}}d}, journal = {Publ. Mathematical Institute of the Hungarian Academy of Sciences}, title = {{On the evolution of random graphs}}, url = {http://www.citeulike.org/group/3072/article/1666220}, year = {1960} } @article{kveton2006solvinga, author = {Kveton, Branislav and Hauskrecht, Milos and Guestrin, Carlos}, journal = {Journal of Artificial Intelligence Research}, pages = {153--201}, title = {{Solving Factored {\{}MDPs{\}} with Hybrid State and Action Variables}}, volume = {27}, year = {2006} } @inproceedings{locatelli2018adaptive, abstract = {We present the first adaptive strategy for active learning in the setting of classification with smooth decision boundary. The problem of adaptivity (to unknown distributional parameters) has remained opened since the seminal work of Castro and Nowak (2007), which first established (active learning) rates for this setting. While some recent advances on this problem establish adaptive rates in the case of univariate data, adaptivity in the more practical setting of multivariate data has so far remained elusive. Combining insights from various recent works, we show that, for the multivariate case, a careful reduction to univariate-adaptive strategies yield near-optimal rates without prior knowledge of distributional parameters.}, archivePrefix = {arXiv}, arxivId = {1711.09294}, author = {Locatelli, Andrea and Carpentier, Alexandra and Kpotufe, Samory}, booktitle = {Algorithmic Learning Theory}, eprint = {1711.09294}, title = {{An Adaptive Strategy for Active Learning with Smooth Decision Boundary}} } @inproceedings{rajeswaran2020game-theoretic, abstract = {Model-based reinforcement learning (MBRL) has recently gained immense interest due to its potential for sample efficiency and ability to incorporate off-policy data. However, designing stable and efficient MBRL algorithms using rich function approximators have remained challenging. To help expose the practical challenges in MBRL and simplify algorithm design from the lens of abstraction, we develop a new framework that casts MBRL as a game between: (1) a policy player, which attempts to maximize rewards under the learned model; (2) a model player, which attempts to fit the real-world data collected by the policy player. For algorithm development, we construct a Stackelberg game between the two players, and show that it can be solved with approximate bi-level optimization. This gives rise to two natural families of algorithms for MBRL based on which player is chosen as the leader in the Stackelberg game. Together, they encapsulate, unify, and generalize many previous MBRL algorithms. Furthermore, our framework is consistent with and provides a clear basis for heuristics known to be important in practice from prior works. Finally, through experiments we validate that our proposed algorithms are highly sample efficient, match the asymptotic performance of model-free policy gradient, and scale gracefully to high-dimensional tasks like dexterous hand manipulation.}, archivePrefix = {arXiv}, arxivId = {2004.07804}, author = {Rajeswaran, Aravind and Mordatch, Igor and Kumar, Vikash}, booktitle = {International Conference on Machine Learning}, eprint = {2004.07804}, title = {{A game-theoretic framework for model-based reinforcement learning}}, url = {http://arxiv.org/abs/2004.07804}, year = {2020} } @misc{chang2001libsvm:, annote = {Software available at http://www.csie.ntu.edu.tw/{\~{}}cjlin/libsvm}, author = {Chang, Chih-Chung and Lin, Chih-Jen}, title = {{{\{}LIBSVM{\}}: a library for support vector machines}}, year = {2001} } @book{NY83, author = {Nemirovski, A and Yudin, D}, publisher = {Wiley Interscience}, title = {{Problem Complexity and Method Efficiency in Optimization}}, year = {1983} } @inproceedings{perrault2019exploiting, abstract = {We improve the efficiency of algorithms for stochastic combinatorial semi-bandits. In most interesting problems, state-of-the-art algorithms take advantage of structural properties of rewards, such as independence. However, while being minimax optimal in terms of regret, these algorithms are intractable. In our paper, we first reduce their implementation to a specific submodular maximization. Then, in case of matroid constraints, we design adapted approximation routines, thereby providing the first efficient algorithms that exploit the reward structure. In particular, we improve the state-of-the-art efficient gap-free regret bound by a factor sqrt(k), where k is the maximum action size. Finally, we show how our improvement translates to more general budgeted combinatorial semi-bandits.}, archivePrefix = {arXiv}, arxivId = {1902.03794}, author = {Perrault, Pierre and Perchet, Vianney and Valko, Michal}, booktitle = {International Conference on Machine Learning}, eprint = {1902.03794}, month = {feb}, title = {{Exploiting structure of uncertainty for efficient matroid semi-bandits}}, year = {2019} } @inproceedings{Kalai03efficient, address = {New York, NY, USA}, author = {Kalai, A and Vempala, S}, booktitle = {Proceedings of the 16th Annual Conference on Learning Theory and the 7th Kernel Workshop, COLT-Kernel 2003}, editor = {Sch{\"{o}}lkopf, B and Warmuth, M}, pages = {26--40}, publisher = {Springer}, title = {{Efficient algorithms for the online decision problem}}, year = {2003} } @inproceedings{turian06advances, address = {Morristown, NJ, USA}, author = {Turian, Joseph and Melamed, I Dan}, booktitle = {ACL '06: Proceedings of the 21st International Conference on Computational Linguistics and the 44th Annual Meeting of the Association for Computational Linguistics}, doi = {http://dx.doi.org/10.3115/1220175.1220285}, pages = {873--880}, publisher = {Association for Computational Linguistics}, title = {{Advances in discriminative parsing}}, year = {2006} } @article{Nem79, annote = {(In Russian)}, author = {Nemirovski, A}, journal = {Ekonomika i Matematicheskie Metody}, title = {{Efficient methods for large-scale convex optimization problems}}, volume = {15}, year = {1979} } @article{brafman2003r-max, author = {Brafman, Ronen and Tennenholtz, Moshe}, journal = {Journal of Machine Learning Research}, pages = {213--231}, title = {{R-MAX -- A General Polynomial Time Algorithm for Near-Optimal Reinforcement Learning}}, volume = {3}, year = {2003} } @inproceedings{szorenyi2014optimistic, abstract = {We consider the problem of online planning in a Markov decision process with discounted rewards for any given initial state. We consider the PAC sample com-plexity problem of computing, with probability 1−$\delta$, an �-optimal action using the smallest possible number of calls to the generative model (which provides reward and next-state samples). We design an algorithm, called StOP (for Stochastic-Optimistic Planning), based on the " optimism in the face of uncertainty " princi-ple. StOP can be used in the general setting, requires only a generative model, and enjoys a complexity bound that only depends on the local structure of the MDP.}, author = {Sz{\"{o}}r{\'{e}}nyi, Bal{\'{a}}zs and Kedenburg, Gunnar and Munos, R{\'{e}}mi}, booktitle = {Neural Information Processing Systems}, title = {{Optimistic planning in Markov decision processes using a generative model}}, url = {https://papers.nips.cc/paper/5368-optimistic-planning-in-markov-decision-processes-using-a-generative-model.pdf}, year = {2014} } @article{steinwart2005classification, address = {Cambridge, MA, USA}, annote = {comps{\_}anX}, author = {Steinwart, Ingo and Hush, Don and Scovel, Clint}, issn = {1533-7928}, journal = {Journal of Machine Learning Research}, pages = {211--232}, publisher = {MIT Press}, title = {{A Classification Framework for Anomaly Detection}}, url = {http://jmlr.csail.mit.edu/papers/volume6/steinwart05a/steinwart05a.pdf}, volume = {6}, year = {2005} } @article{gilpin2012first, abstract = {We propose an iterated version of Nesterov's first-order smoothing method for the two-person zero-sum game equilibrium problem minx∈q 1 maxy∈Q 2 x T Ay = minx∈q 2 maxy∈Q 1 x T Ay. This formulation applies to matrix games as well as sequential games. Our new algorithmic scheme computes an $\epsilon$-equilibrium to this min-max problem in O({\P}A{\P} /$\delta$(A) ln(1/$\epsilon$)) first-order iterations, where $\delta$(A) is a certain condition measure of the matrix A. This improves upon the previous first-order methods which required O(1/$\epsilon$)iterations, and it matches the iteration complexity bound of interior-point methods in terms of the algorithm's dependence on $\epsilon$. Unlike interior-point methods that are inapplicable to large games due to their memory requirements, our algorithm retains the small memory requirements of prior first-order methods. Our scheme supplements Nesterov's method with an outer loop that lowers the target $\epsilon$between iterations (this target affects the amount of smoothing in the inner loop). Computational experiments both in matrix games and sequential games show that a significant speed improvement is obtained in practice as well, and the relative speed improvement increases with the desired accuracy (as suggested by the complexity bounds). {\textcopyright} 2010 Springer and Mathematical Optimization Society.}, author = {Gilpin, Andrew and Pe{\~{n}}a, Javier and Sandholm, Tuomas}, journal = {Mathematical Programming}, title = {{First-order algorithm with O(ln(1/$\epsilon$))) convergence for $\epsilon$-equilibrium in two-person zero-sum games}}, url = {https://www.cs.cmu.edu/{~}sandholm/restart.MathProg12.pdf}, year = {2012} } @inproceedings{geulen10buffering, author = {Geulen, S and Voecking, B and Winkler, M}, title = {{Regret Minimization for Online Buffering Problems Using the Weighted Majority Algorithm}} } @techreport{bouttier2017convergence, abstract = {In this paper we propose a modified version of the simulated annealing algorithm for solving a stochastic global optimization problem. More precisely, we address the problem of finding a global minimizer of a function with noisy evaluations. We provide a rate of convergence and its optimized parametrization to ensure a minimal number of evaluations for a given accuracy and a confidence level close to 1. This work is completed with a set of numerical experimentations and assesses the practical performance both on benchmark test cases and on real world examples.}, archivePrefix = {arXiv}, arxivId = {1703.00329}, author = {Bouttier, Cl{\'{e}}ment and Gavra, Ioana}, eprint = {1703.00329}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Bouttier, Gavra - 2017 - Convergence rate of a simulated annealing algorithm with noisy observations.pdf:pdf}, title = {{Convergence rate of a simulated annealing algorithm with noisy observations}}, url = {http://arxiv.org/abs/1703.00329}, year = {2017} } @article{meila2001random, abstract = {We present a new view of clustering and segmentation by pairwise similarities. We interpret the similarities as edge flows in a Markov random walk and study the eigenvalues and eigenvectors of the walk's transition matrix. This view shows that spectral methods for clustering and segmentation have a probabilistic foundation. We prove that the Normalized Cut method arises naturally from our framework and we provide a complete characterization of the cases when the Normalized Cut algorithm is exact. Then we discuss other spectral segmentation and clustering methods showing that several of them are essentially the same as NCut.}, author = {Meila, M. and Meila, M. and Shi, J. and Shi, J.}, journal = {International Conference on Artificial Intelligence and Statistics}, title = {{A random walks view of spectral segmentation}}, year = {2001} } @article{bach2011learning, archivePrefix = {arXiv}, arxivId = {1111.6453}, author = {Bach, Francis}, eprint = {1111.6453}, keywords = {Learning,Optimization and Control}, title = {{Learning with Submodular Functions: A Convex Optimization Perspective}}, url = {http://arxiv.org/abs/1111.6453}, year = {2011} } @inproceedings{smola2003kernels, author = {Smola, A J and Kondor, R}, booktitle = {Proceedings of the Annual Conference on Computational Learning Theory and Kernel Workshop}, editor = {Sch{\"{o}}lkopf, B and Warmuth, M}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {{Kernels and Regularization on Graphs}}, year = {2003} } @article{blei2003latent, address = {Cambridge, MA, USA}, annote = {comps{\_}models}, author = {Blei, David M and Ng, Andrew Y and Jordan, Michael I}, issn = {1533-7928}, journal = {J. Mach. Learn. Res.}, pages = {993--1022}, publisher = {MIT Press}, title = {{Latent dirichlet allocation}}, url = {http://delivery.acm.org/10.1145/950000/944937/3-993-blei.pdf?key1=944937{\&}key2=6322955221{\&}coll=GUIDE{\&}dl=GUIDE{\&}CFID=8754681{\&}CFTOKEN=61191808}, volume = {3}, year = {2003} } @inproceedings{Pomerleau_1989_2055, author = {Pomerleau, Dean}, pages = {305--313}, title = {{{\{}ALVINN{\}}: An Autonomous Land Vehicle in a Neural Network}} } @inproceedings{lagree2016multiple-play, author = {Lagr{\'{e}}e, Paul and Vernade, Claire and Capp{\'{e}}, Olivier}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Lagr{\'{e}}e, Vernade, Cappe - 2016 - Multiple-play bandits in the position-based model.pdf:pdf}, title = {{Multiple-play bandits in the position-based model}}, url = {https://papers.nips.cc/paper/6546-multiple-play-bandits-in-the-position-based-model}, year = {2016} } @article{even-dar09OnlineMDP, address = {Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA}, author = {Even-Dar, Eyal and Kakade, Sham. M and Mansour, Yishay}, doi = {http://dx.doi.org/10.1287/moor.1090.0396}, issn = {0364-765X}, journal = {Mathematics of Operations Research}, number = {3}, pages = {726--736}, publisher = {INFORMS}, title = {{Online {\{}M{\}}arkov Decision Processes}}, volume = {34}, year = {2009} } @book{chung1997spectral, author = {Chung, Fan}, keywords = {imported}, publisher = {American Mathematical Society}, title = {{Spectral Graph Theory}}, year = {1997} } @article{matus1995extreme, author = {Matus, F}, journal = {Discrete Mathematics}, number = {1}, pages = {177--192}, publisher = {Citeseer}, title = {{Extreme convex set functions with many nonnegative differences}}, volume = {135}, year = {1995} } @article{carpentier2013honest, abstract = {We study the problem of constructing honest and adaptive confidence intervals for the tail coefficient in the second order Pareto model, when the second order coefficient is unknown. This problem is translated into a testing problem on the second order parameter. By constructing an appropriate model and an associated test statistic, we provide a uniform and adaptive confidence interval for the first order parameter. We also provide an almost matching lower bound, which proves that the result is minimax optimal up to a logarithmic factor.}, archivePrefix = {arXiv}, arxivId = {1312.2968}, author = {Carpentier, Alexandra and Kim, Arlene K. H.}, eprint = {1312.2968}, journal = {Electronic Journal of Statistics}, title = {{Honest and adaptive confidence interval for the tail coefficient in the Pareto model}}, year = {2014} } @article{klopp2015matrix, author = {Klopp, Olga}, journal = {Electronic journal of statistics}, number = {2}, pages = {2348--2369}, publisher = {The Institute of Mathematical Statistics and the Bernoulli Society}, title = {{Matrix completion by singular value thresholding: Sharp bounds}}, volume = {9}, year = {2015} } @phdthesis{meila1999learning, author = {Meila, Marina}, school = {Massachusetts Institute of Technology}, title = {{Learning with Mixtures of Trees}}, year = {1999} } @inproceedings{dechter1996bucket, author = {Dechter, Rina}, booktitle = {Proceedings of the 12th Conference on Uncertainty in Artificial Intelligence}, pages = {211--219}, title = {{Bucket Elimination: A Unifying Framework for Probabilistic Inference}}, year = {1996} } @article{hochbaum2001efficient, author = {Hochbaum, D S}, journal = {Journal of the ACM}, number = {4}, pages = {686--701}, publisher = {ACM}, title = {{An efficient algorithm for image segmentation, Markov random fields and related problems}}, volume = {48}, year = {2001} } @inproceedings{boularias2011relative, abstract = {We consider the problem of imitation learn- ing where the examples, demonstrated by an expert, cover only a small part of a large state space. Inverse Reinforcement Learning (IRL) provides an efficient tool for generaliz- ing the demonstration, based on the assump- tion that the expert is optimally acting in a Markov Decision Process (MDP). Most of the past work on IRL requires that a (near)- optimal policy can be computed for differ- ent reward functions. However, this require- ment can hardly be satisfied in systems with a large, or continuous, state space. In this pa- per, we propose a model-free IRL algorithm, where the relative entropy between the em- pirical distribution of the state-action trajec- tories under a uniform policy and their distri- bution under the learned policy is minimized by stochastic gradient descent. We compare this new approach to well-known IRL algo- rithms using approximate MDP models. Em- pirical results on simulated car racing, grid- world and ball-in-a-cup problems show that our approach is able to learn good policies from a small number of demonstrations.}, author = {Boularias, Abdeslam and Kober, Jens and Peters, Jan}, booktitle = {Proceedings of the 14th International Con- ference on Artificial Intelligence and Statistics}, pages = {182--189}, title = {{Relative Entropy Inverse Reinforcement Learning}}, volume = {15}, year = {2011} } @article{lazaric11stoch_adv, author = {Lazaric, A and Munos, R}, journal = {Journal of Computer and System Sciences (Special issue: Cloud Computing 2011)}, pages = {1516--1537}, title = {{Learning with Stochastic Inputs and Adversarial Outputs}}, volume = {78(5)}, year = {2012} } @inproceedings{cesa-bianchi2016delay, abstract = {We study networks of communicating learning agents that cooperate to solve a common nonstochastic bandit problem. Agents use an underlying communication network to get messages about actions selected by other agents, and drop messages that took more than {\$}d{\$} hops to arrive, where {\$}d{\$} is a delay parameter. We introduce $\backslash$textsc{\{}Exp3-Coop{\}}, a cooperative version of the {\{}$\backslash$sc Exp3{\}} algorithm and prove that with {\$}K{\$} actions and {\$}N{\$} agents the average per-agent regret after {\$}T{\$} rounds is at most of order {\$}\backslashsqrt{\{}\backslashbigl(d+1 + \backslashtfrac{\{}K{\}}{\{}N{\}}\backslashalpha{\_}{\{}\backslashle d{\}}\backslashbigr)(T\backslashln K){\}}{\$}, where {\$}\backslashalpha{\_}{\{}\backslashle d{\}}{\$} is the independence number of the {\$}d{\$}-th power of the connected communication graph {\$}G{\$}. We then show that for any connected graph, for {\$}d=\backslashsqrt{\{}K{\}}{\$} the regret bound is {\$}K{\^{}}{\{}1/4{\}}\backslashsqrt{\{}T{\}}{\$}, strictly better than the minimax regret {\$}\backslashsqrt{\{}KT{\}}{\$} for noncooperating agents. More informed choices of {\$}d{\$} lead to bounds which are arbitrarily close to the full information minimax regret {\$}\backslashsqrt{\{}T\backslashln K{\}}{\$} when {\$}G{\$} is dense. When {\$}G{\$} has sparse components, we show that a variant of $\backslash$textsc{\{}Exp3-Coop{\}}, allowing agents to choose their parameters according to their centrality in {\$}G{\$}, strictly improves the regret. Finally, as a by-product of our analysis, we provide the first characterization of the minimax regret for bandit learning with delay.}, author = {Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio and Mansour, Yishay and Minora, Alberto}, booktitle = {Conference on Learning Theory}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Cesa-Bianchi et al. - 2016 - Delay and cooperation in nonstochastic bandits.pdf:pdf}, title = {{Delay and cooperation in nonstochastic bandits}}, year = {2016} } @techreport{dymetman2012os, abstract = {Most current sampling algorithms for high-dimensional distributions are based on MCMC techniques and are approximate in the sense that they are valid only asymptotically. Rejection sampling, on the other hand, produces valid samples, but is unrealistically slow in high-dimension spaces. The OS* algorithm that we propose is a unified approach to exact optimization and sampling, based on incremental refinements of a functional upper bound, which combines ideas of adaptive rejection sampling and of A* optimization search. We show that the choice of the refinement can be done in a way that ensures tractability in high-dimension spaces, and we present first experiments in two different settings: inference in high-order HMMs and in large discrete graphical models.}, archivePrefix = {arXiv}, arxivId = {1207.0742}, author = {Dymetman, Marc and Bouchard, Guillaume and Carter, Simon}, eprint = {1207.0742}, institution = {http://arxiv.org/abs/1207.0742}, title = {{The OS* algorithm: A joint approach to exact optimization and sampling}}, year = {2012} } @article{hoda2010smoothing, abstract = {We develop first-order smoothing techniques for saddle-point problems that arise in finding a Nash equilibrium of sequential games. The crux of our work is a construction of suitable prox-functions for a certain class of polytopes that encode the sequential nature of the game. We also introduce heuristics that significantly speed up the algorithm, and decomposed game representations that reduce the memory requirements, enabling the application of the techniques to drastically larger games. An implementation based on our smoothing techniques computes approximate Nash equilibria for games that are more than four orders of magnitude larger than what prior approaches can handle. Finally, we show near-linear further speedups from parallelization. {\textcopyright} 2010 INFORMS.}, author = {Hoda, Samid and Gilpin, Andrew and Pe{\~{n}}a, Javier and Sandholm, Tuomas}, journal = {Mathematics of Operations Research}, keywords = {Nash equilibrium,Sequential games,Smoothing techniques}, title = {{Smoothing techniques for computing nash equilibria of sequential games}}, url = {https://kilthub.cmu.edu/ndownloader/files/12101699}, year = {2010} } @incollection{torresani2007large, address = {Cambridge, MA}, annote = {comps{\_}distance}, author = {Torresani, Lorenzo and Lee, Kuang-chih}, booktitle = {Advances in Neural Information Processing Systems 19}, editor = {Scholkopf, B and Platt, J and Hoffman, T}, pages = {1385--1392}, publisher = {MIT Press}, title = {{Large Margin Component Analysis}}, url = {http://books.nips.cc/papers/files/nips19/NIPS2006{\_}0791.pdf}, year = {2007} } @book{Sch03, author = {Schrijver, A}, publisher = {Springer}, title = {{Combinatorial Optimization}}, year = {2003} } @inproceedings{perrault2020budgeted, abstract = {We introduce a new budgeted framework for online influence maximization, considering the total cost of an advertising campaign instead of the common cardinality constraint on a chosen influencer set. Our approach models better the real-world setting where the cost of influencers varies and advertizers want to find the best value for their overall social advertising budget. We propose an algorithm assuming an independent cascade diffusion model and edge-level semi-bandit feedback, and provide both theoretical and experimental results. Our analysis is also valid for the cardinality-constraint setting and improves the state of the art regret bound in this case.}, author = {Perrault, Pierre and Wen, Zheng and Healey, Jennifer and Valko, Michal}, booktitle = {International Conference on Machine Learning}, title = {{Budgeted online influence maximization}}, year = {2020} } @inproceedings{demiris96imitative, author = {Demiris, John and Hayes, Gillian}, booktitle = {Proceedings of the 5th European Workshop on Learning Robots}, editor = {Klingspor, Volker}, pages = {9--16}, title = {{Imitative learning mechanisms in robots and humans}}, url = {citeseer.ist.psu.edu/demiris96imitative.html}, year = {1996} } @incollection{klein2012inverse, author = {Klein, Edouard and Geist, Matthieu and PIOT, BILAL and Pietquin, Olivier}, booktitle = {Advances in Neural Information Processing Systems 25}, editor = {Bartlett, P and Pereira, F C N and Burges, C J C and Bottou, L and Weinberger, K Q}, pages = {1016--1024}, title = {{Inverse Reinforcement Learning through Structured Classification}}, url = {http://books.nips.cc/papers/files/nips25/NIPS2012{\_}0491.pdf}, year = {2012} } @article{bala1998learning, abstract = {When payoffs from different actions are unknown, agents use their own past experience as well as the experience of their neighbours to guide their decision making. In this paper, we develop a general framework to study the relationship between the structure of these neighbourhoods and the process of social learning. We show that, in a connected society, local learning ensures that all agents obtain the same payoffs in the long run. Thus, if actions have different payoffs, then all agents choose the same action, and social conformism obtains. We develop conditions on the distribution of prior beliefs, the structure of neighbourhoods and the informativeness of actions under which this action is optimal. In particular, we identify a property of neighbourhood structures-local independence-which greatly facilitates social learning. Simulations of the model generate spatial and temporal patterns of adoption that are consistent with empirical work.}, author = {Bala, Venkatesh and Goyal, Sanjeev}, journal = {Review of Economic Studies}, number = {3}, pages = {595--621}, title = {{Learning from neighbours}}, volume = {65}, year = {1998} } @inproceedings{dhiman2006dynamic, author = {Dhiman, Gaurav and Simunic, Tajana}, booktitle = {Proceedings of the 2006 IEEE / ACM International Conference on Computer-Aided Design}, title = {{Dynamic Power Management Using Machine Learning}}, year = {2006} } @article{baxter2001infinite-horizon, author = {Baxter, Jonathan and Bartlett, Peter and Weaver, Lex}, journal = {Journal of Artificial Intelligence Research}, pages = {319--350}, title = {{Infinite-Horizon Policy-Gradient Estimation}}, volume = {15}, year = {2001} } @article{grant2006disciplined, abstract = {Training a support vector machine (SVM) leads to a quadratic optimization problem with bound constraints and one linear equality constraint. Despite the fact that this type of problem is well understood, there are many issues to be considered in designing an SVM learner. In particular, for large learning tasks with many training examples, o -the-shelf optimization techniques for general quadratic programs quickly become intractable in their memory and time requirements. S V Mlight1 is an implementation of an SVM learner which addresses the problem of large tasks. This chapter presents algorithmic and computational results developed for S V MlightV2.0, which make large-scale SVM training more practical. The results give guidelines for the application of SVMs to large domains}, author = {Grant, Michael and Boyd, Stephen and Ye, Yinyu}, doi = {10.1007/0-387-30528-9_7}, editor = {{Leo Liberti} and {Nelson Maculan}}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Grant, Boyd, Ye - 2006 - Disciplined Convex Programming and CVX.pdf:pdf}, institution = {Stanford}, isbn = {9780387282602}, journal = {Review Literature And Arts Of The Americas}, number = {3}, pages = {1--26}, publisher = {Springer}, series = {Nonconvex Optimization and Its Applications}, title = {{Disciplined Convex Programming and CVX}}, url = {http://www.springerlink.com/content/p00314m582n01743/}, volume = {C}, year = {2006} } @article{basu2017analysis, archivePrefix = {arXiv}, arxivId = {1705.06808}, author = {Basu, Kinjal and Ghosh, Souvik}, eprint = {1705.06808}, journal = {arXiv preprint arXiv:1705.06808}, month = {may}, title = {{Analysis of Thompson sampling for Gaussian process optimization in the bandit setting}}, url = {http://arxiv.org/abs/1705.06808}, year = {2018} } @article{abernethy2008efficient, abstract = {We introduce an efficient algorithm for the problem of online linear optimization in the bandit setting which achieves the optimal O (T regret. The setting is a natural generalization of the non-stochastic multi-armed bandit problem, and the existence of an efficient optimal algorithm has been posed as an open problem in a number of recent papers. We show how the difficulties encountered by previous approaches are overcome by the use of a self-concordant potential function. Our approach presents a novel connection between online learning and interior point methods.}, author = {Abernethy, Jacob Duncan and Hazan, Elad and Rakhlin, Alexander}, doi = {10.1080/09544820500115717}, institution = {EECS Department, University of California, Berkeley}, issn = {09544828}, journal = {Online}, number = {3}, pages = {540--543}, publisher = {Citeseer}, title = {{An Efficient Algorithm for Bandit Linear Optimization}}, url = {http://www.informaworld.com/openurl?genre=article{\&}doi=10.1080/09544820500115717{\&}magic=crossref}, volume = {1}, year = {2008} } @inproceedings{jensen1994from, author = {Jensen, Frank and Jensen, Finn and Dittmer, S{\o}ren}, booktitle = {Proceedings of the 10th Conference on Uncertainty in Artificial Intelligence}, pages = {367--373}, title = {{From Influence Diagrams to Junction Trees}}, year = {1994} } @article{gyorgy04efficient, author = {Gy{\"{o}}rgy, A and Linder, T and Lugosi, G}, journal = {IEEE Transactions on Signal Processing}, pages = {2337--2347}, title = {{Efficient Adaptive Algorithms and Minimax Bounds for Zero-Delay Lossy Source Coding}}, volume = {52}, year = {2004} } @article{negahban2012restricted, abstract = {We consider the matrix completion problem under a form of row/column weighted entrywise sam-pling, including the case of uniform entrywise sampling as a special case. We analyze the associated random observation operator, and prove that with high probability, it satisfies a form of restricted strong convexity with respect to weighted Frobenius norm. Using this property, we obtain as corol-laries a number of error bounds on matrix completion in the weighted Frobenius norm under noisy sampling and for both exact and near low-rank matrices. Our results are based on measures of the " spikiness " and " low-rankness " of matrices that are less restrictive than the incoherence con-ditions imposed in previous work. Our technique involves an M-estimator that includes controls on both the rank and spikiness of the solution, and we establish non-asymptotic error bounds in weighted Frobenius norm for recovering matrices lying with ℓ q - " balls " of bounded spikiness. Us-ing information-theoretic methods, we show that no algorithm can achieve better estimates (up to a logarithmic factor) over these same sets, showing that our conditions on matrices and associated rates are essentially optimal.}, author = {Negahban, Sahand and Wainwright, Martin J}, journal = {Journal of Machine Learning Research}, keywords = {collaborative filtering,convex optimization,matrix completion}, pages = {1665--1697}, title = {{Restricted strong convexity and weighted matrix completion: Optimal bounds with noise}}, volume = {13}, year = {2012} } @inproceedings{chau2011apolo, author = {Chau, Duen Horng and Kittur, Aniket and Hong, Jason I. and Faloutsos, Christos}, booktitle = {Conference on Human Factors in Computing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Chau et al. - 2011 - Apolo Making sense of large network data by combining rich user interaction and machine learning.pdf:pdf}, title = {{Apolo: Making sense of large network data by combining rich user interaction and machine learning}}, url = {https://www.cc.gatech.edu/{~}dchau/papers/11-chi-apolo.pdf}, year = {2011} } @article{Baraniuk2008, author = {Baraniuk, R G and Cevher, V and Duarte, M F and Hegde, C}, journal = {IEEE Transactions on Information Theory}, pages = {1982--2001}, title = {{Model-based compressive sensing}}, volume = {56}, year = {2010} } @inproceedings{kuniyoshi94visionbased, author = {Kuniyoshi, Y and Riekki, J and Ishii, M and Rougeaux, S and Kita, N and Sakane, S and Kakikura, M}, booktitle = {IEEE/RSJ IROS}, pages = {925--931}, title = {{Vision-based behaviors for multi-robot cooperation}}, url = {citeseer.ist.psu.edu/kuniyoshi94visionbased.html}, year = {1994} } @phdthesis{littman1996algorithms, author = {Littman, Michael}, school = {Brown University}, title = {{Algorithms for Sequential Decision Making}}, year = {1996} } @article{gilks1992adaptive, abstract = {We propose a method for rejection sampling from any univariate log-concave probability density function. The method is adaptive: as sampling proceeds, the rejection envelope and the squeezing function converge to the density function. The rejection envelope and squeezing function are piece-wise exponential functions, the rejection envelope touching the density at previously sampled points, and the squeezing function forming arcs between those points of contact. The technique is intended for situations where evaluation of the density is computationally expensive, in particular for applications of Gibbs sampling to Bayesian models with non-conjugacy. We apply the technique to a Gibbs sampling analysis of monoclonal antibody reactivity.}, author = {Gilks, W. R. and Wild, P.}, journal = {Journal of the Royal Statistical Society. Series C (Applied Statistics)}, keywords = {adaptive rejection sampling,bayesian inference,density,gibbs sampling,log-concave,non-conjugate bayesian models,simulation}, number = {2}, pages = {337--348}, title = {{Adaptive rejection sampling for Gibbs sampling}}, volume = {41}, year = {1992} } @inproceedings{jamali2010matrix, author = {Jamali, Mohsen and Ester, Martin}, booktitle = {Conference on Recommender systems}, title = {{A matrix factorization technique with trust propagation for recommendation in social networks}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.459.691{\&}rep=rep1{\&}type=pdf}, year = {2010} } @incollection{pelleg2005active, address = {Cambridge, MA}, annote = {comps{\_}ano}, author = {Pelleg, Dan and Moore, Andrew W}, booktitle = {Advances in Neural Information Processing Systems 17}, editor = {Saul, Lawrence K and Weiss, Yair and Bottou, L{\'{e}}on}, pages = {1073--1080}, publisher = {MIT Press}, title = {{Active Learning for Anomaly and Rare-Category Detection}}, url = {http://books.nips.cc/papers/files/nips17/NIPS2004{\_}0438.pdf}, year = {2005} } @article{harchaoui2008catching, author = {Harchaoui, Z and L{\'{e}}vy-Leduc, C}, journal = {Adv. NIPS}, title = {{Catching change-points with {\{}L{\}}asso}}, volume = {20}, year = {2008} } @inproceedings{locatelli2019active, abstract = {In this work, we formulate a new multi-task active learning setting in which the learner's goal is to solve multiple matrix completion problems simultaneously. At each round, the learner can choose from which matrix it receives a sample from an entry drawn uniformly at random. Our main practical motivation is market segmentation, where the matrices represent different regions with different preferences of the customers. The challenge in this setting is that each of the matrices can be of a different size and also of a different rank which is unknown. We provide and analyze a new algorithm, MAlocate that is able to adapt to the unknown ranks of the different matrices. We then give a lower-bound showing that our strategy is minimax-optimal and demonstrate its performance with synthetic experiments.}, author = {Locatelli, Andrea and Carpentier, Alexandra and Valko, Michal}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Active multiple matrix completion with adaptive confidence sets}}, year = {2019} } @article{quinonero-candela2007approximation, author = {Quinonero-Candela, Joaquin and Rasmussen, Carl Edward and Williams, Christopher K. I.}, journal = {Large-scale kernel machines}, pages = {203--224}, title = {{Approximation methods for gaussian process regression}}, url = {https://homepages.inf.ed.ac.uk/ckiw/postscript/lskm{\_}chap.pdf}, year = {2007} } @inproceedings{rahimi2007random, abstract = {To accelerate the training of kernel machines, we propose to map the input data to a randomized low-dimensional feature space and then apply existing fast linear methods. Our randomized features are designed so that the inner products of the transformed data are approximately equal to those in the feature space of a user specified shift-invariant kernel. We explore two sets of random features, provide convergence bounds on their ability to approximate various radial basis kernels, and show that in large-scale classification and regression tasks linear machine learning algorithms that use these features outperform state-of-the-art large-scale kernel machines.}, author = {Rahimi, Ali and Recht, Ben}, booktitle = {Neural Information Processing Systems}, title = {{Random features for large-scale kernel machines}}, url = {https://people.eecs.berkeley.edu/{~}brecht/papers/07.rah.rec.nips.pdf}, year = {2007} } @article{kveton2016learning, abstract = {Many important optimization problems, such as the minimum spanning tree and minimum-cost flow, can be solved optimally by a greedy method. In this work, we study a learning variant of these problems, where the model of the problem is unknown and has to be learned by interacting repeatedly with the environment in the bandit setting. We formalize our learning problem quite generally, as learning how to maximize an unknown modular function on a known polymatroid. We propose a computationally efficient algorithm for solving our problem and bound its expected cumulative regret. Our gap-dependent upper bound is tight up to a constant and our gap-free upper bound is tight up to polylogarithmic factors. Finally, we evaluate our method on three problems and demonstrate that it is practical.}, author = {Kveton, Branislav and Wen, Zheng and Ashkan, Azin and Valko, Michal}, journal = {Journal of Machine Learning Research}, title = {{Learning to act greedily: Polymatroid semi-bandits}}, year = {2016} } @incollection{visweswaran2005instance-specific, address = {Cambridge, MA}, annote = {comps{\_}models}, author = {Visweswaran, Shyam and Cooper, Gregory F}, booktitle = {Advances in Neural Information Processing Systems 17}, editor = {Saul, Lawrence K and Weiss, Yair and Bottou, L{\'{e}}on}, pages = {1449--1456}, publisher = {MIT Press}, title = {{Instance-Specific {\{}B{\}}ayesian Model Averaging for Classification}}, url = {http://books.nips.cc/papers/files/nips17/NIPS2004{\_}0482.pdf}, year = {2005} } @article{megiddo1974optimal, author = {Megiddo, N}, journal = {Mathematical Programming}, number = {1}, pages = {97--107}, publisher = {Springer}, title = {{Optimal flows in networks with multiple sources and sinks}}, volume = {7}, year = {1974} } @phdthesis{osborne2010bayesian, author = {Osborne, Michael}, school = {University of Oxford}, title = {{Bayesian Gaussian processes for sequential prediction, optimisation and quadrature}}, year = {2010} } @book{chapelle2010semi-supervised, author = {Chapelle, Olivier and Schlkopf, Bernhard and Zien, Alexander}, publisher = {The MIT Press}, title = {{Semi-Supervised Learning}}, year = {2010} } @inproceedings{drineas2005nystr$o$m, author = {Drineas, P and Mahoney, M W}, booktitle = {Proceedings of COLT, 2005}, title = {{On the {\{}N{\}}ystr{\{}$\backslash$ddot{\{}$\backslash$text{\{}o{\}}{\}}{\}}m method for approximating a {\{}G{\}}ram matrix for improved kernel-based learning}}, year = {2005} } @article{dunn1980convergence, author = {Dunn, J C}, journal = {SIAM Journal on Control and Optimization}, pages = {473--487}, title = {{Convergence rates for conditional gradient sequences generated by implicit step length rules}}, volume = {18}, year = {1980} } @article{coulom2007efficient, abstract = {A Monte-Carlo evaluation consists in estimating a position by averaging the outcome of several random continuations. The method can serve as an evaluation function at the leaves of a min-max tree. This paper presents a new framework to combine tree search with Monte-Carlo evaluation, that does not separate between a min-max phase and a Monte-Carlo phase. Instead of backing-up the min-max value close to the root, and the average value at some depth, a more general backup operator is defined that progressively changes from averaging to min-max as the number of simulations grows. This approach provides a fine-grained control of the tree growth, at the level of individual simulations, and allows efficient selectivity. The resulting algorithm was implemented in a 9x9 Go-playing program, Crazy Stone, that won the 10th KGS computer-Go tournament.}, author = {Coulom, R{\'{e}}mi}, journal = {Computers and games}, pages = {72--83}, title = {{Efficient selectivity and backup operators in Monte-Carlo tree search}}, url = {https://hal.inria.fr/inria-00116992/document}, volume = {4630}, year = {2007} } @inproceedings{Kocak2014, abstract = {Copyright {\textcopyright} 2014, Association for the Advancement of Artificial Intelligence. Thompson Sampling (TS) has surged a lot of interest due to its good empirical performance, in particular in the computational advertising. Though successful, the tools for its performance analysis appeared only recently. In this paper, we describe and analyze SpectralTS algorithm for a bandit problem, where the payoffs of the choices are smooth given an underlying graph. In this setting, each choice is a node of a graph and the expected payoffs of the neighboring nodes are assumed to be similar. Although the setting has application both in recommender systems and advertising, the traditional algorithms would scale poorly with the number of choices. For that purpose we consider an effective dimension d, which is small in real-world graphs. We deliver the analysis showing that the regret of SpectralTS scales as d√T InN with high probability, where T is the time horizon and N is the number of choices. Since a d√T InNregret is comparable to the known results, SpectralTS offers a computationally more efficient alternative. We also show that our algorithm is competitive on both synthetic and real-world data.}, author = {Koc{\'{a}}k, T. and Munos, R. and Valko, M. and Agrawal, S.}, booktitle = {AAAI Conference on Artificial Intelligence}, isbn = {9781577356790}, title = {{Spectral thompson sampling}}, volume = {3}, year = {2014} } @article{neu09parsers, author = {Neu, G and Szepesv{\'{a}}ri, $\backslash$textCs.}, journal = {Machine Learning Journal}, number = {2}, pages = {303--337}, title = {{Training parsers by inverse reinforcement learning}}, volume = {77}, year = {2009} } @article{even2006action, author = {Even-Dar, Eyal and Mannor, Shie and Mansour, Yishay}, journal = {Journal of Machine Learning Research}, pages = {1079--1105}, title = {{Action elimination and stopping conditions for the multi-armed bandit and reinforcement learning problems}}, url = {https://jmlr.csail.mit.edu/papers/volume7/evendar06a/evendar06a.pdf}, volume = {7}, year = {2006} } @article{zivn�2009expressive, author = {Zivn$\backslash$i, S and Cohen, D A and Jeavons, P G}, journal = {Discrete Applied Mathematics}, number = {15}, pages = {3347--3358}, publisher = {Elsevier}, title = {{The expressive power of binary submodular functions}}, volume = {157}, year = {2009} } @inproceedings{weinberger2007metric, annote = {comps{\_}distance}, author = {Weinberger, K Q and Tesauro, G}, booktitle = {Proc. of the 11 thInternational Conference on Artificial Intelligence and Statistics}, title = {{Metric Learning for Kernel Regression}}, url = {http://www.stat.umn.edu/{~}aistat/proceedings/data/papers/077.pdf}, year = {2007} } @incollection{bengio2004out-of-sample, address = {Cambridge, MA}, author = {Bengio, Yoshua and Paiement, Jean-Fran{\c{c}}ois and Vincent, Pascal and Delalleau, Olivier and {Le Roux}, Nicolas and Ouimet, Marie}, booktitle = {Advances in Neural Information Processing Systems 16}, editor = {Thrun, Sebastian and Saul, Lawrence and Sch{\"{o}}lkopf, Bernhard}, keywords = {Isomap,Nystrom formula,dimensionality reduction,eigenfunctions learning,kernel PCA,locally linear embedding,spectral methods}, publisher = {MIT Press}, title = {{Out-of-Sample Extensions for {\{}LLE{\}}, Isomap, {\{}MDS{\}}, Eigenmaps, and Spectral Clustering}}, year = {2004} } @book{jeffreys1988methods, address = {Cambridge, United Kingdom}, author = {Jeffreys, Harold and Jeffreys, Bertha}, publisher = {Cambridge University Press}, title = {{Methods of Mathematical Physics}}, year = {1988} } @inproceedings{hillel2007learning, address = {New York, NY, USA}, annote = {comps{\_}distance}, author = {Hillel, Aharon Bar and Weinshall, Daphna}, booktitle = {ICML '07: Proceedings of the 24th international conference on Machine learning}, doi = {http://doi.acm.org/10.1145/1273496.1273505}, isbn = {978-1-59593-793-3}, pages = {65--72}, publisher = {ACM}, title = {{Learning distance function by coding similarity}}, year = {2007} } @article{hoeffding1963probability, author = {Hoeffding, W}, journal = {Journal of the American Statistical Association}, pages = {13--30}, title = {{Probability inequalities for sums of bounded random variables}}, url = {https://www.csee.umbc.edu/{~}lomonaco/f08/643/hwk643/Hoeffding.pdf}, volume = {58}, year = {1963} } @article{azar2013minimax, abstract = {We consider the problems of learning the optimal action-value function and the optimal policy in discounted-reward Markov decision processes (MDPs). We prove new PAC bounds on the sample-complexity of two well-known model-based reinforcement learning (RL) algorithms in the presence of a generative model of the MDP: value iteration and policy iteration. The first result indicates that for an MDP with N state-action pairs and the discount factor $\gamma${\^{a}}̂̂[0,1) only O(Nlog(N/$\delta$)/((1-$\gamma$)3 $\epsilon$ 2)) state-transition samples are required to find an $\epsilon$-optimal estimation of the action-value function with the probability (w.p.) 1-$\delta$. Further, we prove that, for small values of $\epsilon$, an order of O(Nlog(N/$\delta$)/((1-$\gamma$)3 $\epsilon$ 2)) samples is required to find an $\epsilon$-optimal policy w.p. 1-$\delta$. We also prove a matching lower bound of $\Theta$(Nlog(N/$\delta$)/((1-$\gamma$)3 $\epsilon$ 2)) on the sample complexity of estimating the optimal action-value function with $\epsilon$ accuracy. To the best of our knowledge, this is the first minimax result on the sample complexity of RL: the upper bounds match the lower bound in terms of N, $\epsilon$, $\delta$ and 1/(1-$\gamma$) up to a constant factor. Also, both our lower bound and upper bound improve on the state-of-the-art in terms of their dependence on 1/(1-$\gamma$). {\textcopyright} 2013 The Author(s).}, author = {{Gheshlaghi Azar}, Mohammad and Munos, R{\'{e}}mi and Kappen, Hilbert J.}, doi = {10.1007/s10994-013-5368-1}, issn = {08856125}, journal = {Machine Learning}, keywords = {Learning theory,Markov decision processes,Reinforcement learning,Sample complexity}, number = {3}, pages = {325--349}, title = {{Minimax PAC bounds on the sample complexity of reinforcement learning with a generative model}}, url = {https://hal.archives-ouvertes.fr/hal-00831875}, volume = {91}, year = {2013} } @article{lovasz1983submodular, author = {Lov{\'{a}}sz, L{\'{a}}szl{\'{o}}}, editor = {Bachem, Armin and Gr{\"{o}}tschel, Martin and Korte, Bernhard H}, journal = {Mathematical programming the state of the art}, pages = {235--257}, publisher = {Berlin: Springer-Verlag}, title = {{Submodular functions and convexity}}, url = {http://www.cs.elte.hu/{~}lovasz/scans/submodular.pdf}, year = {1983} } @article{Whittle1988, abstract = { We consider a population of n projects which in general continue to evolve whether in operation or not (although by different rules). It is desired to choose the projects in operation at each instant of time so as to maximise the expected rate of reward, under a constraint upon the expected number of projects in operation. The Lagrange multiplier associated with this constraint defines an index which reduces to the Gittins index when projects not being operated are static. If one is constrained to operate m projects exactly then arguments are advanced to support the conjecture that, for m and n large in constant ratio, the policy of operating the m projects of largest current index is nearly optimal. The index is evaluated for some particular projects. }, author = {Whittle, P.}, doi = {10.1017/s0021900200040420}, issn = {0021-9002}, journal = {Journal of Applied Probability}, title = {{Restless bandits: activity allocation in a changing world}}, year = {1988} } @article{Fre75, author = {Freedman, D}, journal = {The Annals of Probability}, pages = {100--118}, title = {{On tail probabilities for martingales}}, volume = {3}, year = {1975} } @inproceedings{li2005lazy, author = {Li, Lihong and Littman, Michael}, booktitle = {Proceedings of the 20th National Conference on Artificial Intelligence}, pages = {1175--1180}, title = {{Lazy Approximation for Solving Continuous Finite-Horizon {\{}MDPs{\}}}}, year = {2005} } @inproceedings{moore1991variable, author = {Moore, Andrew}, booktitle = {Proceedings of the 8th International Conference on Machine Learning}, title = {{Variable Resolution Dynamic Programming: Efficiently Learning Action Maps in Multivariate Real-Valued State-Spaces}}, year = {1991} } @book{horn1990matrix, abstract = {Linear algebra and matrix theory have long been fundamental tools in mathematical disciplines as well as fertile fields for research. In this book the authors present classical and recent results of matrix analysis that have proved to be important to applied mathematics. Facts about matrices, beyond those found in an elementary linear algebra course, are needed to understand virtually any area of mathematical science, but the necessary material has appeared only sporadically in the literature and in university curricula. As interest in applied mathematics has grown, the need for a text and reference offering a broad selection of topics in matrix theory has become apparent, and this book meets that need. This volume reflects two concurrent views of matrix analysis. First, it encompasses topics in linear algebra that have arisen out of the needs of mathematical analysis. Second, it is an approach to real and complex linear algebraic problems that does not hesitate to use notions from analysis. Review and miscellanea -- Eigenvalues, eigenvectors, and similarity.}, author = {Horn, Roger A. and Johnson, Charles R.}, publisher = {Cambridge University Press}, title = {{Matrix analysis}}, year = {1990} } @book{rasmussen2006gaussian, abstract = {Regression -- Classification -- Covariance functions -- Model selection and adaptation of hyperparameters -- Relationships between GPs and other models -- Theoretical perspectives -- Approximation methods for large datasets -- Appendix A : Mathematical background -- Appendix B : Guassian Markov processes.}, author = {Rasmussen, Carl Edward. and Williams, Christopher K. I.}, isbn = {9780262182539}, pages = {248}, publisher = {MIT Press}, title = {{Gaussian processes for machine learning}}, url = {http://www.gaussianprocess.org/gpml/chapters/RW.pdf}, year = {2006} } @inproceedings{HuPo04, author = {Hutter, Marcus and Poland, Jan}, booktitle = {Algorithmic Learning Theory}, pages = {279--293}, title = {{Prediction with Expert Advice by Following the Perturbed Leader for General Weights}}, year = {2004} } @phdthesis{das2009detecting, author = {Das, Kaustav}, school = {Carnegie Mellon University}, title = {{Detecting Patterns of Anomalies}}, url = {http://www.cs.cmu.edu/{~}kaustav/thesis/kaustav{\_}thesis.pdf}, year = {2009} } @inproceedings{schulman2015trust, author = {Schulman, John and Levine, Sergey and Abbeel, Pieter and Jordan, Michael and Moritz, Philipp}, booktitle = {International Conference on Machine Learning}, title = {{Trust region policy optimization}}, url = {https://arxiv.org/pdf/1502.05477.pdf}, year = {2015} } @inproceedings{zanette2019tighter, author = {Zanette, Andrea and Brunskill, Emma}, booktitle = {International Conference on Machine Learning}, title = {{Tighter problem-dependent regret bounds in reinforcement learning without domain knowledge using value function bounds}}, url = {https://arxiv.org/pdf/1901.00210.pdf}, year = {2019} } @article{cap, author = {Zhao, P and Rocha, G and Yu, B}, journal = {Annals of Statistics}, number = {6A}, pages = {3468--3497}, title = {{Grouped and hierarchical model selection through composite absolute penalties}}, volume = {37}, year = {2009} } @phdthesis{daume06thesis, address = {Los Angeles, CA}, author = {{Daum{\'{e}} III}, Hal}, school = {University of Southern California}, title = {{Practical Structured Learning Techniques for Natural Language Processing}}, url = {http://pub.hal3.name/{\#}daume06thesis}, year = {2006} } @inproceedings{kveton2010online, abstract = {This paper proposes an algorithm for real-time learning without explicit feedback. The algorithm combines the ideas of semi-supervised learning on graphs and online learning. In particular, it iteratively builds a graphical representation of its world and updates it with observed examples. Labeled examples constitute the initial bias of the algorithm and are provided offline, and a stream of unlabeled examples is collected online to update this bias. We motivate the algorithm, discuss how to implement it efficiently, prove a regret bound on the quality of its solutions, and apply it to the problem of real-time face recognition. Our recognizer runs in real time, and achieves superior precision and recall on 3 challenging video datasets.}, address = {San Francisco, CA}, author = {Kveton, Branislav and Valko, Michal and Phillipose, Matthai and Huang, Ling}, booktitle = {The Fourth IEEE Online Learning for Computer Vision Workshop in The Twenty--Third IEEE Conference on Computer Vision and Pattern Recognition}, keywords = {misovalko}, mendeley-tags = {misovalko}, title = {{Online Semi-Supervised Perception: Real-Time Learning without Explicit Feedback}}, year = {2010} } @techreport{jaggi, author = {Jaggi, M}, institution = {Arxiv}, number = {1108.1170}, title = {{Convex Optimization without Projection Steps}}, year = {2011} } @inproceedings{sadhanala2016graph, abstract = {Given a statistical estimation problem where regularization is performed according to the structure of a large, dense graph G, we con-sider fitting the statistical estimate using a sparsified surrogate grap G, which shares the vertices of G but has far fewer edges, and is thus more tractable to work with compu-tationally. We examine three types of sparsi-fication: spectral sparsification, which can be seen as the result of sampling edges from the graph with probabilities proportional to their effective resistances, and two simpler sparsi-fiers, which sample edges uniformly from the graph, either globally or locally. We provide strong theoretical and experimental results, demonstrating that sparsification before es-timation can give statistically sensible solu-tions, with significant computational savings.}, author = {Sadhanala, Veeranjaneyulu and Wang, Yu-xiang and Tibshirani, Ryan J}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Graph Sparsification Approaches for Laplacian Smoothing}}, year = {2016} } @article{garivier2019explore, author = {Garivier, Aur{\'{e}}lien and M{\'{e}}nard, Pierre and Stoltz, Gilles}, journal = {Mathematics of Operations Research}, number = {2}, pages = {377--399}, title = {{Explore first, exploit next: The true shape of regret in bandit problems}}, url = {https://arxiv.org/pdf/1602.07182.pdf}, volume = {44}, year = {2019} } @article{haupt2006signal, author = {Haupt, J and Nowak, R}, journal = {IEEE Transactions on Information Theory}, number = {9}, pages = {4036--4048}, publisher = {IEEE}, title = {{Signal reconstruction from noisy random projections}}, volume = {52}, year = {2006} } @incollection{he2008nearest-neighbor-based, address = {Cambridge, MA}, annote = {comps{\_}ano}, author = {He, Jingrui and Carbonell, Jaime}, booktitle = {Advances in Neural Information Processing Systems 20}, editor = {Platt, J C and Koller, D and Singer, Y and Roweis, S}, pages = {633--640}, publisher = {MIT Press}, title = {{Nearest-Neighbor-Based Active Learning for Rare Category Detection}}, url = {http://books.nips.cc/papers/files/nips20/NIPS2007{\_}0051.pdf}, year = {2008} } @article{charniak1991bayesian, address = {Menlo Park, CA, USA}, annote = {comps{\_}models}, author = {Charniak, Eugene}, issn = {0738-4602}, journal = {AI Mag.}, number = {4}, pages = {50--63}, publisher = {American Association for Artificial Intelligence}, title = {{Bayesian networks without tears: making Bayesian networks more accessible to the probabilistically unsophisticated}}, url = {http://www.idi.ntnu.no/emner/it3704/lectures/papers/AIMag12-04-007.pdf}, volume = {12}, year = {1991} } @inproceedings{edmonds, author = {Edmonds, J}, booktitle = {Combinatorial optimization - Eureka, you shrink!}, pages = {11--26}, publisher = {Springer}, title = {{Submodular functions, matroids, and certain polyhedra}}, year = {2003} } @inproceedings{LaMu09, author = {Lazaric, Alessandro and Munos, R{\'{e}}mi}, title = {{Hybrid Stochastic-Adversarial On-line Learning}} } @article{hanley1982meaning, abstract = {A representation and interpretation of the area under a receiver operating characteristic (ROC) curve obtained by the "rating" method, or by mathematical predictions based on patient characteristics, is presented. It is shown that in such a setting the area represents the probability that a randomly chosen diseased subject is (correctly) rated or ranked with greater suspicion than a randomly chosen non-diseased subject. Moreover, this probability of a correct ranking is the same quantity that is estimated by the already well-studied nonparametric Wilcoxon statistic. These two relationships are exploited to (a) provide rapid closed-form expressions for the approximate magnitude of the sampling variability, i.e., standard error that one uses to accompany the area under a smoothed ROC curve, (b) guide in determining the size of the sample required to provide a sufficiently reliable estimate of this area, and (c) determine how large sample sizes should be to ensure that one can statistically detect differences in the accuracy of diagnostic techniques.}, author = {Hanley, J A and Mcneil, B J}, issn = {0033-8419}, journal = {Radiology}, keywords = {auc,auroc,hanley,mcneil,roc}, month = {apr}, number = {1}, pages = {29--36}, title = {{The meaning and use of the area under a receiver operating characteristic (ROC) curve.}}, volume = {143}, year = {1982} } @article{mallows, author = {Mallows, C L}, journal = {Technometrics}, number = {4}, pages = {661--675}, title = {{Some comments on {\{}C{\}}{\_}p}}, volume = {15}, year = {1973} } @article{Gopalan2013, abstract = {We consider stochastic multi-armed bandit problems with complex actions over a set of basic arms, where the decision maker plays a complex action rather than a basic arm in each round. The reward of the complex action is some function of the basic arms' rewards, and the feedback observed may not necessarily be the reward per-arm. For instance, when the complex actions are subsets of the arms, we may only observe the maximum reward over the chosen subset. Thus, feedback across complex actions may be coupled due to the nature of the reward function. We prove a frequentist regret bound for Thompson sampling in a very general setting involving parameter, action and observation spaces and a likelihood function over them. The bound holds for discretely-supported priors over the parameter space and without additional structural properties such as closed-form posteriors, conjugate prior structure or independence across arms. The regret bound scales logarithmically with time but, more importantly, with an improved constant that non-trivially captures the coupling across complex actions due to the structure of the rewards. As applications, we derive improved regret bounds for classes of complex bandit problems involving selecting subsets of arms, including the first nontrivial regret bounds for nonlinear MAX reward feedback from subsets.}, archivePrefix = {arXiv}, arxivId = {1311.0466}, author = {Gopalan, Aditya and Mannor, Shie and Mansour, Yishay}, eprint = {1311.0466}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Gopalan, Mannor, Mansour - 2013 - Thompson Sampling for Complex Bandit Problems.pdf:pdf}, month = {nov}, title = {{Thompson Sampling for Complex Bandit Problems}}, url = {http://arxiv.org/abs/1311.0466}, year = {2013} } @inproceedings{chapman2006comparison, abstract = {Automated syndromic surveillance systems often classify patients into syndromic categories based on free-text chief complaints. Chief complaints (CC) demonstrate low to moderate sensitivity in identify-ing syndromic cases. Emergency Department (ED) reports promise more detailed clinical information that may increase sensitivity of detection.}, author = {Chapman, Wendy W and Dowling, John N and Cooper, Gregory F and Hauskrecht, Milos and Valko, Michal}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Chapman et al. - 2006 - A Comparison of Chief Complaints and Emergency Department Reports for Identifying Patients with Acute Lower Resp.pdf:pdf}, keywords = {misovalko}, mendeley-tags = {misovalko}, title = {{A Comparison of Chief Complaints and Emergency Department Reports for Identifying Patients with Acute Lower Respiratory Syndrome}}, year = {2006} } @incollection{BL13, author = {Bubeck, Sebastien and Liu, Che-yu}, booktitle = {Advances in Neural Information Processing Systems 26}, editor = {Burges, C J C and Bottou, L and Welling, M and Ghahramani, Z and Weinberger, K Q}, pages = {638--646}, title = {{Prior-free and prior-dependent regret bounds for Thompson Sampling}}, year = {2013} } @article{bubeck2013optimal, author = {Bubeck, S{\'{e}}bastien and Ernst, Damien and Garivier, Aur{\'{e}}lien}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Bubeck, Ernst, Garivier - 2013 - Optimal discovery with probabilistic expert advice finite time analysis and macroscopic optimality.pdf:pdf}, journal = {Journal of Machine Learning Research}, pages = {601--623}, title = {{Optimal discovery with probabilistic expert advice: finite time analysis and macroscopic optimality}}, url = {http://www.jmlr.org/papers/volume14/bubeck13a/bubeck13a.pdf}, volume = {14}, year = {2013} } @inproceedings{yurtsever2017sketchy, abstract = {This paper concerns a fundamental class of convex matrix optimization problems. It presents the first algorithm that uses optimal storage and provably computes a low-rank approximation of a solution. In particular, when all solutions have low rank, the algorithm converges to a solution. This algorithm, SketchyCGM, modifies a standard convex optimization scheme, the conditional gradient method, to store only a small randomized sketch of the matrix variable. After the optimization terminates, the algorithm extracts a low-rank approximation of the solution from the sketch. In contrast to nonconvex heuristics, the guarantees for SketchyCGM do not rely on statistical models for the problem data. Numerical work demonstrates the benefits of SketchyCGM over heuristics.}, archivePrefix = {arXiv}, arxivId = {1702.06838}, author = {Yurtsever, Alp and Udell, Madeleine and Tropp, Joel A. and Cevher, Volkan}, booktitle = {International Conference on Artificial Intelligence and Statistics}, eprint = {1702.06838}, title = {{Sketchy decisions: Convex low-rank matrix optimization with optimal storage}}, year = {2017} } @inproceedings{kalenon, author = {Kale, Satyen and Reyzin, Lev and Schapire, Robert E}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {1054--1062}, title = {{Non-Stochastic Bandit Slate Problems}} } @article{zhang1996nonparametric, abstract = {Abstract Importance sampling is a widely used variance reduction simulation technique for the evaluation of high-dimensional integrals. A key step in the implementation of importance sampling is to choose a proper distribution function from which pseudorandom numbers are generated. Parametric sampling distributions, if available at all, are often inadequate for high-dimensional integrals over irregular regions. One possible remedy is to use a nonparametric method to estimate the unknown optimal sampling function. We show that the nonparametric approach yields integral estimates that converge faster than estimates obtained from parametric approaches. We also demonstrate that an adaptive method, which has been used successfully in parametric settings, does not yield better results than simple one-step methods in the nonparametric setting.}, author = {Zhang, Ping}, journal = {Journal of the American Statistical Association}, keywords = {Adaptive importance sampling,Integral evaluation,Kernel density estimation,Monte Carlo simulation,Variance reduction}, number = {435}, pages = {1245--1253}, title = {{Nonparametric importance sampling}}, volume = {91}, year = {1996} } @incollection{simunic2002dynamic, address = {New York, NY}, author = {Simunic, Tajana}, booktitle = {Power Aware Computing}, publisher = {Kluwer Academic Publishers}, title = {{Dynamic Management of Power Consumption}}, year = {2002} } @book{cover-thomas-1991, author = {Cover, T and Thomas, J A}, publisher = {Wiley}, title = {{Elements of Information Theory}}, year = {1991} } @book{stackelberg1934market, author = {von Stackelberg, Heinrich}, isbn = {9783642125867}, title = {{Market structure andequilibrium}}, url = {https://books.google.fr/books?id=dghH9OH5fDoC}, year = {1934} } @inproceedings{achlioptas2013near, author = {Achlioptas, Dimitris and Karnin, Zohar S. and Liberty, Edo}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Achlioptas, Karnin, Liberty - 2013 - Near-optimal entrywise sampling for data matrices.pdf:pdf}, title = {{Near-optimal entrywise sampling for data matrices}}, year = {2013} } @inproceedings{chung1999dynamic, author = {Chung, Eui-Young and Benini, Luca and de Micheli, Giovanni}, booktitle = {Proceedings of the 1999 IEEE / ACM International Conference on Computer-Aided Design}, pages = {274--279}, title = {{Dynamic Power Management Using Adaptive Learning Tree}}, year = {1999} } @inproceedings{saha2011improved, abstract = {The study of online convex optimization in the bandit setting was initiated by Klein- berg (2004) and Flaxman et al. (2005). Such a setting models a decision maker that has to make decisions in the face of adversari- ally chosen convex loss functions. Moreover, the only information the decision maker re- ceives are the losses. The identities of the loss functions themselves are not revealed. In this setting, we reduce the gap between the best known lower and upper bounds for the class of smooth convex functions, i.e. convex functions with a Lipschitz continuous gradi- ent. Building upon existing work on self- concordant regularizers and one-point gradi- ent estimation, we give the first algorithm whose expected regret is O(T2/3), ignoring constant and logarithmic factors.}, author = {Saha, Ankan and Tewari, Ambuj}, booktitle = {AISTATS}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Saha, Tewari - 2011 - Improved Regret Guarantees for Online Smooth Convex Optimization with Bandit Feedback.pdf:pdf}, title = {{Improved Regret Guarantees for Online Smooth Convex Optimization with Bandit Feedback}}, url = {http://jmlr.csail.mit.edu/proceedings/papers/v15/saha11a/saha11a.pdf}, volume = {15}, year = {2011} } @inproceedings{Vem10, author = {Vempala, S}, booktitle = {IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science (FSTTCS 2010)}, editor = {Lodaya, K and Mahajan, M}, pages = {42--64}, publisher = {Schloss Dagstuhl--Leibniz-Zentrum fur Informatik}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, title = {{Recent Progress and Open Problems in Algorithmic Convex Geometry}}, volume = {8}, year = {2010} } @article{wong2005what's, address = {Cambridge, MA, USA}, author = {Wong, Weng-Keen and Moore, Andrew and Cooper, Gregory and Wagner, Michael}, issn = {1533-7928}, journal = {J. Mach. Learn. Res.}, pages = {1961--1998}, publisher = {MIT Press}, title = {{What's Strange About Recent Events (WSARE): An Algorithm for the Early Detection of Disease Outbreaks}}, volume = {6}, year = {2005} } @article{hager1989updating, author = {Hager, W W}, journal = {SIAM review}, pages = {221--239}, publisher = {JSTOR}, title = {{Updating the inverse of a matrix}}, year = {1989} } @inproceedings{lussdecomposing, author = {Luss, R and Rosset, S and Shahar, M}, booktitle = {Adv. NIPS}, title = {{Decomposing Isotonic Regression for Efficiently Solving Large Problems}}, volume = {23}, year = {2010} } @book{DGL96, author = {Devroye, L and Gy{\"{o}}rfi, L and Lugosi, G}, publisher = {Springer}, title = {{A Probabilistic Theory of Pattern Recognition}}, year = {1996} } @article{abernethy2009beating, author = {Abernethy, Jacob Duncan and Rakhlin, A}, doi = {10.1109/ITA.2009.5044958}, isbn = {9781424439904}, journal = {2009 Information Theory and Applications Workshop}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {280--289}, publisher = {Ieee}, title = {{Beating the adaptive bandit with high probability}}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5044958}, year = {2009} } @article{gaiffas2011sharp, abstract = {We observe (Xi,Yi)i=1n where the Yi 's are real valued outputs and the Xi's are m ? T matrices. We observe a new entry X and we want to predict the output Y associated with it. We focus on the high-dimensional setting, where m T ? n. This includes the matrix completion problem with noise, as well as other problems. We consider linear prediction procedures based on different penalizations, involving a mixture of several norms: the nuclear norm, the Frobenius norm and the ?1-norm. For these procedures, we prove sharp oracle inequalities, using a statistical learning theory point of view. A surprising fact in our results is that the rates of convergence do not depend on m and T directly. The analysis is conducted without the usually considered incoherency condition on the unknown matrix or restricted isometry condition on the sampling operator. Moreover, our results are the first to give for this problem an analysis of penalization (such as nuclear norm penalization) as a regularization algorithm: our oracle inequalities prove that these procedures have a prediction accuracy close to the deterministic oracle one, given that the reguralization parameters are well-chosen. ? 2011 IEEE.}, author = {Ga{\"{i}}ffas, St{\'{e}}phane and Lecu{\'{e}}, Guillaume}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Ga{\"{i}}ffas, Lecu{\'{e}} - 2011 - Sharp oracle inequalities for high-dimensional matrix prediction.pdf:pdf}, journal = {IEEE Transactions on Information Theory}, keywords = {Empirical process theory,Schatten norms,empirical risk minimization,high-dimensional matrix,matrix completion,nuclear norm,oracle inequalities,sparsity}, number = {10}, pages = {6942--6957}, title = {{Sharp oracle inequalities for high-dimensional matrix prediction}}, volume = {57}, year = {2011} } @inproceedings{minka2001expectation, abstract = {This paper presents a new deterministic approximation technique in Bayesian networks. This method, "Expectation Propagation", unifies two previous techniques: assumed-density filtering, an extension of the Kalman filter, and loopy belief propagation, an extension of belief propagation in Bayesian networks. All three algorithms try to recover an approximate distribution which is close in KL divergence to the true distribution. Loopy belief propagation, because it propagates exact belief states, is useful for a limited class of belief networks, such as those which are purely discrete. Expectation Propagation approximates the belief states by only retaining certain expectations, such as mean and variance, and iterates until these expectations are consistent throughout the network. This makes it applicable to hybrid networks with discrete and continuous nodes. Expectation Propagation also extends belief propagation in the opposite direction - it can propagate richer belief states that incorporate correlations between nodes. Experiments with Gaussian mixture models show Expectation Propagation to be convincingly better than methods with similar computational cost: Laplace's method, variational Bayes, and Monte Carlo. Expectation Propagation also provides an efficient algorithm for training Bayes point machine classifiers.}, author = {Minka, Tom}, booktitle = {Uncertainty in Artificial Intelligence}, title = {{Expectation oropagation for approximate Bayesian inference}}, year = {2001} } @article{grotschel1981ellipsoid, abstract = {L. G. Khachiyan recently published a polynomial algorithm to check feasibility of a system of linear inequalities. The method is an adaptation of an algorithm proposed by Shor for non-linear optimization problems. In this paper we show that the method also yields interesting results in combinatorial optimization. Thus it yields polynomial algorithms for vertex packing in perfect graphs; for the matching and matroid intersection problems; for optimum covering of directed cuts of a digraph; for the minimum value of a submodular set function; and for other important combinatorial problems. On the negative side, it yields a proof that weighted fractional chromatic number is NP-hard.}, author = {Gr{\"{o}}tschel, M and Lov{\'{a}}sz, L and Schrijver, A}, doi = {10.1007/BF02579273}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Gr{\"{o}}tschel, Lov{\'{a}}sz, Schrijver - 1981 - The ellipsoid method and its consequences in combinatorial optimization.pdf:pdf}, issn = {02099683}, journal = {Combinatorica}, number = {2}, pages = {169--197}, publisher = {Springer}, title = {{The ellipsoid method and its consequences in combinatorial optimization}}, url = {http://www.springerlink.com/index/10.1007/BF02579273}, volume = {1}, year = {1981} } @inproceedings{hkl, author = {Bach, F}, booktitle = {Adv. NIPS}, title = {{Exploring Large Feature Spaces with Hierarchical Multiple Kernel Learning}}, year = {2008} } @inproceedings{koolen2013pareto, author = {{Wouter M. Koolen}}, booktitle = {Neural Information Processing Systems}, title = {{The Pareto regret frontier}}, year = {2013} } @article{BT03, author = {Beck, A and Teboulle, M}, journal = {Operations Research Letters}, number = {3}, pages = {167--175}, title = {{Mirror descent and nonlinear projected subgradient methods for convex optimization}}, volume = {31}, year = {2003} } @inproceedings{maillard2011finite, author = {Maillard, Odalric-Ambrym and Munos, R{\'{e}}mi and Stoltz, Gilles}, booktitle = {To appear in Proceedings of the 24th annual Conference On Learning Theory}, keywords = {bandits}, mendeley-tags = {bandits}, series = {COLT '11}, title = {{Finite-Time Analysis of Multi-armed Bandits Problems with Kullback-Leibler Divergences}}, year = {2011} } @article{metropolis1953equation, author = {Metropolis, Nicholas and Rosenbluth, Arianna and Rosenbluth, Marshall and Teller, Augusta and Teller, Edward}, journal = {Journal of Chemical Physics}, pages = {1087--1092}, title = {{Equation of State Calculations by Fast Computing Machines}}, volume = {21}, year = {1953} } @inproceedings{leurent2019practical, abstract = {We consider the problem of online planning in a Markov Decision Process when given only access to a generative model, restricted to open-loop policies - i.e. sequences of actions - and under budget constraint. In this setting, the Open-Loop Optimistic Planning (OLOP) algorithm enjoys good theoretical guarantees but is overly conservative in practice, as we show in numerical experiments. We propose a modified version of the algorithm with tighter upper-confidence bounds, KLOLOP, that leads to better practical performances while retaining the sample complexity bound. Finally, we propose an efficient implementation that significantly improves the time complexity of both algorithms.}, author = {Leurent, Edouard and Maillard, Odalric-Ambrym}, booktitle = {European Conference on Machine Learning}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Leurent, Maillard - 2019 - Practical open-loop pptimistic planning.pdf:pdf}, keywords = {online learning,planning,tree search}, title = {{Practical open-loop pptimistic planning}}, url = {https://arxiv.org/pdf/1904.04700.pdf}, year = {2019} } @article{fawcett1997adaptive, address = {Hingham, MA, USA}, annote = {comps{\_}ano}, author = {Fawcett, Tom and Provost, Foster}, doi = {http://dx.doi.org/10.1023/A:1009700419189}, issn = {1384-5810}, journal = {Data Min. Knowl. Discov.}, number = {3}, pages = {291--316}, publisher = {Kluwer Academic Publishers}, title = {{Adaptive Fraud Detection}}, volume = {1}, year = {1997} } @inproceedings{moonesignhe2006outlier, address = {Washington, DC, USA}, author = {Moonesignhe, H D K and Tan, Pang-Ning}, booktitle = {ICTAI '06: Proceedings of the 18th IEEE International Conference on Tools with Artificial Intelligence}, doi = {http://dx.doi.org/10.1109/ICTAI.2006.94}, isbn = {0-7695-2728-0}, pages = {532--539}, publisher = {IEEE Computer Society}, title = {{Outlier Detection Using Random Walks}}, year = {2006} } @inproceedings{blum2001learning, address = {San Francisco, CA, USA}, author = {Blum, Avrim and Chawla, Shuchi}, booktitle = {ICML '01: Proceedings of the Eighteenth International Conference on Machine Learning}, isbn = {1-55860-778-1}, pages = {19--26}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {{Learning from Labeled and Unlabeled Data using Graph Mincuts}}, year = {2001} } @inproceedings{HaSe09, author = {Hazan, E and Seshadhri, C}, pages = {393--400}, title = {{Efficient learning algorithms for changing environments}} } @inproceedings{kocak2016onlinea, abstract = {We consider adversarial multi-armed bandit problems where the learner is allowed to observe losses of a number of arms beside the arm that it actually chose. We study the case where all non-chosen arms reveal their loss with an unknown probability rt, independently of each other and the action of the learner. Moreover, we allow rt to change in every round t, which rules out the possibility of estimating rt by a well-concentrated sample average. We propose an algorithm which operates under the assumption that rt is large enough to warrant at least one side observation with high probability. We show that after T rounds in a bandit problem with N arms, the expected regret of our algorithm is of order O(sqrt(sum(t=1)T (1/rt) log N )), given that rt less than log T / (2N-2) for all t. All our bounds are within logarithmic factors of the best achievable performance of any algorithm that is even allowed to know exact values of rt.}, author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Neu, Gergely and Valko, Michal}, booktitle = {Uncertainty in Artificial Intelligence}, title = {{Online learning with Erdős-R{\'{e}}nyi side-observation graphs}}, url = {https://hal.inria.fr/hal-01320588/document}, year = {2016} } @phdthesis{kveton2006planning, author = {Kveton, Branislav}, school = {University of Pittsburgh}, title = {{Planning in Hybrid Structured Stochastic Domains}}, year = {2006} } @inproceedings{shang2018adaptive, abstract = {Hierarchical bandits is an approach for global optimization of extremely irregular functions. This paper provides new elements regarding POO, an adaptive meta-algorithm that does not require the knowledge of local smoothness of the target function. We first highlight the fact that the sub-routine algorithm used in POO should have a small regret under the assumption of local smoothness with respect to the chosen partitioning, which is unknown if it is satisfied by the standard sub-routine HOO. In this work, we establish such regret guarantee for HCT which is another hierarchical optimistic optimization algorithm that needs to know the smoothness. This confirms the validity of POO. We show that POO can be used with HCT as a sub-routine with a regret upper bound that matches that of best-known algorithms using the knowledge of smoothness up to a sqrt(log(n)) factor.}, author = {Shang, Xuedong and Kaufmann, Emilie and Valko, Michal}, booktitle = {European Workshop on Reinforcement Learning}, title = {{Adaptive black-box optimization got easier: HCT needs only local smoothness}}, url = {http://researchers.lille.inria.fr/{~}valko/hp/publications/shang2018adaptive.pdf}, year = {2018} } @inproceedings{Jenatton2011, author = {Jenatton, R and Gramfort, A and Michel, V and Obozinski, G and Bach, F and Thirion, B}, booktitle = {International Workshop on Pattern Recognition in Neuroimaging (PRNI)}, title = {{Multi-scale Mining of f{\{}MRI{\}} Data with Hierarchical Structured Sparsity}}, year = {2011} } @article{jaynes57maxent, author = {Jaynes, E T}, journal = {Physical Review}, number = {4}, pages = {620--630}, publisher = {American Physical Society}, title = {{Information Theory and Statistical Mechanics}}, volume = {106}, year = {1957} } @inproceedings{grabner2008semi-supervised, author = {Grabner, Helmut and Leistner, Christian and Bischof, Horst}, booktitle = {Proceedings of the 10th European Conference on Computer Vision}, pages = {234--247}, title = {{Semi-Supervised On-Line Boosting for Robust Tracking}}, year = {2008} } @inproceedings{koutis2011a-nearly-m, author = {Koutis, Ioannis and Miller, Gary L and Peng, Richard}, booktitle = {{\{}IEEE{\}} 52nd Annual Symposium on Foundations of Computer Science, {\{}FOCS{\}}}, pages = {590--598}, title = {{A Nearly-m log n Time Solver for {\{}SDD{\}} Linear Systems}}, year = {2011} } @article{hannan1957approximation, author = {Hannan, James}, journal = {Contributions to the theory of games}, pages = {97--139}, title = {{Approximation to Bayes risk in repeated play}}, volume = {3}, year = {1957} } @article{CBCoGe04, author = {Cesa-Bianchi, N and Conconi, A and Gentile, C}, journal = {IEEE Transactions on Information Theory}, pages = {2050--2057}, title = {{On the Generalization Ability of On-Line Learning Algorithms}}, volume = {50}, year = {2004} } @inproceedings{yu2005blockwise, author = {Yu, Kai and Yu, Shipeng}, booktitle = {Proc. of the 22nd ICML Workshop on Learning}, title = {{Blockwise supervised inference on large graphs}}, year = {2005} } @inproceedings{cohen2017input, author = {Cohen, Michael B. and Musco, Cameron and Musco, Christopher}, booktitle = {Symposium on Discrete Algorithms}, title = {{Input sparsity time low-rank approximation via ridge leverage score sampling}}, year = {2017} } @article{fujishige1980lexicographically, author = {Fujishige, Satoru}, journal = {Mathematics of Operations Research}, number = {2}, pages = {186--196}, publisher = {JSTOR}, title = {{Lexicographically optimal base of a polymatroid with respect to a weight vector}}, volume = {5}, year = {1980} } @article{NJLS09, author = {Nemirovski, A and Juditsky, A and Lan, G and Shapiro, A}, journal = {SIAM Journal on Optimization}, pages = {1574--1609}, title = {{Robust stochastic approximation approach to stochastic programming}}, volume = {19}, year = {2009} } @inproceedings{syed2007mwal, author = {Syed, Umar and Schapire, Robert}, pages = {1449--1456}, title = {{A Game-Theoretic Approach to Apprenticeship Learning}} } @article{agarwal2011stochastic, abstract = {This paper addresses the problem of minimizing a convex, Lipschitz function f over a convex, compact set xset under a stochastic bandit feedback model. In this model, the algorithm is allowed to observed noisy realizations of the function value f(x) at any query point x in xset. The quantity of interest is regret of the algorithm, which is the sum of the function values at algorithm's query points minus the optimal function value. We demonstrate a generalization of the ellipsoid algorithm that incurs otil(poly(d)sqrtT) regret. Since any algorithm has regret at least Omega(sqrtT) on this problem, our algorithm is optimal in terms of the scaling with T.}, author = {Agarwal, Alekh and Foster, Dean P and Hsu, Daniel and Kakade, Sham M and Rakhlin, Alexander}, journal = {Statistics}, pages = {1--24}, title = {{Stochastic convex optimization with bandit feedback}}, url = {http://arxiv.org/abs/1107.1744}, year = {2011} } @inproceedings{guestrin2004solving, author = {Guestrin, Carlos and Hauskrecht, Milos and Kveton, Branislav}, booktitle = {Proceedings of the 20th Conference on Uncertainty in Artificial Intelligence}, pages = {235--242}, title = {{Solving Factored {\{}MDPs{\}} with Continuous and Discrete Variables}}, year = {2004} } @article{Tibshirani1996, author = {Tibshirani, R}, journal = {Journal of the Royal Statistical Society. Series B}, pages = {267--288}, title = {{Regression shrinkage and selection via the {\{}L{\}}asso}}, year = {1996} } @article{Bubeck2013a, archivePrefix = {arXiv}, arxivId = {arXiv:1205.3181v1}, author = {Bubeck, S. and Wang, T. and Viswanathan, N.}, eprint = {arXiv:1205.3181v1}, journal = {Proceedings of the 30th International Conference on Machine Learning (ICML)}, title = {{Multiple identifications in multi-armed bandits}}, volume = {28}, year = {2013} } @inproceedings{valko2005evolutionary, abstract = {This paper presents an application of the biologically realistic JASTAP neural network model to classification tasks. The JASTAP neural network model is presented as an alternative to the basic multi-layer perceptron model. An evolutionary procedure previously applied to the simultaneous solution of feature selection and neural network training on standard multi-layer perceptrons is extended with JASTAP model. Preliminary results on IRIS standard data set give evidence that this extension allows the use of smaller neural networks that can handle noisier data without any degradation in classification accuracy.}, author = {Valko, Michal and Marques, Nuno Cavalheiro and Castelani, Marco}, booktitle = {Proceedings of 2005 Portuguese Conference on Artificial Intelligence}, editor = {Et al., Bento}, keywords = {misovalko}, mendeley-tags = {misovalko}, pages = {24--32}, publisher = {IEEE}, title = {{Evolutionary Feature Selection for Spiking Neural Network Pattern Classifiers}}, year = {2005} } @inproceedings{kveton2007adaptive, author = {Kveton, Branislav and Gandhi, Prashant and Theocharous, Georgios and Mannor, Shie and Rosario, Barbara and Shah, Nilesh}, booktitle = {Proceedings of the 22nd National Conference on Artificial Intelligence}, pages = {1795--1800}, title = {{Adaptive Timeout Policies for Fast Fine-Grained Power Management}}, year = {2007} } @article{jones1993lipschitzian, abstract = {We present a new algorithm for finding the global minimum of a multivariate function subject to simple bounds. The algorithm is a modification of the standard Lipschitzian approach that eliminates the need to specify a Lipschitz constant. This is done by carrying out simultaneous searches using all possible constants from zero to infinity. On nine standard test functions, the new algorithm converges in fewer function evaluations than most competing methods.}, author = {Jones, David and Perttunen, Cary and Stuckman, Bruce}, journal = {Journal of Optimization Theory and Applications}, number = {1}, pages = {157--181}, publisher = {Springer}, title = {{Lipschitzian optimization without the Lipschitz constant}}, volume = {79}, year = {1993} } @inproceedings{wong2003bayesian, annote = {comps{\_}ano}, author = {Wong, Weng Keen and Moore, Andrew and Cooper, Gregory and Wagner, Michael}, booktitle = {Proceedings of the 20th International Conference on Machine Learning (ICML-2003)}, title = {{Bayesian Network Anomaly Pattern Detection for Disease Outbreaks}}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.5.1245}, year = {2003} } @inproceedings{gautier2019processus, abstract = {For the session ``Mathematical tools in machine learning", we propose a short survey of determinantal point processes, a popular probabilistic model and tool in machine learning, which already has promising applications in signal processing.}, author = {Gautier, Guillaume and Bardenet, R{\'{e}}mi and Valko, Michal}, booktitle = {Symposium of the Signal and Image Processing Study Group}, title = {{Les processus ponctuels d{\'{e}}terminantaux en apprentissage automatique}}, year = {2019} } @article{burges1998tutorial, author = {Burges, Christopher J C}, journal = {Data Mining and Knowledge Discovery}, number = {2}, pages = {121--167}, title = {{A Tutorial on Support Vector Machines for Pattern Recognition}}, url = {citeseer.ist.psu.edu/burges98tutorial.html}, volume = {2}, year = {1998} } @inproceedings{narang2013signal, abstract = {In this paper, we propose a novel algorithm to interpolate data defined on graphs, using signal processing concepts. The interpolation of missing values from known samples appears in various applications, such as matrix/vector completion, sampling of high-dimensional data, semi-supervised learning etc. In this paper, we formulate the data interpolation problem as a signal reconstruction problem on a graph, where a graph signal is defined as the information attached to each node (scalar or vector values mapped to the set of vertices/edges of the graph). We use recent results for sampling in graphs to find classes of bandlimited (BL) graph signals that can be reconstructed from their partially observed samples. The interpolated signal is obtained by projecting the input signal into the appropriate BL graph signal space. Additionally, we impose a `bilateral' weighting scheme on the links between known samples, which further improves accuracy. We use our proposed method for collaborative filtering in recommendation systems. Preliminary results show a very favorable trade-off between accuracy and complexity, compared to state of the art algorithms.}, author = {Narang, Sunil K. and Gadde, Akshay and Ortega, Antonio}, booktitle = {International Conference on Acoustics, Speech and Signal Processing}, keywords = {Graph signal processing,recommendation systems,sampling in graphs,spectral graph theory}, title = {{Signal processing techniques for interpolation in graph structured data}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.650.2525{\&}rep=rep1{\&}type=pdf}, year = {2013} } @inproceedings{Rao2011, author = {Rao, N S and Nowak, R D and Wright, S J and Kingsbury, N G}, booktitle = {International Conference on Image Processing (ICIP)}, title = {{Convex approaches to model wavelet sparsity patterns}}, year = {2011} } @article{Bull2015b, author = {Bull, Adam D.}, issn = {1350-7265}, journal = {Bernoulli}, keywords = {bandits on taxonomies,continuum-armed bandits,noisy global optimisation,tree-armed bandits,zooming dimension}, month = {nov}, number = {4}, pages = {2289--2307}, publisher = {Bernoulli Society for Mathematical Statistics and Probability}, title = {{Adaptive-treed Bandits}}, url = {http://projecteuclid.org/euclid.bj/1438777594}, volume = {21}, year = {2015} } @inproceedings{globerson07exponentiated, author = {Globerson, Amir and Koo, Terry Y and Carreras, Xavier and Collins, Michael}, doi = {http://doi.acm.org/10.1145/1273496.1273535}, pages = {305--312}, title = {{Exponentiated gradient algorithms for log-linear structured prediction}} } @inproceedings{SS07, author = {Shalev-Shwartz, S and Singer, Y}, booktitle = {Advances in Neural Information Processing Systems (NIPS)}, title = {{Convex Repeated Games and Fenchel Duality}}, year = {2007} } @inproceedings{combes2015combinatorial, author = {Combes, Richard and Talebi, Mohammad Sadegh and Prouti{\`{e}}re, Alexandre and Lelarge, Marc}, booktitle = {Neural Information Processing Systems}, title = {{Combinatorial bandits revisited}}, url = {https://papers.nips.cc/paper/5831-combinatorial-bandits-revisited.pdf}, year = {2015} } @inproceedings{sha2003shallow, address = {Morristown, NJ, USA}, annote = {c{\_}omps{\_}models}, author = {Sha, Fei and Pereira, Fernando}, booktitle = {NAACL '03: Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology}, doi = {http://dx.doi.org/10.3115/1073445.1073473}, pages = {134--141}, publisher = {Association for Computational Linguistics}, title = {{Shallow parsing with conditional random fields}}, year = {2003} } @article{turner2010fast, abstract = {We present methods to do fast online anomaly detection using scan statistics. Scan statistics have long been used to detect statistically significant bursts of events. We extend the scan statistics framework to handle many practical issues that occur in application: dealing with an unknown background rate of events, allowing for slow natural changes in background frequency, the inverse problem of finding an unusual lack of events, and setting the test parameters to maximize power. We demonstrate its use on real and synthetic data sets with comparison to other methods.}, author = {Turner, Ryan and Ghahramani, Zoubin and Bottone, Steven}, doi = {10.1109/MLSP.2010.5589151}, isbn = {978-1-4244-7876-7}, issn = {1551-2541}, journal = {IEEE Workshop on Machine Learning for Signal Processing}, keywords = {scan statistics}, mendeley-tags = {scan statistics}, title = {{Fast online anomaly detection using scan statistics}}, year = {2010} } @inproceedings{collins02discriminative, address = {Morristown, NJ, USA}, author = {Collins, Michael}, booktitle = {EMNLP '02: Proceedings of the ACL-02 Conference on Empirical Methods in Natural Language Processing}, doi = {http://dx.doi.org/10.3115/1118693.1118694}, pages = {1--8}, publisher = {Association for Computational Linguistics}, title = {{Discriminative training methods for hidden {\{}M{\}}arkov models: theory and experiments with perceptron algorithms}}, year = {2002} } @inproceedings{gleich2015robustifying, author = {Gleich, David F and Mahoney, Michael W}, booktitle = {Knowledge Discovery and Data Mining}, title = {{Using Local Spectral Methods to Robustify Graph-Based Learning Algorithms}}, year = {2015} } @misc{openmp2008openmp, author = {OpenMP}, institution = {{\{}OpenMP{\}} Architecture Review Board}, title = {{{\{}OpenMP{\}} Application Program Interface -- Version 3.0}}, year = {2008} } @article{markou2003novelty, address = {Amsterdam, The Netherlands, The Netherlands}, annote = {comps{\_}ano}, author = {Markou, Markos and Singh, Sameer}, doi = {http://dx.doi.org/10.1016/j.sigpro.2003.07.019}, issn = {0165-1684}, journal = {Signal Process.}, number = {12}, pages = {2481--2497}, publisher = {Elsevier North-Holland, Inc.}, title = {{Novelty detection: a review, part 1: statistical approaches}}, volume = {83}, year = {2003} } @article{hansen2013strategy, author = {Hansen, Thomas Dueholm and Miltersen, Peter Bro and Zwick, Uri}, journal = {Journal of the ACM}, title = {{Strategy iteration is strongly polynomial for 2-player turn-based stochastic games with a constant discount factor}}, url = {https://arxiv.org/pdf/1008.0530.pdf}, volume = {60}, year = {2013} } @article{krause11submodularity, author = {Krause, Andreas and Guestrin, Carlos}, journal = {ACM Transactions on Intelligent Systems and Technology}, number = {4}, title = {{Submodularity and its Applications in Optimized Information Gathering}}, volume = {2}, year = {2011} } @book{tsybakov2009introduction, address = {New York, NY}, author = {Tsybakov, Alexandre B.}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Tsybakov - 2009 - Introduction to Nonparametric Estimation.pdf:pdf}, publisher = {Springer New York}, series = {Springer Series in Statistics}, title = {{Introduction to Nonparametric Estimation}}, year = {2009} } @unpublished{jordan2003introduction, annote = {A textbook for a probabilistic graphical models class{\textless}m:linebreak{\textgreater}{\textless}/m:linebreak{\textgreater}comps{\_}models}, author = {Jordan, Michael I}, keywords = {bibtex-import}, title = {{An Introduction to Probabilistic Graphical Models}}, year = {2003} } @incollection{bookchapter, author = {Bach, F and Jenatton, R and Mairal, J and Obozinski, G}, booktitle = {Optimization for Machine Learning}, editor = {Sra, S and Nowozin, S and Wright, S J}, publisher = {MIT Press}, title = {{Convex optimization with sparsity-inducing norms}}, year = {2011} } @article{chollet1997some, author = {Chollet, J}, journal = {American Mathematical Monthly}, number = {7}, pages = {609--617}, publisher = {Mathematical Association of America}, title = {{Some inequalities for principal submatrices}}, volume = {104}, year = {1997} } @inproceedings{chang2009inferring, author = {Chang, Keng$\backslash$-hao and Hightower, Jeffrey and Kveton, Branislav}, booktitle = {Proceedings of the 7th International Conference on Pervasive Computing}, pages = {151--167}, title = {{Inferring Identity Using Accelerometers in Television Remote Controls}}, year = {2009} } @unpublished{Bub11, annote = {Lecture Notes}, author = {Bubeck, S}, title = {{Introduction to Online Optimization}}, year = {2011} } @article{durrett1977weak, author = {Durrett, Richard T. and Iglehart, Donald L. and Miller, Douglas R.}, journal = {The Annals of Probability}, number = {1}, pages = {117--129}, title = {{Weak convergence to Brownian meander and Brownian excursion}}, url = {https://projecteuclid.org/download/pdf{\_}1/euclid.aop/1176995895}, volume = {5}, year = {1977} } @inproceedings{KaSriTe08, author = {Kakade, S M and Sridharan, K and Tewari, A}, pages = {793--800}, title = {{On the complexity of linear prediction: Risk bounds, margin bounds, and regularization}} } @inproceedings{zinkevich2003online, author = {Zinkevich, Martin}, booktitle = {Proceedings of the 20th International Conference on Machine Learning}, pages = {928--936}, title = {{Online Convex Programming and Generalized Infinitesimal Gradient Ascent}}, year = {2003} } @book{GP, author = {Rasmussen, Carl Edward and Williams, Christopher}, publisher = {MIT Press}, title = {{Gaussian Processes for Machine Learning}}, year = {2006} } @article{hauskrecht2000value-function, author = {Hauskrecht, Milos}, journal = {Journal of Artificial Intelligence Research}, pages = {33--94}, title = {{Value-Function Approximations for Partially Observable {\{}Markov{\}} Decision Processes}}, volume = {13}, year = {2000} } @article{Prot2017, abstract = {AThis survey aims at demonstrating that the structure of precedence constraints plays a tremendous role on the complexity of scheduling problems. Indeed many problems can be NP-hard when considering general precedence constraints, while they become polynomially solvable for particular precedence constraints. We also show that there still are many very exciting challenges in this research area.}, archivePrefix = {arXiv}, arxivId = {1510.04833}, author = {Prot, D and Bellenguez-Morineau, O}, doi = {10.1007/s10951-017-0519-z}, eprint = {1510.04833}, issn = {10946136}, journal = {Journal of Scheduling}, keywords = {Complexity,Precedence constraints,Scheduling}, number = {1}, pages = {3--16}, title = {{A survey on how the structure of precedence constraints may change the complexity class of scheduling problems}}, volume = {21}, year = {2018} } @inproceedings{abbeel2004apprenticeship, author = {Abbeel, Pieter and Ng, Andrew}, booktitle = {Proceedings of the 21st international conference on machine learning}, doi = {http://doi.acm.org/10.1145/1015330.1015430}, isbn = {1-58113-838-5}, keywords = {irl}, mendeley-tags = {irl}, title = {{Apprenticeship learning via inverse reinforcement learning}}, url = {http://www.eecs.harvard.edu/{~}parkes/cs286r/spring06/papers/abeelng.pdf}, year = {2004} } @inproceedings{park2003solving, author = {Park, James and Darwiche, Adnan}, booktitle = {Proceedings of the 19th Conference on Uncertainty in Artificial Intelligence}, pages = {459--468}, title = {{Solving {\{}MAP{\}} Exactly Using Systematic Search}}, year = {2003} } @inproceedings{riquelme2017active, abstract = {We explore the sequential decision making problem where the goal is to estimate uniformly well a number of linear models, given a shared budget of random contexts independently sampled from a known distribution. The decision maker must query one of the linear models for each incoming context, and receives an observation corrupted by noise levels that are unknown, and depend on the model instance. We present Trace-UCB, an adaptive allocation algorithm that learns the noise levels while balancing contexts accordingly across the different linear functions, and derive guarantees for simple regret in both expectation and high-probability. Finally, we extend the algorithm and its guarantees to high dimensional settings, where the number of linear models times the dimension of the contextual space is higher than the total budget of samples. Simulations with real data suggest that Trace-UCB is remarkably robust, outperforming a number of baselines even when its assumptions are violated.}, author = {Riquelme, Carlos and Ghavamzadeh, Mohammad and Lazaric, Alessandro}, booktitle = {International Conference on Machine Learning}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Riquelme, Ghavamzadeh, Lazaric - 2017 - Active learning for accurate estimation of linear models.pdf:pdf}, title = {{Active learning for accurate estimation of linear models}}, year = {2017} } @inproceedings{abe2006outlier, author = {Abe, Naoki and Zadrozny, Bianca and Langford, John}, booktitle = {Proceedings of the 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, doi = {10.1145/1150402.1150459}, isbn = {1-59593-339-5}, keywords = {active learning,ensemble method,outlier detection}, pages = {504--509}, title = {{Outlier Detection by Active Learning}}, year = {2006} } @inproceedings{price-bayesian, author = {Price, Bob and Boutilier, Craig}, pages = {712--720}, title = {{A {\{}B{\}}ayesian Approach to Imitation in Reinforcement Learning}} } @article{klopp2012noisy, abstract = {In the present paper, we consider the problem of matrix completion with noise. Unlike previous works, we consider quite general sampling distribution and we do not need to know or to estimate the variance of the noise. Two new nuclear-norm penalized estimators are proposed, one of them of "square-root" type. We analyse their performance under high-dimensional scaling and provide non-asymptotic bounds on the Frobenius norm error. Up to a logarithmic factor, these performance guarantees are minimax optimal in a number of circumstances.}, author = {Klopp, Olga}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Klopp - 2014 - Noisy low-rank matrix completion with general sampling distribution.pdf:pdf}, journal = {Bernoulli}, title = {{Noisy low-rank matrix completion with general sampling distribution}}, year = {2014} } @book{papadimitriou1998combinatorial, author = {Papadimitriou, Christos and Steiglitz, Kenneth}, publisher = {Dover Publications}, title = {{Combinatorial Optimization}}, year = {1998} } @inproceedings{gyorgy13nearoptimal, author = {Gy{\"{o}}rgy, Andr{\'{a}}s and Neu, Gergely}, booktitle = {Submitted to the IEEE Transactions on Information Theory}, title = {{Near-Optimal Rates for Limited-Delay Universal Lossy Source Coding}}, year = {2013} } @article{neal2000markov, abstract = {This article reviews Markov chain methods for sampling from the posterior distribution of a Dirichlet process mixture model and presents two new classes of methods. One new approach is to make Metropolis-Hastings updates of the indicators specifying which mixture component is associated with each observation, perhaps supplemented with a partial form of Gibbs sampling. The other new approach extends Gibbs sampling for these indicators by using a set of auxiliary parameters. These methods are simple to implement and are more efficient than previous ways of handling general Dirichlet process mixture models with non-conjugate priors.}, annote = {c{\_}omps{\_}models}, author = {Neal, Radford M}, journal = {Journal of Computational and Graphical Statistics}, keywords = {dirichlet,dp,gibbs-sampling,mcmc}, number = {2}, pages = {249--265}, title = {{Markov Chain Sampling Methods for Dirichlet Process Mixture Models}}, url = {http://www.jstor.org/stable/1390653}, volume = {9}, year = {2000} } @inproceedings{hazan2019provably, author = {Hazan, Elad and Kakade, Sham M and Singh, Karan and Soest, Abby Van}, booktitle = {International Conference on Machine Learning}, title = {{Provably efficient maximum entropy exploration}}, url = {https://arxiv.org/pdf/1812.02690.pdf}, year = {2019} } @article{russo2014learning, author = {Russo, Daniel and {Van Roy}, Benjamin}, journal = {Mathematics of Operations Research}, title = {{Learning to Optimize Via Posterior Sampling}}, year = {2014} } @article{feige1998threshold, author = {Feige, U}, journal = {Journal of the ACM (JACM)}, number = {4}, pages = {634--652}, publisher = {ACM}, title = {{A threshold of {\{}$\backslash$rm ln{\}} n for approximating set cover}}, volume = {45}, year = {1998} } @article{iwata2009simple, abstract = {This paper presents a new simple algorithm for minimizing submodular functions. For integer valued submodular functions, the algorithm runs in O(n6EO log nM) time, where n is the cardinality of the ground set, M is the maximum absolute value of the function value, and EO is the time for function evaluation. The algorithm can be improved to run in O ((n4EO+n5)log nM) time. The strongly polynomial version of this faster algorithm runs in O((n5EO + n6) log n) time for real valued general submodular functions. These are comparable to the best known running time bounds for submodular function minimization. The algorithm can also be implemented in strongly polynomial time using only additions, subtractions, comparisons, and the oracle calls for function evaluation. This is the first fully combinatorial submodular function minimization algorithm that does not rely on the scaling method.}, author = {Iwata, Satoru and Orlin, James B}, doi = {10.1006/jctb.2001.2072}, issn = {00958956}, journal = {Journal of Combinatorial Theory Series B}, number = {2}, pages = {1230--1237}, publisher = {Society for Industrial and Applied Mathematics}, title = {{A simple combinatorial algorithm for submodular function minimization}}, url = {http://portal.acm.org/citation.cfm?id=1496770.1496903}, volume = {84}, year = {2009} } @article{kolar2010estimating, author = {Kolar, Mladen and Song, Le and Ahmed, Amr and Xing, Eric P}, doi = {10.1214/09-AOAS308}, journal = {Annals of Applied Statistics}, pages = {94--123}, title = {{Estimating time-varying networks}}, volume = {4}, year = {2010} } @inproceedings{valko2012semi-supervised, abstract = {In apprenticeship learning we aim to learn a good policy by observing the behavior of an expert or a set of experts. In particular, we consider the case where the expert acts so as to maximize an unknown reward function defined as a linear combination of a set of state features. In this paper, we consider the setting where we observe many sample trajectories (i.e., sequences of states) but only one or a few of them are labeled as experts' trajectories. We investigate the conditions under which the remaining unlabeled trajectories can help in learning a policy with a good performance. In particular, we define an extension to the max-margin inverse reinforcement learning proposed by Abbeel and Ng (2004) where, at each iteration, the max-margin optimization step is replaced by a semi-supervised optimization problem which favors classifiers separating clusters of trajectories. Finally, we report empirical results on two grid-world domains showing that the semi-supervised algorithm is able to output a better policy in fewer iterations than the related algorithm that does not take the unlabeled trajectories into account.}, author = {Valko, Michal and Ghavamzadeh, Mohammad and Lazaric, Alessandro}, booktitle = {The 24th Journal of Machine Learning Research Proceedings of the 10th European Workshop on Reinforcement Learning}, month = {jun}, pages = {131--241}, publisher = {Sparc}, title = {{Semi-supervised apprenticeship learning}}, url = {http://researchers.lille.inria.fr/{~}valko/hp/serve.php?what=publications/valko2012semi-supervised.pdf}, volume = {24}, year = {2012} } @article{hauskrecht2012outlier, abstract = {We develop and evaluate a data-driven approach for detecting unusual (anomalous) patient-management decisions using past patient cases stored in electronic health records (EHRs). Our hypothesis is that a patient-management decision that is unusual with respect to past patient care may be due to an error and that it is worthwhile to generate an alert if such a decision is encountered. We evaluate this hypothesis using data obtained from EHRs of 4486 post-cardiac surgical patients and a subset of 222 alerts generated from the data. We base the evaluation on the opinions of a panel of experts. The results of the study support our hypothesis that the outlier-based alerting can lead to promising true alert rates. We observed true alert rates that ranged from 25{\%} to 66{\%} for a variety of patient-management actions, with 66{\%} corresponding to the strongest outliers.}, author = {Hauskrecht, Milos and Batal, Iyad and Valko, Michal and Visweswaran, Shyam and Cooper, Gregory F and Clermont, Gilles}, doi = {10.1016/j.jbi.2012.08.004}, issn = {1532-0464}, journal = {Journal of Biomedical Informatics}, keywords = {Clinical alerting,Conditional outlier detection,Machine learning,Medical errors}, month = {feb}, number = {1}, pages = {47--55}, title = {{Outlier detection for patient monitoring and alerting}}, url = {http://www.sciencedirect.com/science/article/pii/S1532046412001281}, volume = {46}, year = {2013} } @article{auer2002adaptive, author = {Auer, Peter and Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio}, journal = {Journal of Computer and System Sciences}, pages = {48--75}, title = {{Adaptive and self-confident on-line learning algorithms}}, volume = {64}, year = {2002} } @article{REGK14, author = {de Rooij, Steven and van Erven, Tim and Gr{\"{u}}nwald, Peter D and Koolen, Wouter M}, journal = {Accepted to the Journal of Machine Learning Research}, title = {{Follow the Leader If You Can, Hedge If You Must}}, year = {2014} } @inproceedings{wipf, author = {Wipf, D and Nagarajan, S}, booktitle = {Adv. NIPS 22}, title = {{Sparse Estimation Using General Likelihoods and Non-Factorial Priors}}, year = {2009} } @inproceedings{calandriello2017efficient, abstract = {Online kernel learning (OKL) is a flexible framework to approach prediction problems, since the large approximation space provided by reproducing kernel Hilbert spaces can contain an accurate function for the problem. Nonetheless, optimizing over this space is computationally expensive. Not only first order methods accumulate O( sqrt T ) more loss than the optimal function, but the curse of kernelization results in a O(t) per step complexity. Second-order methods get closer to the optimum much faster, suffering only O( log(T)) regret, but second-order updates are even more expensive, with a O(t 2) per-step cost. Existing approximate OKL methods try to reduce this complexity either by limiting the Support Vectors (SV) introduced in the predictor, or by avoiding the kernelization process altogether using embedding. Nonetheless, as long as the size of the approximation space or the number of SV does not grow over time, an adversary can always exploit the approximation process. In this paper, we propose PROS-N-KONS, a method that combines Nystrom sketching to project the input point in a small, accurate embedded space, and performs efficient second-order updates in this space. The embedded space is continuously updated to guarantee that the embedding remains accurate, and we show that the per-step cost only grows with the effective dimension of the problem and not with T . Moreover, the second-order updated allows us to achieve the logarithmic regret. We empirically compare our algorithm on recent large-scales benchmarks and show it performs favorably.}, author = {Calandriello, Daniele and Lazaric, Alessandro and Valko, Michal}, booktitle = {Neural Information Processing Systems}, title = {{Efficient second-order online kernel learning with adaptive embedding}}, year = {2017} } @article{hein2007graph, author = {Hein, Matthias and Audibert, Jean-Yves and von Luxburg, Ulrike}, issn = {1532-4435}, journal = {J. Mach. Learn. Res.}, month = {dec}, pages = {1325--1370}, publisher = {JMLR.org}, title = {{Graph Laplacians and their Convergence on Random Neighborhood Graphs}}, url = {http://portal.acm.org/citation.cfm?id=1314498.1314544}, volume = {8}, year = {2007} } @incollection{pelikan2005hierarchical, author = {Pelikan, Martin}, booktitle = {Studies in Fuzziness and Soft Computing}, doi = {10.1007/978-3-540-32373-0_6}, pages = {105--129}, title = {{Hierarchical Bayesian optimization algorithm}}, url = {http://link.springer.com/10.1007/978-3-540-32373-0{\_}6}, year = {2005} } @article{Ipsen11ergodicitycoeff, author = {Ipsen, Ilse C F and Selee, Teresa M}, journal = {SIAM J. Matrix Analysis Applications}, number = {1}, pages = {153--200}, title = {{Ergodicity Coefficients Defined by Vector Norms}}, volume = {32}, year = {2011} } @book{Roc70, author = {Rockafellar, R}, publisher = {Princeton University Press}, title = {{Convex Analysis}}, year = {1970} } @article{khachiyan1979polynomial, author = {Khachiyan, Leonid}, journal = {Doklady Akademii Nauk SSSR}, pages = {1093--1096}, title = {{A Polynomial Algorithm in Linear Programming}}, volume = {244}, year = {1979} } @article{Ambuhl2009, abstract = {In this paper we study the singlemachine precedence constrained schedul- ing problem of minimizing the sum of weighted completion time. Specifically, we settle an open problem first raised by Chudak and Hochbaum and whose answer was subsequently conjectured by Correa and Schulz. As shown by Correa and Schulz, the proof of this conjecture implies that the addressed scheduling problem is a special case of the vertex cover problem. This means that previous results for the scheduling problem can be explained, and in some cases improved, by means of vertex cover theory. For example, the conjecture implies the existence of a polynomial time algo- rithm for the special case of two-dimensional partial orders. This considerably ex- tends Lawler's result from 1978 for series-parallel orders.}, author = {Amb{\"{u}}hl, Christoph and Mastrolilli, Monaldo}, doi = {10.1007/s00453-008-9251-6}, isbn = {3540388753}, issn = {01784617}, journal = {Algorithmica}, keywords = {Algorithms,Scheduling,Vertex cover}, number = {4}, pages = {488--503}, title = {{Single machine precedence constrained scheduling is a vertex cover problem}}, volume = {53}, year = {2009} } @techreport{submodular_tutorial, author = {Bach, F}, institution = {HAL}, number = {00527714}, title = {{Convex Analysis and Optimization with Submodular Functions: a Tutorial}}, year = {2010} } @inproceedings{grill2016blazing, abstract = {You are a robot and you live in a Markov decision process (MDP) with a finite or an infinite number of transitions from state-action to next states. You got brains and so you plan before you act. Luckily, your roboparents equipped you with a generative model to do some Monte-Carlo planning. The world is waiting for you and you have no time to waste. You want your planning to be efficient. Sample-efficient. Indeed, you want to exploit the possible structure of the MDP by exploring only a subset of states reachable by following near-optimal policies. You want guarantees on sample complexity that depend on a measure of the quantity of near-optimal states. You want something, that is an extension of Monte-Carlo sampling (for estimating an expectation) to problems that alternate maximization (over actions) and expectation (over next states). But you do not want to StOP with exponential running time, you want something simple to implement and computationally efficient. You want it all and you want it now. You want TrailBlazer.}, author = {Grill, Jean-Bastien and Valko, Michal and Munos, R{\'{e}}mi}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Grill, Valko, Munos - 2016 - Blazing the trails before beating the path Sample-efficient Monte-Carlo planning.pdf:pdf}, title = {{Blazing the trails before beating the path: Sample-efficient Monte-Carlo planning}}, url = {https://hal.inria.fr/hal-01389107v3/document}, year = {2016} } @article{guez2012efficient, abstract = {Bayesian model-based reinforcement learning is a formally elegant approach to learning optimal behaviour under model uncertainty, trading off exploration and exploitation in an ideal way. Unfortunately, finding the resulting Bayes-optimal policies is notoriously taxing, since the search space becomes enormous. In this paper we introduce a tractable, sample-based method for approximate Bayesoptimal planning which exploits Monte-Carlo tree search. Our approach outperformed prior Bayesian model-based RL algorithms by a significant margin on several well-known benchmark problems – because it avoids expensive applications of Bayes rule within the search tree by lazily sampling models from the current beliefs. We illustrate the advantages of our approach by showing it working in an infinite state space domain which is qualitatively out of reach of almost all previous work in Bayesian exploration.}, author = {Guez, Arthur and Silver, David and Dayan, Peter}, journal = {Neural Information Processing Systems}, title = {{Efficient Bayes-adaptive reinforcement learning using sample-based search}}, year = {2012} } @article{cardoso2003dependence, author = {Cardoso, J F}, journal = {The Journal of Machine Learning Research}, pages = {1177--1203}, publisher = {JMLR. org}, title = {{Dependence, correlation and Gaussianity in independent component analysis}}, volume = {4}, year = {2003} } @article{choquet1953theory, author = {Choquet, G}, journal = {Ann. Inst. Fourier}, pages = {131--295}, title = {{Theory of capacities}}, volume = {5}, year = {1954} } @inproceedings{Pol05, author = {Poland, Jan}, booktitle = {In 3rd Symposium on Stochastic Algorithms, Foundations and Applications (SAGA'05)}, pages = {58--69}, title = {{{\{}FPL{\}} analysis for adaptive bandits}}, year = {2005} } @inproceedings{ashkan14diversified, author = {Ashkan, Azin and Kveton, Branislav and Berkovsky, Shlomo and Wen, Zheng}, booktitle = {Conference on Recommender Systems}, title = {{Diversified utility maximization for recommendations}}, year = {2014} } @inproceedings{BLLRS11, author = {Beygelzimer, Alina and Langford, John and Li, Lihong and Reyzin, Lev and Schapire, Robert E}, pages = {19--26}, title = {{Contextual Bandit Algorithms with Supervised Learning Guarantees}} } @article{mcpherson2001birds, author = {McPherson, Miller and Smith-Lovin, Lynn and Cook, James}, journal = {Annual Review of Sociology}, pages = {415--444}, title = {{Birds of a feather: Homophily in social networks}}, url = {http://aris.ss.uci.edu/{~}lin/52.pdf}, volume = {27}, year = {2001} } @inproceedings{Babaioff09truthful, author = {Babaioff, Moshe and Sharma, Yogeshwer and Slivkins, Aleksandrs}, booktitle = {ACM-EC}, pages = {79--88}, title = {{Characterizing truthful multi-armed bandit mechanisms}}, year = {2009} } @article{belkin2006manifold, author = {Belkin, Mikhail and Niyogi, Partha and Sindhwani, Vikas}, journal = {Journal of Machine Learning Research}, pages = {2399--2434}, title = {{Manifold regularization: A geometric framework for learning from labeled and unlabeled examples}}, url = {http://www.jmlr.org/papers/volume7/belkin06a/belkin06a.pdf}, volume = {7}, year = {2006} } @book{press1992numerical, address = {Cambridge, MA}, author = {Press, William and Teukolsky, Saul and Vetterling, William and Flannery, Brian}, publisher = {Cambridge University Press}, title = {{Numerical Recipes in C}}, year = {1992} } @inproceedings{dams13wireless, author = {Dams, Johannes and Hoefer, Martin and Kesselheim, Thomas}, booktitle = {DISC}, editor = {Afek, Yehuda}, pages = {344--357}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {{Sleeping Experts in Wireless Networks.}}, volume = {8205}, year = {2013} } @inproceedings{amit02parametric, author = {Amit, R and Mataric, Maja J}, pages = {944--945}, title = {{A Correspondence Metric for Imitation}} } @inproceedings{kveton2010semi--supervised, abstract = {This paper proposes a novel algorithm for semisupervised learning. This algorithm learns graph cuts that maximize the margin with respect to the labels induced by the harmonic function solution. We motivate the approach, compare it to existing work, and prove a bound on its generalization error. The quality of our solutions is evaluated on a synthetic problem and three UCI ML repository datasets. In most cases, we outperform manifold regularization of support vector machines, which is a state-of-the-art approach to semi-supervised max-margin learning.}, author = {Kveton, Branislav and Valko, Michal and Rahimi, Ali and Huang, Ling}, booktitle = {International Conference on Artificial Intelligence and Statistics}, editor = {Teh, Y W and Titterington, M}, keywords = {misovalko}, mendeley-tags = {misovalko}, number = {W{\&}CP 9}, pages = {421--428}, title = {{Semi-Supervised Learning with Max-Margin Graph Cuts}}, volume = {9}, year = {2010} } @phdthesis{valko2011adaptive, abstract = {We develop graph-based methods for semi-supervised learning based on label propagation on a data similarity graph. When data is abundant or arrive in a stream, the problems of computation and data storage arise for any graph-based method. We propose a fast approximate online algorithm that solves for the harmonic solution on an approximate graph. We show, both empirically and theoretically, that good behavior can be achieved by collapsing nearby points into a set of local representative points that minimize distortion. Moreover, we regularize the harmonic solution to achieve better stability properties. We also present graph-based methods for detecting conditional anomalies and apply them to the identification of unusual clinical actions in hospitals. Our hypothesis is that patient-management actions that are unusual with respect to the past patients may be due to errors and that it is worthwhile to raise an alert if such a condition is encountered. Conditional anomaly detection extends standard unconditional anomaly framework but also faces new problems known as fringe and isolated points. We devise novel nonparametric graph-based methods to tackle these problems. Our methods rely on graph connectivity analysis and soft harmonic solution. Finally, we conduct an extensive human evaluation study of our conditional anomaly methods by 15 experts in critical care.}, author = {Valko, Michal}, keywords = {misovalko}, mendeley-tags = {misovalko}, month = {aug}, school = {University of Pittsburgh}, title = {{Adaptive Graph-Based Algorithms for Conditional Anomaly Detection and Semi-Supervised Learning}}, url = {http://researchers.lille.inria.fr/{~}valko/hp/serve.php?what=publications/valko2011adaptive.pdf}, year = {2011} } @inproceedings{CWY13, author = {Chen, Wei and Wang, Yajun and Yuan, Yang}, booktitle = {International Conference on Machine Learning}, pages = {151--159}, title = {{Combinatorial Multi-Armed Bandit: General Framework and Applications}}, year = {2013} } @article{mitra2002density-based, author = {Mitra, P and Murthy, C A and Pal, S K}, journal = {IEEE Transactions on PAMI}, number = {6}, pages = {1--14}, title = {{Density-based multiscale data condensation}}, volume = {24}, year = {2002} } @inproceedings{shang2020fixed-confidence, abstract = {We investigate and provide new insights on the sampling rule called Top-Two Thompson Sampling (TTTS). In particular, we justify its use for fixed-confidence best-arm identification. We further propose a variant of TTTS called Top-Two Transportation Cost (T3C), which disposes of the computational burden of TTTS. As our main contribution, we provide the first sample complexity analysis of TTTS and T3C when coupled with a very natural Bayesian stopping rule, for bandits with Gaussian rewards, solving one of the open questions raised by Russo (2016). We also provide new posterior convergence results for TTTS under two models that are commonly used in practice: bandits with Gaussian and Bernoulli rewards and conjugate priors.}, author = {Shang, Xuedong and de Heide, Rianne and Kaufmann, Emilie and M{\'{e}}nard, Pierre and Valko, Michal}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Fixed-confidence guarantees for Bayesian best-arm identification}}, year = {2020} } @article{badanidiyuru2014resourceful, abstract = {We study contextual bandits with ancillary constraints on resources, which are common in real-world applications such as choosing ads or dynamic pricing of items. We design the first algorithm for solving these problems, and prove a regret guarantee with near-optimal statistical properties.}, archivePrefix = {arXiv}, arxivId = {arXiv:1402.6779v2}, author = {Badanidiyuru, A and Langford, John and Slivkins, Aleksandrs}, eprint = {arXiv:1402.6779v2}, journal = {arXiv preprint arXiv:1402.6779}, pages = {1--22}, title = {{Resourceful Contextual Bandits}}, url = {http://arxiv.org/abs/1402.6779}, year = {2014} } @article{chen2015combinatorial, abstract = {We define a general framework for a large class of combinatorial multi-armed bandit (CMAB) problems, where subsets of base arms with unknown distributions form super arms. In each round, a super arm is played and the base arms contained in the super arm are played and their outcomes are observed. We further consider the extension in which more based arms could be probabilistically triggered based on the outcomes of already triggered arms. The reward of the super arm depends on the outcomes of all played arms, and it only needs to satisfy two mild assumptions, which allow a large class of nonlinear reward instances. We assume the availability of an offline ($\backslash$alpha,$\backslash$beta)-approximation oracle that takes the means of the outcome distributions of arms and outputs a super arm that with probability {\{}$\backslash$beta{\}} generates an {\{}$\backslash$alpha{\}} fraction of the optimal expected reward. The objective of an online learning algorithm for CMAB is to minimize ($\backslash$alpha,$\backslash$beta)-approximation regret, which is the difference between the $\backslash$alpha{\{}$\backslash$beta{\}} fraction of the expected reward when always playing the optimal super arm, and the expected reward of playing super arms according to the algorithm. We provide CUCB algorithm that achieves O(log n) distribution-dependent regret, where n is the number of rounds played, and we further provide distribution-independent bounds for a large class of reward functions. Our regret analysis is tight in that it matches the bound of UCB1 algorithm (up to a constant factor) for the classical MAB problem, and it significantly improves the regret bound in a earlier paper on combinatorial bandits with linear rewards. We apply our CMAB framework to two new applications, probabilistic maximum coverage and social influence maximization, both having nonlinear reward structures. In particular, application to social influence maximization requires our extension on probabilistically triggered arms.}, author = {Chen, Wei and Wang, Yajun and Yuan, Yang}, journal = {Journal of Machine Learning Research}, title = {{Combinatorial multi-armed bandit and its extension to probabilistically triggered arms}}, url = {http://www.jmlr.org/papers/volume17/14-298/14-298.pdf}, volume = {17}, year = {2016} } @article{ellison1993rules, abstract = {This paper studies agents who consider the experiences of their neighbors in deciding which of two technologies to use. We analyze two learning environments, one in which the same technology is optimal for all players and another in which each technology is better for some of them. In both environments, players use exogenously specified rules of thumb that ignore historical data but may incorporate a tendency to use the more popular technology. In some cases these naive rules can lead to fairly efficient decisions in the long run, but adjustment can be slow when a superior technology is first introduced.}, author = {Ellison, Glenn and Fudenberg, Drew}, journal = {Journal of Political Economy}, number = {4}, pages = {612--643}, title = {{Rules of thumb for social learning}}, volume = {101}, year = {1993} } @inproceedings{menzies2006bayesian, annote = {comps{\_}anX}, author = {Menzies, T and Allen, D and Orrego, A}, booktitle = {Proceedings of the Machine Learning Algorithms for Surveillance and Event Detection Workshop}, title = {{Bayesian Anomaly Detection}}, url = {http://menzies.us/pdf/06bad.pdf}, year = {2006} } @inproceedings{bagnell2010efficient, abstract = {Imitation Learning, while applied successfully on many large real-world$\backslash$nproblems, is typically addressed as a standard supervised learning$\backslash$nproblem, where it is assumed the training and testing data are i.i.d..$\backslash$nThis is not true in imitation learning as the learned policy influences$\backslash$nthe future test inputs (states) upon which it will be tested. We$\backslash$nshow that this leads to compounding errors and a regret bound that$\backslash$ngrows quadratically in the time horizon of the task. We propose two$\backslash$nalternative algorithms for imitation learning where training occurs$\backslash$nover several episodes of interaction. These two approaches share$\backslash$nin common that the learner's policy is slowly modified from executing$\backslash$nthe expert's policy to the learned policy. We show that this leads$\backslash$nto stronger performance guarantees and demonstrate the improved performance$\backslash$non two challenging problems: training a learner to play 1) a 3D racing$\backslash$ngame (Super Tux Kart) and 2) Mario Bros.; given input images from$\backslash$nthe games and corresponding actions taken by a human expert and near-optimal$\backslash$nplanner respectively.}, author = {Bagnell, J Andrew and Ross, St{\'{e}}phane}, booktitle = {Proceedings of the 13th International Conference on Artificial Intelligence and Statistics (AISTATS) 2010}, pages = {661--668}, title = {{Efficient Reductions for Imitation Learning}}, volume = {9}, year = {2010} } @article{Lawler1978, abstract = {Suppose n jobs are to be sequenced for processing by a single machine, with the object of minimizing total weighted completion time. It is shown that the problem is NP-complete if there are arbitrary precedence constraints. However, if precedence constraints are “series parallel”, the problem can be solved in O(n log n) time. This result generalizes previous results for the more special case of rooted trees. It is also shown how a decomposition procedure suggested by Sidney can be implemented in polynomial-bounded time. Equivalence of the sequencing problem with the optimal linear ordering problem for directed graphs is discussed. {\textcopyright}1978 North-Holland Publishing Company}, author = {Lawler, E L}, doi = {10.1016/S0167-5060(08)70323-6}, isbn = {9780720410433}, issn = {01675060}, journal = {Annals of Discrete Mathematics}, number = {C}, pages = {75--90}, title = {{Sequencing jobs to minimize total weighted completion time subject to precedence constraints}}, volume = {2}, year = {1978} } @inproceedings{haarnoja2017reinforcement, author = {Haarnoja, Tuomas and Tang, Haoran and Abbeel, Pieter and Levine, Sergey}, booktitle = {International Conference on Machine Learning}, month = {feb}, title = {{Reinforcement learning with deep energy-based policies}}, url = {https://arxiv.org/pdf/1702.08165.pdf}, year = {2017} } @article{mairal2011b, author = {Mairal, J and Jenatton, R and Obozinski, G and Bach, F}, journal = {Journal of Machine Learning Research}, pages = {2681--2720}, title = {{Convex and Network Flow Optimization for Structured Sparsity}}, volume = {12}, year = {2011} } @article{hastie1996discriminant, annote = {comps{\_}distance}, author = {Hastie, T and Tibshirani, R}, doi = {10.1109/34.506411}, issn = {0162-8828}, journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on}, keywords = {adaptive systems,approximation theory,centroid information,curse of dimensionality,global dimension reduction,linear discriminant analysis,local decision boundaries,neighbourhood-based classifier,pattern classification,pattern recognition,posterior probability,probabilityadaptive nearest neighbor classificati}, month = {jun}, number = {6}, pages = {607--616}, title = {{Discriminant adaptive nearest neighbor classification}}, volume = {18}, year = {1996} } @article{karypis1999fast, author = {Karypis, G and Kumar, V}, journal = {SIAM Journal on Scientific Computing}, pages = {359--392}, title = {{A fast and high quality multilevel scheme for partitioning irregular graphs}}, volume = {20}, year = {1999} } @article{srivastava1996predictive, author = {Srivastava, Mani and Chandrakasan, Anantha and Brodersen, Robert}, journal = {IEEE Transactions on Very Large Scale Integration Systems}, number = {1}, pages = {42--55}, title = {{Predictive System Shutdown and Other Architectural Techniques for Energy Effcient Programmable Computation}}, volume = {4}, year = {1996} } @article{JNTV05, author = {Juditsky, A and Nazin, A and Tsybakov, A and Vayatis, N}, journal = {Problems of Information Transmission}, number = {4}, pages = {368--384}, title = {{Recursive Aggregation of Estimators by the Mirror Descent Algorithm with Averaging}}, volume = {41}, year = {2005} } @article{queyranne1998minimizing, author = {Queyranne, M}, journal = {Mathematical Programming}, number = {1}, pages = {3--12}, publisher = {Springer}, title = {{Minimizing symmetric submodular functions}}, volume = {82}, year = {1998} } @article{erkan2009semi-supervised, abstract = {Various supervised inference methods can be analyzed as convex duals of the generalized maximum entropy (MaxEnt) framework. Generalized MaxEnt aims to find a distribution that maximizes an entropy function while respecting prior information represented as potential functions in miscellaneous forms of constraints and/or penalties. We extend this framework to semi-supervised learning by incorporating unlabeled data via modifications to these potential functions reflecting structural assumptions on the data geometry. The proposed approach leads to a family of discriminative semi-supervised algorithms, that are convex, scalable, inherently multi-class, easy to implement, and that can be kernelized naturally. Experimental evaluation of special cases shows the competitiveness of our methodology.}, author = {Erkan, Ayse Naz and Altun, Yasemin}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Erkan, Altun - 2009 - Semi-Supervised Learning via Generalized Maximum Entropy.pdf:pdf}, journal = {Proceedings of JMLR Workshop}, keywords = {computational,information theoretic learning with statistics}, number = {September}, pages = {209--216}, publisher = {New York University}, title = {{Semi-Supervised Learning via Generalized Maximum Entropy}}, url = {http://eprints.pascal-network.org/archive/00006122/}, volume = {9}, year = {2009} } @article{bates2003ten, abstract = {While evidence-based medicine has increasingly broad-based support in health care, it remains difficult to get physicians to actually practice it. Across most domains in medicine, practice has lagged behind knowledge by at least several years. The authors believe that the key tools for closing this gap will be information systems that provide decision support to users at the time they make decisions, which should result in improved quality of care. Furthermore, providers make many errors, and clinical decision support can be useful for finding and preventing such errors. Over the last eight years the authors have implemented and studied the impact of decision support across a broad array of domains and have found a number of common elements important to success. The goal of this report is to discuss these lessons learned in the interest of informing the efforts of others working to make the practice of evidence-based medicine a reality.}, author = {Bates, David W and Kuperman, Gilad J and Wang, Samuel and Gandhi, Tejal and Kittler, Anne and Volk, Lynn and Spurr, Cynthia and Khorasani, Ramin and Tanasijevic, Milenko and Middleton, Blackford}, doi = {10.1197/jamia.M1370}, institution = {Department of Medicine, Brigham and Women's Hospital, Boston, MA 02115, USA. dbates@partners.org}, journal = {J Am Med Inform Assoc}, keywords = {Clinical,Computer-Assisted,Decision Making,Decision Support Systems,Decision Support Techniques,Evidence-B}, number = {6}, pages = {523--530}, pmid = {12925543}, title = {{Ten commandments for effective clinical decision support: making the practice of evidence-based medicine a reality.}}, url = {http://dx.doi.org/10.1197/jamia.M1370}, volume = {10}, year = {2003} } @article{jaksch2010near, author = {Jaksch, Thomas and Ortner, Ronald and Auer, Peter}, journal = {Journal of Machine Learning Research}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {1563--1600}, title = {{Near-optimal regret bounds for reinforcement learning}}, url = {http://www.jmlr.org/papers/volume11/jaksch10a/jaksch10a.pdf}, volume = {99}, year = {2010} } @inproceedings{mnih2008, author = {Mnih, V and Szepesv{\'{a}}ri, $\backslash$textCs and Audibert, J.-Y.}, booktitle = {ICML}, pages = {672--679}, title = {{Empirical {\{}B{\}}ernstein stopping}}, year = {2008} } @article{azuma1967weighted, author = {Azuma, Kazuoki}, journal = {Tohoku Mathematical Journal}, keywords = {bound,math}, number = {3}, pages = {357--367}, title = {{Weighted sums of certain dependent random variables}}, url = {https://projecteuclid.org/download/pdf{\_}1/euclid.tmj/1178243286}, volume = {19}, year = {1967} } @book{rockafellar97, author = {Rockafellar, R T}, publisher = {Princeton University Press}, title = {{Convex Analysis}}, year = {1997} } @article{glynn1990likelihood, author = {Glynn, Peter}, journal = {Communications of the ACM}, number = {10}, pages = {75--84}, title = {{Likelihood Ratio Gradient Estimation for Stochastic Systems}}, volume = {33}, year = {1990} } @article{feldman2014simple, abstract = {We consider online planning in Markov decision processes (MDPs). In online planning, the agent focuses on its current state only, deliberates about the set of possible policies from that state onwards and, when interrupted, uses the outcome of that exploratory deliberation to choose what action to perform next. The performance of algorithms for online planning is assessed in terms of simple regret, which is the agent's expected performance loss when the chosen action, rather than an optimal one, is followed. To date, state-of-the-art algorithms for online planning in general MDPs are either best effort, or guarantee only polynomial-rate reduction of simple regret over time. Here we introduce a new Monte-Carlo tree search algorithm, BRUE, that guarantees exponential-rate reduction of simple regret and error probability. This algorithm is based on a simple yet non-standard state-space sampling scheme, MCTS2e, in which different parts of each sample are dedicated to different exploratory objectives. Our empirical evaluation shows that BRUE not only provides superior performance guarantees, but is also very effective in practice and favorably compares to state-of-the-art. We then extend BRUE with a variant of "learning by forgetting." The resulting set of algorithms, BRUE(alpha), generalizes BRUE, improves the exponential factor in the upper bound on its reduction rate, and exhibits even more attractive empirical performance.}, author = {Feldman, Zohar and Domshlak, Carmel}, journal = {Journal of Artificial Intelligence Research}, title = {{Simple regret optimization in online planning for Markov decision processes}}, url = {https://www.jair.org/index.php/jair/article/view/10905/26003}, year = {2014} } @inproceedings{valko2008distance, abstract = {Anomaly detection methods can be very useful in identifying unusual or interesting patterns in data. A recently proposed conditional anomaly detection framework extends anomaly detection to the problem of identifying anomalous patterns on a subset of attributes in the data. The anomaly always depends (is conditioned) on the value of remaining attributes. The work presented in this paper focuses on instance-based methods for detecting conditional anomalies. The methods depend heavily on the distance metric that lets us identify examples in the dataset that are most critical for detecting the anomaly. To optimize the performance of the anomaly detection methods we explore and study metric learning methods. We evaluate the quality of our methods on the Pneumonia PORT dataset by detecting unusual admission decisions for patients with the community-acquired pneumonia. The results of our metric learning methods show an improved detection performance over standard distance metrics, which is very promising for building automated anomaly detection systems for variety of intelligent monitoring applications.}, annote = {From Duplicate 1 ( Distance Metric Learning for Conditional Anomaly Detection - Valko, Michal; Hauskrecht, Milos ) From Duplicate 2 ( Distance Metric Learning for Conditional Anomaly Detection - Valko, Michal; Hauskrecht, Milos ) comps{\_}distances From Duplicate 2 ( Distance Metric Learning for Conditional Anomaly Detection - Valko, Michal; Hauskrecht, Milos ) comps{\_}distances}, author = {Valko, Michal and Hauskrecht, Milos}, booktitle = {Twenty-First International Florida Artificial Intelligence Research Society Conference}, keywords = {misovalko}, mendeley-tags = {misovalko}, publisher = {AAAI Press}, title = {{Distance metric learning for conditional anomaly detection}}, year = {2008} } @inproceedings{poupart2002piecewise, author = {Poupart, Pascal and Boutilier, Craig and Patrascu, Relu and Schuurmans, Dale}, booktitle = {Proceedings of the 18th National Conference on Artificial Intelligence}, pages = {292--299}, title = {{Piecewise Linear Value Function Approximation for Factored {\{}MDPs{\}}}}, year = {2002} } @inproceedings{kearns1999sparse, abstract = {A critical issue for the application of Markov decision processes (MDPs) to realistic problems is how the complexity of planning scales with the size of the MDP. In stochastic environments with very large or infinite state spaces, traditional planning and reinforcement learning algorithms may be inapplicable, since their running time typically grows linearly with the state space size in the worst case. In this paper we present a new algorithm that, given only a generative model (a natural and common type of simulator) for an arbitrary MDP, performs on-line, near-optimal planning with a per-state running time that has no dependence on the number of states. The running time is exponential in the horizon time (which depends only on the discount factor $\gamma$ and the desired degree of approximation to the optimal policy). Our algorithm thus provides a different complexity trade-off than classical algorithms such as value iteration—rather than scaling linearly in both horizon time and state space size, our running time trades an exponential dependence on the former in exchange for no dependence on the latter. Our algorithm is based on the idea of sparse sampling. We prove that a randomly sampled look-ahead tree that covers only a vanishing fraction of the full look-ahead tree nevertheless suffices to compute near-optimal actions from any state of an MDP. Practical implementations of the algorithm are discussed, and we draw ties to our related recent results on finding a near-best strategy from a given class of strategies in very large partially observable MDPs.}, author = {Kearns, Michael and Mansour, Yishay and Ng, Andrew Y.}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{A sparse sampling algorithm for near-optimal planning in large Markov decision processes}}, url = {https://www.cis.upenn.edu/{~}mkearns/papers/sparsesampling-journal.pdf}, year = {1999} } @book{jannach2010recommender, author = {Jannach, Dietmar and Zanker, Markus and Felfernig, Alexander and Friedrich, Gerhard}, publisher = {Cambridge University Press}, title = {{Recommender systems: An introduction}}, url = {www.scholat.com/teamwork/teamworkdownloadscholar.html?id=542{\&}teamId=316}, year = {2010} } @inproceedings{kveton2005mcmc, author = {Kveton, Branislav and Hauskrecht, Milos}, booktitle = {Proceedings of the 19th International Joint Conference on Artificial Intelligence}, pages = {1346--1351}, title = {{An {\{}MCMC{\}} Approach to Solving Hybrid Factored {\{}MDPs{\}}}}, year = {2005} } @article{koivisto2004exact, annote = {comps{\_}models}, author = {Koivisto, M and Sood, K}, journal = {Journal of Machine Learning Research}, pages = {549--573}, title = {{Exact {\{}B{\}}ayesian Structure Discovery in {\{}B{\}}ayesian Networks}}, url = {http://citeseer.ist.psu.edu/article/koivisto04exact.html}, volume = {5}, year = {2004} } @article{friedman2010note, author = {Friedman, J and Hastie, T and Tibshirani, R}, journal = {preprint}, title = {{A note on the group lasso and a sparse group lasso}}, year = {2010} } @article{wolsey, author = {Wolsey, Laurence A}, journal = {Mathematics of Operations Research}, number = {3}, pages = {pp. 410--425}, publisher = {INFORMS}, title = {{Maximising Real-Valued Submodular Functions: Primal and Dual Heuristics for Location Problems}}, volume = {7}, year = {1982} } @article{Kar2011, abstract = {This paper studies the multi-agent bandit problem in a distributed networked setting. The setting considered assumes only one bandit (the major bandit) has accessible reward information from its samples, whereas the rest (the minor bandits) have unobservable rewards. Under the assumption that the minor bandits are aware of the sampling pattern of the major bandit (but with no direct access to its rewards), a lower bound on the expected average network regret is obtained. The lower bound resembles the logarithmic optimal regret attained in single (classical) bandit problems, but in addition is shown to scale down with the number of agents. A collaborative and adaptive distributed allocation rule DA is proposed and is shown to achieve the lower bound on the expected average regret for a connected inter-bandit communication network. In particular, it is shown that under the DA allocation rule, the minor bandits attain sub-logarithmic expected regrets as opposed to logarithmic in the single agent setting.}, author = {Kar, Soummya and Poor, H. Vincent and Cui, Shuguang}, doi = {10.1109/CDC.2011.6160719}, isbn = {9781612848006}, issn = {01912216}, journal = {Proceedings of the IEEE Conference on Decision and Control}, keywords = {Asymptotically Efficient,Distributed Allocation Rules,Networked Bandit Problems,Partially Observable Rewards}, pages = {1771--1778}, title = {{Bandit problems in networks: Asymptotically efficient distributed allocation rules}}, year = {2011} } @inproceedings{Jenatton2010a, author = {Jenatton, R and Mairal, J and Obozinski, G and Bach, F}, booktitle = {Proceedings of the International Conference on Machine Learning (ICML)}, title = {{Proximal Methods for Sparse Hierarchical Dictionary Learning}}, year = {2010} } @article{gine2010adaptive, author = {Gin{\'{e}}, Evarist and Nickl, Richard}, journal = {Bernoulli}, number = {4}, pages = {1137--1163}, title = {{Adaptive estimation of a distribution function and its density in sup-norm loss by wavelet and spline projections}}, volume = {16}, year = {2010} } @article{thompson1933likelihood, author = {Thompson, William R.}, journal = {Biometrika}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {285--294}, title = {{On the likelihood that one unknown probability exceeds another in view of the evidence of two samples}}, url = {https://www.jstor.org/stable/pdf/2332286.pdf}, volume = {25}, year = {1933} } @incollection{boularias2012algorithms, author = {Boularias, Abdeslam and Kroemer, Oliver and Peters, Jan}, booktitle = {Advances in Neural Information Processing Systems 25}, editor = {Bartlett, P and Pereira, F C N and Burges, C J C and Bottou, L and Weinberger, K Q}, pages = {2186--2194}, title = {{Algorithms for Learning Markov Field Policies}}, url = {http://books.nips.cc/papers/files/nips25/NIPS2012{\_}1084.pdf}, year = {2012} } @article{gyorgy06adaptive, address = {Oxford, UK}, author = {Gy{\"{o}}rgy, Andr{\'{a}}s and Ottucs{\'{a}}k, $\backslash$textGy{\"{o}}rgy}, doi = {10.1093/comjnl/bxh168}, issn = {0010-4620}, journal = {Computer Journal}, keywords = {adaptive routing,machine learning,network routing}, number = {2}, pages = {180--189}, publisher = {Oxford University Press}, title = {{Adaptive Routing Using Expert Advice}}, url = {http://dx.doi.org/10.1093/comjnl/bxh168}, volume = {49}, year = {2006} } @article{chatterjee2015matrix, abstract = {Consider the problem of estimating the entries of a large matrix, when the observed entries are noisy versions of a small random fraction of the original entries. This problem has received widespread attention in recent times, especially after the pioneering works of Emmanuel Candes and collaborators. Typically, it is assumed that the underlying matrix has low rank. This paper introduces a simple estimation procedure, called Universal Singular Value Thresholding (USVT), that works for any matrix that has `a little bit of structure'. In particular, the matrix need not be of low rank. The procedure is very simple and fast, works under minimal assumptions, and is applicable for very large matrices. Surprisingly, this simple estimator achieves the minimax error rate up to a constant factor. The method is applied to give simple solutions to difficult questions in low rank matrix estimation, blockmodels, distance matrix completion, latent space models, positive definite matrix completion, problems related to graph limits, and generalized Bradley-Terry models for pairwise comparison.}, author = {Chatterjee, Sourav}, journal = {Annals of Statistics}, keywords = {Covariance matrix,Distance matrix,Graphons,Latent space model,Low rank matrices,Matrix completion,Matrix estimation,Singular value decomposition,Sochastic blockmodel}, number = {1}, pages = {177--214}, title = {{Matrix estimation by universal singular value thresholding}}, volume = {43}, year = {2015} } @techreport{krause2005note, author = {Krause, Andreas and Guestrin, Carlos}, booktitle = {Technical Rep No CMUCALD}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Krause, Guestrin - 2005 - A Note on the Budgeted Maximization of Submodular Functions.pdf:pdf}, institution = {CMU}, keywords = {constraints,entropy maximization,optimization,submodular functions}, number = {June}, pages = {1--7}, publisher = {Citeseer}, title = {{A Note on the Budgeted Maximization of Submodular Functions}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.91.9721{\&}rep=rep1{\&}type=pdf}, volume = {5}, year = {2005} } @article{Sho67, annote = {(In Russian)}, author = {Shor, N}, journal = {Kibernetika}, pages = {53--55}, title = {{Generalized gradient descent with application to block programming}}, volume = {3}, year = {1967} } @inproceedings{mannor2006online, author = {Mannor, Shie and Tsitsiklis, John}, booktitle = {Proceedings of 19th Annual Conference on Learning Theory}, pages = {529--543}, title = {{Online Learning with Constraints}}, year = {2006} } @inproceedings{bezdek2002some, address = {London, UK}, author = {Bezdek, James C and Hathaway, Richard J}, booktitle = {Proceedings of the 2002 AFSS International Conference on Fuzzy Systems. Calcutta: Advances in Soft Computing}, isbn = {3-540-43150-0}, pages = {288--300}, publisher = {Springer-Verlag}, series = {AFSS '02}, title = {{Some Notes on Alternating Optimization}}, url = {http://portal.acm.org/citation.cfm?id=647300.721144}, year = {2002} } @inproceedings{bartlett2009regal, author = {Bartlett, Peter L. and Tewari, Ambuj}, booktitle = {Uncertainty in Artificial Intelligence}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{REGAL: A regularization based algorithm for reinforcement learning in weakly communicating MDPs}}, url = {https://arxiv.org/pdf/1205.2661.pdf}, year = {2009} } @inproceedings{zhu2005harmonic, address = {New York, NY, USA}, author = {Zhu, Xiaojin and Lafferty, John}, booktitle = {Proceedings of the 22nd international conference on Machine learning}, doi = {http://doi.acm.org/10.1145/1102351.1102484}, isbn = {1-59593-180-5}, pages = {1052--1059}, publisher = {ACM}, series = {ICML '05}, title = {{Harmonic mixtures: combining mixture models and graph-based methods for inductive and scalable semi-supervised learning}}, url = {http://doi.acm.org/10.1145/1102351.1102484}, year = {2005} } @inproceedings{sanner2005approximate, author = {Sanner, Scott and Boutilier, Craig}, booktitle = {Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence}, title = {{Approximate Linear Programming for First-Order {\{}MDPs{\}}}}, year = {2005} } @article{koutis2011combinatorial, abstract = {Several algorithms for problems including image segmentation, gradient inpainting and total variation are based on solving symmetric diagonally dominant (SDD) linear systems. These algorithms generally produce results of high quality. However, existing solvers are not always efficient, and in many cases they operate only on restricted topologies. The unavailability of reliably efficient solvers has arguably hindered the adoptability of approaches and algorithms based on SDD systems, especially in applications involving very large systems. A central claim of this paper is that SDD-based approaches can now be considered practical and reliable. To support our claim we present Combinatorial Multigrid (CMG), the first reliably efficient SDD solver that tackles problems in general and arbitrary weighted topologies. The solver borrows the structure and operators of multigrid algorithms, but embeds into them powerful and algebraically sound combinatorial preconditioners, based on novel tools from support graph theory. In order to present the derivation of CMG, we review and exemplify key notions of support graph theory that can also guide the future development of specialized solvers. We validate our claims on very large systems derived from imaging applications. Finally, we outline two new reductions of non-linear filtering problems to SDD systems and review the integration of SDD systems into selected algorithms. {\textcopyright} 2011 Elsevier Inc. All rights reserved.}, author = {Koutis, Ioannis and Miller, Gary L. and Tolliver, David}, journal = {Computer Vision and Image Understanding}, number = {12}, pages = {1638--1646}, title = {{Combinatorial preconditioners and multilevel solvers for problems in computer vision and image processing}}, url = {http://www.cs.cmu.edu/{~}./jkoutis/papers/cviu{\_}preprint.pdf}, volume = {115}, year = {2011} } @inproceedings{peters10reps, author = {Peters, Jan and M{\"{u}}lling, Katharina and Altun, Yasemin}, pages = {1607--1612}, title = {{Relative Entropy Policy Search}} } @article{luxburg2007tutorial, author = {von Luxburg, Ulrike}, journal = {Statistics and Computing}, number = {4}, pages = {395--416}, title = {{A tutorial on spectral clustering}}, url = {http://www.kyb.mpg.de/fileadmin/user{\_}upload/files/publications/attachments/Luxburg07{\_}tutorial{\_}4488{\%}5B0{\%}5D.pdf}, volume = {17}, year = {2007} } @inproceedings{besbes2014optimal, abstract = {In a multi-armed bandit (MAB) problem a gambler needs to choose at each round of play one of K arms, each characterized by an unknown reward distribution. Reward realizations are only observed when an arm is selected, and the gambler's objective is to maximize his cumulative expected earnings over some given horizon of play T. To do this, the gambler needs to acquire information about arms (exploration) while simultaneously optimizing immediate rewards (exploitation); the price paid due to this trade off is often referred to as the regret, and the main question is how small can this price be as a function of the horizon length T. This problem has been studied extensively when the reward distributions do not change over time; an assumption that supports a sharp characterization of the regret, yet is often violated in practical settings. In this paper, we focus on a MAB formulation which allows for a broad range of temporal uncertainties in the rewards, while still maintaining mathematical tractability. We fully characterize the (regret) complexity of this class of MAB problems by establishing a direct link between the extent of allowable reward "variation" and the minimal achievable regret. Our analysis draws some connections between two rather disparate strands of literature: the adversarial and the stochastic MAB frameworks.}, archivePrefix = {arXiv}, arxivId = {1405.3316}, author = {Besbes, Omar and Gur, Yonatan and Zeevi, Assaf}, booktitle = {Neural Information Processing Systems}, eprint = {1405.3316}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Besbes, Gur, Zeevi - 2014 - Stochastic multi-armed bandit problem with non-stationary rewards.pdf:pdf}, month = {may}, title = {{Stochastic multi-armed bandit problem with non-stationary rewards}}, url = {http://arxiv.org/abs/1405.3316}, year = {2014} } @article{yuille2003concave, author = {Yuille, A L and Rangarajan, A}, journal = {Neural Computation}, number = {4}, pages = {915--936}, publisher = {MIT Press}, title = {{The concave-convex procedure}}, volume = {15}, year = {2003} } @article{lane1999temporal, address = {New York, NY, USA}, annote = {comps{\_}anX}, author = {Lane, Terran and Brodley, Carla E}, doi = {http://doi.acm.org/10.1145/322510.322526}, issn = {1094-9224}, journal = {ACM Trans. Inf. Syst. Secur.}, number = {3}, pages = {295--331}, publisher = {ACM}, title = {{Temporal sequence learning and data reduction for anomaly detection}}, volume = {2}, year = {1999} } @misc{urlhttp://mplab.ucsd.edumplab, author = {$\backslash$urlhttp://mplab.ucsd.edu}, title = {{MPLab GENKI Database}} } @article{Al-Mharmah1996a, author = {Al-Mharmah, Hisham and Calvin, James M.}, journal = {Journal of Global Optimization}, month = {jan}, number = {1}, pages = {81--90}, title = {{Optimal random non-adaptive algorithm for global optimization of Brownian motion}}, url = {https://link.springer.com/article/10.1007/BF00229303}, volume = {8}, year = {1996} } @article{diaconis1991geometric, abstract = {Predicting the binding mode of flexible polypeptides to proteins is an important task that falls outside the domain of applicability of most small molecule and protein−protein docking tools. Here, we test the small molecule flexible ligand docking program Glide on a set of 19 non-$\alpha$-helical peptides and systematically improve pose prediction accuracy by enhancing Glide sampling for flexible polypeptides. In addition, scoring of the poses was improved by post-processing with physics-based implicit solvent MM- GBSA calculations. Using the best RMSD among the top 10 scoring poses as a metric, the success rate (RMSD ≤ 2.0 {\AA} for the interface backbone atoms) increased from 21{\%} with default Glide SP settings to 58{\%} with the enhanced peptide sampling and scoring protocol in the case of redocking to the native protein structure. This approaches the accuracy of the recently developed Rosetta FlexPepDock method (63{\%} success for these 19 peptides) while being over 100 times faster. Cross-docking was performed for a subset of cases where an unbound receptor structure was available, and in that case, 40{\%} of peptides were docked successfully. We analyze the results and find that the optimized polypeptide protocol is most accurate for extended peptides of limited size and number of formal charges, defining a domain of applicability for this approach.}, author = {Diaconis, Persi and Stroock, Daniel}, doi = {10.1214/aoap/1177005980}, issn = {1050-5164}, journal = {The Annals of Applied Probability}, title = {{Geometric Bounds for Eigenvalues of Markov Chains}}, year = {1991} } @article{kolmogorov2004energy, author = {Kolmogorov, V and Zabih, R}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, number = {2}, pages = {147--159}, publisher = {Published by the IEEE Computer Society}, title = {{What energy functions can be minimized via graph cuts?}}, volume = {26}, year = {2004} } @inproceedings{li2017hyperband, author = {Li, Lisha and Jamieson, Kevin and DeSalvo, Giulia and Talwalkar, Afshin Rostamizadeh Ameet}, booktitle = {International Conference on Learning Representations}, title = {{Hyperband: Bandit-based configuration evaluation for hyperparameter optimization}}, url = {https://openreview.net/pdf?id=ry18Ww5ee}, year = {2017} } @inproceedings{geist2019theory, author = {Geist, Matthieu and Scherrer, Bruno and Pietquin, Olivier}, booktitle = {International Conference on Machine Learning}, pages = {2160--2169}, title = {{A Theory of regularized Markov decision processes}}, url = {https://arxiv.org/pdf/1901.11275.pdf}, year = {2019} } @book{fedorov1972theory, author = {Fedorov, V V}, publisher = {Academic press}, title = {{Theory of optimal experiments}}, year = {1972} } @article{sutton1988learning, author = {Sutton, Richard}, journal = {Machine Learning}, pages = {9--44}, title = {{Learning to Predict by the Methods of Temporal Differences}}, volume = {3}, year = {1988} } @inproceedings{Kveton2010a, abstract = {This paper proposes an algorithm for real-time learning without explicit feedback. The algorithm combines the ideas of semi-supervised learning on graphs and online learning. In particular, it iteratively builds a graphical representation of its world and updates it with observed examples. Labeled examples constitute the initial bias of the algorithm and are provided offline, and a stream of unlabeled examples is collected online to update this bias. We motivate the algorithm, discuss how to implement it efficiently, prove a regret bound on the quality of its solutions, and apply it to the problem of real-time face recognition. Our recognizer runs in real time, and achieves superior precision and recall on 3 challenging video datasets. {\textcopyright} 2010 IEEE.}, author = {Kveton, B. and Philipose, M. and Valko, M. and Huang, L.}, booktitle = {2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition - Workshops, CVPRW 2010}, doi = {10.1109/CVPRW.2010.5543877}, isbn = {9781424470297}, title = {{Online semi-supervised perception: Real-time learning without explicit feedback}}, year = {2010} } @article{RP11, author = {Ryzhov, I and Powell, W}, journal = {Operations Research}, pages = {188--201}, title = {{Information Collection on a Graph}}, volume = {59}, year = {2011} } @techreport{statscience, author = {Bach, F and Jenatton, R and Mairal, J and Obozinski, G}, institution = {HAL}, number = {00621245}, title = {{Structured sparsity through convex optimization}}, year = {2011} } @inproceedings{grill2020bootstrap, abstract = {We introduce Bootstrap Your Own Latent (BYOL), a new approach to self-supervised image representation learning. BYOL relies on two neural networks, referred to as online and target networks, that interact and learn from each other. From an augmented view of an image, we train the online network to predict the target network representation of the same image under a different augmented view. At the same time, we update the target network with a slow-moving average of the online network. While state-of-the art methods intrinsically rely on negative pairs, BYOL achieves a new state of the art without them. BYOL reaches 74.3 per cent top-1 classification accuracy on ImageNet using the standard linear evaluation protocol with a ResNet-50 architecture and 79.6 per cent; with a larger ResNet. We show that BYOL performs on par or better than the current state of the art on both transfer and semi-supervised benchmarks.}, archivePrefix = {arXiv}, arxivId = {2006.07733}, author = {Grill, Jean-Bastien and Strub, Florian and Altch{\'{e}}, Florent and Tallec, Corentin and Richemond, Pierre H. and Buchatskaya, Elena and Doersch, Carl and Pires, Bernardo Avila and Guo, Zhaohan Daniel and Azar, Mohammad Gheshlaghi and Piot, Bilal and Kavukcuoglu, Koray and Munos, R{\'{e}}mi and Valko, Michal}, booktitle = {Neural Information Processing Systems}, eprint = {2006.07733}, title = {{Bootstrap your own latent: A new approach to self-supervised learning}}, year = {2020} } @inproceedings{carpentier2016revealing, abstract = {We study a graph bandit setting where the objective of the learner is to detect the most influential node of a graph by requesting as little information from the graph as possible. One of the relevant applications for this setting is marketing in social networks, where the marketer aims at finding and taking advantage of the most influential customers. The existing approaches for bandit problems on graphs require either partial or complete knowledge of the graph. In this paper, we do not assume any knowledge of the graph, but we consider a setting where it can be gradually discovered in a sequential and active way. At each round, the learner chooses a node of the graph and the only information it receives is a stochastic set of the nodes that the chosen node is currently influencing. To address this setting, we propose BARE, a bandit strategy for which we prove a regret guarantee that scales with the detectable dimension, a problem dependent quantity that is often much smaller than the number of nodes.}, author = {Carpentier, Alexandra and Valko, Michal}, booktitle = {International Conference on Artificial Intelligence and Statistics}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Carpentier, Valko - 2016 - Revealing graph bandits for maximizing local influence.pdf:pdf}, title = {{Revealing graph bandits for maximizing local influence}}, year = {2016} } @article{Audibert-Bubeck-Munos-2010, abstract = {We consider the problem of finding the best arm in a stochastic multi-armed bandit game. The regret of a forecaster is here defined by the gap between the mean reward of the optimal arm and the mean reward of the ultimately chosen arm. We propose a highly exploring UCB policy and a new algorithm based on successive rejects. We show that these algorithms are essentially optimal since their regret decreases exponentially at a rate which is, up to a logarithmic factor, the best possible. However, while the UCB policy needs the tuning of a parameter depending on the unobservable hardness of the task, the successive rejects policy benefits from being parameter-free, and also independent of the scaling of the rewards.}, author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi}, journal = {Order A Journal On The Theory Of Ordered Sets And Its Applications}, keywords = {learning,statistics {\&} optimisation,theory {\&} algorithms}, pages = {1--17}, title = {{Best arm identification in multi-armed bandits}}, url = {http://eprints.pascal-network.org/archive/00007409/}, year = {2010} } @inproceedings{perrault2020statistical, abstract = {We investigate stochastic combinatorial multi-armed bandit with semi-bandit feedback (CMAB). In CMAB, the question of the existence of an efficient policy with an optimal asymptotic regret (up to a factor poly-logarithmic with the action size) is still open for many families of distributions, including mutually independent outcomes, and more generally the multivariate sub-Gaussian family. We propose to answer the above question for these two families by analyzing variants of the Combinatorial Thompson Sampling policy (CTS). For mutually independent outcomes in [0,1] , we propose a tight analysis of CTS using Beta priors. We then look at the more general setting of multivariate sub-Gaussian outcomes and propose a tight analysis of CTS using Gaussian priors. This last result gives us an alternative to the Efficient Sampling for Combinatorial Bandit policy (ESCB), which, although optimal, is not computationally efficient.}, archivePrefix = {arXiv}, arxivId = {2006.06613}, author = {Perrault, Pierre and Boursier, Etienne and Perchet, Vianney and Valko, Michal}, booktitle = {Neural Information Processing Systems}, eprint = {2006.06613}, title = {{Statistical efficiency of Thompson sampling for combinatorial semi-bandits}}, url = {http://arxiv.org/abs/2006.06613}, year = {2020} } @book{catch-22, author = {Heller, Joseph}, publisher = {Simon {\&} Schuster}, title = {{Catch-22}}, year = {1961} } @book{manning99foundations, address = {Cambridge, Massachusetts}, author = {Manning, Christopher D and Sch{\"{u}}tze, Hinrich}, publisher = {The {\{}MIT{\}} Press}, title = {{Foundations of Statistical Natural Language Processing}}, url = {citeseer.ist.psu.edu/635422.html}, year = {1999} } @article{ghahramani1997factorial, address = {Hingham, MA, USA}, annote = {comps{\_}models}, author = {Ghahramani, Zoubin and Jordan, Michael I}, issn = {0885-6125}, journal = {Mach. Learn.}, number = {2-3}, pages = {245--273}, publisher = {Kluwer Academic Publishers}, title = {{Factorial Hidden Markov Models}}, url = {http://www.springerlink.com/content/w3523227075k34t4/}, volume = {29}, year = {1997} } @article{abernethy2015fighting, abstract = {We define a novel family of algorithms for the adversarial multi-armed bandit problem, and provide a simple analysis technique based on convex smoothing. We prove two main results. First, we show that regularization via the $\backslash$emph{\{}Tsallis entropy{\}}, which includes EXP3 as a special case, achieves the {\$}\backslashTheta(\backslashsqrt{\{}TN{\}}){\$} minimax regret. Second, we show that a wide class of perturbation methods achieve a near-optimal regret as low as {\$}O(\backslashsqrt{\{}TN \backslashlog N{\}}){\$} if the perturbation distribution has a bounded hazard rate. For example, the Gumbel, Weibull, Frechet, Pareto, and Gamma distributions all satisfy this key property.}, archivePrefix = {arXiv}, arxivId = {1512.04152}, author = {Abernethy, Jacob and Lee, Chansoo and Tewari, Ambuj}, eprint = {1512.04152}, title = {{Fighting Bandits with a New Kind of Smoothness}}, year = {2015} } @incollection{combettes2010proximal, author = {Combettes, P L and Pesquet, J C}, chapter = {Proximal S}, publisher = {New York: Springer-Verlag}, title = {{Fixed-Point Algorithms for Inverse Problems in Science and Engineering}}, year = {2010} } @article{rivas99dynamic, address = {Department of Genetics, Washington University, St. Louis, MO, 63110, USA.}, author = {Rivas, E and Eddy, S R}, doi = {10.1006/jmbi.1998.2436}, issn = {0022-2836}, journal = {Journal of Molecular Biology}, keywords = {folding,pseudoknot,rna}, number = {5}, pages = {2053--2068}, title = {{A dynamic programming algorithm for {\{}RNA{\}} structure prediction including pseudoknots.}}, url = {http://dx.doi.org/10.1006/jmbi.1998.2436}, volume = {285}, year = {1999} } @article{kelner2012spectral, author = {Kelner, Jonathan A. and Levin, Alex}, journal = {Theory of Computing Systems}, number = {2}, pages = {243--262}, title = {{Spectral sparsification in the semi-streaming setting}}, volume = {53}, year = {2012} } @inproceedings{audiffren2014messi, abstract = {A popular approach to apprenticeship learning (AL) is to formulate it as an inverse reinforcement learning (IRL) problem. The MaxEnt-IRL algorithm successfully integrates the maximum entropy principle into IRL and unlike its predecessors, it resolves the ambiguity arising from the fact that a possibly large number of policies could match the expert's behavior. In this paper, we study an AL setting in which in addition to the expert's trajectories, a number of unsupervised trajectories is available. We introduce MESSI, a novel algorithm that combines MaxEnt-IRL with principles coming from semi-supervised learning. In particular, MESSI integrates the unsupervised data into the MaxEnt-IRL framework using a pairwise penalty on trajectories. Empirical results in a highway driving and grid-world problems indicate that MESSI is able to take advantage of the unsupervised trajectories and improve the performance of MaxEnt-IRL.}, author = {Audiffren, Julien and Valko, Michal and Lazaric, Alessandro and Ghavamzadeh, Mohammad}, booktitle = {NIPS Workshop on Novel Trends and Applications in Reinforcement Learning}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Audiffren et al. - 2014 - MESSI Maximum entropy semi-supervised inverse reinforcement learning.pdf:pdf}, title = {{MESSI: Maximum entropy semi-supervised inverse reinforcement learning}}, year = {2014} } @phdthesis{gordon1999approximate, author = {Gordon, Geoffrey}, school = {Carnegie Mellon University}, title = {{Approximate Solutions to {\{}Markov{\}} Decision Processes}}, year = {1999} } @inproceedings{blum1996on-line, author = {Blum, Avrim}, booktitle = {Online Algorithms}, pages = {306--325}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {{On-line Algorithms in Machine Learning}}, volume = {1442}, year = {1996} } @article{hazel2000multivariate, annote = {comps{\_}anX}, author = {Hazel, G G}, doi = {10.1109/36.843012}, issn = {0196-2892}, journal = {Geoscience and Remote Sensing, IEEE Transactions on}, keywords = {Markov processes,anomaly detection,first-order isotropic texture model,geophysical measurement technique,geophysical signal processing,geophysical techniques,image processing,image segmentation,image texture,joint spatial-spectral modeling,land surface,multidimensional signal processing,multispectral imagery,multispectral scene segmentation,multivariate Gaussian MRF,multivariate method,receiver operating characteristic,remote sensing,terrain mapping,terrain mappingGaussian Markov random field textu,vector observations}, month = {may}, number = {3}, pages = {1199--1211}, title = {{Multivariate Gaussian MRF for multispectral scene segmentation and anomaly detection}}, volume = {38}, year = {2000} } @article{iwata2001combinatorial, author = {Iwata, S and Fleischer, L and Fujishige, Satoru}, journal = {Journal of the ACM}, number = {4}, pages = {761--777}, publisher = {ACM}, title = {{A combinatorial strongly polynomial algorithm for minimizing submodular functions}}, volume = {48}, year = {2001} } @inproceedings{shalev2007pegasos, author = {Shalev-Shwartz, S and Singer, Y and Srebro, N}, booktitle = {Proc. ICML}, title = {{Pegasos: Primal estimated sub-gradient solver for svm}}, year = {2007} } @book{filar1996competitive, address = {New York, NY}, author = {Filar, Jerzy and Vrieze, Koos}, isbn = {1461284813 9781461284819}, publisher = {Springer New York}, title = {{Competitive Markov decision processes}}, year = {2012} } @inproceedings{sprechmann2010collaborative, author = {Sprechmann, P and Ramirez, I and Sapiro, G and Eldar, Y}, booktitle = {Conf. Information Sciences and Systems (CISS)}, title = {{Collaborative hierarchical sparse modeling}}, year = {2010} } @article{lecchini2009stochastic, abstract = {We introduce bounds on the finite-time performance of Markov chain Monte Carlo algorithms in approaching the global solution of stochastic optimization problems over continuous domains. A comparison with other state-of-the-art methods having finite-time guarantees for solving stochastic programming problems is included.}, archivePrefix = {arXiv}, arxivId = {0906.1055}, author = {Lecchini-Visintini, A. and Lygeros, J. and Maciejowski, J.}, eprint = {0906.1055}, month = {jun}, pages = {29}, title = {{Stochastic optimization on continuous domains with finite-time guarantees by Markov chain Monte Carlo methods}}, url = {http://arxiv.org/abs/0906.1055}, year = {2009} } @inproceedings{KP11, author = {Kapralov, Michael and Panigrahy, Rina}, pages = {828--836}, title = {{Prediction strategies without loss}}, year = {2011} } @incollection{hauskrecht2006fundamentals, abstract = {Finding reliable, meaningful patterns in data with high numbers of attributes can be extremely difficult. Feature selection helps us to decide what attributes or combination of attributes are most important for finding these patterns. In this chapter, we study feature selection methods for building classification models from high-throughput genomic (microarray) and proteomic (mass spectrometry) data sets. Thousands of feature candidates must be analyzed, compared and combined in such data sets. We describe the basics of four different approaches used for feature selection and illustrate their effects on an MS cancer proteomic data set. The closing discussion provides assistance in performing an analysis in high-dimensional genomic and proteomic data.}, author = {Hauskrecht, Milos and Pelikan, Richard and Valko, Michal and Lyons-Weiler, James}, booktitle = {Fundamentals of Data Mining in Genomics and Proteomics}, keywords = {misovalko}, mendeley-tags = {misovalko}, publisher = {Springer}, title = {{Feature selection and dimensionality reduction in genomics and proteomics}}, year = {2006} } @inproceedings{das2008anomaly, address = {New York, NY, USA}, author = {Das, Kaustav and Schneider, Jeff and Neill, Daniel B}, booktitle = {Proceeding of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining}, isbn = {978-1-60558-193-4}, keywords = {anomaly detection,machine learning,pattern detection}, pages = {169--176}, publisher = {ACM}, series = {KDD '08}, title = {{Anomaly pattern detection in categorical datasets}}, year = {2008} } @techreport{zhu2003semi-superviseda, author = {Zhu, Xiaojin and Ghahramani, Zoubin and Lafferty, John}, booktitle = {Proceedings of the 20th International Conference on Machine Learning}, institution = {School of CS, CMU}, pages = {912--919}, title = {{Semi-Supervised Learning: From Gaussian Fields to Gaussian Processes}}, year = {2003} } @article{lovasz1982submodular, author = {Lov{\'{a}}sz, L}, journal = {Mathematical programming: The state of the art, Bonn}, pages = {235--257}, title = {{Submodular functions and convexity}}, year = {1982} } @article{daniel1973stability, author = {Daniel, James}, journal = {Mathematical Programming}, pages = {41--53}, title = {{Stability of the solution of definite quadratic programs}}, volume = {5}, year = {1973} } @incollection{mccormick2006submodular, author = {{S. Thomas McCormick}}, booktitle = {Handbook on Discrete Optimization}, chapter = {Submodular}, editor = {Nemhauser, G. and Aardal, K. and Weismantel, R.}, pages = {321--391}, publisher = {Elsevier}, title = {{Submodular function minimization}}, year = {2006} } @article{pazzani2007content, abstract = {This chapter discusses content-based recommendation systems, i.e., systems that recommend an item to a user based upon a description of the item and a profile of the users interests. Content-based recommendation systems may be used in a variety of domains ranging from recommending web pages, news articles, restaurants, television programs, and items for sale. Although the details of various systems differ, content-based recommendation systems share in common a means for describing the items that may be recommended, a means for creating a profile of the user that describes the types of items the user likes, and a means of comparing items to the user profile to determine what to re commend. The profile is often created and updated automatically in response to feedback on the desirability of items that have been presented to the user.}, author = {Pazzani, Michael J and Billsus, Daniel}, journal = {The adaptive web}, title = {{Content-Based Recommendation Systems}}, year = {2007} } @article{girvan2002community, abstract = {A number of recent studies have focused on the statistical properties of networked systems such as social networks and the Worldwide Web. Researchers have concentrated particularly on a few properties that seem to be common to many networks: the small-world property, power-law degree distributions, and network transitivity. In this article, we highlight another property that is found in many networks, the property of community structure, in which network nodes are joined together in tightly knit groups, between which there are only looser connections. We propose a method for detecting such communities, built around the idea of using centrality indices to find community boundaries. We test our method on computer-generated and real-world graphs whose community structure is already known and find that the method detects this known structure with high sensitivity and reliability. We also apply the method to two networks whose community structure is not well known--a collaboration network and a food web--and find that it detects significant and informative community divisions in both cases.}, author = {Girvan, Michelle and Newman, Mark E J}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Girvan, Newman - 2002 - Community structure in social and biological networks.pdf:pdf}, journal = {National Academy of Sciences of the United States of America}, keywords = {Algorithms,Animals,Community Networks,Computer Simulation,Humans,Models,Nerve Net,Nerve Net: physiology,Neural Networks (Computer),Social Behavior,Theoretical}, number = {12}, pages = {7821--6}, title = {{Community structure in social and biological networks.}}, volume = {99}, year = {2002} } @techreport{domingues2020regret, abstract = {We consider the exploration-exploitation dilemma in finite-horizon reinforcement learning problems whose state-action space is endowed with a metric. We introduce Kernel-UCBVI, a model-based optimistic algorithm that leverages the smoothness of the MDP and a non-parametric kernel estimator of the rewards and transitions to efficiently balance exploration and exploitation. Unlike existing approaches with regret guarantees, it does not use any kind of partitioning of the state-action space. For problems with K episodes and horizon H, we provide a regret bound of O H 3 K max(1 2 , 2d 2d+1) , where d is the covering dimension of the joint state-action space. We empirically validate Kernel-UCBVI on discrete and continuous MDPs.}, archivePrefix = {arXiv}, arxivId = {2004.05599}, author = {Domingues, Omar Darwiche and M{\'{e}}nard, Pierre and Pirotta, Matteo and Kaufmann, Emilie and Valko, Michal}, eprint = {2004.05599}, title = {{Regret bounds for kernel-based reinforcement learning}}, url = {http://arxiv.org/abs/2004.05599}, year = {2020} } @article{Haasdonk2010, author = {Haasdonk, Bernard and Pekalska, Elzbieta}, journal = {Advances in Data Analysis, Data Handling and Business Intelligence}, pages = {351--361}, publisher = {Springer}, title = {{Classification with kernel Mahalanobis distance classifiers}}, year = {2010} } @inproceedings{valko2013finite, abstract = {We tackle the problem of online reward maximisation over a large finite set of actions described by their contexts. We focus on the case when the number of actions is too big to sample all of them even once. However we assume that we have access to the similarities between actions' contexts and that the expected reward is an arbitrary linear function of the contexts' images in the related reproducing kernel Hilbert space (RKHS). We propose KernelUCB, a kernelised UCB algorithm, and give a cumulative regret bound through a frequentist analysis. For contextual bandits, the related algorithm GP-UCB turns out to be a special case of our algorithm, and our finite-time analysis improves the regret bound of GP-UCB for the agnostic case, both in the terms of the kernel-dependent quantity and the RKHS norm of the reward function. Moreover, for the linear kernel, our regret bound matches the lower bound for contextual linear bandits.}, author = {Valko, Michal and Korda, Nathan and Munos, R{\'{e}}mi and Flaounas, Ilias and Cristianini, Nelo}, booktitle = {Uncertainty in Artificial Intelligence}, title = {{Finite-time analysis of kernelised contextual bandits}}, url = {https://hal.inria.fr/hal-00826946/document}, year = {2013} } @techreport{garivier2017thresholding, abstract = {We analyze the sample complexity of the thresholding bandit problem, with and without the assumption that the mean values of the arms are increasing. In each case, we provide a lower bound valid for any risk {\$}\backslashdelta{\$} and any {\$}\backslashdelta{\$}-correct algorithm; in addition, we propose an algorithm whose sample complexity is of the same order of magnitude for small risks. This work is motivated by phase 1 clinical trials, a practically important setting where the arm means are increasing by nature, and where no satisfactory solution is available so far.}, archivePrefix = {arXiv}, arxivId = {1711.04454}, author = {Garivier, Aur{\'{e}}lien and M{\'{e}}nard, Pierre and Rossi, Laurent}, eprint = {1711.04454}, month = {nov}, title = {{Thresholding Bandit for Dose-ranging: The Impact of Monotonicity}}, url = {http://arxiv.org/abs/1711.04454}, year = {2017} } @article{freund99perceptron, address = {Hingham, MA, USA}, author = {Freund, Yoav and Schapire, Robert E}, doi = {http://dx.doi.org/10.1023/A:1007662407062}, issn = {0885-6125}, journal = {Machine Learning}, number = {3}, pages = {277--296}, publisher = {Kluwer Academic Publishers}, title = {{Large Margin Classification Using the Perceptron Algorithm}}, volume = {37}, year = {1999} } @inproceedings{huggins2019scalable, author = {Huggins, Jonathan H. and Campbell, Trevor and Kasprzak, Miko{\l}aj and Broderick, Tamara}, booktitle = {International Conference on Artificial Intelligence and Statistics}, month = {apr}, title = {{Scalable Gaussian process inference with finite-data mean and variance guarantees}}, url = {http://proceedings.mlr.press/v89/huggins19a/huggins19a.pdf}, year = {2019} } @incollection{easley2010networks, abstract = {Are all film stars linked to Kevin Bacon? Why do the stock markets rise and fall sharply on the strength of a vague rumour? How does gossip spread so quickly? Are we all related through six degrees of separation? There is a growing awareness of the complex networks that pervade modern society. We see them in the rapid growth of the Internet, the ease of global communication, the swift spread of news and information, and in the way epidemics and financial crises develop with startling speed and intensity. This introductory book on the new science of networks takes an interdisciplinary approach, using economics, sociology, computing, information science and applied mathematics to address fundamental questions about the links that connect us, and the ways that our decisions can have consequences for others.}, author = {Easley, David and Kleinberg, Jon}, publisher = {Cambridge University Press}, title = {{Networks, Crowds, and Markets: Reasoning About a Highly Connected World}}, year = {2010} } @phdthesis{bubeck2010bandits, author = {Bubeck, S{\'{e}}bastien}, keywords = {bandits}, mendeley-tags = {bandits}, school = {Universit{\{}{\'{e}}{\}} de Lille 1}, title = {{Bandits Games and Clustering Foundations}}, year = {2010} } @article{beygelzimer2010contextual, abstract = {We address the problem of learning in an online, bandit setting where the learner must repeatedly select among K actions, but only receives partial feedback based on its choices. We establish two new facts: First, using a new algorithm called Exp4.P, we show that it is possible to compete with the best in a set of N experts with probability {\$}1-delta while incurring regret at most O(sqrtKTln(N/delta)) over T time steps. The new algorithm is tested empirically in a large-scale, real-world dataset. Second, we give a new algorithm called VE that competes with a possibly infinite set of policies of VC-dimension d while incurring regret at most O(sqrtT(dln(T) + ln (1/delta))) with probability {\$}1-delta. These guarantees improve on those of all previous algorithms, whether in a stochastic or adversarial environment, and bring us closer to providing supervised learning type guarantees for the contextual bandit setting.}, author = {Beygelzimer, Alina and Langford, John and Li, Lihong and Reyzin, Lev and Schapire, Robert E}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Beygelzimer et al. - 2010 - Contextual Bandit Algorithms with Supervised Learning Guarantees.pdf:pdf}, journal = {Machine Learning}, pages = {14}, title = {{Contextual Bandit Algorithms with Supervised Learning Guarantees}}, url = {http://arxiv.org/abs/1002.4058}, volume = {15}, year = {2010} } @article{Pol67, author = {Polyak, B}, journal = {Soviet Math. Doklady}, pages = {33--36}, title = {{A general method for solving extremal problems}}, volume = {174}, year = {1967} } @inproceedings{smola2000sparse, author = {Smola, A and Scholkopf, B}, booktitle = {Proceedings of the 17th International Conference on Machine Learning}, title = {{Sparse greedy matrix approximation for machine learning}}, year = {2000} } @article{gomez2003immuno-fuzzy, author = {Gomez, J and Gonzalez, F and Dasgupta, D}, doi = {10.1109/FUZZ.2003.1206605}, journal = {Fuzzy Systems, 2003. FUZZ '03. The 12th IEEE International Conference on}, keywords = {fuzzy logic,fuzzy rules,fuzzy set theory,immuno fuzzy approach,real data sets,security of data anomaly detection,synthetic sets}, month = {may}, pages = {1219--1224 vol.2}, title = {{An immuno-fuzzy approach to anomaly detection}}, volume = {2}, year = {2003} } @inproceedings{BMSS09, author = {Bubeck, S and Munos, R and Stoltz, G and Szepesvari, Cs.}, booktitle = {Advances in Neural Information Processing Systems (NIPS)}, pages = {201--208}, title = {{Online Optimization in $\backslash$mathcal{\{}X{\}}-Armed Bandits}}, year = {2009} } @article{akoglu2015graph, author = {Akoglu, Leman and Tong, Hanghang and Koutra, Danai}, doi = {10.1007/s10618-014-0365-y}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Akoglu, Tong, Koutra - 2015 - Graph based anomaly detection and description a survey.pdf:pdf}, issn = {1384-5810}, journal = {Data Mining and Knowledge Discovery}, month = {may}, number = {3}, pages = {626--688}, publisher = {Springer US}, title = {{Graph based anomaly detection and description: a survey}}, volume = {29}, year = {2015} } @article{browne2012survey, author = {Browne, Cameron B. and Powley, Edward and Whitehouse, Daniel and Lucas, Simon M. and Cowling, Peter I. and Rohlfshagen, Philipp and Tavener, Stephen and Perez, Diego and Samothrakis, Spyridon and Colton, Simon}, journal = {IEEE Transactions on Computational Intelligence and AI in Games}, number = {1}, pages = {1--43}, title = {{A survey of Monte Carlo tree search methods}}, volume = {4}, year = {2012} } @misc{TheMendeleySupportTeam2011b, abstract = {A quick introduction to Mendeley. Learn how Mendeley creates your personal digital library, how to organize and annotate documents, how to collaborate and share with colleagues, and how to generate citations and bibliographies.}, address = {London}, author = {{The Mendeley Support Team}}, booktitle = {Mendeley Desktop}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/The Mendeley Support Team - 2011 - Getting Started with Mendeley.pdf:pdf}, keywords = {Mendeley,how-to,user manual}, pages = {1--16}, publisher = {Mendeley Ltd.}, title = {{Getting Started with Mendeley}}, url = {http://www.mendeley.com}, year = {2011} } @book{good1994permutation, author = {Good, P}, publisher = {Springer-Verlag}, title = {{Permutation Tests: A Practical Guide to Resampling Methods for Testing Hypothesis}}, year = {1994} } @book{shawe2004kernel, author = {Shawe-Taylor, John and Cristianini, Nelo}, publisher = {Cambridge University Press}, title = {{Kernel methods for pattern analysis}}, year = {2004} } @article{gyorgy07sp, author = {Gy{\"{o}}rgy, A and Linder, T and Lugosi, G and Ottucs{\'{a}}k, $\backslash$relax Gy.}, issn = {1532-4435}, journal = {Journal of Machine Learning Research}, pages = {2369--2403}, publisher = {JMLR.org}, title = {{The On-Line Shortest Path Problem Under Partial Monitoring}}, volume = {8}, year = {2007} } @inproceedings{hwang1997predictive, author = {Hwang, Chi-Hong and Wu, Allen}, booktitle = {Proceedings of the 1997 IEEE / ACM International Conference on Computer-Aided Design}, pages = {28--32}, title = {{A Predictive System Shutdown Method for Energy Saving of Event-Driven Computation}}, year = {1997} } @article{barabasi1999emergence, abstract = {Systems as diverse as genetic networks or the world wide web are best described as networks with complex topology. A common property of many large networks is that the vertex connectivities follow a scale-free power-law distribution. This feature is found to be a consequence of the two generic mechanisms that networks expand continuously by the addition of new vertices, and new vertices attach preferentially to already well connected sites. A model based on these two ingredients reproduces the observed stationary scale-free distributions, indicating that the development of large networks is governed by robust self-organizing phenomena that go beyond the particulars of the individual systems.}, author = {Barab{\'{a}}si, Albert-L{\'{a}}szl{\'{o}} and Albert, R{\'{e}}ka}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Barab{\'{a}}si, Albert - 1999 - Emergence of scaling in random networks.pdf:pdf}, journal = {Science}, keywords = {complex networks,network,networks}, mendeley-tags = {complex networks,network,networks}, pages = {11}, title = {{Emergence of scaling in random networks}}, volume = {286}, year = {1999} } @inproceedings{haarnoja2018soft, archivePrefix = {arXiv}, arxivId = {1801.01290}, author = {Haarnoja, Tuomas and Zhou, Aurick and Abbeel, Pieter and Levine, Sergey}, booktitle = {International Conference on Machine Learning}, eprint = {1801.01290}, title = {{Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor}}, url = {https://arxiv.org/abs/1801.01290}, year = {2018} } @article{Martinet1978, author = {Martinet, B}, journal = {ESAIM: Mathematical Modelling and Numerical Analysis - Mod{\'{e}}lisation Math{\'{e}}matique et Analyse Num{\'{e}}rique}, number = {2}, pages = {153--171}, publisher = {EDP Sciences}, title = {{Perturbation des m{\'{e}}thodes d'optimisation. Applications}}, url = {http://eudml.org/doc/193317}, volume = {12}, year = {1978} } @inproceedings{dash2002exact, address = {San Francisco, CA, USA}, annote = {comps{\_}models}, author = {Dash, Denver and Cooper, Gregory F}, booktitle = {ICML '02: Proceedings of the Nineteenth International Conference on Machine Learning}, isbn = {1-55860-873-7}, pages = {91--98}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {{Exact model averaging with naive {\{}B{\}}ayesian classifiers}}, url = {http://www.pittsburgh.intel-research.net/{~}dhdash//docs/icml{\_}02.pdf}, year = {2002} } @inproceedings{valko2008learning, abstract = {Multiple technologies that measure expression levels of protein mixtures in the human body offer a potential for detection and understanding the disease. The recent increase of these technologies prompts researchers to evaluate the individual and combined utility of data generated by the technologies. In this work, we study two data sources to measure the expression of protein mixtures in the human body: whole-sample MS profiling and multiplexed protein arrays. We investigate the individual and combined utility of these technologies by learning and testing a variety of classification models on the data from a pancreatic cancer study. We show that for the combination of these two (heterogeneous) datasets, classification models that work well on one of them individually fail on the combination of the two datasets. We study and propose a class of model fusion methods that acknowledge the differences and try to reap most of the benefits from their combination.}, author = {Valko, Michal and Pelikan, Richard and Hauskrecht, Milos}, booktitle = {AMIA Summit on Translational Bioinformatics}, keywords = {misovalko}, mendeley-tags = {misovalko}, month = {mar}, title = {{Learning predictive models for multiple heterogeneous proteomic datasources}}, year = {2008} } @inproceedings{zhu2009some, address = {Piscataway, NJ, USA}, author = {Zhu, Xiaojin and Goldberg, Andrew B and Khot, Tushar}, booktitle = {ICME'09: Proceedings of the 2009 IEEE international conference on Multimedia and Expo}, isbn = {978-1-4244-4290-4}, pages = {1504--1507}, publisher = {IEEE Press}, title = {{Some new directions in graph-based semi-supervised learning}}, year = {2009} } @techreport{malherbe2017global, abstract = {The goal of the paper is to design sequential strategies which lead to efficient optimization of an unknown function under the only assumption that it has a finite Lipschitz constant. We first identify sufficient conditions for the consistency of generic sequential algorithms and formulate the expected minimax rate for their performance. We introduce and analyze a first algorithm called LIPO which assumes the Lipschitz constant to be known. Consistency, minimax rates for LIPO are proved, as well as fast rates under an additional H$\backslash$"older like condition. An adaptive version of LIPO is also introduced for the more realistic setup where the Lipschitz constant is unknown and has to be estimated along with the optimization. Similar theoretical guarantees are shown to hold for the adaptive LIPO algorithm and a numerical assessment is provided at the end of the paper to illustrate the potential of this strategy with respect to state-of-the-art methods over typical benchmark problems for global optimization.}, author = {Malherbe, C{\'{e}}dric and Vayatis, Nicolas}, month = {mar}, title = {{Global optimization of Lipschitz functions}}, url = {http://arxiv.org/abs/1703.02628}, year = {2017} } @inproceedings{audiffren2015maximum, abstract = {A popular approach to apprenticeship learning (AL) is to formulate it as an inverse reinforcement learning (IRL) problem. The MaxEnt-IRL algorithm successfully integrates the maximum entropy principle into IRL and unlike its predecessors, it resolves the ambiguity arising from the fact that a possibly large number of policies could match the expert's behavior. In this paper, we study an AL setting in which in addition to the expert's trajectories, a number of unsupervised trajectories is available. We introduce MESSI, a novel algorithm that combines MaxEnt-IRL with principles coming from semi-supervised learning. In particular, MESSI integrates the unsupervised data into the MaxEnt-IRL framework using a pairwise penalty on trajectories. Empirical results in a highway driving and grid-world problems indicate that MESSI is able to take advantage of the unsupervised trajectories and improve the performance of MaxEnt-IRL.}, author = {Audiffren, Julien and Valko, Michal and Lazaric, Alessandro and Ghavamzadeh, Mohammad}, booktitle = {International Joint Conferences on Artificial Intelligence}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Audiffren et al. - 2015 - MESSI Maximum entropy semi-supervised inverse reinforcement learning.pdf:pdf}, title = {{MESSI: Maximum entropy semi-supervised inverse reinforcement learning}}, year = {2015} } @book{puterman1994markov, address = {New York, NY}, author = {Puterman, Martin L.}, howpublished = {Hardcover}, isbn = {0471619779}, publisher = {John Wiley {\&} Sons}, title = {{Markov decision processes: Discrete stochastic dynamic programming}}, url = {https://onlinelibrary.wiley.com/doi/book/10.1002/9780470316887}, year = {1994} } @misc{asuncion2007uci, author = {Asuncion, A and Newman, D J}, institution = {University of California, Irvine, School of Information and Computer Sciences}, title = {{{\{}UCI{\}} Machine Learning Repository}}, url = {http://www.ics.uci.edu/{~}mlearn/{\%}7BMLR{\%}7Depository.html}, year = {2011} } @inproceedings{xing2003distance, annote = {comps{\_}distance}, author = {Xing, Eric P and Ng, Andrew Y and Jordan, Michael I and Russell, Stuart}, booktitle = {Advances in Neural Information Processing Systems 15}, pages = {505--512}, publisher = {MIT Press}, title = {{Distance metric learning, with application to clustering with side-information}}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.58.3667}, year = {2003} } @incollection{rakhlin12rr, author = {Rakhlin, Sasha and Shamir, Ohad and Sridharan, Karthik}, booktitle = {Advances in Neural Information Processing Systems 25}, pages = {2150--2158}, title = {{Relax and Randomize : From Value to Algorithms}}, year = {2012} } @inproceedings{papadimitriou2003cross-outlier, author = {Papadimitriou, Spiros and Faloutsos, Christos}, booktitle = {Advances in Spatial and Temporal Databases, 8th International Symposium, SSTD 2003, Santorini Island, Greece, July 24-27, 2003, Proceedings}, editor = {Hadzilacos, Thanasis and Manolopoulos, Yannis and Roddick, John F and Theodoridis, Yannis}, pages = {199--213}, title = {{Cross-Outlier Detection}}, volume = {2750}, year = {2003} } @inproceedings{babenko2009visual, author = {Babenko, Boris and Yang, Ming-Hsuan and Belongie, Serge}, booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition}, title = {{Visual Tracking with Online Multiple Instance Learning}}, year = {2009} } @inproceedings{mahadevan2006learning, author = {Mahadevan, Sridhar and Maggioni, Mauro and Ferguson, Kimberly and Osentoski, Sarah}, booktitle = {Proceedings of the 21st National Conference on Artificial Intelligence}, title = {{Learning Representation and Control in Continuous {\{}Markov{\}} Decision Processes}}, year = {2006} } @article{goldengorin1999data, author = {Goldengorin, B and Sierksma, G and Tijssen, G A and Tso, M}, journal = {Management Science}, pages = {1539--1551}, publisher = {JSTOR}, title = {{The data-correcting algorithm for the minimization of supermodular functions}}, year = {1999} } @article{ahmed2011maximizing, author = {Ahmed, S and Atamt{\"{u}}rk, A}, journal = {Mathematical Programming: Series A and B}, number = {1-2}, pages = {149--169}, publisher = {Springer-Verlag New York, Inc.}, title = {{Maximizing a class of submodular utility functions}}, volume = {128}, year = {2011} } @book{dubitzky2007fundamentals, abstract = {Finding reliable, meaningful patterns in data with high numbers of attributes can be extremely difficult. Feature selection helps us to decide what attributes or combination of attributes are most important for finding these patterns. In this chapter, we study feature selection methods for building classification models from high-throughput genomic (microarray) and proteomic (mass spectrometry) data sets. Thousands of feature candidates must be analyzed, compared and combined in such data sets. We describe the basics of four different approaches used for feature selection and illustrate their effects on an MS cancer proteomic data set. The closing discussion provides assistance in performing an analysis in high-dimensional genomic and proteomic data.}, author = {Dubitzky, W and Granzow, M and Berrar, Dp}, booktitle = {Vasa}, pages = {149--172}, title = {{Fundamentals of data mining in genomics and proteomics}}, year = {2007} } @inproceedings{syed2010unsupervised, author = {Syed, Zeeshan and Rubinfeld, Ilan}, booktitle = {ICML}, editor = {F{\"{u}}rnkranz, Johannes and Joachims, Thorsten}, pages = {1023--1030}, publisher = {Omnipress}, title = {{Unsupervised Risk Stratification in Clinical Datasets: Identifying Patients at Risk of Rare Outcomes}}, year = {2010} } @misc{leskovec2014snapnets, author = {Leskovec, Jure and Krevl, Andrej}, howpublished = {http://snap.stanford.edu/data}, month = {jun}, title = {{SNAP datasets: Stanford large network dataset collection}}, year = {2014} } @article{lanczos1950iteration, abstract = {The present investigation designs a systematic method for finding the latent roots and the principal axes of a matrix, without reducing the order of the matrix. It is characterized by a wide field of applicability and great accuracy, since the accumulation of rounding errors is avoided, through the process of "minimized iterations". Moreover, the method leads to a well convergent successive approximation procedure by which the solution of integral equations of the Fredholm type and the solution of the eigenvalue problem of linear differential and integral operators may be accomplished.}, author = {Lanczos, C.}, doi = {10.6028/jres.045.026}, issn = {0091-0635}, journal = {Journal of Research of the National Bureau of Standards}, number = {4}, pages = {255}, title = {{An iteration method for the solution of the eigenvalue problem of linear differential and integral operators}}, url = {http://nvlpubs.nist.gov/nistpubs/jres/045/jresv45n4p255{\_}A1b.pdf}, volume = {45}, year = {1950} } @inproceedings{jin2001mining, address = {New York, NY, USA}, author = {Jin, Wen and Tung, Anthony K H and Han, Jiawei}, booktitle = {KDD '01: Proceedings of the seventh ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {http://doi.acm.org/10.1145/502512.502554}, isbn = {1-58113-391-X}, pages = {293--298}, publisher = {ACM Press}, title = {{Mining top-n local outliers in large databases}}, year = {2001} } @article{hoorfar2008inequalities, author = {Hoorfar, Abdolhossein and Hassani, Mehdi}, journal = {Journal of Inequalities in Pure and Applied Mathematics}, number = {2}, pages = {5--9}, title = {{Inequalities on the Lambert W function and hyperpower function}}, url = {https://www.emis.de/journals/JIPAM/images/107{\_}07{\_}JIPAM/107{\_}07.pdf}, volume = {9}, year = {2008} } @techreport{Nesterov2007, author = {Nesterov, Y}, institution = {Center for Operations Research and Econometrics (CORE), Catholic University of Louvain}, title = {{Gradient methods for minimizing composite objective function}}, year = {2007} } @article{gine2010confidence, author = {Gin{\'{e}}, Evarist and Nickl, Richard}, journal = {The Annals of Statistics}, number = {2}, pages = {1122--1170}, title = {{Confidence bands in density estimation}}, volume = {38}, year = {2010} } @article{audibert2010regret, author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien}, journal = {Journal of Machine Learning Research}, keywords = {bandits}, mendeley-tags = {bandits}, month = {dec}, pages = {2785--2836}, publisher = {JMLR.org}, title = {{Regret bounds and minimax policies under partial monitoring}}, volume = {11}, year = {2010} } @inproceedings{ng2000algorithms, abstract = {This paper addresses the problem of inverse reinforcement learning (IRL) in Markov decision processes, that is, the problem of extracting a reward function given observed, optimal behaviour. IRL may be useful for apprenticeship learning to acquire skilled behaviour, and for ascertaining the reward function being optimized by a natural system. We rst characterize the set of all reward functions for which a given policy is optimal. We then derive three algorithms for IRL. The rst two deal with the case where the entire policy is known; we handle tabulated reward functions on a nite state space and linear functional approximation of the reward function over a potentially in- nite state space. The third algorithm deals with the more realistic case in which the policy is known only through a nite set of observed trajectories. In all cases, a key issue is degeneracythe existence of a large set of reward functions for which the observed policy is optimal. To remove...}, author = {Ng, Andrew and Russell, Stuart}, booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning}, doi = {10.2460/ajvr.67.2.323}, editor = {{De Sousa}, Jorge Pinho}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Ng, Russell - 2000 - Algorithms for inverse reinforcement learning.pdf:pdf}, issn = {00029645}, pages = {663--670}, pmid = {16454640}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {{Algorithms for inverse reinforcement learning}}, url = {http://www-cs.stanford.edu/people/ang/papers/icml00-irl.pdf}, year = {2000} } @inproceedings{audibert2011minimax, author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien and Lugosi, Gabor}, booktitle = {Proceedings of the 24th annual Conference On Learning Theory}, keywords = {bandits}, mendeley-tags = {bandits}, series = {COLT '11}, title = {{Minimax Policies for Combinatorial Prediction Games}}, year = {2011} } @inproceedings{ratliff07subgradient, author = {Ratliff, Nathan and Bagnell, James (Drew) and Zinkevich, Martin}, pages = {2:380--387}, title = {{({\{}O{\}}nline) {\{}S{\}}ubgradient Methods for Structured Prediction}} } @inproceedings{yu09Modulated, author = {Yu, Jia Yuan and Mannor, Shie}, booktitle = {Joint 48th IEEE Conference on Decision and Control and 28th Chinese Control Conference}, pages = {2946--2953}, publisher = {IEEE Press}, title = {{Arbitrarily modulated {\{}M{\}}arkov decision processes}}, year = {2009} } @article{Pis75, author = {Pisier, G}, journal = {Israel Journal of Mathematics}, pages = {326--350}, title = {{Martingales with values in uniformly convex spaces}}, volume = {20}, year = {1975} } @article{spielman_spectral_2011, author = {Spielman, Daniel A and Teng, Shang-Hua}, journal = {SIAM Journal on Computing}, number = {4}, pages = {981--1025}, title = {{Spectral sparsification of graphs}}, url = {http://epubs.siam.org/doi/abs/10.1137/08074489X}, volume = {40}, year = {2011} } @inproceedings{balluchi00automotiveengine, author = {Balluchi, A and Benvenuti, L and {Di Benedetto}, M D and Pinello, C and Sangiovanni-Vincentelli, A L}, booktitle = {Proceedings of the IEEE}, pages = {888--912}, title = {{Automotive engine control and hybrid systems: challenges and opportunities}}, year = {2000} } @inproceedings{klein03a*parsing, address = {Morristown, NJ, USA}, author = {Klein, Dan and Manning, Christopher D}, booktitle = {NAACL '03: Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology}, keywords = {algorithm,nlp,parsing,viterbi}, pages = {40--47}, publisher = {Association for Computational Linguistics}, title = {{{\{}A{\}}{\^{}}* parsing: fast exact Viterbi parse selection}}, url = {http://portal.acm.org/citation.cfm?id=1073461}, year = {2003} } @article{gallo1989fast, author = {Gallo, G and Grigoriadis, M D and Tarjan, R E}, journal = {SIAM Journal on Computing}, number = {1}, pages = {30--55}, title = {{A fast parametric maximum flow algorithm and applications}}, volume = {18}, year = {1989} } @incollection{fergus2009semi-supervised, author = {Fergus, Rob and Weiss, Yair and Torralba, Antonio}, booktitle = {Neural Information Processing Systems}, title = {{Semi-Supervised Learning in Gigantic Image Collections}}, year = {2009} } @inproceedings{hazan2011beyond, author = {Hazan, Elad and Kale, Satyen}, booktitle = {Conference on Learning Theory}, title = {{Beyond the regret minimization barrier: an optimal algorithm for stochastic strongly-convex optimization.}}, year = {2011} } @inproceedings{talwalkar2008large-scale, author = {Talwalkar, Ameet and Kumar, Sanjiv and Rowley, Henry A}, booktitle = {Computer Vision and Pattern Recognition (CVPR)}, title = {{Large-Scale Manifold Learning}}, year = {2008} } @book{Bertsekas, author = {Bertsekas, D}, publisher = {Athena Scientific}, title = {{Nonlinear programming}}, year = {1995} } @book{HL01, author = {Hiriart-Urruty, J.-B. and Lemar{\'{e}}chal, C}, publisher = {Springer}, title = {{Fundamentals of Convex Analysis}}, year = {2001} } @inproceedings{koller1999computing, author = {Koller, Daphne and Parr, Ronald}, booktitle = {Proceedings of the 16th International Joint Conference on Artificial Intelligence}, pages = {1332--1339}, title = {{Computing Factored Value Functions for Policies in Structured {\{}MDPs{\}}}}, year = {1999} } @article{queyranne1995scheduling, author = {Queyranne, M and Schulz, A}, journal = {Integer Programming and Combinatorial Optimization}, pages = {307--320}, publisher = {Springer}, title = {{Scheduling unit jobs with compatible release dates on parallel machines with nonstationary speeds}}, volume = {920}, year = {1995} } @article{rust1997using, author = {Rust, John}, journal = {Econometrica}, number = {3}, pages = {487--516}, title = {{Using Randomization to Break the Curse of Dimensionality}}, volume = {65}, year = {1997} } @article{montague, author = {Montague, P R and Dayan, P and Person, C and Sejnowski, T J}, journal = {Nature}, pages = {725--728}, title = {{Bee foraging in uncertain environments using predictive Hebbian learning}}, volume = {377}, year = {1995} } @article{breunig2000lof:, address = {New York, NY, USA}, author = {Breunig, Markus M and Kriegel, Hans-Peter and Ng, Raymond T and Sander, J{\"{o}}rg}, doi = {http://doi.acm.org/10.1145/335191.335388}, issn = {0163-5808}, journal = {SIGMOD Rec.}, number = {2}, pages = {93--104}, publisher = {ACM}, title = {{LOF: identifying density-based local outliers}}, volume = {29}, year = {2000} } @inproceedings{maddison2014a, author = {Maddison, Chris J and Tarlow, Daniel and Minka, Tom}, booktitle = {Neural Information Processing Systems}, title = {{A* sampling}}, year = {2014} } @inproceedings{erraqabi2017trading, abstract = {In multi-armed bandits, the most common objective is the maximization of the cumulative reward. Alternative settings include active exploration, where a learner tries to gain accurate estimates of the rewards of all arms. While these objectives are contrasting, in many scenarios it is desirable to trade off rewards and errors. For instance, in educational games the designer wants to gather generalizable knowledge about the behavior of the students and teaching strategies (small estimation errors) but, at the same time, the system needs to avoid giving a bad experience to the players, who may leave the system permanently (large reward). In this paper, we formalize this tradeoff and introduce the ForcingBalance algorithm whose performance is provably close to the best possible tradeoff strategy. Finally, we demonstrate on real-world educational data that ForcingBalance returns useful information about the arms without compromising the overall reward.}, author = {Erraqabi, Akram and Lazaric, Alessandro and Valko, Michal and Brunskill, Emma and Liu, Yun-en}, booktitle = {International Conference on Artificial Intelligence and Statistics}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Erraqabi et al. - 2017 - Trading off rewards and errors in multi-armed bandits(2).pdf:pdf}, title = {{Trading off rewards and errors in multi-armed bandits}}, year = {2017} } @article{heard2010bayesian, author = {Heard, Nicholas A and Weston, David J and Platanioti, Kiriaki and Hand, David J}, doi = {10.1214/10-AOAS329SUPPB}, journal = {Annals of Applied Statistics}, pages = {645--662}, title = {{Bayesian anomaly detection methods for social networks}}, volume = {4}, year = {2010} } @book{boucheron2013concentration, author = {Boucheron, St{\'{e}}phane and Lugosi, G{\'{a}}bor and Massart, Pascal}, publisher = {Oxford University Press}, title = {{Concentration inequalities}}, url = {https://www.hse.ru/data/2016/11/24/1113029206/Concentration inequalities.pdf}, year = {2013} } @inproceedings{MB04, author = {McMahan, H Brendan and Blum, Avrim}, booktitle = {In Proceedings of the 17th Annual Conference on Learning Theory (COLT)}, pages = {109--123}, title = {{Online geometric optimization in the bandit setting against an adaptive adversary}}, year = {2004} } @inproceedings{huang2017structured, abstract = {We study the problem of identifying the best action among a set of possible options when the value of each action is given by a mapping from a number of noisy micro-observables in the so-called fixed confidence setting. Our main motivation is the application to the minimax game search, which has been a major topic of interest in artificial intelligence. In this paper we introduce an abstract setting to clearly describe the essential properties of the problem. While previous work only considered a two-move game tree search problem, our abstract setting can be applied to the general minimax games where the depth can be non-uniform and arbitrary, and transpositions are allowed. We introduce a new algorithm (LUCB-micro) for the abstract setting, and give its lower and upper sample complexity results. Our bounds recover some previous results, which were only available in more limited settings, while they also shed further light on how the structure of minimax problems influence sample complexity.}, archivePrefix = {arXiv}, arxivId = {1706.05198}, author = {Huang, Ruitong and Ajallooeian, Mohammad M. and Szepesv{\'{a}}ri, Csaba and M{\"{u}}ller, Martin}, booktitle = {Algorithmic Learning Theory}, eprint = {1706.05198}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Huang et al. - 2017 - Structured Best Arm Identification with Fixed Confidence.pdf:pdf}, title = {{Structured best-arm identification with fixed confidence}}, url = {https://arxiv.org/pdf/1706.05198.pdf}, year = {2017} } @book{boyd94lmi, address = {Philadelphia, PA}, author = {Boyd, S and El{\~{}}Ghaoui, L and Feron, E and Balakrishnan, V}, isbn = {0-89871-334-X}, publisher = {SIAM}, series = {Studies in Applied Mathematics}, title = {{Linear Matrix Inequalities in System and Control Theory}}, volume = {15}, year = {1994} } @article{munos2002variable, author = {Munos, Remi and Moore, Andrew}, journal = {Machine Learning}, pages = {291--323}, title = {{Variable Resolution Discretization in Optimal Control}}, volume = {49}, year = {2002} } @book{duda73pattern, author = {Duda, Richard and Hart, Peter}, publisher = {Wiley}, title = {{Pattern Classification and Scene Analysis}}, year = {1973} } @inproceedings{yuan2004annealed, author = {Yuan, Changhe and Lu, Tsai-Ching and Druzdzel, Marek}, booktitle = {Proceedings of the 20th Conference on Uncertainty in Artificial Intelligence}, pages = {628--635}, title = {{Annealed {\{}MAP{\}}}}, year = {2004} } @article{manne1960linear, author = {Manne, Alan}, journal = {Management Science}, number = {3}, pages = {259--267}, title = {{Linear Programming and Sequential Decisions}}, volume = {6}, year = {1960} } @article{castro2008minimax, abstract = {This paper analyzes the potential advantages and theoretical challenges of "active learning" algorithms. Active learning involves sequential sampling procedures that use information gleaned from previous samples in order to focus the sampling and accelerate the learning process relative to "passive learning" algorithms, which are based on nonadaptive (usually random) samples. There are a number of empirical and theoretical results suggesting that in certain situations active learning can be significantly more effective than passive learning. However, the fact that active learning algorithms are feedback systems makes their theoretical analysis very challenging. This paper aims to shed light on achievable limits in active learning. Using minimax analysis techniques, we study the achievable rates of classification error convergence for broad classes of distributions characterized by decision boundary regularity and noise conditions. The results clearly indicate the conditions under which one can expect significant gains through active learning. Furthermore, we show that the learning rates derived are tight for "boundary fragment" classes in d-dimensional feature spaces when the feature marginal density is bounded from above and below.}, author = {Castro, Rui M. and Nowak, Robert D.}, journal = {IEEE Transactions on Information Theory}, keywords = {Active learning,Adaptive sampling,Minimax lower bounds,Statistical learning theory}, number = {5}, pages = {2339--2353}, title = {{Minimax bounds for active learning}}, volume = {54}, year = {2008} } @article{ghavamzadeh2016bayesian, abstract = {Policy gradient methods are reinforcement learning algorithms that adapt a parameterized policy by following a performance gradient estimate. Many conventional policy gradient methods use Monte-Carlo techniques to estimate this gradient. The policy is improved by adjusting the parameters in the direction of the gradient estimate. Since Monte-Carlo methods tend to have high variance, a large number of samples is required to attain accurate estimates, resulting in slow convergence. In this paper, we first propose a Bayesian framework for policy gradient, based on modeling the policy gradient as a Gaussian process. This reduces the number of samples needed to obtain accurate gradient estimates. Moreover, estimates of the natural gradient as well as a measure of the uncertainty in the gradient estimates, namely, the gradient covariance, are provided at little extra cost. Since the proposed Bayesian framework considers system trajectories as its basic observable unit, it does not require the dynamics within trajectories to be of any particular form, and thus, can be easily extended to partially observable problems. On the downside, it cannot take advantage of the Markov property when the system is Markovian. To address this issue, we proceed to supplement our Bayesian policy gradient framework with a new actor-critic learning model in which a Bayesian class of non- parametric critics, based on Gaussian process temporal difference learning, is used. Such critics model the action- value function as a Gaussian process, allowing Bayes' rule to be used in computing the posterior distribution over action-value functions, conditioned on the observed data. Appropriate choices of the policy parameterization and of the prior covariance (kernel) between action-values allow us to obtain closed-form expressions for the posterior distribution of the gradient of the expected return with respect to the policy parameters. We perform detailed experimental comparisons of the proposed Bayesian policy gradient and actor-critic algorithms with classic Monte-Carlo based policy gradient methods, as well as with each other, on a number of reinforcement learning problems.}, author = {Ghavamzadeh, Mohammad and Engel, Yaakov and Valko, Michal}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Ghavamzadeh, Engel, Valko - 2016 - Bayesian policy gradient and actor-critic algorithms.pdf:pdf}, journal = {Journal of Machine Learning Research}, number = {66}, pages = {1--53}, title = {{Bayesian policy gradient and actor-critic algorithms}}, volume = {17}, year = {2016} } @inproceedings{tsang2006large-scale, author = {Tsang, Ivor W and Kwok, James T}, booktitle = {NIPS}, title = {{Large-Scale Sparsified Manifold Regularization.}}, year = {2006} } @book{golub83matrix, author = {Golub, G H and Loan, C F Van}, publisher = {Johns Hopkins University Press}, title = {{Matrix Computations}}, year = {1996} } @article{Auer2010, abstract = {In the stochastic multi-armed bandit problem we consider a modification of the UCB algorithm of Auer et al. 4. For this modified algorithm we give an improved bound on the regret with respect to the optimal reward. While for the original UCB algorithm the regret in K-armed bandits after T trials is bounded by const K log(T)/Delta, where Delta measures the distance between a suboptimal arm and the optimal arm, for the modified UCB algorithm we show an upper bound on the regret of const K log (T/Delta 2) Delta.}, author = {Auer, Peter and Ortner, Ronald}, doi = {10.1007/s10998-010-3055-6}, journal = {Periodica Mathematica Hungarica}, keywords = {computational,information theoretic learning with statistics,learning,statistics {\&} optimisation,theory {\&} algorithms}, pages = {1--11}, title = {{UCB revisited: Improved regret bounds for the stochastic multi-armed bandit problem}}, volume = {61}, year = {2010} } @article{shi2000normalized, abstract = {We propose a novel approach for solving the perceptual grouping problem in vision. Rather than focusing on local features and their consistencies in the image data, our approach aims at extracting the global impression of an image. We treat image segmentation as a graph}, author = {Shi, J and Malik, J}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, pages = {888--905}, title = {{Normalized Cuts and Image Segmentation}}, volume = {22}, year = {2000} } @article{megiddo74optimal, abstract = {The concept of an optimal flow in a multiple source, multiple sink network is defined. It generalizes maximal flow in a single source, single sink network. An existence proof and an algorithm are given.}, author = {Megiddo, Nimrod}, journal = {Mathematical Programming}, number = {1}, pages = {97--107}, title = {{Optimal flows in networks with multiple sources and sinks}}, volume = {7}, year = {1974} } @book{bellman1957dynamic, address = {Princeton, NJ}, author = {Bellman, Richard}, publisher = {Princeton University Press}, title = {{Dynamic Programming}}, year = {1957} } @article{Hemelrijk1966, author = {Hemelrijk, J}, doi = {10.1111/j.1467-9574.1966.tb00488.x}, issn = {14679574}, journal = {Statistica Neerlandica}, number = {1}, pages = {1--7}, title = {{Underlining random variables}}, volume = {20}, year = {1966} } @article{chopra1989spanning, author = {Chopra, S}, journal = {Operations Research Letters}, number = {1}, pages = {25--29}, publisher = {Elsevier}, title = {{On the spanning tree polyhedron}}, volume = {8}, year = {1989} } @inproceedings{guillou2015collaborative, abstract = {Recommender Systems (RS) aim at suggesting to users one or several items in which they might have interest. Following the feedback they receive from the user, these systems have to adapt their model in order to improve future recommendations. The repetition of these steps defines the RS as a sequential process. This sequential aspect raises an exploration-exploitation dilemma, which is surprisingly rarely taken into account for RS without contextual information. In this paper we present an explore-exploit collaborative filtering RS, based on Matrix Factor-ization and Bandits algorithms. Using experiments on artificial and real datasets, we show the importance and practicability of using sequential approaches to perform recommendation. We also study the impact of the model update on both the quality and the computation time of the recommendation procedure.}, author = {Guillou, Fr{\'{e}}d{\'{e}}ric and Gaudel, Romaric and Preux, Philippe}, booktitle = {NIPS Workshop on Machine Learning for eCommerce}, keywords = {Collaborative Filtering,Matrix Factorization,Multi-Armed Bandtis,Recommender Systems,sequential Recommender Systems}, month = {dec}, title = {{Collaborative filtering as a multi-armed bandit}}, year = {2015} } @book{Cao07, address = {New York}, author = {Cao, Xi-Ren}, publisher = {Springer}, title = {{Stochastic Learning and Optimization: A Sensitivity-Based Approach}}, year = {2007} } @inproceedings{shapinglevelsets, author = {Bach, F}, booktitle = {Adv. NIPS}, title = {{Shaping Level Sets with Submodular Functions}}, year = {2011} } @article{bertsekas1995counterexample, author = {Bertsekas, Dimitri}, journal = {Neural Computation}, number = {2}, pages = {270--279}, title = {{A Counterexample for Temporal Differences Learning}}, volume = {7}, year = {1995} } @book{Powell07, author = {Powell, Warren B}, publisher = {John Wiley and Sons, New York}, title = {{Approximate Dynamic Programming: Solving the curses of dimensionality}}, year = {2007} } @inproceedings{charniak05ctf, address = {Morristown, NJ, USA}, author = {Charniak, Eugene and Johnson, Mark}, booktitle = {ACL '05: Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics}, doi = {http://dx.doi.org/10.3115/1219840.1219862}, pages = {173--180}, publisher = {Association for Computational Linguistics}, title = {{Coarse-to-fine n-best parsing and {\{}M{\}}ax{\{}E{\}}nt discriminative reranking}}, year = {2005} } @inproceedings{shental2002adjustment, address = {London, UK}, author = {Shental, Noam and Hertz, Tomer and Weinshall, Daphna and Pavel, Misha}, booktitle = {ECCV '02: Proceedings of the 7th European Conference on Computer Vision-Part IV}, isbn = {3-540-43748-7}, pages = {776--792}, publisher = {Springer-Verlag}, title = {{Adjustment Learning and Relevant Component Analysis}}, year = {2002} } @article{kulesza2011kdpp, abstract = {Determinantal point processes ( DPPs ) have recently been proposed$\backslash$nas models for set selection problems where diversity is pre- ferred.$\backslash$nFor example, they can be used to select diverse sets of sentences$\backslash$nto form doc- ument summaries, or to find multiple non- overlapping$\backslash$nhuman ...}, author = {Kulesza, a and Taskar, B}, isbn = {978-1-4503-0619-5}, issn = {{\textless}null{\textgreater}}, journal = {International Conference on Machine Learning}, keywords = {To Read Urgently}, pages = {1193--1200}, title = {{k-DPPs: Fixed-Size Determinantal Point Processes}}, url = {http://158.130.69.163/{~}taskar/pubs/kdpps{\_}icml11.pdf{\%}5Cnpapers2://publication/uuid/49BE74F9-9BC7-433A-8533-9508003732F8}, year = {2011} } @inproceedings{mutny2018efficient, author = {Mutn{\'{y}}, Mojm{\'{i}}r and Krause, Andreas}, booktitle = {Neural Information Processing Systems}, title = {{Efficient high-dimensional Bayesian optimization with additivity and quadrature Fourier features}}, url = {https://papers.nips.cc/paper/8115-efficient-high-dimensional-bayesian-optimization-with-additivity-and-quadrature-fourier-features.pdf}, year = {2018} } @inproceedings{tewari08optimistic, author = {Tewari, Ambuj and Bartlett, Peter L}, pages = {1505--1512}, title = {{Optimistic Linear Programming gives Logarithmic Regret for Irreducible {\{}MDPs{\}}}} } @book{fujishige2005submodular, author = {Fujishige, Satoru}, series = {Annals of discrete mathematics}, title = {{Submodular functions and optimization}}, year = {2005} } @article{wainwright2008graphical, author = {Wainwright, M J and Jordan, M I}, journal = {Foundations and Trends{\{}$\backslash$textregistered{\}} in Machine Learning}, number = {1-2}, pages = {1--305}, publisher = {Now Publishers Inc.}, title = {{Graphical models, exponential families, and variational inference}}, volume = {1}, year = {2008} } @inproceedings{Cevher2009, author = {Cevher, V and Indyk, P and Hegde, C and Baraniuk, R G}, booktitle = {Sampling Theory and Applications (SAMPTA)}, title = {{Recovery of clustered sparse signals from compressive measurements}}, year = {2009} } @phdthesis{valko2016bandits, abstract = {We investigate the structural properties of certain sequential decision-making problems with limited feedback (bandits) in order to bring the known algorithmic solutions closer to a practical use. In the first part, we put a special emphasis on structures that can be represented as graphs on actions, in the second part we study the large action spaces that can be of exponential size in the number of base actions or even infinite. We show how to take advantage of structures over the actions and (provably) learn faster.}, author = {Valko, Michal}, school = {{\'{E}}cole normale sup{\'{e}}rieure de Cachan}, title = {{Bandits on graphs and structures}}, type = {habilitation}, url = {https://hal.inria.fr/tel-01359757/document}, year = {2016} } @book{koller2009bayesian, annote = {comps{\_}models}, author = {Koller, Daphne and Friedman, Nir}, keywords = {bibtex-import}, publisher = {MIT Press}, title = {{Probabilistic Graphical Models: Principles and Techniques}}, year = {2009} } @inproceedings{joshi2005investigating, address = {New York, NY, USA}, annote = {comps{\_}anX}, author = {Joshi, Shrijit S and Phoha, Vir V}, booktitle = {ACM-SE 43: Proceedings of the 43rd annual Southeast regional conference}, doi = {http://doi.acm.org/10.1145/1167350.1167387}, isbn = {1-59593-059-0}, pages = {98--103}, publisher = {ACM}, title = {{Investigating hidden Markov models capabilities in anomaly detection}}, year = {2005} } @inproceedings{keshavan2009matrix, author = {Keshavan, Raghunandan and Oh, Sewoong and Montanari, Andrea}, booktitle = {International Symposium on Information Theory}, title = {{Matrix completion from a few entries}}, url = {https://arxiv.org/pdf/0901.3150.pdf}, year = {2009} } @article{alpern2013mining, abstract = {We show how to optimize the search for a hidden object, terrorist, or simply Hider, located at a point H according to a known or unknown distribution {\$}\nu{\$} on a rooted network Q. We modify the traditional “pathwise search” approach to a more general notion of “expanding search.” When the Hider is restricted to the nodes of Q, an expanding search S consists of an ordering {\$}\backslash{\$}documentclass{\{}aastex{\}} {\$}\backslash{\$}usepackage{\{}amsbsy{\}} {\$}\backslash{\$}usepackage{\{}amsfonts{\}} {\$}\backslash{\$}usepackage{\{}amssymb{\}} {\$}\backslash{\$}usepackage{\{}bm{\}} {\$}\backslash{\$}usepackage{\{}mathrsfs{\}} {\$}\backslash{\$}usepackage{\{}pifont{\}} {\$}\backslash{\$}usepackage{\{}stmaryrd{\}} {\$}\backslash{\$}usepackage{\{}textcomp{\}} {\$}\backslash{\$}usepackage{\{}portland,xspace{\}} {\$}\backslash{\$}usepackage{\{}amsmath,amsxtra{\}} {\$}\backslash{\$}pagestyle{\{}empty{\}} {\$}\backslash{\$}DeclareMathSizes{\{}10{\}}{\{}9{\}}{\{}7{\}}{\{}6{\}} {\$}\backslash{\$}begin{\{}document{\}} {\$}(a{\_}{\{}1{\}},a{\_}{\{}2{\}},\backslashbackslashldots){\$} {\$}\backslash{\$}end{\{}document{\}} of the arcs of a spanning subtree such that the root node is in a1 and every arc ai is adjacent to a previous arc aj, j {\textless}i. If ak contains H, the search time T is {\$}\backslash{\$}documentclass{\{}aastex{\}} {\$}\backslash{\$}usepackage{\{}amsbsy{\}} {\$}\backslash{\$}usepackage{\{}amsfonts{\}} {\$}\backslash{\$}usepackage{\{}amssymb{\}} {\$}\backslash{\$}usepackage{\{}bm{\}} {\$}\backslash{\$}usepackage{\{}mathrsfs{\}} {\$}\backslash{\$}usepackage{\{}...{\}}}, author = {Alpern, Steve and Lidbetter, Thomas}, doi = {10.1287/opre.1120.1134}, issn = {0030-364X}, journal = {Operations Research}, keywords = {games/group decisions,networks/graphs,search/surveillance,teams,tree algorithms}, month = {apr}, number = {2}, pages = {265--279}, publisher = {INFORMS}, title = {{Mining coal or finding terrorists: The expanding search paradigm}}, url = {http://pubsonline.informs.org/doi/abs/10.1287/opre.1120.1134}, volume = {61}, year = {2013} } @inproceedings{xia2016budgeted, author = {Xia, Yingce and Qin, Tao and Ma, Weidong and Yu, Nenghai and Liu, Tie-Yan}, booktitle = {International Joint Conference on Artificial Intelligence}, title = {{Budgeted multi-armed bandits with multiple plays}}, url = {https://www.ijcai.org/Proceedings/16/Papers/315.pdf}, year = {2016} } @inproceedings{negahban2008joint, author = {Negahban, S and Wainwright, M J}, booktitle = {Adv. NIPS}, title = {{Joint support recovery under high-dimensional scaling: Benefits and perils of $\backslash$ell{\_}1-$\backslash$ell{\_}$\backslash$infty-regularization}}, year = {2008} } @article{freund1997decision, author = {Freund, Y and Schapire, R E}, journal = {Journal of Computer and System Sciences}, pages = {119--139}, title = {{A decision-theoretic generalization of on-line learning and an application to boosting}}, volume = {55}, year = {1997} } @inproceedings{Venkataraman-2004-Mislabeled, author = {Venkataraman, S and Fradkin, D Metxas D and Kulikowski., C}, booktitle = {16th IEEE International Conference on Tools with Artificial Intelligence}, pages = {356--361}, title = {{Distinguishing Mislabeled Data from Correctly Labeled Data in Classifier Design.}}, year = {2004} } @techreport{liu2018gaussian, archivePrefix = {arXiv}, arxivId = {1807.01065}, author = {Liu, Haitao and Ong, Yew-Soon and Shen, Xiaobo and Cai, Jianfei}, eprint = {1807.01065}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Liu et al. - 2018 - When Gaussian process meets big data A Review of scalable GPs.pdf:pdf}, month = {jul}, title = {{When Gaussian process meets big data: A Review of scalable GPs}}, url = {https://arxiv.org/abs/1807.01065}, year = {2018} } @incollection{asadpour2008stochastic, author = {Asadpour, Arash and Nazerzadeh, Hamid and Saberi, Amin}, booktitle = {Internet and Network Economics}, pages = {477--489}, publisher = {Springer}, title = {{Stochastic submodular maximization}}, year = {2008} } @article{denisov1984random, author = {Денисов, И. В.}, journal = {Теория вероятностей и ее применения}, number = {4}, pages = {785--788}, title = {{Случайное блуждание и винеровский процесс, рассматриваемые из точки максимума}}, url = {http://www.mathnet.ru/links/a507288f44b12fc6bf77d226477a60a3/tvp2231.pdf}, volume = {28}, year = {1983} } @inproceedings{hoey1999spudd:, author = {Hoey, Jesse and St-Aubin, Robert and Hu, Alan and Boutilier, Craig}, booktitle = {Proceedings of the 15th Conference on Uncertainty in Artificial Intelligence}, pages = {279--288}, title = {{{\{}SPUDD{\}}: Stochastic Planning using Decision Diagrams}}, year = {1999} } @inproceedings{gautier2019two, author = {Gautier, Guillaume and Bardenet, R{\'{e}}mi and Valko, Michal}, booktitle = {Neural Information Processing Systems}, title = {{On two ways to use determinantal point processes for Monte-Carlo integration}}, year = {2019} } @inproceedings{augustin, author = {Lef{\`{e}}vre, A and Bach, F and F{\'{e}}votte, C}, booktitle = {Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP)}, title = {{Itakura-{\{}S{\}}aito Nonnegative Matrix Factorization With Group Sparsity}}, year = {2011} } @incollection{JN11a, author = {Juditsky, A and Nemirovski, A}, booktitle = {Optimization for Machine Learning}, editor = {Sra, S and Nowozin, S and Wright, S}, pages = {121--147}, publisher = {MIT press}, title = {{First-Order Methods for Nonsmooth Convex Large-Scale Optimization, I: General Purpose Methods}}, year = {2011} } @article{hart2011identification, abstract = {The purpose of this study was to provide a univariate and multivariate analysis of genomic microbial data and salivary mass-spectrometry proteomic profiles for dental caries outcomes. In order to determine potential useful biomarkers for dental caries, a multivariate classification analysis was employed to build predictive models capable of classifying microbial and salivary sample profiles with generalization performance. We used high-throughput methodologies including multiplexed microbial arrays and SELDI-TOF-MS profiling to characterize the oral flora and salivary proteome in 204 children aged 1-8 years (n = 118 caries-free, n = 86 caries-active). The population received little dental care and was deemed at high risk for childhood caries. Findings of the study indicate that models incorporating both microbial and proteomic data are superior to models of only microbial or salivary data alone. Comparison of results for the combined and independent data suggests that the combination of proteomic and microbial sources is beneficial for the classification accuracy and that combined data lead to improved predictive models for caries-active and caries-free patients. The best predictive model had a 6{\%} test error, {\textgreater}92{\%} sensitivity, and {\textgreater}95{\%} specificity. These findings suggest that further characterization of the oral microflora and the salivary proteome associated with health and caries may provide clinically useful biomarkers to better predict future caries experience.}, author = {Hart, Thomas C and Corby, Patricia M and Hauskrecht, Milos and {Hee Ryu}, Ok and Pelikan, Richard and Valko, Michal and Oliveira, Maria B and Hoehn, Gerald T and Bretz, Walter A}, doi = {10.1155/2011/196721}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Hart et al. - 2011 - Identification of microbial and proteomic biomarkers in early childhood caries.pdf:pdf}, institution = {Department of Periodontics, College of Dentistry, University of Illinois at Chicago, 801 S. Paulina Street, Chicago, IL 60612, USA.}, journal = {Int J Dent}, keywords = {misovalko}, mendeley-tags = {misovalko}, pages = {196721}, pmid = {22013442}, title = {{Identification of microbial and proteomic biomarkers in early childhood caries.}}, url = {http://dx.doi.org/10.1155/2011/196721}, volume = {2011}, year = {2011} } @techreport{vaswani2016adaptive, abstract = {Most previous work on influence maximization in social networks is limited to the non-adaptive setting in which the marketer is supposed to select all of the seed users, to give free samples or discounts to, up front. A disadvantage of this setting is that the marketer is forced to select all the seeds based solely on a diffusion model. If some of the selected seeds do not perform well, there is no opportunity to course-correct. A more practical setting is the adaptive setting in which the marketer initially selects a batch of users and observes how well seeding those users leads to a diffusion of product adoptions. Based on this market feedback, she formulates a policy for choosing the remaining seeds. In this paper, we study adaptive offline strategies for two problems: (a) MAXSPREAD -- given a budget on number of seeds and a time horizon, maximize the spread of influence and (b) MINTSS -- given a time horizon and an expected number of target users to be influenced, minimize the number of seeds that will be required. In particular, we present theoretical bounds and empirical results for an adaptive strategy and quantify its practical benefit over the non-adaptive strategy. We evaluate adaptive and non-adaptive policies on three real data sets. We conclude that while benefit of going adaptive for the MAXSPREAD problem is modest, adaptive policies lead to significant savings for the MINTSS problem.}, archivePrefix = {arXiv}, arxivId = {1604.08171}, author = {Vaswani, Sharan and Lakshmanan, Laks V. S.}, eprint = {1604.08171}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Vaswani, Lakshmanan - 2016 - Adaptive influence maximization in social networks Why commit when you can adapt.pdf:pdf}, title = {{Adaptive influence maximization in social networks: Why commit when you can adapt?}}, year = {2016} } @article{boucheron2012concentration, abstract = {This note describes non-asymptotic variance and tail bounds for order statistics of samples of independent identically distributed random variables. Those bounds are checked to be asymptotically tight when the sampling distribution belongs to a maximum domain of attraction. If the sampling distribution has non-decreasing hazard rate (this includes the Gaussian distribution), we derive an exponential Efron-Stein inequality for order statistics: an inequality connecting the logarithmic moment generating function of centered order statistics with exponential moments of Efron-Stein (jackknife) estimates of variance. We use this general connection to derive variance and tail bounds for order statistics of Gaussian sample. Those bounds are not within the scope of the Tsirelson-Ibragimov-Sudakov Gaussian concentration inequality. Proofs are elementary and combine R$\backslash$'enyi's representation of order statistics and the so-called entropy approach to concentration inequalities popularized by M. Ledoux.}, archivePrefix = {arXiv}, arxivId = {1207.7209}, author = {Boucheron, St{\'{e}}phane and Thomas, Maud}, doi = {10.1214/ECP.v17-2210}, eprint = {1207.7209}, issn = {1083589X}, journal = {Electronic Communications in Probability}, keywords = {Concentration inequalities,Efron-Stein inequalities,Entropy method,Order statistics,Renyi's representation}, title = {{Concentration inequalities for order statistics}}, volume = {17}, year = {2012} } @inproceedings{sun2012size, author = {Sun, Yi and Schmidhuber, J{\"{u}}rgen and Gomez, Faustino J.}, booktitle = {International Conference on Machine Learning}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Sun, Schmidhuber, Gomez - 2012 - On the size of the online kernel sparsification dictionary.pdf:pdf}, title = {{On the size of the online kernel sparsification dictionary}}, year = {2012} } @inproceedings{EvDaKaMa04, author = {Even-Dar, E and Kakade, S M and Mansour, Y}, pages = {401--408}, title = {{Experts in a {\{}M{\}}arkov Decision Process}} } @article{metropolis1949monte, abstract = {Abstract We shall present here the motivation and a general description of a method dealing with a class of problems in mathematical physics. The method is, essentially, a statistical approach to the study of differential equations, or more generally, of integro-differential equations that occur in various branches of the natural sciences. Abstract We shall present here the motivation and a general description of a method dealing with a class of problems in mathematical physics. The method is, essentially, a statistical approach to the study of differential equations, or more generally, of integro-differential equations that occur in various branches of the natural sciences.}, author = {Metropolis, Nicholas and Ulam, S.}, journal = {Journal of the American Statistical Association}, number = {247}, pages = {335--341}, title = {{The Monte Carlo method}}, volume = {44}, year = {1949} } @inproceedings{Sanchez-2000-Mislabeled, author = {Sanchez, J S and Barandela, R and Marques, A I and Alejo, R and J., Badenas.}, booktitle = {Advances in Pattern Recognition Lecture Notes in Computer Science 1876}, pages = {621--630}, title = {{Decontamination of Training Data for Supevised Pattern Recognition.}}, year = {2000} } @incollection{neal2003density, abstract = {I introduce a family of prior distributions over multivariate distributions, based on the use of a " Dirichlet diffusion tree " to generate exchangeable data sets. These priors can be viewed as generalizations of Dirichlet processes and of Dirichlet process mixtures, but unlike simple mixtures, they can capture the hierarchical structure present in many distributions, by means of the latent diffusion tree underlying the data. This latent tree also provides a hierarchical clustering of the data, which, unlike ad hoc clustering methods, comes with probabilistic indications of uncertainty. The relevance of each variable to the clustering can also be determined. Although Dirichlet diffusion trees are defined in terms of a continuous-time process, posterior inference involves only finite-dimensional quantities, allowing computation to be performed by reasonably efficient Markov chain Monte Carlo methods. The methods are demonstrated on problems of modeling a two-dimensional density and of clustering gene expression data. 1. INTRODUCTION Unknown distributions are encountered when estimating the density of observed data and when modeling the distribution of random effects or other latent variables. Exploratory data analysis can also be viewed in terms of finding features of the data, such as clusters, that are useful in modeling its distribution. A Bayesian model involving an unknown distribution requires a prior distribution over distributions. For such a model to be useful in practice, the prior must be an adequate approximation to our actual prior beliefs about the unknown distribution, and it must be possible to compute the predictive distribution for new data with reasonable efficiency. The Dirichlet process (Ferguson 1973) is a simple and computationally tractable prior for an unknown distribution. However, it produces distributions that are discrete with probability one, making it unsuitable for density modeling. This can be avoided by convolving the distribution with some continuous kernel, or more generally, by using a Dirichlet process to define a mixture distribution with infinitely many components, of some simple parametric form (Antoniak 1973; Ferguson 1983). Such Dirichlet process mixture models are not always ideal, however, because they use a prior distribution in which the parameters of one mixture component are independent of the parameters of other components. For many problems, we would expect instead that the components will be hierarchically organized, in ways analogous to the hierarchical grouping of organisms belonging to various species. Even if no obvious hierarchy is present, modeling a complex distribution by a mixture of simple distributions will require that each mode of the distribution be modeled using many of these simple mixture components, which will have similar parameters, and therefore form clusters themselves. Since Dirichlet process mixture models don't capture this hierarchical structure, inference using these models will be inefficient}, author = {Neal, Radford M.}, booktitle = {Bayesian Statistics}, pages = {619--629}, title = {{Density modeling and clustering using Dirichlet diffusion trees}}, volume = {7}, year = {2003} } @article{dani2008price, author = {Dani, Varsha and Hayes, Thomas and Kakade, Sham}, editor = {Platt, J C and Koller, D and Singer, Y and Roweis, S}, issn = {00368075}, journal = {Advances in Neural Information Processing Systems 20}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {1--8}, publisher = {MIT Press}, title = {{The Price of Bandit Information for Online Optimization}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.71.4607{\&}rep=rep1{\&}type=pdf}, volume = {20}, year = {2008} } @article{boutilier1999decision-theoretic, author = {Boutilier, Craig and Dean, Thomas and Hanks, Steve}, journal = {Journal of Artificial Intelligence Research}, pages = {1--94}, title = {{Decision-Theoretic Planning: Structural Assumptions and Computational Leverage}}, volume = {11}, year = {1999} } @incollection{GP91, author = {Gupta, S and Panchapakesan, S}, booktitle = {Handbook of Sequential Analysis}, editor = {Ghosh, B and Sen, P}, pages = {363--380}, title = {{On sequential ranking and selection procedures}}, year = {1991} } @inproceedings{Steinberger2009, author = {Steinberger, R and Pouliquen, B and {Van der Goot}, E}, booktitle = {Information Access in a Multilingual World-Proceedings of the SIGIR 2009 Workshop (SIGIR-CLIR'2009)}, pages = {1--8}, title = {{An Introduction to the {\{}Europe Media Monitor{\}} Family of Applications}}, year = {2009} } @inproceedings{GGLB11, author = {Gabillon, V and Ghavamzadeh, M and Lazaric, A and Bubeck, S}, booktitle = {Neural Information Processing Systems (NIPS)}, title = {{Multi-Bandit Best Arm Identification}}, year = {2011} } @article{andrieu2003introduction, abstract = {This purpose of this introductory paper is threefold. First, it introduces the Monte Carlo method with emphasis on probabilistic machine learning. Second, it reviews the main building blocks of modern Markov chain Monte Carlo simulation, thereby providing and introduction to the remaining papers of this special issue. Lastly, it discusses new interesting research horizons.}, author = {Andrieu, Christophe and {De Freitas}, Nando and Doucet, Arnaud and Jordan, Michael I.}, journal = {Machine Learning}, keywords = {MCMC,Markov chain Monte Carlo,Sampling,Stochastic algorithms}, number = {1-2}, pages = {5--43}, title = {{An introduction to MCMC for machine learning}}, volume = {50}, year = {2003} } @book{Nesterov2004, author = {Nesterov, Yurii}, publisher = {Kluwer Academic Publishers}, title = {{Introductory lectures on convex optimization: A basic course}}, year = {2004} } @inproceedings{valko2013stochastic, abstract = {We study the problem of global maximization of a function f given a finite number of evaluations perturbed by noise. We consider a very weak assumption on the function, namely that it is locally smooth (in some precise sense) with respect to some semi-metric, around one of its global maxima. Compared to previous works on bandits in general spaces (Kleinberg et al., 2008; Bubeck et al., 2011a) our algorithm does not require the knowledge of this semi-metric. Our algorithm, StoSOO, follows an optimistic strategy to iteratively construct upper confidence bounds over the hierarchical partitions of the function domain to decide which point to sample next. A finite-time analysis of StoSOO shows that it performs almost as well as the best specifically-tuned algorithms even though the local smoothness of the function is not known.}, author = {Valko, Michal and Carpentier, Alexandra and Munos, R{\'{e}}mi}, booktitle = {International Conference on Machine Learning}, title = {{Stochastic simultaneous optimistic optimization}}, url = {http://proceedings.mlr.press/v28/valko13.pdf}, year = {2013} } @article{leskovec2007graph, author = {Leskovec, Jure and Kleinberg, Jon and Faloutsos, Christos}, journal = {Knowledge Discovery from Data}, month = {mar}, title = {{Graph evolution: Densification and shrinking diameters}}, year = {2007} } @article{Seznec2018, abstract = {In bandits, arms' distributions are stationary. This is often violated in practice, where rewards change over time. In applications as recommendation systems, online advertising, and crowdsourcing, the changes may be triggered by the pulls, so that the arms' rewards change as a function of the number of pulls. In this paper, we consider the specific case of non-parametric rotting bandits, where the expected reward of an arm may decrease every time it is pulled. We introduce the filtering on expanding window average (FEWA) algorithm that at each round constructs moving averages of increasing windows to identify arms that are more likely to return high rewards when pulled once more. We prove that, without any knowledge on the decreasing behavior of the arms, FEWA achieves similar anytime problem-dependent, {\$}\backslashwidetilde{\{}\backslashmathcal{\{}O{\}}{\}}(\backslashlog{\{}(KT){\}}),{\$} and problem-independent, {\$}\backslashwidetilde{\{}\backslashmathcal{\{}O{\}}{\}}(\backslashsqrt{\{}KT{\}}){\$}, regret bounds of near-optimal stochastic algorithms as UCB1 of Auer et al. (2002a). This result substantially improves the prior result of Levine et al. (2017) which needed knowledge of the horizon and decaying parameters to achieve problem-independent bound of only {\$}\backslashwidetilde{\{}\backslashmathcal{\{}O{\}}{\}}(K{\^{}}{\{}1/3{\}}T{\^{}}{\{}2/3{\}}){\$}. Finally, we report simulations confirming the theoretical improvements of FEWA.}, archivePrefix = {arXiv}, arxivId = {1811.11043}, author = {Seznec, Julien and Locatelli, Andrea and Carpentier, Alexandra and Lazaric, Alessandro and Valko, Michal}, eprint = {1811.11043}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Seznec et al. - 2018 - Rotting bandits are no harder than stochastic ones.pdf:pdf}, number = {2016}, title = {{Rotting bandits are no harder than stochastic ones}}, url = {http://arxiv.org/abs/1811.11043}, year = {2018} } @article{jenatton2009structured, author = {Jenatton, R and Audibert, J-Y. and Bach, F}, journal = {Journal of Machine Learning Research}, pages = {2777--2824}, title = {{Structured Variable Selection with Sparsity-Inducing Norms}}, volume = {12}, year = {2011} } @incollection{joshi97treeadjoining, address = {Berlin, New York}, author = {Joshi, Aravind K and Schabes, Yves}, booktitle = {Handbook of Formal Languages}, pages = {69--124}, publisher = {Springer}, title = {{Tree-Adjoining Grammars}}, url = {citeseer.ist.psu.edu/joshi97treeadjoining.html}, volume = {3}, year = {1997} } @inproceedings{badanidiyuru2013bandits, abstract = {Multi-armed bandit problems are the predominant theoretical model of exploration-exploitation tradeoffs in learning, and they have countless applications ranging from medical trials, to communication networks, to Web search and advertising. In many of these application domains the learner may be constrained by one or more supply (or budget) limits, in addition to the customary limitation on the time horizon. The literature lacks a general model encompassing these sorts of problems. We introduce such a model, called "bandits with knapsacks", that combines aspects of stochastic integer programming with online learning. A distinctive feature of our problem, in comparison to the existing regret-minimization literature, is that the optimal policy for a given latent distribution may significantly outperform the policy that plays the optimal fixed arm. Consequently, achieving sub linear regret in the bandits-with-knapsacks problem is significantly more challenging than in conventional bandit problems. We present two algorithms whose reward is close to the information-theoretic optimum: one is based on a novel "balanced exploration" paradigm, while the other is a primal-dual algorithm that uses multiplicative updates. Further, we prove that the regret achieved by both algorithms is optimal up to polylogarithmic factors. We illustrate the generality of the problem by presenting applications in a number of different domains including electronic commerce, routing, and scheduling. As one example of a concrete application, we consider the problem of dynamic posted pricing with limited supply and obtain the first algorithm whose regret, with respect to the optimal dynamic policy, is sub linear in the supply.}, archivePrefix = {arXiv}, arxivId = {1305.2545}, author = {Badanidiyuru, Ashwinkumar and Kleinberg, Robert and Slivkins, Aleksandrs}, booktitle = {Proceedings - Annual IEEE Symposium on Foundations of Computer Science, FOCS}, doi = {10.1109/FOCS.2013.30}, eprint = {1305.2545}, isbn = {9780769551357}, issn = {02725428}, keywords = {Dynamic ad allocation,Dynamic pricing,Dynamic procurement,Exploratio}, pages = {207--216}, title = {{Bandits with knapsacks}}, year = {2013} } @article{geman1984stochastic, author = {Geman, Stuart and Geman, Donald}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, number = {6}, pages = {721--741}, title = {{Stochastic Relaxation, {\{}Gibbs{\}} Distribution, and the {\{}Bayesian{\}} Restoration of Images}}, volume = {6}, year = {1984} } @inproceedings{valko2011conditionala, abstract = {In this paper, we consider the problem of conditional anomaly detection that aims to identify data instances with an unusual response or a class label. We develop a new non-parametric approach for conditional anomaly detection based on the soft harmonic solution, with which we estimate the confidence of the label to detect anomalous mislabeling. We further regularize the solution to avoid the detection of isolated examples and examples on the boundary of the distribution support. We demonstrate the efficacy of the proposed method on several synthetic and UCI ML datasets in detecting unusual labels when compared to several baseline approaches. We also evaluate the performance of our method on a real-world electronic health record dataset where we seek to identify unusual patient-management decisions.}, author = {Valko, Michal and Kveton, Branislav and Valizadegan, Hamed and Cooper, Gregory F and Hauskrecht, Milos}, booktitle = {Proceedings of the 2011 IEEE International Conference on Data Mining}, keywords = {misovalko}, mendeley-tags = {misovalko}, month = {jun}, title = {{Conditional anomaly detection with soft harmonic functions}}, year = {2011} } @inproceedings{atkeson, author = {Atkeson, C G and Schaal, S}, pages = {12--20}, title = {{Robot learning from demonstration}} } @inproceedings{colt2010, author = {{Daniel Golovin} and {Andreas Krause}}, booktitle = {23rd Annual Conference on Learning Theory}, pages = {333--345}, title = {{Adaptive Submodularity: A New Approach to Active Learning and Stochastic Optimization}}, year = {2010} } @techreport{C2020, abstract = {We study sampling algorithms for $\beta$-ensembles with time complexity less than cubic in the cardinality of the ensemble. Following Dumitriu and Edelman (2002), we see the ensemble as the eigenvalues of a random tridiagonal matrix, namely a random Jacobi matrix. First, we provide a unifying and elementary treatment of the tridiagonal models associated to the three classical Hermite, Laguerre and Jacobi ensembles. For this purpose, we use simple changes of variables between successive reparametrizations of the coefficients defining the tridiagonal matrix. Second, we derive an approximate sampler for the simulation of $\beta$-ensembles, and illustrate how fast it can be for polynomial potentials. This method combines a Gibbs sampler on Jacobi matrices and the diagonalization of these matrices. In practice, even for large ensembles, only a few Gibbs passes suffice for the marginal distribution of the eigenvalues to fit the expected theoretical distribution. When the conditionals in the Gibbs sampler can be simulated exactly, the same fast empirical convergence is observed for the fluctuations of the largest eigenvalue. Our experimental results support a conjecture by Krishnapur et al. (2016), that the Gibbs chain on Jacobi matrices of size N mixes in O(log(N)).}, archivePrefix = {arXiv}, arxivId = {2003.02344}, author = {Gautier, Guillaume and Bardenet, R{\'{e}}mi and Valko, Michal}, eprint = {2003.02344}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/C, Bardenet, Valko - 2020 - Fast sampling from {\$}beta{\$}-ensembles.pdf:pdf}, keywords = {gautier2020fast}, mendeley-tags = {gautier2020fast}, month = {mar}, title = {{Fast sampling from {\$}\backslashbeta{\$}-ensembles}}, url = {http://arxiv.org/abs/2003.02344}, year = {2020} } @inproceedings{LV06, author = {Lovasz, L and Vempala, S}, booktitle = {Proceedings of the 47th Annual IEEE Symposium on Foundations of Computer Science (FOCS)}, pages = {57--68}, title = {{Fast algorithms for logconcave functions: sampling, rounding, integration and optimization}}, year = {2006} } @inproceedings{carpentier2014extreme, abstract = {In many areas of medicine, security, and life sciences, we want to allocate limited resources to different sources in order to detect extreme values. In this paper, we study an efficient way to allocate these resources sequentially under limited feedback. While sequential design of experiments is well studied in bandit theory, the most commonly optimized property is the regret with respect to the maximum mean reward. However, in other problems such as network intrusion detection, we are interested in detecting the most extreme value output by the sources. Therefore, in our work we study extreme regret which measures the efficiency of an algorithm compared to the oracle policy selecting the source with the heaviest tail. We propose the ExtremeHunter algorithm, provide its analysis, and evaluate it empirically on synthetic and real-world experiments.}, author = {Carpentier, Alexandra and Valko, Michal}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Carpentier, Valko - 2014 - Extreme bandits(2).pdf:pdf}, title = {{Extreme bandits}}, year = {2014} } @article{ratliff2006maximum, abstract = {Imitation learning of sequential, goal-directed behavior by standard supervised techniques is often difficult. We frame learning such behaviors as a maximum margin structured prediction problem over a space of policies. In this approach, we learn mappings from features to cost so an optimal policy in an MDP with these cost mimics the expert's behavior. Further, we demonstrate a simple, provably efficient approach to structured maximum margin learning, based on the subgradient method, that leverages existing fast algorithms for inference. Although the technique is general, it is particularly relevant in problems where A and dynamic programming approaches make learning policies tractable in problems beyond the limitations of a QP formulation. We demonstrate our approach applied to route planning for outdoor mobile robots, where the behavior a designer wishes a planner to execute is often clear, while specifying cost functions that engender this behavior is a much more difficult task.}, author = {Ratliff, Nathan D and Bagnell, J Andrew and Zinkevich, Martin A}, doi = {10.1145/1143844.1143936}, isbn = {1595933832}, issn = {17458358}, journal = {Proceedings of the 23rd ICML}, number = {10}, pmid = {17914344}, publisher = {ACM Press}, title = {{Maximum margin planning}}, url = {http://webdocs.cs.ualberta.ca/{~}maz/publications/maximummarginplanning.pdf}, volume = {3}, year = {2006} } @book{AGZ10, author = {Anderson, G and Guionnet, A and Zeitouni, O}, publisher = {Cambridge University Press}, title = {{An Introduction to Random Matrices}}, year = {2010} } @techreport{heckerman1995tutorial, address = {Redmond, Washington}, annote = {Revised June 96}, author = {Heckerman, D}, institution = {Microsoft Research}, title = {{A Tutorial on Learning with Bayesian Networks}}, year = {1995} } @article{spielman_nearly_2014, author = {Spielman, Daniel A and Teng, Shang-Hua}, journal = {SIAM Journal on Matrix Analysis and Applications}, number = {3}, pages = {835--885}, title = {{Nearly linear time algorithms for preconditioning and solving symmetric, diagonally dominant linear systems}}, url = {http://epubs.siam.org/doi/abs/10.1137/090771430}, volume = {35}, year = {2014} } @inproceedings{UNK10, author = {Uchiya, T and Nakamura, A and Kudo, M}, booktitle = {Proceedings of the 21st International Conference on Algorithmic Learning Theory (ALT)}, title = {{Algorithms for Adversarial Bandit Problems with Multiple Plays}}, year = {2010} } @inproceedings{jamieson2014lilUCB, author = {Jamieson, Kevin and Malloy, Matthew and Nowak, Robert and Bubeck, S{\'{e}}bastien}, booktitle = {Conference on Learning Theory}, title = {{lil'UCB: An Optimal Exploration Algorithm for Multi-Armed Bandits}}, year = {2014} } @article{spielman2007spectral, abstract = {Spectral partitioning methods use the Fiedler vector-the eigenvector of the second-smallest eigenvalue of the Laplacian matrix-to find a small separator of a graph. These methods are important components of many scientific numerical algorithms and have been demonstrated by experiment to work extremely well. In this paper, we show that spectral partitioning methods work well on bounded-degree planar graphs and finite element meshes-the classes of graphs to which they are usually applied. While naive spectral bisection does not necessarily work, we prove that spectral partitioning techniques can be used to produce separators whose ratio of vertices removed to edges cut is O (sqrt(n)) for bounded-degree planar graphs and two-dimensional meshes and O(n1/d) for well-shaped d-dimensional meshes. The heart of our analysis is an upper bound on the second-smallest eigenvalues of the Laplacian matrices of these graphs: we prove a bound of O(1/n) for bounded-degree planar graphs and O(1/n2/d) for well-shaped d-dimensional meshes. ?? 2006 Elsevier Inc. All rights reserved.}, author = {Spielman, Daniel A. and Teng, Shang H.}, journal = {Linear Algebra and Its Applications}, keywords = {Eigenvalue problems,Graph embedding,Graph partitioning,Spectral analysis,Spectral methods}, pages = {284--305}, title = {{Spectral partitioning works: Planar graphs and finite element meshes}}, volume = {421}, year = {2007} } @article{farias2003linear, author = {de Farias, Daniela Pucci and {Van Roy}, Benjamin}, journal = {Operations Research}, number = {6}, pages = {850--856}, title = {{The Linear Programming Approach to Approximate Dynamic Programming}}, volume = {51}, year = {2003} } @misc{nvidia-cusparse, title = {{NVIDIA CUSPARSE and CUBLAS Libraries}}, url = {http://www.nvidia.com/object/cuda{\_}develop.html}, year = {2012} } @article{Jenatton2010b, author = {Jenatton, R and Mairal, J and Obozinski, G and Bach, F}, journal = {Journal Machine Learning Research}, pages = {2297--2334}, title = {{Proximal Methods for Hierarchical Sparse Coding}}, volume = {12}, year = {2011} } @inproceedings{zhang1996high-performance, author = {Zhang, Wei and Dietterich, Thomas}, booktitle = {Advances in Neural Information Processing Systems 8}, pages = {1024--1030}, title = {{High-Performance Job-Shop Scheduling with a Time-Delay {\{}TD{\}}($\lambda$) Network}}, year = {1996} } @inproceedings{EWK14, author = {van Erven, Tim and Warmuth, Manfred and Owski, Wojciech Kot$\backslash$l}, pages = {949--974}, title = {{Follow the Leader with Dropout Perturbations}} } @inproceedings{yan2009fast, author = {Yan, Donghui and Huang, Ling and Jordan, Michael}, booktitle = {Proceedings of the 15th ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, title = {{Fast Approximate Spectral Clustering}}, year = {2009} } @inproceedings{ng2004inverted, author = {Ng, Andrew and Coates, Adam and Diel, Mark and Ganapathi, Varun and Schulte, Jamie and Tse, Ben and Berger, Eric and Liang, Eric}, booktitle = {International Symposium on Experimental Robotics}, title = {{Inverted Autonomous Helicopter Flight via Reinforcement Learning}}, year = {2004} } @article{goldberg2009multi-manifold, author = {Goldberg, Andrew B and Zhu, Xiaojin and Singh, Aarti and Xu, Zhiting and Nowak, Robert}, journal = {Journal of Machine Learning Research}, pages = {169--176}, title = {{Multi-Manifold Semi-Supervised Learning}}, volume = {5}, year = {2009} } @inproceedings{gabillon2020derivative-free, abstract = {In this paper, we formalise order-robust optimisation as an instance of online learning minimising simple regret, and propose Vroom, a zero'th order optimisation algorithm capable of achieving vanishing regret in non-stationary environments, while recovering favorable rates under stochastic reward-generating processes. Our results are the first to target simple regret definitions in adversarial scenarios unveiling a challenge that has been rarely considered in prior work.}, author = {Gabillon, Victor and Tutunov, Rasul and Ammar, Haitham Bou and Valko, Michal}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Derivative-free {\&} order-robust optimisation}}, year = {2020} } @article{spectralbandits, author = {Authors}, journal = {Supplementary Material}, title = {{Spectral Bandits for Smooth Graph Functions}} } @inproceedings{meek2003uai, booktitle = {UAI}, editor = {Meek, Christopher and Kj{\ae}rulff, Uffe}, isbn = {0-127-05664-5}, publisher = {Morgan Kaufmann}, title = {{UAI '03, Proceedings of the 19th Conference in Uncertainty in Artificial Intelligence, August 7-10 2003, Acapulco, Mexico}}, year = {2003} } @inproceedings{goldberger2004neighbourhood, author = {Goldberger, Jacob and Roweis, Sam T and Hinton, Geoffrey E and Salakhutdinov, Ruslan}, booktitle = {NIPS}, title = {{Neighbourhood Components Analysis.}}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.108.7841}, year = {2004} } @techreport{Warmuth97continuousand, author = {Warmuth, M K and Jagota, A K}, institution = {In Fifth International Symposium on Artificial Intelligence and Mathematics}, title = {{Continuous and discrete-time nonlinear gradient descent: Relative loss bounds and convergence}}, year = {1997} } @inproceedings{kaufmann2020adaptive, abstract = {Reward-free exploration is a reinforcement learning setting recently studied by Jin et al., who address it by running several algorithms with regret guarantees in parallel. In our work, we instead propose a more adaptive approach for reward-free exploration which directly reduces upper bounds on the maximum MDP estimation error. We show that, interestingly, our reward-free UCRL algorithm can be seen as a variant of an algorithm of Fiechter from 1994, originally proposed for a different objective that we call best-policy identification. We prove that RF-UCRL needs O((SAH4/$\epsilon$2)ln(1/$\delta$)) episodes to output, with probability 1−$\delta$, an $\epsilon$-approximation of the optimal policy for any reward function. We empirically compare it to oracle strategies using a generative model.}, author = {Kaufmann, Emilie and M{\'{e}}nard, Pierre and Domingues, Omar Darwiche and Jonsson, Anders and Leurent, Edouard and Valko, Michal}, booktitle = {Algorithmic Learning Theory}, title = {{Adaptive reward-free exploration}}, url = {https://arxiv.org/pdf/2006.06294.pdf}, year = {2021} } @article{hohzaki2016search, author = {Hohzaki, Ryusuke}, journal = {Journal of the Operations Research Society of Japan}, number = {1}, pages = {1--34}, title = {{Search games: Literature and survey}}, url = {http://www.orsj.or.jp/{~}archive/pdf/e{\_}mag/Vol.59{\_}01{\_}001.pdf}, volume = {59}, year = {2016} } @article{schweitzer1985generalized, author = {Schweitzer, Paul and Seidmann, Abraham}, journal = {Journal of Mathematical Analysis and Applications}, pages = {568--582}, title = {{Generalized Polynomial Approximations in {\{}Markovian{\}} Decision Processes}}, volume = {110}, year = {1985} } @techreport{meuleau2001exploration, author = {Meuleau, Nicolas and Peshkin, Leonid and Kim, Kee-Eung}, institution = {Massachusetts Institute of Technology}, number = {1713 (AI Memo 2001-003)}, title = {{Exploration in Gradient-Based Reinforcement Learning}}, year = {2001} } @article{fujishige2006minimum, author = {Fujishige, Satoru and Isotani, S}, journal = {Pacific Journal of Optimization}, pages = {3--17}, title = {{A Submodular Function Minimization Algorithm Based on the Minimum-Norm Base}}, volume = {7}, year = {2011} } @inproceedings{yang2007bayesian, annote = {comps{\_}distancX}, author = {Yang, Liu and Jin, Rong and Sukthankar, Rahul}, booktitle = {Proceedings of Uncertainty in AI}, title = {{Bayesian Active Distance Metric Learning.}}, url = {http://www.cs.cmu.edu/{~}rahuls/pub/uai2007-rahuls.pdf}, year = {2007} } @inproceedings{blelloch2010hierarchical, author = {Blelloch, Guy E and Koutis, Ioannis and Miller, Gary L and Tangwongsan, Kanat}, booktitle = {High Performance Computing, Networking, Storage and Analysis (SC), 2010 International Conference for}, organization = {IEEE}, pages = {1--12}, title = {{Hierarchical diagonal blocking and precision reduction applied to combinatorial multigrid}}, year = {2010} } @inproceedings{bao2016online, abstract = {Social networks have been popular platforms for information propagation. An important use case is viral marketing: given a promotion budget, an advertiser can choose some influential users as the seed set and provide them free or discounted sample products; in this way, the advertiser hopes to increase the popularity of the product in the users' friend circles by the world-of-mouth effect, and thus maximizes the number of users that information of the production can reach. There has been a body of literature studying the influence maximization problem. Nevertheless, the existing studies mostly investigate the problem on a one-off basis, assuming fixed known influence probabilities among users, or the knowledge of the exact social network topology. In practice, the social network topology and the influence probabilities are typically unknown to the advertiser, which can be varying over time, i.e., in cases of newly established, strengthened or weakened social ties. In this paper, we focus on a dynamic non-stationary social network and design a randomized algorithm, RSB, based on multi-armed bandit optimization, to maximize influence propagation over time. The algorithm produces a sequence of online decisions and calibrates its explore-exploit strategy utilizing outcomes of previous decisions. It is rigorously proven to achieve an upper-bounded regret in reward and applicable to large-scale social networks. Practical effectiveness of the algorithm is evaluated using both synthetic and real-world datasets, which demonstrates that our algorithm outperforms previous stationary methods under non-stationary conditions.}, author = {Bao, Yixin and Wang, Xiaoke and Wang, Zhi and Wu, Chuan and Lau, Francis C. M.}, booktitle = {International Symposium on Quality of Service}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Bao et al. - 2016 - Online influence maximization in non-stationary social networks.pdf:pdf}, month = {apr}, title = {{Online influence maximization in non-stationary social networks}}, year = {2016} } @inproceedings{GW98, author = {Gentile, C and Warmuth, M}, booktitle = {Advances in Neural Information Processing Systems (NIPS)}, pages = {225--231}, title = {{Linear hinge loss and average margin}}, year = {1998} } @article{Lin2015, abstract = {In decision-theoretic troubleshooting, we are given a Bayesian network model of a malfunctioning device and our task is to find a repair strategy with minimal expected cost. The troubleshooting problem has received considerable attention over the past two decades. We show that several troubleshooting scenarios proposed in the literature are equivalent to well-studied machine scheduling problems. This immediately yields new complexity-theoretic and algorithmic results for troubleshooting. We also apply scheduling results to multi-agent troubleshooting. Further, we examine the so-called call service action which is often used in troubleshooting but has no natural counterpart in machine scheduling. We show that adding the call service action to basic troubleshooting models does not make the problem intractable.}, author = {L{\'{i}}n, V{\'{a}}clav}, doi = {10.1016/j.ijar.2014.08.004}, issn = {0888613X}, journal = {International Journal of Approximate Reasoning}, keywords = {Algorithms,Computational complexity,Decision-theoretic troubleshooting,Single machine scheduling with weighted flowtime}, number = {PA}, pages = {87--107}, title = {{Scheduling results applicable to decision-theoretic troubleshooting}}, volume = {56}, year = {2015} } @article{bellman1963polynomial, author = {Bellman, Richard and Kalaba, Robert and Kotkin, Bella}, journal = {Mathematics of Computation}, number = {82}, pages = {155--161}, title = {{Polynomial Approximation -- A New Computational Technique in Dynamic Programming: Allocation Processes}}, volume = {17}, year = {1963} } @phdthesis{vanroy1998planning, author = {{Van Roy}, Benjamin}, school = {Massachusetts Institute of Technology}, title = {{Planning Under Uncertainty in Complex Structured Environments}}, year = {1998} } @article{harisson1978, author = {Harrison, D and Rubinfeld, D L}, journal = {J. Environ. Economics {\&} Management}, pages = {81--102}, title = {{Hedonic prices and the demand for clean air}}, volume = {5}, year = {1978} } @inproceedings{bifet2007learning, abstract = {We present a new approach for dealing with distribution change and concept drift when learning from data sequences that may vary with time. We use sliding windows whose size, instead of being fixed a priori, is recomputed online according to the rate of change observed from the data in the window itself. This delivers the user or programmer from having to guess a time-scale for change. Contrary to many related works, we provide rigorous guarantees of performance, as bounds on the rates of false positives and false negatives. Using ideas from data stream algorithmics, we develop a time- and memory-efficient version of this algorithm, called ADWIN2. We show how to combine ADWIN2 with the Naive Bayes (NB) predictor, in two ways: one, using it to monitor the error rate of the current model and declare when revision is necessary and, two, putting it inside the NB predictor to maintain up-to-date estimations of conditional probabilities in the data. We test our approach using synthetic and real data streams.}, author = {Bifet, Albert and Gavald{\`{a}}, Ricard}, booktitle = {International Conference on Data Mining}, title = {{Learning from time-changing data with adaptive windowing}}, url = {https://pdfs.semanticscholar.org/fea2/14dd4c4050d96e00fd4bf45b564274efef04.pdf}, year = {2007} } @inproceedings{calandriello2018improved, abstract = {The representation and learning benefits of methods based on graph Laplacians, such as Laplacian smoothing or harmonic function solution for semi-supervised learning (SSL), are empirically and theoretically well supported. Nonetheless, the exact versions of these methods scale poorly with the number of nodes n of the graph. In this paper, we combine a spectral sparsification routine with Laplacian learning. Given a graph G as input, our algorithm computes a sparsifier in a distributed way in O(nlog3(n)) time, O(mlog3(n)) work and O(nlog(n)) memory, using only log(n) rounds of communication. Furthermore, motivated by the regularization often employed in learning algorithms, we show that constructing sparsifiers that preserve the spectrum of the Laplacian only up to the regularization level may drastically reduce the size of the final graph. By constructing a spectrally-similar graph, we are able to bound the error induced by the sparsification for a variety of downstream tasks (e.g., SSL). We empirically validate the theoretical guarantees on Amazon co-purchase graph and compare to the state-of-the-art heuristics.}, author = {Calandriello, Daniele and Koutis, Ioannis and Lazaric, Alessandro and Valko, Michal}, booktitle = {International Conference on Machine Learning}, title = {{Improved large-scale graph learning through ridge spectral sparsification}}, year = {2018} } @article{rosset, author = {Rosset, S and Zhu, J}, journal = {Ann. Statist.}, number = {3}, pages = {1012--1030}, title = {{Piecewise linear regularized solution paths}}, volume = {35}, year = {2007} } @inproceedings{neu10o-mdp, author = {Neu, G and Gy{\"{o}}rgy, A and Szepesv{\'{a}}ri, $\backslash$relax Cs. and Antos, A}, pages = {1804--1812}, title = {{Online {\{}M{\}}arkov Decision Processes under Bandit Feedback}} } @article{ripeanu2002mapping, abstract = {We studied the topology and protocols of the public Gnutella network. Its substantial user base and open architecture make it a good large-scale, if uncontrolled, testbed. We captured the network's topology, generated traffic, and dynamic behavior to determine its connectivity structure and how well (if at all) Gnutella's overlay network topology maps to the physical Internet infrastructure. Our analysis of the network allowed us to evaluate costs and benefits of the peer-to-peer (P2P) approach and to investigate possible improvements that would allow better scaling and increased reliability in Gnutella and similar networks. A mismatch between Gnutella's overlay network topology and the Internet infrastructure has critical performance implications}, author = {Ripeanu, Matei and Iamnitchi, Adriana and Foster, Ian}, doi = {10.1109/4236.978369}, issn = {10897801}, journal = {IEEE Internet Computing}, number = {1}, pages = {50--57}, title = {{Mapping the Gnutella network}}, volume = {6}, year = {2002} } @article{dekel13switching, author = {Dekel, Ofer and Ding, Jian and Koren, Tomer and Peres, Yuval}, journal = {CoRR}, title = {{Bandits with Switching Costs: {\{}T{\^{}}{\{}2/3{\}}{\}} Regret}}, volume = {abs/1310.2}, year = {2013} } @techreport{cohen_online_2016, abstract = {Finding a small spectral approximation for a tall {\$}n \backslashtimes d{\$} matrix {\$}A{\$} is a fundamental numerical primitive. For a number of reasons, one often seeks an approximation whose rows are sampled from those of {\$}A{\$}. Row sampling improves interpretability, saves space when {\$}A{\$} is sparse, and preserves row structure, which is especially important, for example, when {\$}A{\$} represents a graph. However, correctly sampling rows from {\$}A{\$} can be costly when the matrix is large and cannot be stored and processed in memory. Hence, a number of recent publications focus on row sampling in the streaming setting, using little more space than what is required to store the outputted approximation [KL13, KLM+14]. Inspired by a growing body of work on online algorithms for machine learning and data analysis, we extend this work to a more restrictive online setting: we read rows of {\$}A{\$} one by one and immediately decide whether each row should be kept in the spectral approximation or discarded, without ever retracting these decisions. We present an extremely simple algorithm that approximates {\$}A{\$} up to multiplicative error {\$}\backslashepsilon{\$} and additive error {\$}\backslashdelta{\$} using {\$}O(d \backslashlog d \backslashlog(\backslashepsilon||A||{\_}2/\backslashdelta)/\backslashepsilon{\^{}}2){\$} online samples, with memory overhead proportional to the cost of storing the spectral approximation. We also present an algorithm that uses {\$}O(d{\^{}}2{\$}) memory but only requires {\$}O(d\backslashlog(\backslashepsilon||A||{\_}2/\backslashdelta)/\backslashepsilon{\^{}}2){\$} samples, which we show is optimal. Our methods are clean and intuitive, allow for lower memory usage than prior work, and expose new theoretical properties of leverage score based matrix approximation.}, annote = {arXiv: 1604.05448}, archivePrefix = {arXiv}, arxivId = {1604.05448}, author = {Cohen, Michael B and Musco, Cameron and Pachocki, Jakub}, eprint = {1604.05448}, title = {{Online row sampling}}, url = {http://arxiv.org/abs/1604.05448}, year = {2016} } @article{tibshirani2005sparsity, author = {Tibshirani, R and Saunders, M and Rosset, S and Zhu, J and Knight, K}, journal = {Journal of the Royal Statistical Society. Series B, Statistical Methodology}, pages = {91--108}, title = {{Sparsity and smoothness via the fused {\{}L{\}}asso}}, year = {2005} } @inproceedings{poupart2002piecewise, author = {Poupart, Pascal and Boutilier, Craig and Patrascu, Relu and Schuurmans, Dale and Guestrin, Carlos}, booktitle = {Proceedings of the 18th National Conference on Artificial Intelligence}, pages = {292--299}, title = {{Greedy Linear Value-Approximation for Factored {\{}Markov{\}} Decision Processes}}, year = {2002} } @inproceedings{ross2013normalized, abstract = {We introduce online learning algorithms which are independent of feature scales, proving regret bounds dependent on the ratio of scales existent in the data rather than the absolute scale. This has several useful effects: there is no need to pre-normalize data, the test-time and test-space complexity are reduced, and the algorithms are more robust.}, archivePrefix = {arXiv}, arxivId = {1305.6646}, author = {Ross, Stephane and Mineiro, Paul and Langford, John}, booktitle = {Uncertainty in Artificial Intelligence}, eprint = {1305.6646}, month = {may}, title = {{Normalized online learning}}, url = {http://arxiv.org/abs/1305.6646}, year = {2013} } @article{heckerman1995learning, author = {Heckerman, D and Geiger, D and Chickering, D M}, journal = {Machine Learning}, number = {3}, pages = {197--243}, publisher = {Springer}, title = {{Learning {\{}B{\}}ayesian networks: The combination of knowledge and statistical data}}, volume = {20}, year = {1995} } @inproceedings{hoi2006learning, address = {Washington, DC, USA}, annote = {comps{\_}distancX}, author = {Hoi, Steven C H and Liu, Wei and Lyu, Michael R and Ma, Wei-Ying}, booktitle = {CVPR '06: Proceedings of the 2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition}, doi = {http://dx.doi.org/10.1109/CVPR.2006.167}, isbn = {0-7695-2597-0}, pages = {2072--2078}, publisher = {IEEE Computer Society}, title = {{Learning Distance Metrics with Contextual Constraints for Image Retrieval}}, year = {2006} } @inproceedings{bilenko2004integrating, address = {New York, NY, USA}, annote = {comps{\_}distancX}, author = {Bilenko, Mikhail and Basu, Sugato and Mooney, Raymond J}, booktitle = {ICML '04: Proceedings of the twenty-first international conference on Machine learning}, doi = {http://doi.acm.org/10.1145/1015330.1015360}, isbn = {1-58113-828-5}, pages = {11}, publisher = {ACM}, title = {{Integrating constraints and metric learning in semi-supervised clustering}}, year = {2004} } @inproceedings{calandriello2018statistical, author = {Calandriello, Daniele and Rosasco, Lorenzo}, booktitle = {Neural Information Processing Systems}, title = {{Statistical and computational trade-offs in kernel k-means}}, url = {https://papers.nips.cc/paper/8147-statistical-and-computational-trade-offs-in-kernel-k-means.pdf}, year = {2018} } @article{foster11neigh, author = {Foster, Dean P and Rakhlin, Alexander}, journal = {CoRR}, title = {{No Internal Regret via Neighborhood Watch}}, volume = {abs/1108.6}, year = {2011} } @inproceedings{bartlett05exponentiated, author = {Bartlett, Peter L and Collins, Michael and Taskar, Ben and McAllester, David}, pages = {113--120}, title = {{Exponentiated Gradient Algorithms for Large-margin Structured Classification}} } @article{ghahramani2000variational, address = {Cambridge, MA, USA}, annote = {c{\_}omps{\_}models}, author = {Ghahramani, Zoubin and Hinton, Geoffrey E}, doi = {http://dx.doi.org/10.1162/089976600300015619}, issn = {0899-7667}, journal = {Neural Comput.}, number = {4}, pages = {831--864}, publisher = {MIT Press}, title = {{Variational Learning for Switching State-Space Models}}, volume = {12}, year = {2000} } @incollection{ORVR13, author = {Osband, Ian and Russo, Dan and {Van Roy}, Benjamin}, booktitle = {Advances in Neural Information Processing Systems 26}, editor = {Burges, C J C and Bottou, L and Welling, M and Ghahramani, Z and Weinberger, K Q}, pages = {3003--3011}, title = {{({\{}M{\}}ore) Efficient Reinforcement Learning via Posterior Sampling}}, year = {2013} } @misc{berkeley-parser, annote = {http://nlp.cs.berkeley.edu/Main.html{\#}Parsing}, author = {Petrov, Slav}, publisher = {University of California}, title = {{Berkeley Parser}}, year = {2007} } @inproceedings{akoglu2010oddball:, author = {Akoglu, Leman and McGlohon, Mary and Faloutsos, Christos}, booktitle = {Advances in Knowledge Discovery and Data Mining, 14th Pacific-Asia Conference, PAKDD 2010, Hyderabad, India, June 21-24, 2010. Proceedings. Part II}, pages = {410--421}, title = {{Oddball: Spotting Anomalies in Weighted Graphs}}, year = {2010} } @inproceedings{dearden1999model, author = {Dearden, Richard and Friedman, Nir and Andre, David}, booktitle = {Proceedings of the 15th Conference on Uncertainty in Artificial Intelligence}, pages = {150--159}, title = {{Model Based {\{}Bayesian{\}} Exploration}}, year = {1999} } @article{guestrin2003efficient, author = {Guestrin, Carlos and Koller, Daphne and Parr, Ronald and Venkataraman, Shobha}, journal = {Journal of Artificial Intelligence Research}, pages = {399--468}, title = {{Efficient Solution Algorithms for Factored {\{}MDPs{\}}}}, volume = {19}, year = {2003} } @inproceedings{aggarwal2001outlier, address = {New York, NY, USA}, annote = {comps{\_}ano}, author = {Aggarwal, Charu C and Yu, Philip S}, booktitle = {SIGMOD '01: Proceedings of the 2001 ACM SIGMOD international conference on Management of data}, doi = {http://doi.acm.org/10.1145/375663.375668}, isbn = {1-58113-332-4}, pages = {37--46}, publisher = {ACM}, title = {{Outlier detection for high dimensional data}}, year = {2001} } @article{spielman_graph_2011, author = {Spielman, Daniel A and Srivastava, Nikhil}, journal = {Journal on Computing}, number = {6}, pages = {1913--1926}, title = {{Graph sparsification by effective resistances}}, volume = {40}, year = {2011} } @article{warkentin2004heparin-induced, abstract = {This chapter about the recognition, treatment, and prevention of heparin-induced thrombocytopenia (HIT) is part of the Seventh ACCP Conference on Antithrombotic and Thrombolytic Therapy: Evidence Based Guidelines. Grade 1 recommendations are strong and indicate that the benefits do, or do not, outweigh risks, burden, and costs. Grade 2 suggests that individual patients' values may lead to different choices (for a full understanding of the grading, see Guyatt et al, CHEST 2004; 126:179S-187S). Among the key recommendations in this chapter are the following: For patients in whom the risk of HIT is considered to be {\textgreater} 0.1{\%}, we recommend platelet count monitoring (Grade 1C). For patients who are receiving therapeutic-dose unfractionated heparin (UFH), we suggest at least every-other-day platelet count monitoring until day 14, or until UFH is stopped, whichever occurs first (Grade 2C). For patients who are receiving postoperative antithrombotic prophylaxis with UFH (HIT risk {\textgreater} 1{\%}), we suggest at least every-other-day platelet count monitoring between postoperative days 4 to 14 (or until UFH is stopped, whichever occurs first) [Grade 2C]. For medical/obstetric patients who are receiving prophylactic-dose UFH, postoperative patients receiving prophylactic-dose low molecular weight heparin (LMWH), postoperative patients receiving intravascular catheter UFH "flushes," or medical/obstetrical patients receiving LMWH after first receiving UFH (risk, 0.1 to 1{\%}), we suggest platelet count monitoring every 2 days or 3 days from day 4 to day 14, or until heparin is stopped, whichever occurs first (Grade 2C). For medical/obstetrical patients who are only receiving LMWH, or medical patients who are receiving only intravascular catheter UFH flushes (risk {\textless} 0.1{\%}), we suggest clinicians do not use routine platelet count monitoring (Grade 2C). For patients with strongly suspected (or confirmed) HIT, whether or not complicated by thrombosis, we recommend use of an alternative anticoagulant, such as lepirudin (Grade 1C+), argatroban (Grade 1C), bivalirudin (Grade 2C), or danaparoid (Grade 1B). For patients with strongly suspected (or confirmed) HIT, we recommend routine ultrasonography of the lower-limb veins for investigation of deep venous thrombosis (Grade 1C); against the use of vitamin K antagonist (VKA) [coumarin] therapy until after the platelet count has substantially recovered; that the VKA antagonist be administered only during overlapping alternative anticoagulation (minimum 5-day overlap); and begun with low, maintenance doses (all Grade 2C). For patients receiving VKAs at the time of diagnosis of HIT, we recommend use of vitamin K (Grade 2C) [corrected] For patients with a history of HIT who are HIT antibody negative and require cardiac surgery, we recommend use of UFH (Grade 1C).}, author = {Warkentin, Theodore E and Greinacher, Andreas}, doi = {10.1378/chest.126.3_suppl.311S}, institution = {Hamilton Regional Laboratory Medicine Program, Hamilton Health Sciences, General Site, 237 Barton St E, Hamilton, Ontario L8L 2X2, Canada. twarken@mcmaster.ca}, journal = {Chest}, keywords = {Dose-Response Relationship,Drug; Drug Monitoring; Evidence-Based Medicine; F,Low-Molecular-Weight; Humans; Platelet Count; Pre}, month = {sep}, number = {3 Suppl}, pages = {311S--337S}, pmid = {15383477}, title = {{Heparin-induced thrombocytopenia: recognition, treatment, and prevention: the Seventh ACCP Conference on Antithrombotic and Thrombolytic Therapy.}}, url = {http://dx.doi.org/10.1378/chest.126.3{\_}suppl.311S}, volume = {126}, year = {2004} } @inproceedings{dai2018sbeed, author = {Dai, Bo and Shaw, Albert and Li, Lihong and Xiao, Lin and He, Niao and Liu, Zhen and Chen, Jianshu and Song, Le}, booktitle = {International Conference on Machine Learning}, title = {{SBEED: Convergent reinforcement learning with nonlinear function approximation}}, url = {https://arxiv.org/pdf/1712.10285.pdf}, year = {2018} } @inproceedings{viola2001robust, author = {Viola, Paul A and Jones, Michael J}, booktitle = {ICCV}, pages = {747}, title = {{Robust Real-Time Face Detection}}, year = {2001} } @article{zhou2003probabilistic, author = {Zhou, Shaohua and Kruger, Volker and Chellappa, Rama}, journal = {Computer Vision and Image Understanding}, number = {1-2}, pages = {214--245}, title = {{Probabilistic Recognition of Human Faces from Video}}, volume = {91}, year = {2003} } @inproceedings{erraqabi2016pliable, abstract = {Rejection sampling is a known technique for sampling from difficult distributions. However, its use is limited due to a high rejection rate. Common adaptive rejection sampling methods either work for very specific distributions or without performance guarantees. In this paper, we present pliable rejection sampling (PRS), a new approach to rejection sampling, where we adapt the sampling envelope using a kernel estimator. Since our method builds on rejection sampling, the samples obtained are i.i.d. and exactly distributed according to f. Another benefit of PRS is that it comes with a guarantee on the number of accepted samples.}, author = {Erraqabi, Akram and Valko, Michal and Carpentier, Alexandra and Maillard, Odalric-Ambrym}, booktitle = {International Conference on Machine Learning}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Erraqabi et al. - 2016 - Pliable rejection sampling.pdf:pdf}, title = {{Pliable rejection sampling}}, year = {2016} } @article{astrom1965optimal, author = {Astrom, Karl}, journal = {Journal of Mathematical Analysis and Applications}, number = {1}, pages = {174--205}, title = {{Optimal Control of {\{}Markov{\}} Processes with Incomplete State Information}}, volume = {10}, year = {1965} } @article{Ratliff2007, abstract = {Decision making in robotics often involves computing an optimal action for a given state, where the space of actions under consideration can potentially be large and state dependent. Many of these decision making problems can be naturally formalized in the multiclass classification framework, where actions are regarded as labels for states. One powerful approach to multiclass classification relies on learning a function that scores each action; action selection is done by returning the action with maximum score. In this work, we focus on two imitation learning problems in particular that arise in robotics. The first problem is footstep prediction for quadruped locomotion, in which the system predicts next footstep locations greedily given the current four-foot configuration of the robot over a terrain height map. The second problem is grasp prediction, in which the system must predict good grasps of complex free-form objects given an approach direction for a robotic hand. We present experimental results of applying a recently developed functional gradient technique for optimizing a structured margin formulation of the corresponding large non-linear multiclass classification problems.}, author = {Ratliff, Nathan and Bagnell, J Andrew and Srinivasa, Siddhartha S}, doi = {10.1109/ICHR.2007.4813899}, institution = {Robotics Institute}, isbn = {9781424418619}, journal = {2007 7th IEEERAS International Conference on Humanoid Robots}, pages = {392--397}, publisher = {Ieee}, title = {{Imitation learning for locomotion and manipulation}}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4813899}, year = {2007} } @inproceedings{auer1995gambling, author = {Auer, Peter and Cesa-Bianchi, Nicol{\`{o}} and Freund, Yoav and Schapire, Robert}, booktitle = {Proceedings of the 36th Annual Symposium on Foundations of Computer Science}, pages = {322--331}, title = {{Gambling in a Rigged Casino: The Adversarial Multi-Armed Bandit problem}}, year = {1995} } @article{candes2006near-optimal, author = {Cand{\`{e}}s, Emmanuel J. and Tao, Terence}, journal = {IEEE Transactions on Information Theory}, number = {12}, pages = {5406--5425}, title = {{Near-optimal signal recovery from random projections: universal encoding strategies?}}, volume = {52}, year = {2006} } @article{bala2001conformism, abstract = {When there are competing technologies or products with unknown payoffs an important question is which technology will prevail and whether technologies with different payoffs can coexist in the long run. In this paper, we use a social learning model with local interactions to study this question. We show that the adoption of technologies as well as the prospects of conformism/diversity depend crucially on the nature of interaction between individuals and the heterogeneity of preferences in a society.}, author = {Bala, Venkatesh and Goyal, Sanjeev}, journal = {Economic Theory}, pages = {101--120}, title = {{Conformism and diversity under social learning}}, volume = {17}, year = {2001} } @inproceedings{mahadevan2006value, author = {Mahadevan, Sridhar and Maggioni, Mauro}, booktitle = {Advances in Neural Information Processing Systems 18}, pages = {843--850}, title = {{Value Function Approximation with Diffusion Wavelets and {\{}Laplacian{\}} Eigenfunctions}}, year = {2006} } @inproceedings{das2008algorithms, author = {Das, A and Kempe, D}, booktitle = {Proceedings of the 40th annual ACM symposium on Theory of computing}, organization = {ACM}, title = {{Algorithms for subset selection in linear regression}}, year = {2008} } @techreport{musco2016provably, abstract = {We give the first algorithms for kernel matrix approximation that run in time linear in the number of data points and output an approximation which gives provable guarantees when used in many downstream learning tasks, including kernel principal component analysis, kernel {\$}k{\$}-means clustering, kernel ridge regression, and kernel canonical correlation analysis. Our methods require just {\$}\backslashtilde O(n\backslashcdot k){\$} kernel evaluations and {\$}\backslashtilde O(n \backslashcdot k{\^{}}2){\$} additional runtime, where {\$}n{\$} is the number of training data points and {\$}k{\$} is a target rank or effective dimensionality parameter. These runtimes are significantly sub-linear in the size of the {\$}n \backslashtimes n{\$} kernel matrix and apply to any kernel matrix, without assuming regularity or incoherence conditions. The algorithms are based on a ridge leverage score Nystr$\backslash$"om sampling scheme (RLS-Nystr$\backslash$"om) which was recently shown to yield strong kernel approximations, but which had no efficient implementation. We address this shortcoming by introducing fast recursive sampling methods for RLS-Nystr$\backslash$"om, while at the same time proving extended approximation guarantees for this promising new method.}, archivePrefix = {arXiv}, arxivId = {1605.07583}, author = {Musco, Cameron and Musco, Christopher}, eprint = {1605.07583}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Musco, Musco - 2016 - Provably useful kernel matrix approximation in linear time.pdf:pdf}, month = {may}, title = {{Provably useful kernel matrix approximation in linear time}}, url = {http://arxiv.org/abs/1605.07583}, year = {2016} } @inproceedings{guestrin2002context, author = {Guestrin, Carlos and Venkataraman, Shobha and Koller, Daphne}, booktitle = {Proceedings of the 18th National Conference on Artificial Intelligence}, pages = {253--259}, title = {{Context Specific Multiagent Coordination and Planning with Factored {\{}MDPs{\}}}}, year = {2002} } @phdthesis{collins99thesis, author = {Collins, Michael}, school = {University of Pennsylvania}, title = {{Head-Driven Statistical Models for Natural Language Processing}}, year = {1999} } @article{fisher1928limiting, abstract = {SummaryThe limiting distribution, when n is large, of the greatest or least of a sample of n, must satisfy a functional equation which limits its form to one of two main types. Of these one has, apart from size and position, a single parameter h, while the other is the limit to which it tends when h tends to zero.The appropriate limiting distribution in any case may be found from the manner in which the probability of exceeding any value x tends to zero as x is increased. For the normal distribution the limiting distribution has h = 0.From the normal distribution the limiting distribution is approached with extreme slowness; the final series of forms passed through as the ultimate form is approached may be represented by the series of limiting distributions in which h tends to zero in a definite manner as n increases to infinity.Numerical values are given for the comparison of the actual with the penultimate distributions for samples of 60 to 1000, and of the penultimate with the ultimate distributions for larger samples.}, author = {Fisher, Ronald Aylmer and Tippett, Leonard Henry Caleb}, doi = {10.1017/S0305004100015681}, isbn = {1469-8064}, issn = {0305-0041}, journal = {Mathematical Proceedings of the Cambridge Philosophical Society}, pages = {180}, title = {{Limiting forms of the frequency distribution of the largest or smallest member of a sample}}, volume = {24}, year = {1928} } @book{munkres1984elements, author = {Munkres, J R}, publisher = {Addison-Wesley Reading, MA}, title = {{Elements of algebraic topology}}, volume = {2}, year = {1984} } @inproceedings{hauskrecht2006approximate, author = {Hauskrecht, Milos and Kveton, Branislav}, booktitle = {Proceedings of the 9th International Symposium on Artificial Intelligence and Mathematics}, pages = {114--120}, title = {{Approximate Linear Programming for Solving Hybrid Factored {\{}MDPs{\}}}}, year = {2006} } @inproceedings{chang2004locally, address = {New York, NY, USA}, annote = {comps{\_}distancX}, author = {Chang, Hong and Yeung, Dit-Yan}, booktitle = {ICML '04: Proceedings of the twenty-first international conference on Machine learning}, doi = {http://doi.acm.org/10.1145/1015330.1015391}, isbn = {1-58113-828-5}, pages = {20}, publisher = {ACM}, title = {{Locally linear metric adaptation for semi-supervised clustering}}, year = {2004} } @inproceedings{farajtabar2016multistage, abstract = {We consider the problem of how to optimize multi-stage campaigning over social networks. The dynamic programming framework is employed to balance the high present reward and large penalty on low future outcome in the presence of extensive uncertainties. In particular, we establish theoretical foundations of optimal campaigning over social networks where the user activities are modeled as a multivariate Hawkes process, and we derive a time dependent linear relation between the intensity of exogenous events and several commonly used objective functions of campaigning. We further develop a convex dynamic programming framework for determining the optimal intervention policy that prescribes the required level of external drive at each stage for the desired campaigning result. Experiments on both synthetic data and the real-world MemeTracker dataset show that our algorithm can steer the user activities for optimal campaigning much more accurately than baselines.}, author = {Farajtabar, Mehrdad and Ye, Xiaojing and Harati, Sahar and Song, Le and Zha, Hongyuan}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Farajtabar et al. - 2016 - Multistage campaigning in social networks.pdf:pdf}, title = {{Multistage campaigning in social networks}}, year = {2016} } @incollection{Bal97, author = {Ball, K}, booktitle = {Flavors of Geometry}, editor = {Levy, S}, pages = {1--58}, publisher = {Cambridge University Press}, title = {{An Elementary Introduction to Modern Convex Geometry}}, year = {1997} } @inproceedings{valko2010online, author = {Valko, Michal and Kveton, Branislav and Huang, Ling and Ting, Daniel}, booktitle = {Uncertainty in Artificial Intelligence}, keywords = {misovalko}, mendeley-tags = {misovalko}, title = {{Online semi-supervised learning on quantized graphs}}, url = {http://researchers.lille.inria.fr/{~}valko/hp/serve.php?what=publications/valko2010online.pdf}, year = {2010} } @inproceedings{saha2020improved, abstract = {In this paper, we consider the problem of sleeping bandits with stochastic action sets and adversarial rewards. In this setting, in contrast to most work in bandits, the actions may not be available at all times. For instance, some products might be out of stock in item recommendation. The best existing efficient (i.e., polynomial-time) algorithms for this problem only guarantee a O(T**2/3) upper-bound on the regret. Yet, inefficient algorithms based on EXP4 can achieve O(sqrt(T)). In this paper, we provide a new computationally efficient algorithm inspired by EXP3 satisfying a regret of order O(sqrt(T)) when the availabilities of each action i in A are independent. We then study the most general version of the problem where at each round available sets are generated from some unknown arbitrary distribution (i.e., without the independence assumption) and propose an efficient algorithm with O(sqrt(2**KT)) regret guarantee. Our theoretical results are corroborated with experimental evaluations.}, archivePrefix = {arXiv}, arxivId = {2004.06248}, author = {Saha, Aadirupa and Gaillard, Pierre and Valko, Michal}, booktitle = {International Conference on Machine Learning}, eprint = {2004.06248}, title = {{Improved sleeping bandits with stochastic action sets and adversarial rewards}}, url = {http://arxiv.org/abs/2004.06248}, year = {2020} } @inproceedings{valizadegan2007kernel, author = {Valizadegan, Hamed and Tan, Pang-Ning}, booktitle = {Proceedings of the Seventh SIAM International Conference on Data Mining, April 26-28, 2007, Minneapolis, Minnesota, USA}, title = {{Kernel Based Detection of Mislabeled Training Examples}}, year = {2007} } @incollection{suehiro12submodular, author = {Suehiro, Daiki and Hatano, Kohei and Kijima, Shuji and Takimoto, Eiji and Nagano, Kiyohito}, booktitle = {Algorithmic Learning Theory}, isbn = {978-3-642-34105-2}, pages = {260--274}, publisher = {Springer Berlin Heidelberg}, series = {Lecture Notes in Computer Science}, title = {{Online Prediction under Submodular Constraints}}, volume = {7568}, year = {2012} } @incollection{Combettes2010, author = {Combettes, P L and Pesquet, J.-C.}, booktitle = {Fixed-Point Algorithms for Inverse Problems in Science and Engineering}, publisher = {Springer}, title = {{Proximal splitting methods in signal processing}}, year = {2010} } @article{variant2011shamir, author = {Shamir, Ohad}, journal = {CoRR}, title = {{A Variant of Azuma's Inequality for Martingales with Subgaussian Tails}}, volume = {abs/1110.2}, year = {2011} } @inproceedings{lafferty2001conditional, address = {San Francisco, CA, USA}, annote = {comps{\_}models}, author = {Lafferty, John D and McCallum, Andrew and Pereira, Fernando C N}, booktitle = {ICML '01: Proceedings of the Eighteenth International Conference on Machine Learning}, isbn = {1-55860-778-1}, pages = {282--289}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {{Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data}}, url = {http://www.cis.upenn.edu/{~}pereira/papers/crf.pdf}, year = {2001} } @inproceedings{eskin2000detecting, address = {Seattle}, annote = {comps{\_}ano}, author = {Eskin, Eleazar}, booktitle = {Proc. 17th International Conf. on Machine Learning}, month = {apr}, pages = {255--262}, publisher = {Morgan Kaufmann, San Francisco, {\{}CA{\}}}, title = {{Anomaly Detection over Noisy Data using Learned Probability Distributions}}, url = {http://citeseer.ist.psu.edu/eskin00anomaly.html http://citeseer.comp.nus.edu.sg/542907.html}, year = {2000} } @article{Beck2009, author = {Beck, A and Teboulle, M}, journal = {SIAM Journal on Imaging Sciences}, number = {1}, pages = {183--202}, title = {{A fast iterative shrinkage-thresholding algorithm for linear inverse problems}}, volume = {2}, year = {2009} } @article{manevitz2002one-class, author = {Manevitz, Larry M and Yousef, Malik}, issn = {1532-4435}, journal = {J. Mach. Learn. Res.}, pages = {139--154}, publisher = {JMLR.org}, title = {{One-class svms for document classification}}, volume = {2}, year = {2002} } @article{KW01, author = {Kivinen, J and Warmuth, M}, journal = {Machine Learning}, pages = {301--329}, title = {{Relative loss bounds for multidimensional regression problems}}, volume = {45}, year = {2001} } @inproceedings{calandriello2015large-scale, abstract = {We introduce Sparse-HFS, a scalable algorithm that can compute solutions to SSL problems using only O(n polylog(n)) space and O(m polylog(n)) time.}, author = {Calandriello, Daniele and Lazaric, Alessandro and Valko, Michal}, booktitle = {Resource-Efficient Machine Learning workshop at International Conference on Machine Learning}, title = {{Large-scale semi-supervised learning with online spectral graph sparsification}}, year = {2015} } @article{fowlkes2004spectral, author = {Fowlkes, Charless and Belongie, Serge and Chung, Fan and Malik, Jitendra}, journal = {IEEE Transactions on PAMI}, number = {2}, title = {{Spectral Grouping Using the Nystrom Method}}, volume = {26}, year = {2004} } @inproceedings{melo2010analysis, author = {Melo, F S and Lopes, M and Ferreira, R}, booktitle = {Proceedings of the 2010 conference on ECAI 2010: 19th European Conference on Artificial Intelligence}, organization = {IOS Press}, pages = {349--354}, title = {{Analysis of inverse reinforcement learning with perturbed demonstrations}}, url = {http://flowers.inria.fr/mlopes/myrefs/10-ecai.pdf}, year = {2010} } @inproceedings{narasimhan2015learnability, author = {Narasimhan, Harikrishna and Parkes, David C. and Singer, Yaron}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Narasimhan, Parkes, Singer - 2015 - Learnability of influence in networks.pdf:pdf}, title = {{Learnability of influence in networks}}, year = {2015} } @inproceedings{tarbouriech2020improved, abstract = {We investigate the exploration of an unknown environment when no reward function is provided. Building on the incremental exploration setting introduced by Lim and Auer [1], we define the objective of learning the set of ϵ-optimal goal-conditioned policies attaining all states that are incrementally reachable within L steps (in expectation) from a reference state s0. In this paper, we introduce a novel model-based approach that interleaves discovering new states from s0 and improving the accuracy of a model estimate that is used to compute goal-conditioned policies to reach newly discovered states. The resulting algorithm, DisCo, achieves a sample complexity scaling as Õ (L5SL+ϵ$\Gamma$L+ϵAϵ−2), where A is the number of actions, SL+ϵ is the number of states that are incrementally reachable from s0 in L+ϵ steps, and $\Gamma$L+ϵ is the branching factor of the dynamics over such states. This improves over the algorithm proposed in [1] in both ϵ and L at the cost of an extra $\Gamma$L+ϵ factor, which is small in most environments of interest. Furthermore, DisCo is the first algorithm that can return an ϵ/cmin-optimal policy for any cost-sensitive shortest-path problem defined on the L-reachable states with minimum cost cmin. Finally, we report preliminary empirical results confirming our theoretical findings.}, archivePrefix = {arXiv}, arxivId = {2012.14755}, author = {Tarbouriech, Jean and Pirotta, Matteo and Valko, Michal and Lazaric, Alessandro}, booktitle = {Neural Information Processing Systems}, eprint = {2012.14755}, month = {dec}, title = {{Improved sample complexity for incremental autonomous exploration in MDPs}}, url = {http://arxiv.org/abs/2012.14755}, year = {2020} } @article{cooper1992bayesian, abstract = {This paper presents a Bayesian method for constructing probabilistic networks from databases. In particular, we focus on constructing Bayesian belief networks. Potential applications include computer-assisted hypothesis testing, automated scientific discovery, and automated construction of probabilistic expert systems. We extend the basic method to handle missing data and hidden (latent) variables. We show how to perform probabilistic inference by averaging over the inferences of multiple belief networks. Results are presented of a preliminary evaluation of an algorithm for constructing a belief network from a database of cases. Finally, we relate the methods in this paper to previous work, and we discuss open problems.}, author = {Cooper, G F and Herskovits, E}, issn = {0885-6125}, journal = {Machine Learning}, keywords = {algorithm,bayesian,induction,learning,network,structure}, month = {oct}, number = {4}, pages = {309--347}, title = {{A Bayesian Method for the Induction of Probabilistic Networks from Data}}, url = {http://www.ingentaconnect.com/content/klu/mach/1992/00000009/00000004/00422779}, volume = {09}, year = {1992} } @inproceedings{calandriello2017second, abstract = {Kernel online convex optimization (KOCO) is a framework combining the expressiveness of non-parametric kernel models with the regret guarantees of online learning. First-order KOCO methods such as functional gradient descent require only O(t) time and space per iteration, and, when the only information on the losses is their convexity, achieve a minimax optimal O(sqrtT) regret. Nonetheless, many common losses in kernel problems, such as squared loss, logistic loss, and squared hinge loss posses stronger curvature that can be exploited. In this case, second-order KOCO methods achieve O(log(Det(K))) regret, which we show scales as O(deff log T), where deff is the effective dimension of the problem and is usually much smaller than O(sqrtT). The main drawback of second-order methods is their much higher O(t 2) space and time complexity. In this paper, we introduce kernel online Newton step (KONS), a new second-order KOCO method that also achieves O(defflog T) regret. To address the computational complexity of second-order methods, we introduce a new matrix sketching algorithm for the kernel matrix K, and show that for a chosen parameter gamma leq 1 our Sketched-KONS reduces the space and time complexity by a factor of gamma 2 to O(t 2gamma 2) space and time per iteration, while incurring only 1/gamma times more regret.}, author = {Calandriello, Daniele and Lazaric, Alessandro and Valko, Michal}, booktitle = {International Conference on Machine Learning}, title = {{Second-order kernel online convex optimization with adaptive sketching}}, url = {http://proceedings.mlr.press/v70/calandriello17a/calandriello17a.pdf}, year = {2017} } @inproceedings{boyan1995generalization, author = {Boyan, Justin and Moore, Andrew}, booktitle = {Advances in Neural Information Processing Systems 7}, pages = {369--376}, title = {{Generalization in Reinforcement Learning: Safely Approximating the Value Function}}, year = {1995} } @article{gittins1979bandit, abstract = {The paper aims to give a unified account of the central concepts in recent work on bandit processes and dynamic allocation indices; to show how these reduce some previously intractable problems to the problem of calculating such indices; and to describe how these calculations may be carried out. Applications to stochastic scheduling, sequential clinical trials and a class of search problems are discussed.}, author = {Gittins, J C}, doi = {10.2307/2985029}, issn = {00359246}, journal = {Journal of the Royal Statistical Society Series B Methodological}, number = {2}, pages = {148--177}, publisher = {JSTOR}, series = {B}, title = {{Bandit processes and dynamic allocation indices}}, url = {http://www.jstor.org/stable/2985029}, volume = {41}, year = {1979} } @article{pickland1975statistical, abstract = {A method is presented for making statistical inferences about the upper tail of a distribution function. It is useful for estimating the probabilities of future extremely large observations. The method is applicable if the underlying distribution function satisfies a condition which holds for all common continuous distribution functions.}, author = {Pickands, James III}, journal = {The Annals of Statistics}, pages = {119--131}, title = {{Statistical Inference Using Extreme Order Statistics}}, volume = {3}, year = {1975} } @article{Bubeck2013, abstract = {The stochastic multiarmed bandit problem is well understood when the reward distributions are sub-Gaussian. In this paper, we examine the bandit problem under the weaker assumption that the distributions have moments of order 1 + $\epsilon$, for some $\epsilon$ ∈ (0,1]. Surprisingly, moments of order 2 (i.e., finite variance) are sufficient to obtain regret bounds of the same order as under sub-Gaussian reward distributions. In order to achieve such regret, we define sampling strategies based on refined estimators of the mean such as the truncated empirical mean, Catoni's M-estimator, and the median-of-means estimator. We also derive matching lower bounds that also show that the best achievable regret deteriorates when $\epsilon$ {\textless}; 1. View full abstract}, archivePrefix = {arXiv}, arxivId = {arXiv:1209.1727v1}, author = {Bubeck, S{\'{e}}bastien and Cesa-Bianchi, Nicol{\`{o}} and Lugosi, G{\'{a}}bor}, doi = {10.1109/TIT.2013.2277869}, eprint = {arXiv:1209.1727v1}, issn = {00189448}, journal = {IEEE Transactions on Information Theory}, keywords = {Heavy-tailed distributions,regret bounds,robust estimators,stochastic multi-armed bandit}, number = {11}, pages = {7711--7717}, title = {{Bandits with heavy tail}}, volume = {59}, year = {2013} } @article{nagamochi1998note, author = {Nagamochi, H and Ibaraki, T}, journal = {Information Processing Letters}, number = {5}, pages = {239--244}, publisher = {Elsevier}, title = {{A note on minimizing submodular functions}}, volume = {67}, year = {1998} } @inproceedings{maes07sequencelabeling, author = {Maes, Francis and Denoyer, Ludovic and Gallinari, Patrick}, pages = {648--657}, title = {{Sequence Labeling with Reinforcement Learning and Ranking Algorithms}} } @incollection{Haz11, author = {Hazan, E}, booktitle = {Optimization for Machine Learning}, editor = {Sra, S and Nowozin, S and Wright, S}, pages = {287--303}, publisher = {MIT press}, title = {{The convex optimization approach to regret minimization}}, year = {2011} } @book{pearl1988probabilistic, address = {San Francisco, CA, USA}, author = {Pearl, Judea}, isbn = {0-934613-73-7}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {{Probabilistic reasoning in intelligent systems: networks of plausible inference}}, year = {1988} } @article{cornuejols1977uncapacitated, author = {Cornuejols, G and Fisher, M and Nemhauser, G L}, journal = {Annals of Discrete Mathematics}, pages = {163--177}, publisher = {Elsevier}, title = {{On the Uncapacitated Location Problem}}, volume = {1}, year = {1977} } @article{gnedenko1943distribution, author = {Gnedenko, Boris}, journal = {The Annals of Mathematics}, number = {3}, pages = {423--453}, publisher = {JSTOR}, title = {{Sur la distribution limite du terme maximum d'une s{\'{e}}rie al{\'{e}}atoire}}, volume = {44}, year = {1943} } @article{andrieu2003introduction, author = {Andrieu, Christophe and de Freitas, Nando and Doucet, Arnaud and Jordan, Michael}, journal = {Machine Learning}, pages = {5--43}, title = {{An Introduction to {\{}MCMC{\}} for Machine Learning}}, volume = {50}, year = {2003} } @article{spielman2011graph, author = {Spielman, Daniel A and Srivastava, Nikhil}, journal = {Journal on Computing}, number = {6}, title = {{Graph sparsification by effective resistances}}, volume = {40}, year = {2011} } @inproceedings{lafferty01conditional, address = {San Francisco, CA, USA}, annote = {From Duplicate 1 (Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data - Lafferty, John D; McCallum, Andrew; Pereira, Fernando C N) comps{\_}models}, author = {Lafferty, John D and McCallum, Andrew and Pereira, Fernando C N}, booktitle = {ICML '01: Proceedings of the Eighteenth International Conference on Machine Learning}, isbn = {1-55860-778-1}, pages = {282--289}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {{Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data}}, url = {http://www.cis.upenn.edu/{~}pereira/papers/crf.pdf citeseer.ist.psu.edu/lafferty01conditional.html}, year = {2001} } @article{bates2003improving, author = {Bates, David W and Gawande, Atul. A}, journal = {New England Journal of Medicine}, number = {25}, pages = {2526--2534}, title = {{Improving Safety with Information Technology}}, volume = {348}, year = {2003} } @inproceedings{grill2018optimistic, abstract = {We address the problem of optimizing a Brownian motion. We consider a (random) realization W of a Brownian motion with input space in [0,1]. Given W, our goal is to return an epsilon-approximation of its maximum using the smallest possible number of function evaluations, the sample complexity of the algorithm. We provide an algorithm with sample complexity of order log2(1/epsilon). This improves over previous results of Al-Mharmah and Calvin (1996) and Calvin et al. (2017) which provided only polynomial rates. Our algorithm is adaptive---each query depends on previous values---and is an instance of the optimism-in-the-face-of-uncertainty principle.}, author = {Grill, Jean-Bastien and Valko, Michal and Munos, R{\'{e}}mi}, booktitle = {Neural Information Processing Systems}, title = {{Optimistic optimization of a Brownian}}, year = {2018} } @inproceedings{dann2015sample, author = {Dann, Christoph and Brunskill, Emma}, booktitle = {Neural Information Processing Systems}, title = {{Sample complexity of episodic fixed-horizon reinforcement learning}}, url = {https://arxiv.org/pdf/1510.08906.pdf}, year = {2015} } @inproceedings{bartlett2019scale-free, abstract = {We address the problem of planning in an environment with deterministic dynamics and stochastic discounted rewards under a limited numerical budget where the ranges of both rewards and noise are unknown. We introduce PlaTypOOS, an adaptive, robust, and efficient alternative to the OLOP (open-loop optimistic planning) algorithm. Whereas OLOP requires a priori knowledge of the ranges of both rewards and noise, PlaTypOOS dynamically adapts its behavior to both. This allows PlaTypOOS to be immune to two vulnerabilities of OLOP: failure when given underestimated ranges of noise and rewards and inefficiency when these are overestimated. PlaTypOOS additionally adapts to the global smoothness of the value function. PlaTypOOS acts in a provably more efficient manner vs. OLOP when OLOP is given an overestimated reward and show that in the case of no noise, PlaTypOOS learns exponentially faster.}, author = {Bartlett, Peter L and Gabillon, Victor and Healey, Jennifer and Valko, Michal}, booktitle = {International Conference on Machine Learning}, title = {{Scale-free adaptive planning for deterministic dynamics {\&} discounted rewards}}, url = {http://researchers.lille.inria.fr/{~}valko/hp/publications/bartlett2019scale-free}, year = {2019} } @inproceedings{priebe2005scan, abstract = {We introduce a theory of scan statistics on graphs and apply the ideas to the problem of anomaly detection in a time series of Enron email graphs.}, author = {Priebe, Carey E. and Conroy, John M. and Marchette, David J. and Park, Youngser}, booktitle = {Computational and Mathematical Organization Theory}, doi = {10.1007/s10588-005-5378-z}, issn = {1381-298X}, pages = {229--247}, title = {{Scan Statistics on Enron Graphs}}, volume = {11}, year = {2005} } @inproceedings{IgelHusken00:iRprop, author = {Igel, Christian and H{\"{u}}sken, Michael}, booktitle = {Proceedings of the Second International ICSC Symposium on Neural Computation (NC 2000)}, pages = {115--121}, publisher = {ICSC Academic Press}, title = {{Improving the {\{}R{\}}prop Learning Algorithm}}, url = {citeseer.ist.psu.edu/igel00improving.html}, year = {2000} } @article{gilks1995adaptive, abstract = {Gibbs sampling is a powerful technique for statistical inference. It involves little more than sampling from full conditional distributions, which can be both complex and computationally expensive to evaluate. Gilks and Wild have shown that in practice full conditionals are often log-concave, and they proposed a method of adaptive rejection sampling for efficiently sampling from univariate log-concave distributions. In this paper, to deal with non-log-concave full conditional distributions, we generalize adaptive rejection sampling to include a Hastings-Metropolis algorithm step. One important field of application in which statistical models may lead to non-log-concave full conditionals is population pharmacokinetics. Here, the relationship between drug dose and blood or plasma concentration in a group of patients typically is modelled by using non-linear mixed effects models. Often, the data used for analysis are routinely collected hospital measurements, which tend to be noisy and irregular. Consequently, a robust (t-distributed) error structure is appropriate to account for outlying observations and/or patients. We propose a robust non-linear full probability model for population pharmacokinetic data. We demonstrate that our method enables Bayesian inference for this model, through an analysis of antibiotic administration in new-born babies.}, author = {Gilks, W. R. and Best, N. G. and Tan, K. K. C.}, journal = {Journal of the Royal Statistical Society. Series C (Applied Statistics)}, keywords = {bayesian computation,gibbs sampling,markov chain monte carlo,method,metropolis algorithm,pharmacokinetic model,random variate generation}, number = {4}, pages = {455--472}, title = {{Adaptive rejection metropolis sampling within Gibbs sampling}}, volume = {44}, year = {1995} } @inproceedings{valko2014spectral, abstract = {Smooth functions on graphs have wide applications in manifold and semi-supervised learning. In this paper, we study a bandit problem where the payoffs of arms are smooth on a graph. This framework is suitable for solving online learning problems that involve graphs, such as content-based recommendation. In this problem, each item we can recommend is a node and its expected rating is similar to its neighbors. The goal is to recommend items that have high expected ratings. We aim for the algorithms where the cumulative regret with respect to the optimal policy would not scale poorly with the number of nodes. In particular, we introduce the notion of an effective dimension, which is small in real-world graphs, and propose two algorithms for solving our problem that scale linearly and sublinearly in this dimension. Our experiments on real-world content recommendation problem show that a good estimator of user preferences for thousands of items can be learned from just tens of nodes evaluations.}, author = {Valko, Michal and Munos, R{\'{e}}mi and Kveton, Branislav and Koc{\'{a}}k, Tom{\'{a}}{\v{s}}}, booktitle = {International Conference on Machine Learning}, title = {{Spectral bandits for smooth graph functions}}, url = {http://proceedings.mlr.press/v32/valko14.pdf}, year = {2014} } @inproceedings{ziebart2008maximum, abstract = {Recent research has shown the benefit of framing problems of imitation learning as solutions to Markov Decision Problems. This approach reduces the problem of learning to recovering a utility function that makes the behavior induced by a near-optimal policy closely mimic demonstrated behavior. In this work, we develop a probabilistic approach based on the principle of maximum entropy. Our approach provides a well-defined, globally normalized distribution over decisions, while providing the same performance guarantees as existing methods.We develop our technique in the context of modeling real-world navigation and driving behaviors where collected data is inherently noisy and imperfect. Our probabilistic approach enables modeling of route preferences as well as a powerful new approach to inferring destinations and routes based on partial trajectories.}, author = {Ziebart, Brian and Maas, Andrew L and Bagnell, J Andrew and Dey, Anind K}, booktitle = {Proc AAAI}, editor = {Archer, M}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Ziebart et al. - 2008 - Maximum Entropy Inverse Reinforcement Learning.pdf:pdf}, isbn = {9781577353683}, keywords = {irl}, mendeley-tags = {irl}, pages = {1433--1438}, publisher = {AAAI Press}, title = {{Maximum Entropy Inverse Reinforcement Learning}}, url = {http://www.aaai.org/Papers/AAAI/2008/AAAI08-227.pdf}, year = {2008} } @inproceedings{azar2014online, abstract = {In this paper we consider the problem of online stochastic optimization of a locally smooth function under bandit feedback. We introduce the high-confidence tree (HCT) algorithm, a novel any-time {\$}\backslashmathcal{\{}X{\}}{\$}-armed bandit algorithm, and derive regret bounds matching the performance of existing state-of-the-art in terms of dependency on number of steps and smoothness factor. The main advantage of HCT is that it handles the challenging case of correlated rewards, whereas existing methods require that the reward-generating process of each arm is an identically and independent distributed (iid) random process. HCT also improves on the state-of-the-art in terms of its memory requirement as well as requiring a weaker smoothness assumption on the mean-reward function in compare to the previous anytime algorithms. Finally, we discuss how HCT can be applied to the problem of policy search in reinforcement learning and we report preliminary empirical results.}, author = {Azar, Mohammad Gheshlaghi and Lazaric, Alessandro and Brunskill, Emma}, booktitle = {International Conference on Machine Learning}, title = {{Online stochastic optimization under correlated bandit feedback}}, url = {http://proceedings.mlr.press/v32/azar14.pdf}, year = {2014} } @inproceedings{tarbouriech2020no-regret, abstract = {Many popular reinforcement learning problems (e.g., navigation in a maze, some Atari games, mountain car) are instances of the episodic setting under its stochastic shortest path (SSP) formulation, where an agent has to achieve a goal state while minimizing the cumulative cost. Despite the popularity of this setting, the exploration-exploitation dilemma has been sparsely studied in general SSP problems, with most of the theoretical literature focusing on different problems (i.e., fixed-horizon and infinite-horizon) or making the restrictive loop-free SSP assumption (i.e., no state can be visited twice during an episode). In this paper, we study the general SSP problem with no assumption on its dynamics (some policies may actually never reach the goal). We introduce UC-SSP, the first no-regret algorithm in this setting, and prove a regret bound scaling as O(DSsqrt(ADK)) after K episodes for any unknown SSP with S states, A actions, positive costs and SSP-diameter D, defined as the smallest expected hitting time from any starting state to the goal. We achieve this result by crafting a novel stopping rule, such that UC-SSP may interrupt the current policy if it is taking too long to achieve the goal and switch to alternative policies that are designed to rapidly terminate the episode.}, archivePrefix = {arXiv}, arxivId = {1912.03517}, author = {Tarbouriech, Jean and Garcelon, Evrard and Valko, Michal and Pirotta, Matteo and Lazaric, Alessandro}, booktitle = {International Conference on Machine Learning}, eprint = {1912.03517}, title = {{No-regret exploration in goal-oriented reinforcement learning}}, url = {https://arxiv.org/pdf/1912.03517.pdf}, year = {2020} } @article{Syrgkanis2016, abstract = {We give an oracle-based algorithm for the adversarial contextual bandit problem, where either contexts are drawn i.i.d. or the sequence of contexts is known a priori, but where the losses are picked adversarially. Our algorithm is computationally efficient, assuming access to an offline optimization oracle, and enjoys a regret of order {\$}O((KT){\^{}}{\{}\backslashfrac{\{}2{\}}{\{}3{\}}{\}}(\backslashlog N){\^{}}{\{}\backslashfrac{\{}1{\}}{\{}3{\}}{\}}){\$}, where {\$}K{\$} is the number of actions, {\$}T{\$} is the number of iterations and {\$}N{\$} is the number of baseline policies. Our result is the first to break the {\$}O(T{\^{}}{\{}\backslashfrac{\{}3{\}}{\{}4{\}}{\}}){\$} barrier that is achieved by recently introduced algorithms. Breaking this barrier was left as a major open problem. Our analysis is based on the recent relaxation based approach of (Rakhlin and Sridharan, 2016).}, archivePrefix = {arXiv}, arxivId = {1606.00313}, author = {Syrgkanis, Vasilis and Luo, Haipeng and Krishnamurthy, Akshay and Schapire, Robert E.}, eprint = {1606.00313}, issn = {10495258}, title = {{Improved Regret Bounds for Oracle-Based Adversarial Contextual Bandits}}, year = {2016} } @inproceedings{seldin2014prediction, author = {Seldin, Yevgeny and Bartlett, Peter and Crammer, Koby and Abbasi-Yadkori, Yasin}, booktitle = {International Conference on Machine Learning}, title = {{Prediction with limited advice and multiarmed bandits with paid observations}}, year = {2014} } @inproceedings{lois2015online, abstract = {This work studies two interrelated problems - online robust PCA (RPCA) and online low-rank matrix completion (MC). In recent work by Cand$\backslash$`{\{}e{\}}s et al., RPCA has been defined as a problem of separating a low-rank matrix (true data), {\$}L:=[\backslashell{\_}1, \backslashell{\_}2, \backslashdots \backslashell{\_}{\{}t{\}}, \backslashdots , \backslashell{\_}{\{}t{\_}{\{}\backslashmax{\}}{\}}]{\$} and a sparse matrix (outliers), {\$}S:=[x{\_}1, x{\_}2, \backslashdots x{\_}{\{}t{\}}, \backslashdots, x{\_}{\{}t{\_}{\{}\backslashmax{\}}{\}}]{\$} from their sum, {\$}M:=L+S{\$}. Our work uses this definition of RPCA. An important application where both these problems occur is in video analytics in trying to separate sparse foregrounds (e.g., moving objects) and slowly changing backgrounds. While there has been a large amount of recent work on both developing and analyzing batch RPCA and batch MC algorithms, the online problem is largely open. In this work, we develop a practical modification of our recently proposed algorithm to solve both the online RPCA and online MC problems. The main contribution of this work is that we obtain correctness results for the proposed algorithms under mild assumptions. The assumptions that we need are: (a) a good estimate of the initial subspace is available (easy to obtain using a short sequence of background-only frames in video surveillance); (b) the {\$}\backslashell{\_}t{\$}'s obey a `slow subspace change' assumption; (c) the basis vectors for the subspace from which {\$}\backslashell{\_}t{\$} is generated are dense (non-sparse); (d) the support of {\$}x{\_}t{\$} changes by at least a certain amount at least every so often; and (e) algorithm parameters are appropriately set}, archivePrefix = {arXiv}, arxivId = {1503.03525}, author = {Lois, Brian and Vaswani, Namrata}, booktitle = {IEEE International Symposium on Information Theory}, eprint = {1503.03525}, title = {{Online Matrix Completion and Online Robust PCA}}, year = {2015} } @article{R76, author = {Rockafellar, R Tyrrell}, journal = {SIAM Journal on Control and Optimization}, keywords = {algorithms,point,proximal}, number = {5}, pages = {877--898}, title = {{Monotone Operators and the Proximal Point Algorithm}}, volume = {14}, year = {1976} } @book{BaHoSchSmTaVi07, author = {Bakir, G{\"{u}}khan H and Hofmann, Thomas and Sch{\"{o}}lkopf, Bernhard and Smola, Alexander J and Taskar, Ben and Vishwanathan, S V N}, isbn = {0262026171}, publisher = {The MIT Press}, title = {{Predicting Structured Data (Neural Information Processing)}}, year = {2007} } @inproceedings{K01, author = {Kakade, Sham}, booktitle = {Advances in Neural Information Processing Systems 14 (NIPS)}, pages = {1531--1538}, title = {{A Natural Policy Gradient}}, year = {2001} } @article{wolfe1976finding, author = {Wolfe, P}, journal = {Math. Progr.}, number = {1}, pages = {128--149}, publisher = {Springer}, title = {{Finding the nearest point in a polytope}}, volume = {11}, year = {1976} } @book{Edmonds1970, author = {Edmonds, J}, booktitle = {Combinatorial Structures and Their Applications}, pages = {69--87}, publisher = {New York: Gordon and Breach}, title = {{Submodular functions, matroids, and certain polyhedra in Combinatorial Structures and Their Applications}}, year = {1970} } @misc{urlhttp://mplab.ucsd.edumplab, author = {{\$}\backslash{\$}urlhttp://mplab.ucsd.edu}, title = {{MPLab GENKI Database}} } @inproceedings{klein2012structured, abstract = {TBD}, address = {Edinburgh (UK)}, author = {Klein, Edouard and PIOT, Bilal and Geist, Matthieu and Pietquin, Olivier}, booktitle = {European Workshop on Reinforcement Learning (EWRL 2012)}, month = {jun}, title = {{Structured Classification for Inverse Reinforcement Learning}}, url = {http://ewrl.files.wordpress.com/2011/12/ewrl2012{\_}submission{\_}30.pdf}, year = {2012} } @inproceedings{campbell2000linear, annote = {comps{\_}ano}, author = {Campbell, Colin and Bennett, Kristin P}, booktitle = {Advances in Neural Information Processing Systems 13, Papers from Neural Information Processing Systems (NIPS) 2000}, pages = {395--401}, title = {{A Linear Programming Approach to Novelty Detection}}, url = {http://books.nips.cc/papers/files/nips13/CampbellBennett.pdf}, year = {2000} } @article{carpentier2014asimple, archivePrefix = {arXiv}, arxivId = {1505.04627}, author = {Carpentier, Alexandra and Valko, Michal}, eprint = {1505.04627}, journal = {arXiv:1505.04627, http://arxiv.org/abs/1505.04627, ArXiv e-prints,}, title = {{Simple regret for infinitely many armed bandits}}, year = {2015} } @article{HaSe07, author = {Hazan, E and Seshadhri, C}, journal = {Electronic Colloquium on Computational Complexity (ECCC)}, title = {{Adaptive algorithms for online decision problems}}, year = {2007} } @article{ghashami2016frequent, abstract = {We describe a new algorithm called Frequent Directions for deterministic matrix sketching in the row-updates model. The algorithm is presented an arbitrary input matrix {\$}A \backslashin R{\^{}}{\{}n \backslashtimes d{\}}{\$} one row at a time. It performed {\$}O(d \backslashtimes \backslashell){\$} operations per row and maintains a sketch matrix {\$}B \backslashin R{\^{}}{\{}\backslashell \backslashtimes d{\}}{\$} such that for any {\$}k {\textless} \backslashell{\$} {\$}\backslash|A{\^{}}TA - B{\^{}}TB \backslash|{\_}2 \backslashleq \backslash|A - A{\_}k\backslash|{\_}F{\^{}}2 / (\backslashell-k){\$} and {\$}\backslash|A - \backslashpi{\_}{\{}B{\_}k{\}}(A)\backslash|{\_}F{\^{}}2 \backslashleq \backslashbig(1 + \backslashfrac{\{}k{\}}{\{}\backslashell-k{\}}\backslashbig) \backslash|A-A{\_}k\backslash|{\_}F{\^{}}2 {\$} . Here, {\$}A{\_}k{\$} stands for the minimizer of {\$}\backslash|A - A{\_}k\backslash|{\_}F{\$} over all rank {\$}k{\$} matrices (similarly {\$}B{\_}k{\$}) and {\$}\backslashpi{\_}{\{}B{\_}k{\}}(A){\$} is the rank {\$}k{\$} matrix resulting from projecting {\$}A{\$} on the row span of {\$}B{\_}k{\$}. We show both of these bounds are the best possible for the space allowed. The summary is mergeable, and hence trivially parallelizable. Moreover, Frequent Directions outperforms exemplar implementations of existing streaming algorithms in the space-error tradeoff.}, author = {Ghashami, Mina and Liberty, Edo and Phillips, Jeff M and Woodruff, David P.}, journal = {The SIAM Journal of Computing}, pages = {1--28}, title = {{Frequent directions: Simple and deterministic matrix sketching}}, url = {https://arxiv.org/pdf/1501.01711.pdf}, year = {2016} } @inproceedings{allenberg2006hannan, author = {Allenberg, Chamy and Auer, Peter and Gy{\"{o}}rfi, L{\'{a}}szl{\'{o}} and Ottucs{\'{a}}k, Gy{\"{o}}rgy}, booktitle = {Algorithmic Learning Theory}, title = {{Hannan consistency in on-line learning in case of unbounded losses under partial monitoring}}, year = {2006} } @article{yu09ArbitraryRewards, address = {Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA}, author = {Yu, Jia Yuan and Mannor, Shie and Shimkin, Nahum}, doi = {http://dx.doi.org/10.1287/moor.1090.0397}, issn = {0364-765X}, journal = {Mathematics of Operations Research}, number = {3}, pages = {737--757}, publisher = {INFORMS}, title = {{{\{}M{\}}arkov Decision Processes with Arbitrary Reward Processes}}, volume = {34}, year = {2009} } @article{barto1983neuronlike, author = {Barto, Andrew and Sutton, Richard and Anderson, Charles}, journal = {IEEE Transactions on Systems, Man, and Cybernetics}, number = {5}, pages = {835--846}, title = {{Neuronlike Elements that Can Solve Difficult Learning Control Problems}}, volume = {13}, year = {1983} } @article{collins03head-driven, address = {Cambridge, MA, USA}, author = {Collins, Michael}, doi = {http://dx.doi.org/10.1162/089120103322753356}, issn = {0891-2017}, journal = {Computational Linguistics}, number = {4}, pages = {589--637}, publisher = {MIT Press}, title = {{Head-Driven Statistical Models for Natural Language Parsing}}, volume = {29}, year = {2003} } @article{Fokkink2016, abstract = {Suppose some objects are hidden in a finite set {\$}S{\$} of hiding places which must be examined one-by-one. The cost of searching subsets of {\$}S{\$} is given by a submodular function and that the probability that all objects are contained in a subset is given by a supermodular function. We seek an ordering of S that finds all the objects in minimal expected cost. This problem is NP-hard and we give an efficient combinatorial {\$}2{\$}-approximation algorithm, generalizing analogous results in scheduling theory. We also give a new scheduling application {\$}1|prec|\backslashbackslashsum w{\_}A g(C{\_}A){\$}, where a set of jobs must be ordered subject to precedence constraints to minimize the weighted sum of some concave function {\$}g{\$} of the completion times of subsets of jobs. We go on to give better approximations for submodular functions with low total curvature and we give a full solution for cost functions we call fully reducible. Next, we consider a zero-sum game between a cost-maximizing Hider and a cost-minimizing Searcher. We prove that the equilibrium mixed strategies for the Hider are in the base polyhedron of the cost function, suitably scaled, and we solve the game for fully reducible cost functions, giving approximately optimal strategies in other cases.}, archivePrefix = {arXiv}, arxivId = {1607.07598}, author = {Fokkink, Robbert and Lidbetter, Thomas and V{\'{e}}gh, L{\'{a}}szl{\'{o}} A}, eprint = {1607.07598}, journal = {arXiv preprint}, title = {{On submodular search and machine scheduling}}, url = {http://arxiv.org/abs/1607.07598}, year = {2016} } @article{bousquet2002stability, author = {Bousquet, Olivier and Elisseeff, Andre}, journal = {Journal of Machine Learning Research}, pages = {499--526}, title = {{Stability and Generalization}}, volume = {2}, year = {2002} } @inproceedings{silva:long, author = {da Silva, Valdinei Freire and Costa, Anna Helena Reali and Lima, Pedro}, pages = {4246--4251}, title = {{Inverse Reinforcement Learning with Evaluation}} } @inproceedings{rowland2019multiagent, author = {Rowland, Mark and Omidshafiei, Shayegan and Tuyls, Karl and P{\'{e}}rolat, Julien and Valko, Michal and Piliouras, Georgios and Munos, R{\'{e}}mi}, booktitle = {Neural Information Processing Systems}, title = {{Multiagent evaluation under incomplete information}}, year = {2019} } @article{kocak2020spectral, abstract = {Smooth functions on graphs have wide applications in manifold and semi-supervised learning. In this work, we study a bandit problem where the payoffs of arms are smooth on a graph. This framework is suitable for solving online learning problems that involve graphs, such as content-based recommendation. In this problem, each item we can recommend is a node of an undirected graph and its expected rating is similar to the one of its neighbors. The goal is to recommend items that have high expected ratings. We aim for the algorithms where the cumulative regret with respect to the optimal policy would not scale poorly with the number of nodes. In particular, we introduce the notion of an effective dimension, which is small in real-world graphs, and propose three algorithms for solving our problem that scale linearly and sublinearly in this dimension. Our experiments on content recommendation problem show that a good estimator of user preferences for thousands of items can be learned from just tens of node evaluations.}, author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Munos, R{\'{e}}mi and Kveton, Branislav and Agrawal, Shipra and Valko, Michal}, journal = {Journal of Machine Learning Research}, pages = {1--44}, title = {{Spectral bandits}}, volume = {21}, year = {2020} } @techreport{calandriello2016analysis, abstract = {We derive a new proof to show that the incremental resparsification algorithm proposed by Kelner and Levin (2013) produces a spectral sparsifier in high probability. We rigorously take into account the dependencies across subsequent resparsifications using martingale inequalities, fixing a flaw in the original analysis.}, archivePrefix = {arXiv}, arxivId = {1609.03769}, author = {Calandriello, Daniele and Lazaric, Alessandro and Valko, Michal}, eprint = {1609.03769}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Calandriello, Lazaric, Valko - 2016 - Analysis of Kelner and Levin graph sparsification algorithm for a streaming setting.pdf:pdf}, title = {{Analysis of Kelner and Levin graph sparsification algorithm for a streaming setting}}, url = {http://arxiv.org/abs/1609.03769}, year = {2016} } @inproceedings{low2010graphlab:, address = {Catalina Island, California}, author = {Low, Yucheng and Gonzalez, Joseph and Kyrola, Aapo and Bickson, Danny and Guestrin, Carlos and Hellerstein, Joseph M}, booktitle = {Conference on Uncertainty in Artificial Intelligence (UAI)}, month = {jul}, title = {{GraphLab: A New Parallel Framework for Machine Learning}}, year = {2010} } @article{benini1999policy, author = {Benini, Luca and Bogliolo, Alessandro and Paleologo, Giuseppe and Micheli, Giovanni De}, journal = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems}, number = {6}, pages = {813--833}, title = {{Policy Optimization for Dynamic Power Management}}, volume = {18}, year = {1999} } @article{edmonds71matroids, author = {Edmonds, Jack}, journal = {Mathematical Programming}, number = {1}, pages = {127--136}, title = {{Matroids and the Greedy Algorithm}}, volume = {1}, year = {1971} } @book{guestrin2008beyond, address = {Helsinki, Finland}, author = {Guestrin, Carlos and Krause, Andreas}, publisher = {Tutorial at the 25rd International Conference on Machine Learning (ICML)}, title = {{Beyond convexity - submodularity in machine learning}}, year = {2008} } @inproceedings{sanner2006practical, author = {Sanner, Scott and Boutilier, Craig}, booktitle = {Proceedings of the 22nd Conference on Uncertainty in Artificial Intelligence}, title = {{Practical Linear Value-Approximation Techniques for First-Order {\{}MDPs{\}}}}, year = {2006} } @article{lauritzen1988local, author = {Lauritzen, Steffen and Spiegelhalter, David}, journal = {Journal of Royal Statistical Society}, pages = {157--224}, title = {{Local computations with probabilities on graphical structures and their application to expert systems}}, volume = {50}, year = {1988} } @inproceedings{park2002map, author = {Park, James}, booktitle = {Proceedings of the 18th Conference on Uncertainty in Artificial Intelligence}, pages = {388--396}, title = {{{\{}MAP{\}} Complexity Results and Approximation Methods}}, year = {2002} } @inproceedings{contal2016stochastic, abstract = {The paper considers the problem of global optimization in the setup of stochastic process bandits. We introduce an UCB algorithm which builds a cascade of discretization trees based on generic chaining in order to render possible his operability over a continuous domain. The theoretical framework applies to functions under weak probabilistic smoothness assumptions and also extends significantly the spectrum of application of UCB strategies. Moreover generic regret bounds are derived which are then specialized to Gaussian processes indexed on infinite-dimensional spaces as well as to quadratic forms of Gaussian processes. Lower bounds are also proved in the case of Gaussian processes to assess the optimality of the proposed algorithm.}, archivePrefix = {arXiv}, arxivId = {1602.04976}, author = {Contal, Emile and Vayatis, Nicolas}, eprint = {1602.04976}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Contal, Vayatis - 2016 - Stochastic process bandits Upper confidence bounds algorithms via generic chaining.pdf:pdf}, month = {feb}, title = {{Stochastic process bandits: Upper confidence bounds algorithms via generic chaining}}, year = {2016} } @inproceedings{syed2008apprenticeship, author = {Syed, Umar and Schapire, Robert and Bowling, Michael}, booktitle = {Proceedings of the Twenty-Fifth International Conference on Machine Learning (ICML)}, pages = {1032--1039}, title = {{Apprenticeship Learning Using Linear Programming}}, url = {http://www.cs.princeton.edu/{~}usyed/SyedBowlingSchapireICML2008.pdf}, year = {2008} } @inproceedings{ng2001spectral, abstract = {Despite many empirical successes of spectral clustering methods| algorithms that cluster points using eigenvectors of matrices derived from the data|there are several unresolved issues. First, there are a wide variety of algorithms that use the eigenvectors in slightly dierent ways. Second, many of these algorithms have no proof that they will actually compute a reasonable clustering. In this paper, we present a simple spectral clustering algorithm that can be implemented using a few lines of Matlab. Using tools from matrix perturbation theory, we analyze the algorithm, and give conditions under which it can be expected to do well. We also show surprisingly good experimental results on a number of challenging clustering problems. 1}, author = {Ng, Andrew Y and Jordan, Michael I and Weiss, Yair}, booktitle = {Neural Information Processing Systems}, keywords = {clustering community detection graph spectral theo}, title = {{On spectral clustering: Analysis and an algorithm}}, year = {2001} } @inproceedings{yang2006efficient, annote = {comps{\_}distancX}, author = {Yang, Liu and Jin, Rong and Sukthankar, Rahul and Liu, Yi}, booktitle = {Proceedings, The Twenty-First National Conference on Artificial Intelligence and the Eighteenth Innovative Applications of Artificial Intelligence Conference, July 16-20, 2006, Boston, Massachusetts, USA}, title = {{An Efficient Algorithm for Local Distance Metric Learning}}, url = {http://www.cse.msu.edu/{~}yangliu1/aaai2006-distance-v7.pdf}, year = {2006} } @inproceedings{kveton2013learning, abstract = {Face recognition from a single image per person is a challenging problem because the training sample is extremely small. We consider a variation of this problem. In our problem, we recognize only one person, and there are no labeled data for any other person. This setting naturally arises in authentication on personal computers and mobile devices, and poses additional challenges because it lacks negative examples. We formalize our problem as one-class classification, and propose and analyze an algorithm that learns a non-parametric model of the face from a single labeled image and a stream of unlabeled data. In many domains, for instance when a person interacts with a computer with a camera, unlabeled data are abundant and easy to utilize. This is the first paper that investigates how these data can help in learning better models in the single-image-per-person setting. Our method is evaluated on a dataset of 43 people and we show that these people can be recognized 90{\%} of time at nearly zero false positives. This recall is 25+{\%} higher than the recall of our best performing baseline. Finally, we conduct a comprehensive sensitivity analysis of our algorithm and provide a guideline for setting its parameters in practice.}, address = {Shanghai, China}, author = {Kveton, Branislav and Valko, Michal}, booktitle = {10th IEEE International Conference on Automatic Face and Gesture Recognition}, title = {{Learning from a Single Labeled Face and a Stream of Unlabeled Data}}, year = {2013} } @article{fine1997prediction, author = {Fine, M J and Auble, T E and Yealy, D M and Hanusa, B H and Weissfeld, L A and Singer, D E and Coley, C M and Marrie, T J and Kapoor, W N}, journal = {New England Journal of Medicine}, number = {4}, pages = {243--250}, title = {{A Prediction Rule to Identify Low-Risk Patients with Community-Acquired Pneumonia}}, volume = {336}, year = {1997} } @inproceedings{boutilier1996approximating, author = {Boutilier, Craig and Dearden, Richard}, booktitle = {Proceedings of the 13th International Conference on Machine Learning}, pages = {54--62}, title = {{Approximating Value Trees in Structured Dynamic Programming}}, year = {1996} } @article{kempe2003maximizing, abstract = {Models for the processes by which ideas and influence propagate through a social network have been studied in a number of domains, including the diffusion of medical and technological innovations, the sudden and widespread adoption of various strategies in game-theoretic settings, and the effects of “word of mouth” in the promotion of new products. Recently, motivated by the design of viral marketing strategies, Domingos and Richardson posed a fundamental algorithmic problem for such social network processes: if we can try to convince a subset of individuals to adopt a new product or innovation, and the goal is to trigger a large cascade of further adoptions, which set of individuals should we target? We consider this problem in several of the most widely studied models in social network analysis. The optimization problem of selecting the most influential nodes is NP-hard here, and we provide the first provable approximation guarantees for efficient algorithms. Using an analysis framework based on submodular functions, we show that a natural greedy strategy obtains a solution that is provably within 63{\%} of optimal for several classes of models; our framework suggests a general approach for reasoning about the performance guarantees of algorithms for these types of influence problems in social networks. We also provide computational experiments on large collaboration networks, showing that in addition to their provable guarantees, our approximation algorithms significantly out-perform nodeselection heuristics based on the well-studied notions of degree centrality and distance centrality from the field of social networks.}, author = {Kempe, David and Kleinberg, Jon and Tardos, {\'{E}}va}, journal = {Knowledge Discovery and Data Mining}, pages = {137}, title = {{Maximizing the spread of influence through a social network}}, year = {2003} } @inproceedings{valko2010feature, abstract = {The objective of this paper is to understand what characteris-tics and features of clinical data influence physician.s deci-sion about ordering laboratory tests or prescribing medica-tions the most. We conduct our analysis on data and decisions extracted from electronic health records of 4486 post-surgical cardiac patients. The summary statistics for 335 different lab order decisions and 407 medication decisions are reported. We show that in many cases, physician.s lab-order and medication decisions are predicted well by simple patterns such as last value of a single test result, time since a certain lab test was ordered or time since certain procedure was executed.}, author = {Valko, Michal and Hauskrecht, Milos}, booktitle = {13th International Congress on Medical Informatics MEDINFO 2010}, keywords = {misovalko}, mendeley-tags = {misovalko}, title = {{Feature importance analysis for patient management decisions}}, year = {2010} } @inproceedings{locatelli2018adaptivity, author = {Locatelli, Andrea and Carpentier, Alexandra}, booktitle = {Conference on Learning Theory}, title = {{Adaptivity to Smoothness in X-armed bandits}}, url = {http://proceedings.mlr.press/v75/locatelli18a/locatelli18a.pdf}, year = {2018} } @inproceedings{grill2019planning, author = {Grill, Jean-Bastien and Domingues, Omar Darwiche and M{\'{e}}nard, Pierre and Munos, R{\'{e}}mi and Valko, Michal}, booktitle = {Neural Information Processing Systems}, title = {{Planning in entropy-regularized Markov decision processes and games}}, year = {2019} } @incollection{JN11b, author = {Juditsky, A and Nemirovski, A}, booktitle = {Optimization for Machine Learning}, editor = {Sra, S and Nowozin, S and Wright, S}, pages = {149--183}, publisher = {MIT press}, title = {{First-Order Methods for Nonsmooth Convex Large-Scale Optimization, II: Utilizing Problem's Structure}}, year = {2011} } @article{AgHeTe88:Switching, author = {Agrawal, R and Hedge, M V and Teneketzis, D}, journal = {IEEE Transactions on Automatic Control}, number = {10}, pages = {899--906}, title = {{Asymptotically Efficient Adaptive Allocation Rules for the Multiarmed Bandit Problem with Switching Cost}}, volume = {33}, year = {1988} } @inproceedings{lazarevic2005feature, address = {New York, NY, USA}, annote = {comps{\_}ano}, author = {Lazarevic, Aleksandar and Kumar, Vipin}, booktitle = {KDD '05: Proceedings of the eleventh ACM SIGKDD international conference on Knowledge discovery in data mining}, doi = {http://doi.acm.org/10.1145/1081870.1081891}, isbn = {1-59593-135-X}, pages = {157--166}, publisher = {ACM}, title = {{Feature bagging for outlier detection}}, year = {2005} } @inproceedings{neu2015explore, author = {Neu, Gergely}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Neu - 2015 - Explore no more Improved high-probability regret bounds for non-stochastic bandits.pdf:pdf}, title = {{Explore no more: Improved high-probability regret bounds for non-stochastic bandits}}, year = {2015} } @inproceedings{aggarwal2003framework, author = {Aggarwal, Charu C and Han, Jiawei and Wang, Jianyong and Yu, Philip S}, booktitle = {Proceedings of the 29th international conference on Very large data bases - Volume 29}, isbn = {0-12-722442-4}, pages = {81--92}, publisher = {VLDB Endowment}, series = {VLDB '2003}, title = {{A framework for clustering evolving data streams}}, url = {http://portal.acm.org/citation.cfm?id=1315451.1315460}, year = {2003} } @inproceedings{tang2020taylor, abstract = {In this work, we investigate the application of Taylor expansions in reinforcement learning. In particular, we propose Taylor expansion policy optimization , a policy optimization formalism that generalizes prior work (e.g., TRPO) as a first-order special case. We also show that Taylor expansions intimately relate to off-policy evaluation. Finally, we show that this new formulation entails modifications which improve the performance of several state-of-the-art distributed algorithms.}, author = {Tang, Yunghao and Valko, Michal and Munos, R{\'{e}}mi}, booktitle = {International Conference on Machine Learning}, title = {{Taylor expansion policy optimization}}, year = {2020} } @inproceedings{viswanath2009evolution, abstract = {Online social networks have become extremely popular; numerous sites allow users to interact and share content using social links. Users of these networks often establish hundreds to even thousands of social links with other users. Recently, researchers have suggested examining the activity network— a network that is based on the actual interaction between users, rather than mere friendship—to distinguish between strong and weak links. While initial studies have led to in- sights on how an activity network is structurally different from the social network itself, a natural and important aspect of the activity network has been disregarded: the fact that over time social links can grow stronger or weaker. In this paper, we study the evolution of activity between users in the Facebook social network to capture this notion. We find that links in the activity network tend to come and go rapidly over time, and the strength of ties exhibits a general decreasing trend of activity as the social network link ages. For example, only 30{\%} of Facebook user pairs interact consistently from one month to the next. Interestingly, we also find that even though the links of the activity network change rapidly over time, many graph-theoretic properties of the activity network remain unchanged.}, author = {Viswanath, Bimal and Mislove, Alan and Cha, Meeyoung and Gummadi, Krishna P.}, booktitle = {ACM Workshop on Online Social Networks}, title = {{On the evolution of user interaction in facebook}}, year = {2009} } @article{Chen1998, author = {Chen, S S and Donoho, D L and Saunders, M A}, journal = {SIAM Journal on Scientific Computing}, number = {1}, pages = {33--61}, title = {{Atomic Decomposition by Basis Pursuit}}, volume = {20}, year = {1998} } @book{schrijver2004combinatorial, author = {Schrijver, A}, publisher = {Springer}, title = {{Combinatorial optimization: Polyhedra and efficiency}}, year = {2004} } @inproceedings{Mairal2010a, author = {Mairal, J and Jenatton, R and Obozinski, G and Bach, F}, booktitle = {Adv. NIPS}, number = {00512556}, organization = {HAL INRIA}, title = {{Network Flow Algorithms for Structured Sparsity}}, year = {2010} } @inproceedings{delalleau2005efficient, author = {Delalleau, Olivier and Bengio, Yoshua and Roux, Nicolas Le}, booktitle = {AISTAT}, pages = {96--103}, title = {{Efficient Non-Parametric Function Induction in Semi-Supervised Learning}}, year = {2005} } @inproceedings{kujala07perturbed, author = {Kujala, Jussi and Elomaa, Tapio}, doi = {http://dx.doi.org/10.1007/978-3-540-75225-7_16}, pages = {166--180}, title = {{Following the Perturbed Leader to Gamble at Multi-armed Bandits}} } @inproceedings{goldberg2011oasis:, author = {Goldberg, Andrew and Zhu, Xiaojin and Furger, Alex and Xu, Jun-Ming}, booktitle = {Proceedings of the Twenty-Fifth AAAI Conference on Artificial Intelligence}, title = {{OASIS: Online active semisupervised learning}}, year = {2011} } @article{ando1979concavity, author = {Ando, T}, journal = {Linear Algebra and its Applications}, pages = {203--241}, publisher = {Elsevier}, title = {{Concavity of certain maps on positive definite matrices and applications to {\{}H{\}}adamard products}}, volume = {26}, year = {1979} } @article{minoux1978accelerated, author = {Minoux, M}, journal = {Optimization Techniques}, pages = {234--243}, publisher = {Springer}, title = {{Accelerated greedy algorithms for maximizing submodular set functions}}, year = {1978} } @inproceedings{NeuTDK2006, author = {Neu, Gergely}, booktitle = {BME-VIK TDK'06}, title = {{Inverse Reinforcement Learning via the Method of Natural Gradients}}, year = {2006} } @incollection{taskar2004max-margin, address = {Cambridge, MA}, author = {Taskar, Ben and Guestrin, Carlos and Koller, Daphne}, booktitle = {Advances in Neural Information Processing Systems 16}, editor = {Thrun, Sebastian and Saul, Lawrence and Sch{\"{o}}lkopf, Bernhard}, keywords = {Markov models,PAC bounds,graphical models,kernel methods,large margin methods,machine learning,quadratic programming,statistical learning theory,structured data}, publisher = {MIT Press}, title = {{Max-Margin Markov Networks}}, year = {2004} } @article{dayan1994td, author = {Dayan, Peter and Sejnowski, Terry}, journal = {Machine Learning}, pages = {295--301}, title = {{{\{}TD{\}}($\lambda$) Converges with Probability 1}}, volume = {14}, year = {1994} } @techreport{carpentier2015uncertainty, abstract = {We construct minimax optimal non-asymptotic confidence sets for low rank matrix recovery algorithms such as the Matrix Lasso or Dantzig selector. These are employed to devise adaptive sequential sampling procedures that guarantee recovery of the true matrix in Frobenius norm after a data-driven stopping time {\$}\backslashhat n{\$} for the number of measurements that have to be taken. With high probability, this stopping time is minimax optimal. We detail applications to quantum tomography problems where measurements arise from Pauli observables. We also give a theoretical construction of a confidence set for the density matrix of a quantum state that has optimal diameter in nuclear norm. The non-asymptotic properties of our confidence sets are further investigated in a simulation study.}, archivePrefix = {arXiv}, arxivId = {1504.03234}, author = {Carpentier, Alexandra and Eisert, Jens and Gross, David and Nickl, Richard}, eprint = {1504.03234}, title = {{Uncertainty quantification for matrix compressed sensing and quantum tomography problems}}, year = {2015} } @inproceedings{koolen10comphedge, author = {Koolen, Wouter M. and Warmuth, Manfred K. and Kivinen, Jyrki}, booktitle = {Conference on Learning Theory}, title = {{Hedging structured concepts}}, year = {2010} } @article{Tremblay2017a, abstract = {We present a new random sampling strategy for k-bandlimited signals defined on graphs, based on determinantal point processes (DPP). For small graphs, ie, in cases where the spectrum of the graph is accessible, we exhibit a DPP sampling scheme that enables perfect recovery of bandlimited signals. For large graphs, ie, in cases where the graph's spectrum is not accessible, we investigate, both theoretically and empirically, a sub-optimal but much faster DPP based on loop-erased random walks on the graph. Preliminary experiments show promising results especially in cases where the number of measurements should stay as small as possible and for graphs that have a strong community structure. Our sampling scheme is efficient and can be applied to graphs with up to {\$}10{\^{}}6{\$} nodes.}, archivePrefix = {arXiv}, arxivId = {1703.01594}, author = {Tremblay, Nicolas and Amblard, Pierre-Olivier and Barthelm{\'{e}}, Simon}, doi = {10.23919/EUSIPCO.2017.8081494}, eprint = {1703.01594}, isbn = {9780992862671}, title = {{Graph sampling with determinantal processes}}, year = {2017} } @incollection{choi2012nonparametric, author = {Choi, Jaedeug and Kim, Kee-Eung}, booktitle = {Advances in Neural Information Processing Systems 25}, editor = {Bartlett, P and Pereira, F C N and Burges, C J C and Bottou, L and Weinberger, K Q}, pages = {314--322}, title = {{Nonparametric Bayesian Inverse Reinforcement Learning for Multiple Reward Functions}}, url = {http://books.nips.cc/papers/files/nips25/NIPS2012{\_}0159.pdf}, year = {2012} } @inproceedings{ng2000, author = {Ng, A Y and Russell, S}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Ng, Russell - 2000 - Algorithms for inverse reinforcement learning.pdf:pdf}, pages = {663--670}, title = {{Algorithms for Inverse Reinforcement Learning}}, url = {citeseer.ist.psu.edu/ng00algorithms.html} } @inproceedings{bresina2002planning, author = {Bresina, John and Dearden, Richard and Meuleau, Nicolas and Ramakrishnan, Sailesh and Smith, David and Washington, Rich}, booktitle = {Proceedings of the 18th Conference on Uncertainty in Artificial Intelligence}, pages = {77--84}, title = {{Planning Under Continuous Time and Resource Uncertainty: A Challenge for {\{}AI{\}}}}, year = {2002} } @article{kirkpatrick1983optimization, author = {Kirkpatrick, S and Gelatt, C D and Vecchi, M P}, journal = {Science}, number = {4598}, pages = {671--680}, title = {{Optimization by Simulated Annealing}}, volume = {220}, year = {1983} } @inproceedings{neu12o-ssp, author = {Neu, G and Gy{\"{o}}rgy, A and Szepesv{\'{a}}ri, $\backslash$relax Cs. $\backslash$textCs}, booktitle = {Proceedings of the 23rd Annual Conference on Learning Theory (COLT)}, pages = {231--243}, title = {{The Online Loop-free Stochastic Shortest-Path Problem}}, year = {2013} } @incollection{weinberger2006distance, address = {Cambridge, MA}, annote = {comps{\_}distance}, author = {Weinberger, Kilian and Blitzer, John and Saul, Lawrence}, booktitle = {Advances in Neural Information Processing Systems 18}, editor = {Weiss, Y and Sch{\"{o}}lkopf, B and Platt, J}, pages = {1473--1480}, publisher = {MIT Press}, title = {{Distance Metric Learning for Large Margin Nearest Neighbor Classification}}, url = {http://books.nips.cc/papers/files/nips18/NIPS2005{\_}0265.pdf}, year = {2006} } @article{EKMW08, author = {Even-Dar, Eyal and Kearns, Michael and Mansour, Yishay and Wortman, Jennifer}, journal = {Machine Learning}, number = {1-2}, pages = {21--37}, title = {{Regret to the best vs. regret to the average}}, volume = {72}, year = {2008} } @phdthesis{guestrin2003planning, author = {Guestrin, Carlos}, school = {Stanford University}, title = {{Planning Under Uncertainty in Complex Structured Environments}}, year = {2003} } @techreport{toshev2010submodular, annote = {Written Preliminary Examination}, author = {Toshev, A}, institution = {University of Pennsylvania}, title = {{Submodular Function Minimization}}, year = {2010} } @inproceedings{sutton2000policy, author = {Sutton, Richard and McAllester, David and Singh, Satinder and Mansour, Yishay}, booktitle = {Advances in Neural Information Processing Systems 12}, pages = {1057--1063}, title = {{Policy Gradient Methods for Reinforcement Learning with Function Approximation}}, year = {2000} } @inproceedings{boutilier1995exploiting, author = {Boutilier, Craig and Dearden, Richard and Goldszmidt, Mois{\'{e}}s}, booktitle = {Proceedings of the 14th International Joint Conference on Artificial Intelligence}, pages = {1104--1111}, title = {{Exploiting Structure in Policy Construction}}, year = {1995} } @article{lee2010spectral, author = {Lee, Ann B and Wasserman, Larry}, doi = {10.1198/jasa.2010.tm09754}, journal = {Journal of the American Statistical Association}, number = {0}, pages = {1--15}, title = {{Spectral Connectivity Analysis}}, url = {http://pubs.amstat.org/doi/abs/10.1198/jasa.2010.tm09754}, volume = {0}, year = {2010} } @article{narasimhan2006q, author = {Narasimhan, M and Jojic, N and Bilmes, J}, journal = {Adv. NIPS}, title = {{Q-clustering}}, volume = {18}, year = {2006} } @article{zhang2015divide, abstract = {We establish optimal convergence rates for a decomposition-based scalable approach to kernel ridge regression. The method is simple to describe: it randomly partitions a dataset of size N into m subsets of equal size, computes an independent kernel ridge regression estimator for each subset, then averages the local solutions into a global predictor. This partitioning leads to a substantial reduction in computation time versus the standard approach of performing kernel ridge regression on all N samples. Our two main theorems establish that despite the computational speed-up, statistical optimality is retained: as long as m is not too large, the partition-based estimator achieves the statistical minimax rate over all estimators using the set of N samples. As concrete examples, our theory guarantees that the number of processors m may grow nearly linearly for finite-rank kernels and Gaussian kernels and polynomially in N for Sobolev spaces, which in turn allows for substantial reductions in computational cost. We conclude with experiments on both simulated data and a music-prediction task that complement our theoretical results, exhibiting the computational and statistical benefits of our approach.}, author = {Zhang, Yuchen and Duchi, John C. and Wainwright, Martin J.}, journal = {Journal Machine Learning Research}, pages = {3299--3340}, title = {{Divide and conquer kernel ridge regression: A distributed algorithm with minimax optimal rates}}, volume = {16}, year = {2015} } @article{littlestone1994weighted, author = {Littlestone, Nick and Warmuth, Manfred}, journal = {Information and Computation}, number = {2}, pages = {212--261}, title = {{The weighted majority algorithm}}, volume = {108}, year = {1994} } @article{higdon1998auxiliary, author = {Higdon, David}, journal = {Journal of the American Statistical Association}, number = {442}, pages = {585--595}, title = {{Auxiliary Variable Methods for {\{}Markov{\}} Chain {\{}Monte Carlo{\}} with Applications}}, volume = {93}, year = {1998} } @article{GLS01, author = {Grove, A and Littlestone, N and Schuurmans, D}, journal = {Machine Learning}, pages = {173--210}, title = {{General convergence results for linear discriminant updates}}, volume = {43}, year = {2001} } @article{gans2005medical, abstract = {We surveyed a nationally representative sample of medical group practices to assess their current use of information technology (IT). Our results suggest that adoption of electronic health records (EHRs) is progressing slowly, at least in smaller practices, although a number of group practices plan to implement an EHR within the next two years. Moreover, the process of choosing and implementing an EHR appears to be more complex and varied than we expected. This suggests a need for greater support for practices, particularly smaller ones, in this quest if the benefits expected from EHRs are to be realized.}, author = {Gans, David and Kralewski, John and Hammons, Terry and Dowd, Bryan}, doi = {10.1377/hlthaff.24.5.1323}, institution = {Medical Group Management Association, Englewood, Colorado, USA. dng@mgma.com}, journal = {Health Aff (Millwood)}, keywords = {Computerized,Data Collection; Diffusion of Innovation; Informat,Medical; United States,utilization; Medical Records Systems,utilization; Practice Management}, number = {5}, pages = {1323--1333}, pmid = {16162580}, title = {{Medical groups' adoption of electronic health records and information systems.}}, url = {http://dx.doi.org/10.1377/hlthaff.24.5.1323}, volume = {24}, year = {2005} } @inproceedings{Verbaeten-2003-MisLabeled, author = {Verbaeten, S and Assche., A V}, booktitle = {Proceeding of 4th International Workshop on Multiple Classifier Systems}, title = {{Ensemble Methods for Noise Elimination in Classification Problems.}}, year = {2003} } @article{shah2019reinforcement, author = {Shah, Devavrat and Xie, Qiaomin and Xu, Zhi}, journal = {arXiv preprint arXiv:1902.05213}, title = {{On reinforcement learning using Monte-Carlo tree search with supervised learning: Non-asymptotic analysis}}, url = {https://arxiv.org/pdf/1902.05213.pdf}, year = {2019} } @inproceedings{karnin2013almost, author = {Karnin, Zohar and Koren, Tomer and Somekh, Oren}, booktitle = {International Conference on Machine Learning}, title = {{Almost optimal exploration in multi-armed bandits}}, year = {2013} } @inproceedings{agosta2013mixture, abstract = {We model a little studied type of traffic, namely the network traffic generated from endhosts. We introduce a parsimonious model of the marginal distribution for connection arrivals consisting of mixture models with both heavy and light-tailed component distributions. Our methodology assumes that the underlying user data can be fitted to one of several models, and we apply Bayesian model selection criterion to choose the preferred combination of components. Our experiments show that a simple Pareto-exponential mixture model is preferred over more complex alternatives, for a wide range of users. This model has the desirable property of modeling the entire distribution, effectively clustering the traffic into the heavy-tailed as well as the non-heavy-tailed components. Also this method quantifies the wide diversity in the observed endhost traffic.}, author = {Agosta, John Mark and Chandrashekar, Jaideep and Crovella, Mark and Taft, Nina and Ting, Daniel}, booktitle = {IEEE Proceedings of INFOCOM,}, doi = {10.1109/INFCOM.2013.6566768}, issn = {0743-166X}, keywords = {Approximation methods,Bayes methods,Bayesian model selection criterion,Computational modeling,Data models,Educational institutions,Mathematical model,Maximum likelihood estimation,Pareto distribution,connection arrivals,endhost network traffic,heavy-tailed component distributions,light-tailed component distributions,marginal distribution,parsimonious model,simple Pareto-exponential mixture,telecommunication networks,telecommunication traffic,traffic clustering,wide diversity}, pages = {225--229}, title = {{Mixture models of endhost network traffic}}, year = {2013} } @article{Lenstra1978, abstract = {Precedence constraints between jobs that have to be respected in every feasible schedule generally increase the computational com- plexity of a scheduling problem. Occasionally, their introduction may turn a problem that is solvable within polynomial time into an NP-complete one, for which a good algorithm is highly unlikely to exist. We illustrate the use of these concepts by extending some typical NP-completeness results and simplifying their correctness proofs for scheduling problems involving precedence constraints.}, author = {Lenstra, J K and {Rinnooy Kan}, A H G}, doi = {10.1287/opre.26.1.22}, issn = {0030-364X}, journal = {Operations Research}, number = {1}, pages = {22--35}, title = {{Complexity of scheduling under precedence constraints}}, url = {http://pubsonline.informs.org/doi/abs/10.1287/opre.26.1.22}, volume = {26}, year = {1978} } @inproceedings{balcan2005person, author = {Balcan, Maria-Florina and Blum, Avrim and Choi, Patrick Pakyan and Lafferty, John and Pantano, Brian and Rwebangira, Mugizi Robert and Zhu, Xiaojin}, booktitle = {ICML 2005 Workshop on Learning with Partially Classified Training Data}, title = {{Person Identification in Webcam Images: An Application of Semi-Supervised Learning}}, year = {2005} } @article{orabona2018scale, abstract = {We design and analyze algorithms for online linear optimization that have optimal regret and at the same time do not need to know any upper or lower bounds on the norm of the loss vectors. Our algorithms are instances of the Follow the Regularized Leader (FTRL) and Mirror Descent (MD) meta-algorithms. We achieve adaptiveness to the norms of the loss vectors by scale invariance, i.e., our algorithms make exactly the same decisions if the sequence of loss vectors is multiplied by any positive constant. The algorithm based on FTRL works for any decision set, bounded or unbounded. For unbounded decisions sets, this is the first adaptive algorithm for online linear optimization with a non-vacuous regret bound. In contrast, we show lower bounds on scale-free algorithms based on MD on unbounded domains.}, archivePrefix = {arXiv}, arxivId = {1601.01974}, author = {Orabona, Francesco and P{\'{a}}l, D{\'{a}}vid}, doi = {10.1016/j.tcs.2017.11.021}, eprint = {1601.01974}, issn = {03043975}, journal = {Theoretical Computer Science}, keywords = {Online algorithms,Online learning,Optimization,Regret bounds}, title = {{Scale-free online learning}}, year = {2018} } @article{Zhang2017, abstract = {We study a mini-batch diversification scheme for stochastic gradient descent (SGD). While classical SGD relies on uniformly sampling data points to form a mini-batch, we propose a non-uniform sampling scheme based on the Determinantal Point Process (DPP). The DPP relies on a similarity measure between data points and gives low probabilities to mini-batches which contain redundant data, and higher probabilities to mini-batches with more diverse data. This simultaneously balances the data and leads to stochastic gradients with lower variance. We term this approach Diversified Mini-Batch SGD (DM-SGD). We show that regular SGD and a biased version of stratified sampling emerge as special cases. Furthermore, DM-SGD generalizes stratified sampling to cases where no discrete features exist to bin the data into groups. We show experimentally that our method results more interpretable and diverse features in unsupervised setups, and in better classification accuracies in supervised setups.}, archivePrefix = {arXiv}, arxivId = {1705.00607}, author = {Zhang, Cheng and Kjellstrom, Hedvig and Mandt, Stephan}, eprint = {1705.00607}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Zhang, Kjellstrom, Mandt - 2017 - Determinantal Point Processes for Mini-Batch Diversification.pdf:pdf}, month = {may}, title = {{Determinantal Point Processes for Mini-Batch Diversification}}, url = {http://arxiv.org/abs/1705.00607}, year = {2017} } @inproceedings{bartletthigh, author = {Bartlett, Peter L and Dani, Varsha and Hayes, Thomas P and Kakade, Sham M and Rakhlin, Alexander and Tewari, Ambuj}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {335--342}, title = {{High-probability Regret Bounds for Bandit Online Linear Optimization}} } @article{gal2001optimality, author = {Gal, Shmuel}, journal = {International Journal of Game Theory}, number = {4}, pages = {533--542}, title = {{On the optimality of a simple strategy for searching graphs}}, url = {https://pdfs.semanticscholar.org/232a/ebaee2320105a12d2b47aed1e2ef21aec8e2.pdf}, volume = {29}, year = {2001} } @article{tropp2015an-introduction, author = {Tropp, Joel Aaron}, journal = {Foundations and Trends in Machine Learning}, number = {1-2}, pages = {1--230}, title = {{An introduction to matrix concentration inequalities}}, url = {https://arxiv.org/pdf/1501.01571}, volume = {8}, year = {2015} } @article{Heckerman1995, abstract = {This article discusses an optimal troubleshooting plan. An optimal troubling plan is a sequence of observations and repairs that minimizes expected costs. The classic way to compute the expected cost of a plan is to use a decision tree with Bayesian networks. A decision tree represents the possible unfolding of events in temporal order. The representation contains two types of nodes called decision nodes and chance nodes. A decision node represents a decision which is an irrevocable allocation of resource. Branches of a decision node correspond to the mutually exclusive and collectively exhaustive set of alternatives available to the decision-maker. A chance node represents an uncertain variable. Branches of a chance node correspond to the mutually exclusive and collectively exhaustive possible states of the variable. Associated with each chance-node branch, is the decision maker's probability that the variable will be in the corresponding state. Each path through the tree reflects a possible outcome for the decision-maker. Associated with each path is the decision-maker's preference for that outcome.}, author = {Heckerman, David and Breese, John S and Rommelse, Koos}, doi = {10.1145/203330.203341}, issn = {00010782}, journal = {Communications of the ACM}, number = {3}, pages = {49--57}, title = {{Decision-theoretic troubleshooting}}, url = {http://portal.acm.org/citation.cfm?doid=203330.203341}, volume = {38}, year = {1995} } @inproceedings{McMaStre09, author = {McMahan, H Brendan and Streeter, Matthew}, title = {{Tighter Bounds for Multi-Armed Bandits with Expert Advice}} } @article{tesauro1995temporal, author = {Tesauro, Gerald}, journal = {Communications of the ACM}, number = {3}, pages = {58--68}, title = {{Temporal Difference Learning and {\{}TD-Gammon{\}}}}, volume = {38}, year = {1995} } @incollection{ZiNe13, author = {Zimin, A and Neu, G}, booktitle = {Advances in Neural Information Processing Systems 26}, editor = {Burges, C J C and Bottou, L and Welling, M and Ghahramani, Z and Weinberger, K Q}, pages = {1583--1591}, title = {{Online learning in episodic {\{}M{\}}arkovian decision processes by relative entropy policy search}}, year = {2013} } @article{kapralov_single_2014, abstract = {We present the first single pass algorithm for computing spectral sparsifiers of graphs in the dynamic semi-streaming model. Given a single pass over a stream containing insertions and deletions of edges to a graph G, our algorithm maintains a randomized linear sketch of the incidence matrix of G into dimension O((1/epsilon{\{}$\backslash$textasciicircum{\}}2) n polylog(n)). Using this sketch, at any point, the algorithm can output a (1 +/- epsilon) spectral sparsifier for G with high probability. While O((1/epsilon{\{}$\backslash$textasciicircum{\}}2) n polylog(n)) space algorithms are known for computing "cut sparsifiers" in dynamic streams [AGM12b, GKP12] and spectral sparsifiers in "insertion-only" streams [KL11], prior to our work, the best known single pass algorithm for maintaining spectral sparsifiers in dynamic streams required sketches of dimension Omega((1/epsilon{\{}$\backslash$textasciicircum{\}}2) n{\{}$\backslash$textasciicircum{\}}(5/3)) [AGM14]. To achieve our result, we show that, using a coarse sparsifier of G and a linear sketch of G's incidence matrix, it is possible to sample edges by effective resistance, obtaining a spectral sparsifier of arbitrary precision. Sampling from the sketch requires a novel application of ell{\_}2/ell{\_}2 sparse recovery, a natural extension of the ell{\_}0 methods used for cut sparsifiers in [AGM12b]. Recent work of [MP12] on row sampling for matrix approximation gives a recursive approach for obtaining the required coarse sparsifiers. Under certain restrictions, our approach also extends to the problem of maintaining a spectral approximation for a general matrix A{\{}$\backslash$textasciicircum{\}}T A given a stream of updates to rows in A.}, annote = {arXiv: 1407.1289}, author = {Kapralov, Michael and Lee, Yin Tat and Musco, Cameron and Musco, Christopher and Sidford, Aaron}, journal = {arXiv:1407.1289 [cs]}, keywords = {Computer Science - Data Structures and Algorithms}, month = {jul}, title = {{Single {\{}Pass{\}} {\{}Spectral{\}} {\{}Sparsification{\}} in {\{}Dynamic{\}} {\{}Streams{\}}}}, url = {http://arxiv.org/abs/1407.1289}, year = {2014} } @inproceedings{shalev-shwartz2004online, address = {New York, NY, USA}, annote = {comps{\_}distance}, author = {Shalev-Shwartz, Shai and Singer, Yoram and Ng, Andrew Y}, booktitle = {ICML '04: Proceedings of the twenty-first international conference on Machine learning}, doi = {http://doi.acm.org/10.1145/1015330.1015376}, isbn = {1-58113-828-5}, pages = {94}, publisher = {ACM}, title = {{Online and batch learning of pseudo-metrics}}, year = {2004} } @article{cortez2009, author = {Cortez, P and Cerdeira, A and Almeida, F and Matos, T and Reis, J}, journal = {Decision Support Systems}, pages = {547--553}, publisher = {Elsevier}, title = {{Modeling wine preferences by data mining from physicochemical properties}}, volume = {47}, year = {2009} } @phdthesis{shalev-shwartz2007online, author = {Shalev-Shwartz, Shai}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jul}, school = {The Hebrew University of Jerusalem}, title = {{Online Learning: Theory, Algorithms, and Applications}}, year = {2007} } @inproceedings{eaton2007bayesian, annote = {comps{\_}models}, author = {Eaton, D and Murphy, K}, booktitle = {Proceedings of the 23nd Annual Conference on Uncertainty in Artificial Intelligence (UAI-07)}, title = {{{\{}B{\}}ayesian structure learning using dynamic programming and {\{}MCMC{\}}}}, url = {http://www.cs.ubc.ca/{~}murphyk/Papers/eaton-uai07.pdf}, year = {2007} } @inproceedings{bennett1999semi-supervised, author = {Bennett, Kristin and Demiriz, Ayhan}, booktitle = {Advances in Neural Information Processing Systems 11}, pages = {368--374}, title = {{Semi-Supervised Support Vector Machines}}, year = {1999} } @article{yu, author = {Yu, B}, journal = {The Annals of Probability}, number = {1}, pages = {94--116}, title = {{Rates of convergence for empirical processes of stationary mixing sequences}}, volume = {22}, year = {1994} } @inproceedings{silva, author = {da Silva, V F and Costa, A H R and Lima, P}, pages = {4246--4251}, title = {{Inverse Reinforcement Learning with Evaluation}} } @book{mockus1989global, author = {Mockus, Jonas}, doi = {10.1007/978-94-009-0909-0_1}, title = {{Global optimization and the Bayesian approach}}, url = {http://www.springerlink.com/index/10.1007/978-94-009-0909-0{\_}1}, year = {1989} } @inproceedings{BM11, author = {Bach, F and Moulines, E}, booktitle = {Advances in Neural Information Processing Systems (NIPS)}, title = {{Non-Asymptotic Analysis of Stochastic Approximation Algorithms for Machine Learning}}, year = {2011} } @article{varshamov1957estimate, author = {Varshamov, Rom Rubenovich}, journal = {Doklady Akademii Nauk SSSR}, pages = {739--741}, title = {{Estimate of the number of signals in error correcting codes}}, volume = {117}, year = {1957} } @inproceedings{stobbe, author = {Stobbe, P and Krause, A}, booktitle = {Adv. NIPS}, title = {{Efficient Minimization of Decomposable Submodular Functions}}, year = {2010} } @inproceedings{pinto2009how, author = {Pinto, Nicolas and DiCarlo, James and Cox, David}, booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition}, title = {{How Far Can You Get with a Modern Face Recognition Test Set Using Only Simple Features?}}, year = {2009} } @article{sethuraman1994constructive, annote = {c{\_}omps{\_}models}, author = {Sethuraman, J}, journal = {Statistica Sinica}, pages = {639--650}, title = {{A constructive definition of Dirichlet priors}}, url = {http://www3.stat.sinica.edu.tw/statistica/oldpdf/A4n216.pdf}, volume = {4}, year = {1994} } @article{Martinet1970, author = {Martinet, B}, journal = {ESAIM: Mathematical Modelling and Numerical Analysis - Mod{\'{e}}lisation Math{\'{e}}matique et Analyse Num{\'{e}}rique}, number = {R3}, pages = {154--158}, publisher = {EDP Sciences}, title = {{R{\'{e}}gularisation d'in{\'{e}}quations variationnelles par approximations successives}}, volume = {4}, year = {1970} } @inproceedings{titov07incremental, address = {Prague, Czech Republic}, author = {Titov, Ivan and Henderson, James}, booktitle = {ACL '07: Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics}, pages = {632--639}, publisher = {Association for Computational Linguistics}, title = {{Constituent Parsing with Incremental Sigmoid Belief Networks}}, url = {http://www.aclweb.org/anthology/P/P07/P07-0080}, year = {2007} } @inproceedings{el-yaniv_stable_2006, author = {El-Yaniv, Ran and Pechyony, Dmitry}, booktitle = {Proceedings of COLT}, title = {{Stable transductive learning}}, year = {2006} } @article{zhao2003face, author = {Zhao, Wen-Yi and Chellappa, Rama and Phillips, P and Rosenfeld, Azriel}, journal = {ACM Computing Surveys}, number = {4}, pages = {399--458}, title = {{Face Recognition: A Literature Survey}}, volume = {35}, year = {2003} } @article{jewell1963markov-renewal, author = {Jewell, William}, journal = {Operations Research}, number = {6}, pages = {938--948}, title = {{{\{}Markov{\}}-Renewal Programming. {\{}I{\}}: Formulation, Finite Return Models}}, volume = {11}, year = {1963} } @article{pwc2009, author = {PricewaterhouseCoopers{\~{}}(PWC)}, journal = {Tech. Report}, title = {{Global city {\{}GDP{\}} rankings 2008-2025}}, year = {2009} } @inproceedings{grill2020bootstrap, abstract = {Copyright {\textcopyright} 2020, arXiv, All rights reserved. We introduce Bootstrap Your Own Latent (BYOL), a new approach to self-supervised image representation learning. BYOL relies on two neural networks, referred to as online and target networks, that interact and learn from each other. From an augmented view of an image, we train the online network to predict the target network representation of the same image under a different augmented view. At the same time, we update the target network with a slow-moving average of the online network. While state-of-the art methods intrinsically rely on negative pairs, BYOL achieves a new state of the art without them. BYOL reaches 74.3{\%} top-1 classification accuracy on ImageNet using the standard linear evaluation protocol with a ResNet-50 architecture and 79.6{\%} with a larger ResNet. We show that BYOL performs on par or better than the current state of the art on both transfer and semi-supervised benchmarks.}, author = {Grill, J.-B. and Strub, F. and Altch{\'{e}}, F. and Tallec, C. and Richemond, P.H. and Buchatskaya, E. and Doersch, C. and Pires, B.A. and Guo, Z.D. and Azar, M.G. and Piot, B. and Kavukcuoglu, K. and Munos, R. and Valko, M.}, booktitle = {Neural Information Processing Systems}, title = {{Bootstrap Your Own Latent A New Approach to Self-Supervised Learning}}, year = {2020} } @article{ross2010reduction, abstract = {Sequential prediction problems such as imitation learning, where future observations depend on previous predictions (actions), violate the common i.i.d. assumptions made in statistical learning. This leads to poor performance in theory and often in practice. Some recent approaches provide stronger guarantees in this setting, but remain somewhat unsatisfactory as they train either non-stationary or stochastic policies and require a large number of iterations. In this paper, we propose a new iterative algorithm, which trains a stationary deterministic policy, that can be seen as a no regret algorithm in an online learning setting. We show that any such no regret algorithm, combined with additional reduction assumptions, must find a policy with good performance under the distribution of observations it induces in such sequential settings. We demonstrate that this new approach outperforms previous approaches on two challenging imitation learning problems and a benchmark sequence labeling problem.}, author = {Ross, Stephane and Gordon, Geoffrey J and Bagnell, J Andrew}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Ross, Gordon, Bagnell - 2010 - A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning.pdf:pdf}, journal = {AISTATS}, pages = {627--635}, title = {{A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning}}, url = {http://arxiv.org/abs/1011.0686}, volume = {15}, year = {2010} } @inproceedings{ng2000algorithms, abstract = {This paper addresses the problem of inverse reinforcement learning (IRL) in Markov decision processes, that is, the problem of extracting a reward function given observed, optimal behaviour. IRL may be useful for apprenticeship learning to acquire skilled behaviour, and for ascertaining the reward function being optimized by a natural system. We rst characterize the set of all reward functions for which a given policy is optimal. We then derive three algorithms for IRL. The rst two deal with the case where the entire policy is known; we handle tabulated reward functions on a nite state space and linear functional approximation of the reward function over a potentially in- nite state space. The third algorithm deals with the more realistic case in which the policy is known only through a nite set of observed trajectories. In all cases, a key issue is degeneracythe existence of a large set of reward functions for which the observed policy is optimal. To remove...}, author = {Ng, Andrew and Russell, Stuart}, booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning}, doi = {10.2460/ajvr.67.2.323}, editor = {{De Sousa}, Jorge Pinho}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Ng, Russell - 2000 - Algorithms for inverse reinforcement learning.pdf:pdf}, issn = {00029645}, pages = {663--670}, pmid = {16454640}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {{Algorithms for inverse reinforcement learning}}, url = {http://www-cs.stanford.edu/people/ang/papers/icml00-irl.pdf}, year = {2000} } @inproceedings{abbeel2004apprenticeship, author = {Abbeel, Pieter and Ng, Andrew}, booktitle = {Proceedings of the 21st international conference on machine learning}, doi = {http://doi.acm.org/10.1145/1015330.1015430}, isbn = {1-58113-838-5}, keywords = {irl}, mendeley-tags = {irl}, title = {{Apprenticeship learning via inverse reinforcement learning}}, url = {http://www.eecs.harvard.edu/{~}parkes/cs286r/spring06/papers/abeelng.pdf}, year = {2004} } @inproceedings{valko2012semi-supervised, abstract = {In apprenticeship learning we aim to learn a good policy by observing the behavior of an expert or a set of experts. In particular, we consider the case where the expert acts so as to maximize an unknown reward function defined as a linear combination of a set of state features. In this paper, we consider the setting where we observe many sample trajectories (i.e., sequences of states) but only one or a few of them are labeled as experts' trajectories. We investigate the conditions under which the remaining unlabeled trajectories can help in learning a policy with a good performance. In particular, we define an extension to the max-margin inverse reinforcement learning proposed by Abbeel and Ng (2004) where, at each iteration, the max-margin optimization step is replaced by a semi-supervised optimization problem which favors classifiers separating clusters of trajectories. Finally, we report empirical results on two grid-world domains showing that the semi-supervised algorithm is able to output a better policy in fewer iterations than the related algorithm that does not take the unlabeled trajectories into account.}, author = {Valko, Michal and Ghavamzadeh, Mohammad and Lazaric, Alessandro}, booktitle = {The 24th Journal of Machine Learning Research Proceedings of the 10th European Workshop on Reinforcement Learning}, month = {jun}, pages = {131--241}, publisher = {Sparc}, title = {{Semi-Supervised Apprenticeship Learning}}, url = {http://researchers.lille.inria.fr/{~}valko/hp/serve.php?what=publications/valko2012semi-supervised.pdf}, volume = {24}, year = {2012} } @article{ziebart2008maximum, abstract = {Recent research has shown the benefit of framing problems of imitation learning as solutions to Markov Decision Problems. This approach reduces the problem of learning to recovering a utility function that makes the behavior induced by a near-optimal policy closely mimic demonstrated behavior. In this work, we develop a probabilistic approach based on the principle of maximum entropy. Our approach provides a well-defined, globally normalized distribution over decisions, while providing the same performance guarantees as existing methods.We develop our technique in the context of modeling real-world navigation and driving behaviors where collected data is inherently noisy and imperfect. Our probabilistic approach enables modeling of route preferences as well as a powerful new approach to inferring destinations and routes based on partial trajectories.}, author = {Ziebart, Brian and Maas, Andrew and Bagnell, J Andrew and Dey, Anind K}, editor = {Archer, M}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Ziebart et al. - 2008 - Maximum Entropy Inverse Reinforcement Learning.pdf:pdf}, isbn = {9781577353683}, journal = {Proc AAAI}, keywords = {irl}, mendeley-tags = {irl}, publisher = {AAAI Press}, title = {{Maximum Entropy Inverse Reinforcement Learning}}, url = {http://www.aaai.org/Papers/AAAI/2008/AAAI08-227.pdf}, year = {2008} } @inproceedings{judah2012active, author = {Judah, Kshitij and Fern, Alan Paul and Dietterich, Thomas Glenn}, booktitle = {AAAI Fall Symposium: Robots Learning Interactively from Human Teachers}, keywords = {dblp}, publisher = {AAAI}, series = {AAAI Technical Report}, title = {{Active Imitation Learning via Reduction to I.I.D. Active Learning.}}, url = {http://dblp.uni-trier.de/db/conf/aaaifs/aaaifs2012-07.html{\#}JudahFD12}, volume = {FS-12-07}, year = {2012} } @inproceedings{bagnell2010efficient, abstract = {Imitation Learning, while applied successfully on many large real-world$\backslash$nproblems, is typically addressed as a standard supervised learning$\backslash$nproblem, where it is assumed the training and testing data are i.i.d..$\backslash$nThis is not true in imitation learning as the learned policy influences$\backslash$nthe future test inputs (states) upon which it will be tested. We$\backslash$nshow that this leads to compounding errors and a regret bound that$\backslash$ngrows quadratically in the time horizon of the task. We propose two$\backslash$nalternative algorithms for imitation learning where training occurs$\backslash$nover several episodes of interaction. These two approaches share$\backslash$nin common that the learner's policy is slowly modified from executing$\backslash$nthe expert's policy to the learned policy. We show that this leads$\backslash$nto stronger performance guarantees and demonstrate the improved performance$\backslash$non two challenging problems: training a learner to play 1) a 3D racing$\backslash$ngame (Super Tux Kart) and 2) Mario Bros.; given input images from$\backslash$nthe games and corresponding actions taken by a human expert and near-optimal$\backslash$nplanner respectively.}, author = {Bagnell, J Andrew and Ross, St{\'{e}}phane}, booktitle = {Proceedings of the 13th International Conference on Artificial Intelligence and Statistics (AISTATS) 2010}, pages = {661--668}, title = {{Efficient Reductions for Imitation Learning}}, volume = {9}, year = {2010} } @inproceedings{CSS10, author = {Cesa-Bianchi, Nicol{\`{o}} and Shalev-Shwartz, Shai and Shamir, Ohad}, booktitle = {COLT}, pages = {218--231}, title = {{Online learning of noisy data with kernels}}, year = {2010} } @inproceedings{yue2009k, author = {Yue, Yisong and Broder, J and Kleinberg, R and Joachims, T}, booktitle = {22th annual conference on learning theory}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{The K-armed Dueling Bandits Problem}}, year = {2009} } @inproceedings{levine2017rotting, abstract = {The Multi-Armed Bandits (MAB) framework highlights the trade-off between acquiring new knowledge (Exploration) and leveraging available knowledge (Ex-ploitation). In the classical MAB problem, a decision maker must choose an arm at each time step, upon which she receives a reward. The decision maker's objective is to maximize her cumulative expected reward over the time horizon. The MAB problem has been studied extensively, specifically under the assumption of the arms' rewards distributions being stationary, or quasi-stationary, over time. We consider a variant of the MAB framework, which we termed Rotting Bandits, where each arm's expected reward decays as a function of the number of times it has been pulled. We are motivated by many real-world scenarios such as online advertis-ing, content recommendation, crowdsourcing, and more. We present algorithms, accompanied by simulations, and derive theoretical guarantees.}, author = {Levine, Nir and Crammer, Koby and Mannor, Shie}, booktitle = {Neural Information Processing Systems}, title = {{Rotting bandits}}, url = {http://papers.nips.cc/paper/6900-rotting-bandits.pdf}, year = {2017} } @inproceedings{kleinbergregret, author = {Kleinberg, Robert D and Niculescu-Mizil, Alexandru and Sharma, Yogeshwer}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {425--436}, title = {{Regret Bounds for Sleeping Experts and Bandits}} } @inproceedings{slivkins2009contextual, abstract = {In a multi-armed bandit (MAB) problem, an online algorithm makes a sequence of choices. In each round it chooses from a time-invariant set of alternatives and receives the payoff associated with this alternative. While the case of small strategy sets is by now well-understood, a lot of recent work has focused on MAB problems with exponentially or infinitely large strategy sets, where one needs to assume extra structure in order to make the problem tractable. In particular, recent literature considered information on similarity between arms. We consider similarity information in the setting of "contextual bandits", a natural extension of the basic MAB problem where before each round an algorithm is given the "context" - a hint about the payoffs in this round. Contextual bandits are directly motivated by placing advertisements on webpages, one of the crucial problems in sponsored search. A particularly simple way to represent similarity information in the contextual bandit setting is via a "similarity distance" between the context-arm pairs which gives an upper bound on the difference between the respective expected payoffs. Prior work on contextual bandits with similarity uses "uniform" partitions of the similarity space, which is potentially wasteful. We design more efficient algorithms that are based on adaptive partitions adjusted to "popular" context and "high-payoff" arms.}, annote = {From Duplicate 2 ( Contextual Bandits with Similarity Information - Slivkins, Aleksandrs ) And Duplicate 4 ( Contextual Bandits with Similarity Information - Slivkins, Aleksandrs ) And Duplicate 5 ( Contextual Bandits with Similarity Information - Slivkins, Aleksandrs ) From Duplicate 1 ( Contextual Bandits with Similarity Information - Slivkins, Aleksandrs ) }, author = {Slivkins, Aleksandrs}, booktitle = {Conference on Learning Theory}, keywords = {6,7 have been obtained,a postdoc brown,a preliminary version,a write up has,bandits,been circulated 2007,been posted arxiv,colt 2011,contextual bandits,full,has,metric spaces,multi armed bandits,online learning,org july 2009,regret minimization,results section,university,version a paper,which does not include,while author}, mendeley-tags = {bandits}, title = {{Contextual bandits with similarity information}}, url = {http://proceedings.mlr.press/v19/slivkins11a/slivkins11a.pdf}, year = {2009} } @article{agrawal1995continuum, author = {Agrawal, Rajeev}, journal = {SIAM Journal on Control and Optimization}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {1926--1951}, title = {{The continuum-armed bandit problem}}, url = {https://epubs.siam.org/doi/pdf/10.1137/S0363012992237273}, volume = {33}, year = {1995} } @inproceedings{hondaasymptotically, author = {Honda, Junya and Takemura, Akimichi}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {67--79}, title = {{An Asymptotically Optimal Bandit Algorithm for Bounded Support Models}} } @article{azuma1967weighted, author = {Azuma, Kazuoki}, journal = {Tohoku Mathematical Journal}, keywords = {bound,math}, number = {3}, pages = {357--367}, title = {{Weighted sums of certain dependent random variables}}, url = {https://www.jstage.jst.go.jp/article/tmj1949/19/3/19{\_}3{\_}357/{\_}pdf}, volume = {19}, year = {1967} } @inproceedings{lugosi2006, address = {Pittsburgh, PA, USA}, booktitle = {Proceedings of the 19th annual Conference On Learning Theory}, editor = {Lugosi, G{\'{a}}bor and Simon, Hans-Ulrich}, isbn = {3-540-35294-5}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jun}, publisher = {Springer}, series = {COLT '06, Lecture Notes in Computer Science}, title = {{No Title}}, volume = {4005}, year = {2006} } @inproceedings{agrawal2011analysis, author = {Agrawal, Shipra and Goyal, Navin}, booktitle = {Conference on Learning Theory}, title = {{Analysis of Thompson sampling for the multi-armed bandit problem}}, url = {http://proceedings.mlr.press/v23/agrawal12/agrawal12.pdf}, year = {2012} } @inproceedings{korda2013thompson, abstract = {Thompson Sampling has been demonstrated in many complex bandit models, however the theoretical guarantees available for the parametric multi-armed bandit are still limited to the Bernoulli case. Here we extend them by proving asymptotic optimality of the algorithm using the Jeffreys prior for 1-dimensional exponential family bandits. Our proof builds on previous work, but also makes extensive use of closed forms for Kullback-Leibler divergence and Fisher information (and thus Jeffreys prior) available in an exponential family. This allow us to give a finite time exponential concentration inequality for posterior distributions on exponential families that may be of interest in its own right. Moreover our analysis covers some distributions for which no optimistic algorithm has yet been proposed, including heavy-tailed exponential families.}, archivePrefix = {arXiv}, arxivId = {1307.3400}, author = {Korda, Nathaniel and Kaufmann, Emilie and Munos, R{\'{e}}mi}, booktitle = {Neural Information Processing Systems}, eprint = {1307.3400}, title = {{Thompson Sampling for 1-Dimensional Exponential Family Bandits}}, year = {2013} } @inproceedings{hutter2007, address = {Sendai, Japan}, booktitle = {Proceedings of the 18th international conference on Algorithmic Learning Theory}, editor = {Hutter, Marcus and Servedio, Rocco A and Takimoto, Eiji}, isbn = {978-3-540-75224-0}, keywords = {bandits}, mendeley-tags = {bandits}, month = {oct}, publisher = {Springer}, series = {ALT '07, Lecture Notes in Computer Science}, title = {{No Title}}, volume = {4754}, year = {2007} } @article{Gopalan2013b, abstract = {We consider stochastic multi-armed bandit problems with complex actions over a set of basic arms, where the decision maker plays a complex action rather than a basic arm in each round. The reward of the complex action is some function of the basic arms' rewards, and the feedback observed may not necessarily be the reward per-arm. For instance, when the complex actions are subsets of the arms, we may only observe the maximum reward over the chosen subset. Thus, feedback across complex actions may be coupled due to the nature of the reward function. We prove a frequentist regret bound for Thompson sampling in a very general setting involving parameter, action and observation spaces and a likelihood function over them. The bound holds for discretely-supported priors over the parameter space and without additional structural properties such as closed-form posteriors, conjugate prior structure or independence across arms. The regret bound scales logarithmically with time but, more importantly, with an improved constant that non-trivially captures the coupling across complex actions due to the structure of the rewards. As applications, we derive improved regret bounds for classes of complex bandit problems involving selecting subsets of arms, including the first nontrivial regret bounds for nonlinear MAX reward feedback from subsets.}, author = {Gopalan, Aditya and Mannor, Shie and Mansour, Yishay}, month = {nov}, title = {{Thompson Sampling for Complex Bandit Problems}}, url = {http://proceedings.mlr.press/v32/gopalan14.pdf}, year = {2013} } @book{cesa-bianchi2006prediction, annote = {From Duplicate 1 ( Prediction, Learning, and Games - Cesa-Bianchi, Nicolo; Lugosi, Gabor ) }, author = {Cesa-Bianchi, Nicol{\`{o}} and Lugosi, G{\'{a}}bor}, keywords = {bandits}, mendeley-tags = {bandits}, publisher = {Cambridge University Press}, title = {{Prediction, learning, and games}}, url = {http://www.ii.uni.wroc.pl/{~}lukstafi/pmwiki/uploads/AGT/Prediction{\_}Learning{\_}and{\_}Games.pdf}, year = {2006} } @article{Kolla2016, abstract = {We consider a collaborative online learning paradigm, wherein a group of agents connected through a social network are engaged in playing a stochastic multi-armed bandit game. Each time an agent takes an action, the corresponding reward is instantaneously observed by the agent, as well as its neighbours in the social network. We perform a regret analysis of various policies in this collaborative learning setting. A key finding of this paper is that natural extensions of widely-studied single agent learning policies to the network setting need not perform well in terms of regret. In particular, we identify a class of non-altruistic and individually consistent policies, and argue by deriving regret lower bounds that they are liable to suffer a large regret in the networked setting. We also show that the learning performance can be substantially improved if the agents exploit the structure of the network, and develop a simple learning algorithm based on dominating sets of the network. Specifically, we first consider a star network, which is a common motif in hierarchical social networks, and show analytically that the hub agent can be used as an information sink to expedite learning and improve the overall regret. We also derive networkwide regret bounds for the algorithm applied to general networks. We conduct numerical experiments on a variety of networks to corroborate our analytical results.}, author = {Kolla, Ravi Kumar and Jagannathan, Krishna and Gopalan, Aditya}, month = {feb}, pages = {14}, title = {{Collaborative Learning of Stochastic Bandits over a Social Network}}, url = {http://arxiv.org/abs/1602.08886}, year = {2016} } @techreport{besson2019generalized, abstract = {We propose a new algorithm for the piece-wise $\backslash$iid{\{}{\}} non-stationary bandit problem with bounded rewards. Our proposal, GLR-klUCB, combines an efficient bandit algorithm, klUCB, with an efficient, parameter-free, change-point detector, the Bernoulli Generalized Likelihood Ratio Test, for which we provide new theoretical guarantees of independent interest. We analyze two variants of our strategy, based on local restarts and global restarts, and show that their regret is upper-bounded by {\$}\backslashmathcal{\{}O{\}}(\backslashUpsilon{\_}T \backslashsqrt{\{}T \backslashlog(T){\}}){\$} if the number of change-points {\$}\backslashUpsilon{\_}T{\$} is unknown, and by {\$}\backslashmathcal{\{}O{\}}(\backslashsqrt{\{}\backslashUpsilon{\_}T T \backslashlog(T){\}}){\$} if {\$}\backslashUpsilon{\_}T{\$} is known. This improves the state-of-the-art bounds, as our algorithm needs no tuning based on knowledge of the problem complexity other than {\$}\backslashUpsilon{\_}T{\$}. We present numerical experiments showing that GLR-klUCB outperforms passively and actively adaptive algorithms from the literature, and highlight the benefit of using local restarts.}, archivePrefix = {arXiv}, arxivId = {1902.01575}, author = {Besson, Lilian and Kaufmann, Emilie}, eprint = {1902.01575}, title = {{The generalized likelihood-ratio test meets klUCB: An improved algorithm for piece-wise non-stationary bandits}}, url = {http://arxiv.org/abs/1902.01575}, year = {2019} } @inproceedings{raedt2005, address = {Bonn, Germany}, booktitle = {Proceedings of the 22nd International Conference on Machine Learning}, editor = {Raedt, Luc De and Wrobel, Stefan}, isbn = {1-59593-180-5}, keywords = {bandits}, mendeley-tags = {bandits}, month = {aug}, publisher = {ACM}, series = {ICML '05, ACM International Conference Proceeding Series}, title = {{No Title}}, volume = {119}, year = {2005} } @article{Asadi2016a, abstract = {A softmax operator applied to a set of values acts somewhat like the maximization function and somewhat like an average. In sequential decision making, softmax is often used in settings where it is necessary to maximize utility but also to hedge against problems that arise from putting all of one's weight behind a single maximum utility decision. The Boltzmann softmax operator is the most commonly used softmax operator in this setting, but we show that this operator is prone to misbehavior. In this work, we study an alternative softmax operator that, among other properties, is both a non-expansion (ensuring convergent behavior in learning and planning) and differentiable (making it possible to improve decisions via gradient descent methods). We provide proofs of these properties and present empirical comparisons between various softmax operators.}, author = {Asadi, Kavosh and Littman, Michael L.}, month = {dec}, title = {{A New Softmax Operator for Reinforcement Learning}}, url = {http://arxiv.org/abs/1612.05628}, year = {2016} } @article{Contal2016a, abstract = {The paper considers the problem of global optimization in the setup of stochastic process bandits. We introduce an UCB algorithm which builds a cascade of discretization trees based on generic chaining in order to render possible his operability over a continuous domain. The theoretical framework applies to functions under weak probabilistic smoothness assumptions and also extends significantly the spectrum of application of UCB strategies. Moreover generic regret bounds are derived which are then specialized to Gaussian processes indexed on infinite-dimensional spaces as well as to quadratic forms of Gaussian processes. Lower bounds are also proved in the case of Gaussian processes to assess the optimality of the proposed algorithm.}, author = {Contal, Emile and Vayatis, Nicolas}, month = {feb}, title = {{Stochastic Process Bandits: Upper Confidence Bounds Algorithms via Generic Chaining}}, url = {http://arxiv.org/abs/1602.04976}, year = {2016} } @inproceedings{billsus2000learning, author = {Billsus, Daniel and Pazzani, Michael J. and Chen, James}, booktitle = {International Conference on Intelligent User Interfaces}, title = {{A learning agent for wireless news access}}, url = {https://www.ics.uci.edu/{~}pazzani/Publications/billsuspazzanichen.pdf}, year = {2000} } @inproceedings{cohen2016online, author = {Cohen, Alon and Hazan, Tamir and Koren, Tomer}, booktitle = {International Conference on Machine Learning}, title = {{Online learning with feedback graphs without the graphs}}, year = {2016} } @inproceedings{bubeck2012towards, author = {Bubeck, S{\'{e}}bastien and Cesa-Bianchi, Nicol{\`{o}} and Kakade, Sham M.}, booktitle = {Conference on Learning Theory}, title = {{Towards minimax policies for online linear optimization with bandit feedback}}, url = {http://proceedings.mlr.press/v23/bubeck12a/bubeck12a.pdf}, year = {2012} } @phdthesis{stoltz2011contributions, author = {Stoltz, Gilles}, keywords = {bandits}, mendeley-tags = {bandits}, school = {Universit{\{}{\'{e}}{\}} Paris-Sud}, title = {{Contributions to the sequential prediction of arbitrary sequences: applications to the theory of repeated games and empirical studies of the performance of the aggregation of experts}}, type = {Habilitation {\{}{\`{a}}{\}} Diriger des Recherches}, year = {2011} } @phdthesis{maillard2011apprentissage, author = {Maillard, Odalric-Ambrym}, keywords = {bandits}, mendeley-tags = {bandits}, school = {Universit{\{}{\'{e}}{\}} des Sciences et des Technologies de Lille 1}, title = {{Apprentissage s{\'{e}}quentiel: Bandits, Statistique et Renforcement}}, year = {2011} } @inproceedings{wu2015online, author = {Wu, Yifan and Gy{\"{o}}rgy, Andr{\'{a}}s and Szepesv{\'{a}}ri, Csaba}, booktitle = {Neural Information Processing Systems}, title = {{Online learning with Gaussian payoffs and side observations}}, year = {2015} } @inproceedings{bengio2009, address = {Vancouver, British Columbia, Canada}, booktitle = {Proceedings of the 23rd conference on advances in Neural Information Processing Systems}, editor = {Bengio, Yoshua and Schuurmans, Dale and Lafferty, John D and Williams, Chris K I and Culotta, Aron}, month = {dec}, series = {NIPS '09}, title = {{No Title}}, year = {2009} } @inproceedings{agrawal2013thomson, author = {Agrawal, Shipra and Goyal, Navin}, booktitle = {International Conference on Machine Learning}, title = {{Thompson sampling for contextual bandits with linear payoffs}}, url = {http://proceedings.mlr.press/v28/agrawal13.pdf}, year = {2013} } @inproceedings{ghahramani2007, address = {Corvalis, Oregon, USA}, booktitle = {Proceedings of the 24th International Conference on Machine Learning}, editor = {Ghahramani, Zoubin}, isbn = {978-1-59593-793-3}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jun}, publisher = {ACM}, series = {ICML '07, ACM International Conference Proceeding Series}, title = {{No Title}}, volume = {227}, year = {2007} } @article{Lund2006, abstract = {Now available in paperback. This is a text comprising the major theorems of probability theory and the measure theoretical foundations of the subject. The main topics treated are independence, interchangeability,and martingales; particular emphasis is placed upon stopping times, both as tools in proving theorems and as objects of interest themselves. No prior knowledge of measure theory is assumed and a unique feature of the book is the combined presentation of measure and probability. It is easily adapted for graduate students familar with measure theory as indicated by the guidelines in the preface. Special features include: A comprehensive treatment of the law of the iterated logarithm; the Marcinklewicz-Zygmund inequality, its extension to martingales and applications thereof; development and applications of the second moment analogue of Wald's equation; limit theorems for martingale arrays, the central limit theorem for the interchangeable and martingale cases, moment convergence in the central limit theorem; complete discussion, including central limit theorem, of the random casting of r balls into n cells; recent martingale inequalities; Cram r-L vy theore and factor-closed families of distributions. This edition includes a section dealing with U-statistic, adds additional theorems and examples, and includes simpler versions of some proofs.}, author = {Lund, Robert B. and Chow, Yuan Shih and Teicher, Henry}, doi = {10.2307/2670078}, issn = {01621459}, journal = {Journal of the American Statistical Association}, title = {{Probability Theory: Independence, Interchangeability, Martingales}}, year = {2006} } @inproceedings{combes2014unimodal, author = {Combes, Richard and Prouti{\`{e}}re, Alexandre}, booktitle = {International Conference on Machine Learning}, title = {{Unimodal bandits: Regret lower bounds and optimal algorithms}}, url = {http://proceedings.mlr.press/v32/combes14.pdf}, year = {2014} } @inproceedings{jamieson2016non-stochastic, author = {Jamieson, Kevin and Talwalkar, Ameet}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Non-stochastic best arm identification and hyperparameter optimization}}, url = {http://proceedings.mlr.press/v51/jamieson16.pdf}, year = {2016} } @inproceedings{cesa-bianchi2010online, author = {Cesa-Bianchi, Nicol{\`{o}} and Shalev-Shwartz, Shai and Shamir, Oha}, booktitle = {Conference on Learning Theory}, title = {{Online learning of noisy data with kernels}}, year = {2010} } @article{Gopalan2013a, abstract = {We consider stochastic multi-armed bandit problems with complex actions over a set of basic arms, where the decision maker plays a complex action rather than a basic arm in each round. The reward of the complex action is some function of the basic arms' rewards, and the feedback observed may not necessarily be the reward per-arm. For instance, when the complex actions are subsets of the arms, we may only observe the maximum reward over the chosen subset. Thus, feedback across complex actions may be coupled due to the nature of the reward function. We prove a frequentist regret bound for Thompson sampling in a very general setting involving parameter, action and observation spaces and a likelihood function over them. The bound holds for discretely-supported priors over the parameter space and without additional structural properties such as closed-form posteriors, conjugate prior structure or independence across arms. The regret bound scales logarithmically with time but, more importantly, with an improved constant that non-trivially captures the coupling across complex actions due to the structure of the rewards. As applications, we derive improved regret bounds for classes of complex bandit problems involving selecting subsets of arms, including the first nontrivial regret bounds for nonlinear MAX reward feedback from subsets.}, author = {Gopalan, Aditya and Mannor, Shie and Mansour, Yishay}, month = {nov}, title = {{Thompson Sampling for Complex Bandit Problems}}, url = {http://arxiv.org/abs/1311.0466}, year = {2013} } @inproceedings{gentile2014online, author = {Gentile, Claudio and Li, Shuai and Zappella, Giovanni}, booktitle = {International Conference on Machine Learning}, title = {{Online clustering of bandits}}, url = {http://proceedings.mlr.press/v32/gentile14.pdf}, year = {2014} } @article{Cesa-Bianchi2016a, abstract = {We study networks of communicating learning agents that cooperate to solve a common nonstochastic bandit problem. Agents use an underlying communication network to get messages about actions selected by other agents, and drop messages that took more than {\$}d{\$} hops to arrive, where {\$}d{\$} is a delay parameter. We introduce $\backslash$textsc{\{}Exp3-Coop{\}}, a cooperative version of the {\{}$\backslash$sc Exp3{\}} algorithm and prove that with {\$}K{\$} actions and {\$}N{\$} agents the average per-agent regret after {\$}T{\$} rounds is at most of order {\$}\backslashsqrt{\{}\backslashbigl(d+1 + \backslashtfrac{\{}K{\}}{\{}N{\}}\backslashalpha{\_}{\{}\backslashle d{\}}\backslashbigr)(T\backslashln K){\}}{\$}, where {\$}\backslashalpha{\_}{\{}\backslashle d{\}}{\$} is the independence number of the {\$}d{\$}-th power of the connected communication graph {\$}G{\$}. We then show that for any connected graph, for {\$}d=\backslashsqrt{\{}K{\}}{\$} the regret bound is {\$}K{\^{}}{\{}1/4{\}}\backslashsqrt{\{}T{\}}{\$}, strictly better than the minimax regret {\$}\backslashsqrt{\{}KT{\}}{\$} for noncooperating agents. More informed choices of {\$}d{\$} lead to bounds which are arbitrarily close to the full information minimax regret {\$}\backslashsqrt{\{}T\backslashln K{\}}{\$} when {\$}G{\$} is dense. When {\$}G{\$} has sparse components, we show that a variant of $\backslash$textsc{\{}Exp3-Coop{\}}, allowing agents to choose their parameters according to their centrality in {\$}G{\$}, strictly improves the regret. Finally, as a by-product of our analysis, we provide the first characterization of the minimax regret for bandit learning with delay.}, author = {Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio and Mansour, Yishay and Minora, Alberto}, month = {feb}, pages = {27}, title = {{Delay and Cooperation in Nonstochastic Bandits}}, url = {http://arxiv.org/abs/1602.04741}, year = {2016} } @inproceedings{abeille2017linear, author = {Abeille, Marc and Lazaric, Alessandro}, booktitle = {International Conference on Artificial Intelligence and Statistics}, file = {::}, title = {{Linear Thompson sampling revisited}}, url = {http://proceedings.mlr.press/v54/abeille17a/abeille17a.pdf}, year = {2017} } @inproceedings{talebi2016optimal, abstract = {The selection of leaders in leader-follower multi-agent systems can be naturally formulated as a matroid optimization problem. In this paper, we investigate the online and stochastic version of such a problem, where in each iteration or round, we select a set of leaders and then observe a random realization of the corresponding reward, i.e., of the system performance. This problem is referred to as a stochastic matroid bandit, a variant of combinatorial multi-armed bandit problems where the underlying combinatorial structure is a matroid. We consider semi-bandit feedback and Bernoulli rewards, and derive a tight and problem-dependent lower bound on the regret of any consistent algorithm. We propose KL-OSM, a computationally efficient algorithm that exploits the matroid structure. We derive a finite-time upper bound of the regret of KL-OSM that improves the performance guarantees of existing algorithms. This upper bound actually matches our lower bound, i.e., KL-OSM is asymptotically optimal. Numerical experiments attest that KL-OSM outperforms state-of-the-art algorithms in practice, and the difference in some cases is significant.}, author = {Talebi, Mohammad Sadegh and Prouti{\`{e}}re, Alexandre}, booktitle = {International Conference on Autonomous Agents {\&} Multiagent Systems}, title = {{An optimal algorithm for stochastic matroid bandit optimization}}, year = {2016} } @inproceedings{littlestone1989weighted, address = {Washington, DC, USA}, author = {Littlestone, Nick and Warmuth, Manfred K}, booktitle = {Proceedings of the 30th annual Symposium on Foundations of Computer Science}, isbn = {0-8186-1982-1}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {256--261}, publisher = {IEEE Computer Society}, title = {{The weighted majority algorithm}}, year = {1989} } @inproceedings{yue2012hierarchical, address = {New York, NY, USA}, author = {Yue, Yisong and Hong, Sue A and Guestrin, Carlos}, booktitle = {Proceedings of the 29th International Conference on Machine Learning (ICML-12)}, editor = {Langford, John and Pineau, Joelle}, pages = {1895--1902}, publisher = {ACM}, title = {{Hierarchical Exploration for Accelerating Contextual Bandits}}, url = {http://icml.cc/2012/papers/933.pdf}, year = {2012} } @inproceedings{wei2018abruptly, abstract = {We study the non-stationary stochastic multiarmed bandit (MAB) problem and propose two generic algorithms, namely, the limited memory deterministic sequencing of exploration and exploitation (LM-DSEE) and the Sliding-Window Upper Confidence Bound{\#} (SW-UCB{\#}). We rigorously analyze these algorithms in abruptly-changing and slowly-varying environments and characterize their performance. We show that the expected cumulative regret for these algorithms under either of the environments is upper bounded by sublinear functions of time, i.e., the time average of the regret asymptotically converges to zero. We complement our analytic results with numerical illustrations.}, author = {Wei, Lai and Srivatsva, Vaibhav}, booktitle = {Proceedings of the American Control Conference}, doi = {10.23919/ACC.2018.8431265}, isbn = {9781538654286}, issn = {07431619}, title = {{On Abruptly-Changing and Slowly-Varying Multiarmed Bandit Problems}}, year = {2018} } @inproceedings{kaufmann2017monte, author = {Kaufmann, Emilie and Koolen, Wouter M}, booktitle = {Neural Information Processing Systems}, title = {{Monte-carlo tree search by best-arm identification}}, url = {https://arxiv.org/pdf/1706.02986.pdf}, year = {2017} } @article{audibert2009exploration, author = {Audibert, Jean-Yves and Munos, R{\'{e}}mi and Szepesv{\'{a}}ri, Csaba}, journal = {Theoretical Computer Science}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {1876--1902}, title = {{Exploration-exploitation trade-off using variance estimates in multi-armed bandits}}, volume = {410}, year = {2009} } @inproceedings{krause11contextual, author = {Krause, Andreas and Ong, Cheng Soon}, booktitle = {Proceedings of Neural Information Processing Systems (NIPS)}, title = {{Contextual Gaussian Process Bandit Optimization}}, year = {2011} } @inproceedings{narayananrandom, author = {Narayanan, Hariharan and Rakhlin, Alexander}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {1777--1785}, title = {{Random Walk Approach to Regret Minimization}} } @inproceedings{danyluk2009, address = {Montreal, Quebec, Canada}, booktitle = {Proceedings of the 26th International Conference on Machine Learning}, editor = {Danyluk, Andrea Pohoreckyj and Bottou, L{\'{e}}on and Littman, Michael L}, isbn = {978-1-60558-516-1}, month = {jun}, publisher = {ACM}, series = {ICML '09, ACM International Conference Proceeding Series}, title = {{No Title}}, volume = {382}, year = {2009} } @inproceedings{shawe-taylor2004, address = {Banff, Canada}, booktitle = {Proceedings of the 17th annual Conference On Learning Theory}, editor = {Shawe-Taylor, John and Singer, Yoram}, isbn = {3-540-22282-0}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jul}, publisher = {Springer}, series = {COLT '04, Lecture Notes in Computer Science}, title = {{No Title}}, volume = {3120}, year = {2004} } @inproceedings{wang2017improving, abstract = {We study combinatorial multi-armed bandit with probabilistically triggered arms (CMAB-T) and semi-bandit feedback. We resolve a serious issue in the prior CMAB-T studies where the regret bounds contain a possibly exponentially large factor of {\$}1/p{\^{}}*{\$}, where {\$}p{\^{}}*{\$} is the minimum positive probability that an arm is triggered by any action. We address this issue by introducing triggering probability moderated (TPM) bounded smoothness conditions into the general CMAB-T framework, and show that many applications such as influence maximization bandit and combinatorial cascading bandit satisfy such TPM conditions. As a result, we completely remove the factor of {\$}1/p{\^{}}*{\$} from the regret bounds, achieving significantly better regret bounds for influence maximization and cascading bandits than before. Finally, we provide lower bound results showing that the factor {\$}1/p{\^{}}*{\$} is unavoidable for general CMAB-T problems, suggesting that TPM conditions are crucial in removing this factor.}, archivePrefix = {arXiv}, arxivId = {1703.01610}, author = {Wang, Qinshi and Chen, Wei}, booktitle = {Neural Information Processing Systems}, eprint = {1703.01610}, file = {::}, month = {mar}, title = {{Improving regret bounds for combinatorial semi-bandits with probabilistically triggered arms and its applications}}, url = {http://arxiv.org/abs/1703.01610}, year = {2017} } @inproceedings{fang2014networked, author = {Fang, Meng and Tao, Dacheng}, booktitle = {International Conference on Knowledge Discovery and Data Mining}, keywords = {exploration/exploitation dilemma,networked bandits,social network}, title = {{Networked bandits with disjoint linear payoffs}}, url = {http://delivery.acm.org/10.1145/2630000/2623672/p1106-fang.pdf?ip=193.49.212.233{\&}id=2623672{\&}acc=ACTIVE SERVICE{\&}key=7EBF6E77E86B478F.5C2A4B72BE2A7DDF.4D4702B0C3E38B35.4D4702B0C3E38B35{\&}{\_}{\_}acm{\_}{\_}=1528847270{\_}23b4902004322713593e1389d42ae48c}, year = {2014} } @inproceedings{bubeck2008online, author = {Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi and Stoltz, Gilles and Szepesv{\'{a}}ri, Csaba}, booktitle = {Advances in Neural Information Processing Systems}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {201--208}, title = {{Online Optimization of X-armed Bandits}}, year = {2008} } @article{Prisadnikov2014a, author = {Prisadnikov, Nedyalko}, publisher = {ETH-Z{\"{u}}rich, Department of Computer Science}, title = {{Exploration-exploitation trade-offs via probabilistic matrix factorization}}, url = {http://e-collection.library.ethz.ch/view/eth:14399}, year = {2014} } @article{kleinberg2013, author = {Kleinberg, Robert and Slivkins, Aleksandrs and Upfal, Eli}, journal = {Journal of ACM}, title = {{Bandits and experts in metric spaces}}, url = {https://arxiv.org/pdf/1312.1277.pdf}, year = {2015} } @inproceedings{audibert2011minimax, author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien and Lugosi, Gabor}, booktitle = {Proceedings of the 24th annual Conference On Learning Theory}, keywords = {bandits}, mendeley-tags = {bandits}, series = {COLT '11}, title = {{Minimax Policies for Combinatorial Prediction Games}}, year = {2011} } @article{blum2007from, author = {Blum, Avrim and Mansour, Yishay}, journal = {Journal of Machine Learning Research}, keywords = {bandits}, mendeley-tags = {bandits}, month = {dec}, pages = {1307--1324}, publisher = {JMLR.org}, title = {{From External to Internal Regret}}, volume = {8}, year = {2007} } @inproceedings{kaufmann2012bayesian, abstract = {Stochastic bandit problems have been ana-lyzed from two different perspectives: a fre-quentist view, where the parameter is a deter-ministic unknown quantity, and a Bayesian approach, where the parameter is drawn from a prior distribution. We show in this paper that methods derived from this second per-spective prove optimal when evaluated using the frequentist cumulated regret as a mea-sure of performance. We give a general for-mulation for a class of Bayesian index policies that rely on quantiles of the posterior distri-bution. For binary bandits, we prove that the corresponding algorithm, termed Bayes-UCB, satisfies finite-time regret bounds that imply its asymptotic optimality. More gen-erally, Bayes-UCB appears as an unifying framework for several variants of the UCB algorithm addressing different bandit prob-lems (parametric multi-armed bandits, Gaus-sian bandits with unknown mean and vari-ance, linear bandits). But the generality of the Bayesian approach makes it possible to address more challenging models. In par-ticular, we show how to handle linear ban-dits with sparsity constraints by resorting to Gibbs sampling.}, author = {Kaufmann, Emilie and Capp{\'{e}}, Olivier and Garivier, Aur{\'{e}}lien}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{On Bayesian upper confidence bounds for bandit problems}}, url = {http://proceedings.mlr.press/v22/kaufmann12/kaufmann12.pdf}, year = {2012} } @inproceedings{desautels12parallelizing, author = {Desautels, Thomas and Krause, Andreas and Burdick, Joel}, booktitle = {International Conference on Machine Learning}, title = {{Parallelizing exploration-exploitation tradeoffs in Gaussian process bandit optimization}}, url = {https://icml.cc/2012//papers/602.pdf}, year = {2012} } @phdthesis{stoltz2005incomplete, address = {Orsay, France}, author = {Stoltz, Gilles}, keywords = {bandits}, mendeley-tags = {bandits}, month = {may}, school = {Universit{\{}{\'{e}}{\}} Paris-Sud}, title = {{Incomplete Information and Internal Regret in Prediction of Individual Sequences}}, type = {PhD thesis}, year = {2005} } @inproceedings{auer2018adaptively, author = {Auer, Peter and Gajane, Pratik and Ortner, Ronald}, booktitle = {European Workshop on Reinforcement Learning}, title = {{Adaptively Tracking the Best Arm with an Unknown Number of Distribution Changes}}, year = {2018} } @inproceedings{cesa-bianchi2013online, author = {Cesa-Bianchi, Nicol{\`{o}} and Dekel, Ofer and Shamir, Ohad}, booktitle = {Advances in Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Cesa-Bianchi, Dekel, Shamir - 2013 - Online Learning with Switching Costs and Other Adaptive Adversaries.pdf:pdf}, pages = {1160--1168}, title = {{Online Learning with Switching Costs and Other Adaptive Adversaries}}, url = {http://papers.nips.cc/paper/5151-online-learning-with-switching-costs-and-other-adaptive-adversaries}, year = {2013} } @inproceedings{ding2013multi-armed, abstract = {We study the multi-armed bandit problems with budget constraint and variable costs (MAB-BV). In this setting, pulling an arm will receive a random reward together with a random cost, and the objective of an algorithm is to pull a sequence of arms in order to maximize the expected total reward with the costs of pulling those arms complying with a budget constraint. This new setting models many Internet applications (e.g., ad exchange, sponsored search, and cloud computing) in a more accurate manner than previous settings where the pulling of arms is either costless or with a fixed cost.We propose two UCB based algorithms for the new setting. The first algorithm needs prior knowledge about the lower bound of the expected costs when computing the exploration term. The second algorithm eliminates this need by estimating the minimal expected costs from empirical observations, and therefore can be applied to more real-world applications where pri- or knowledge is not available.We prove that both algorithms have nice learning abilities, with regret bounds of O(lnB). Furthermore, we show that when applying our proposed algorithms to a previous setting with fixed costs (which can be regarded as our special case), one can improve the previously obtained regret bound. Our simulation results on real-time bidding in ad exchange verify the effectiveness of the algorithms and are consistent with our theoretical analysis}, author = {Ding, Wenkui and Qin, Tao and Zhang, Xu-dong and Liu, Tie-yan}, booktitle = {Proceedings of the Twenty-Seventh AAAI Conference on Artificial Intelligence}, isbn = {9781577356158}, title = {{Multi-Armed Bandit with Budget Constraint and Variable Costs}}, url = {http://dblp.uni-trier.de/db/conf/aaai/aaai2013.html{\#}DingQZL13}, year = {2013} } @article{russo2014learning, author = {Russo, Daniel and {Van Roy}, Benjamin}, journal = {Mathematics of Operations Research}, title = {{Learning to Optimize Via Posterior Sampling}}, year = {2014} } @article{burnetas1997optimal, address = {Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA}, author = {Burnetas, Apostolos N and Katehakis, Micha{\"{e}}l N}, issn = {0364-765X}, journal = {Mathematics of Operations Research}, keywords = {bandits}, mendeley-tags = {bandits}, month = {feb}, number = {1}, pages = {222--255}, publisher = {INFORMS}, title = {{Optimal adaptive policies for Markov decision processes}}, volume = {22}, year = {1997} } @article{auer2002finite, address = {Hingham, MA, USA}, author = {Auer, Peter and Cesa-Bianchi, Nicol{\`{o}} and Fischer, Paul}, journal = {Machine Learning}, keywords = {adaptive allocation rules,bandit problems,bandits,finite horizon regret}, mendeley-tags = {bandits}, number = {2-3}, pages = {235--256}, publisher = {Kluwer Academic Publishers}, title = {{Finite-time analysis of the multiarmed bandit problem}}, url = {https://homes.di.unimi.it/{~}cesabian/Pubblicazioni/ml-02.pdf}, volume = {47}, year = {2002} } @inproceedings{thrun2003, address = {Vancouver, British Columbia, Canada}, booktitle = {Proceedings of the 17th conference on advances in Neural Information Processing Systems}, editor = {Thrun, Sebastian and Saul, Lawrence K and Sch{\"{o}}lkopf, Bernhard}, isbn = {0-262-20152-6}, month = {dec}, publisher = {MIT Press}, series = {NIPS '03}, title = {{No Title}}, year = {2003} } @inproceedings{hazan2006logarithmic, author = {Hazan, Elad and Kalai, Adam Tauman and Agarwal, Amit and Kale, Satyen}, booktitle = {Conference on Learning Theory}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Logarithmic regret algorithms for online convex optimization}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.88.3483{\&}rep=rep1{\&}type=pdf}, year = {2006} } @article{may2012optimistic, author = {May, Benedict C. and Korda, Nathaniel and Lee, Anthony and Leslie, David S.}, journal = {Journal of Machine Learning Research}, number = {1}, pages = {2069--2106}, title = {{Optimistic Bayesian sampling in contextual-bandit problems}}, url = {http://www.jmlr.org/papers/volume13/may12a/may12a.pdf}, volume = {13}, year = {2012} } @article{chan2018cbt, abstract = {The infinite arms bandit problem was initiated by Berry et al. (1997). They derived a regret lower bound of all solutions for Bernoulli rewards, and proposed various bandit strategies based on success runs, but which do not achieve this bound. We propose here a confidence bound target (CBT) algorithm that achieves extensions of their regret lower bound for general reward distributions and distribution priors. The algorithm does not require information on the reward distributions, for each arm we require only the mean and standard deviation of its rewards to compute a confidence bound. We play the arm with the smallest confidence bound provided it is smaller than a target mean. If the confidence bounds are all larger, then we play a new arm. We show how the target mean can be computed from the prior so that the smallest asymptotic regret, among all infinite arms bandit algorithms, is achieved. We also show that in the absence of information on the prior, the target mean can be determined empirically, and that the regret achieved is comparable to the smallest regret. Numerical studies show that CBT is versatile and outperforms its competitors.}, archivePrefix = {arXiv}, arxivId = {1805.11793}, author = {Chan, Hock Peng and Hu, Shouri}, eprint = {1805.11793}, file = {::}, title = {{Infinite arms bandit: Optimality via confidence bounds}}, url = {http://arxiv.org/pdf/1805.11793.pdf}, year = {2018} } @article{hager1989updating, author = {Hager, W W}, journal = {SIAM review}, pages = {221--239}, publisher = {JSTOR}, title = {{Updating the inverse of a matrix}}, year = {1989} } @inproceedings{yu2011unimodal, author = {Yu, Jia Yuan and Mannor, Shie}, booktitle = {International Conference on Machine Learning}, title = {{Unimodal bandits}}, url = {http://www.icml-2011.org/papers/50{\_}icmlpaper.pdf}, year = {2011} } @misc{tran-thang2012knapsack, author = {Tran-Thanh, Long and Chapman, Archie C. and Rogers, Alex and Jennings, Nicholas R.}, booktitle = {AAAI}, title = {{Knapsack Based Optimal Policies for Budget-Limited Multi-Armed Bandits.}}, urldate = {2014-10-16}, year = {2012} } @inproceedings{dani2008stochastic, author = {Dani, Varsha and Hayes, Thomas P and Kakade, Sham M}, booktitle = {Conference on Learning Theory}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Stochastic linear optimization under bandit feedback}}, url = {https://repository.upenn.edu/cgi/viewcontent.cgi?article=1501{\&}context=statistics{\_}papers}, year = {2008} } @article{ortner2014regret, abstract = {We consider the restless Markov bandit problem, in which the state of each arm evolves according to a Markov process independently of the learner's actions. We suggest an algorithm, that first represents the setting as an MDP which exhibits some special structural properties. In order to grasp this information we introduce the notion of $\epsilon$-structured MDPs, which are a generalization of concepts like (approximate) state aggregation and MDP homomorphisms. We propose a general algorithm for learning $\epsilon$-structured MDPs and show regret bounds that demonstrate that additional structural information enhances learning.Applied to the restless bandit setting, this algorithm achieves after any T steps regret of order O{\~{}}(T) with respect to the best policy that knows the distributions of all arms. We make no assumptions on the Markov chains underlying each arm except that they are irreducible. In addition, we show that index-based policies are necessarily suboptimal for the considered problem.}, author = {Ortner, Ronald and Ryabko, Daniil and Auer, Peter and Munos, R{\'{e}}mi}, doi = {10.1016/j.tcs.2014.09.026}, issn = {03043975}, journal = {Theoretical Computer Science}, keywords = {Markov decision processes,Regret,Restless bandits}, title = {{Regret bounds for restless Markov bandits}}, year = {2014} } @inproceedings{agrawal2013further, abstract = {Thompson Sampling is one of the oldest heuristics for multi-armed bandit problems. It is a randomized algorithm based on Bayesian ideas, and has recently generated significant interest after several studies demonstrated it to have better empirical performance compared to the state of the art methods. In this paper, we provide a novel regret analysis for Thompson Sampling that simultaneously proves both the optimal problem-dependent bound of {\$}(1+\backslashepsilon)\backslashsum{\_}i \backslashfrac{\{}\backslashln T{\}}{\{}\backslashDelta{\_}i{\}}+O(\backslashfrac{\{}N{\}}{\{}\backslashepsilon{\^{}}2{\}}){\$} and the first near-optimal problem-independent bound of {\$}O(\backslashsqrt{\{}NT\backslashln T{\}}){\$} on the expected regret of this algorithm. Our near-optimal problem-independent bound solves a COLT 2012 open problem of Chapelle and Li. The optimal problem-dependent regret bound for this problem was first proven recently by Kaufmann et al. [ALT 2012]. Our novel martingale-based analysis techniques are conceptually simple, easily extend to distributions other than the Beta distribution, and also extend to the more general contextual bandits setting [Manuscript, Agrawal and Goyal, 2012].}, author = {Agrawal, Shipra and Goyal, Navin}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Further optimal regret bounds for Thompson sampling}}, url = {http://proceedings.mlr.press/v31/agrawal13a.pdf}, year = {2013} } @inproceedings{gabillon2014largescale, author = {Gabillon, Victor and Kveton, Branislav and Wen, Zheng and Eriksson, Brian and Muthukrishnan, S.}, booktitle = {AAAI Conference on Artificial Intelligence}, title = {{Large-scale optimistic adaptive submodularity}}, year = {2014} } @inproceedings{singla2015information, abstract = {How should we gather information in a network, where each node's visibility is limited to its local neighborhood? This problem arises in numerous real-world applications, such as surveying and task routing in social networks, team formation in collaborative networks and experimental design with dependency constraints. Often the informativeness of a set of nodes can be quantified via a submodular utility function. Existing approaches for submodular optimization, however, require that the set of all nodes that can be selected is known ahead of time, which is often unrealistic. In contrast, we propose a novel model where we start our exploration from an initial node, and new nodes become visible and available for selection only once one of their neighbors has been chosen. We then present a general algorithm NetExp for this problem, and provide theoretical bounds on its performance dependent on structural properties of the underlying network. We evaluate our methodology on various simulated problem instances as well as on data collected from social question answering system deployed within a large enterprise.}, author = {Singla, Adish and Horvitz, Eric and Kohli, Pushmeet and White, Ryen and Krause, Andreas}, booktitle = {International Joint Conferences on Artificial Intelligence}, title = {{Information gathering in networks via active exploration}}, year = {2015} } @article{awerbuch2008online, address = {Orlando, FL, USA}, author = {Awerbuch, Baruch and Kleinberg, Robert D}, issn = {0022-0000}, journal = {Journal of Computer Systems and Science}, keywords = {bandits}, mendeley-tags = {bandits}, month = {feb}, number = {1}, pages = {97--114}, publisher = {Academic Press, Inc.}, title = {{Online linear optimization and adaptive routing}}, volume = {74}, year = {2008} } @inproceedings{ma2015active, author = {Ma, Yifei and Huang, Tzu-Kuo and Schneider, Jeff}, booktitle = {Uncertainty in Artificial Intelligence}, title = {{Active search and bandits on graphs using sigma-optimality}}, url = {https://pdfs.semanticscholar.org/f72b/71c747d2f487e8c0ade09f4d31e4ad2c0185.pdf}, year = {2015} } @phdthesis{bubeck2010bandits, author = {Bubeck, S{\'{e}}bastien}, keywords = {bandits}, mendeley-tags = {bandits}, school = {Universit{\{}{\'{e}}{\}} de Lille 1}, title = {{Bandits Games and Clustering Foundations}}, year = {2010} } @inproceedings{audibert2010best, abstract = {We consider the problem of finding the best arm in a stochastic multi-armed bandit game. The regret of a forecaster is here defined by the gap between the mean reward of the optimal arm and the mean reward of the ultimately chosen arm. We propose a highly exploring UCB policy and a new algorithm based on successive rejects. We show that these algorithms are essentially optimal since their regret decreases exponentially at a rate which is, up to a logarithmic factor, the best possible. However, while the UCB policy needs the tuning of a parameter depending on the unobservable hardness of the task, the successive rejects policy benefits from being parameter-free, and also independent of the scaling of the rewards.}, annote = {From Duplicate 3 ( Best arm identification in multi-armed bandits - Audibert, Jean-Yves; Bubeck, S{\'{e}}bastien; Munos, R{\'{e}}mi ) }, author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi}, booktitle = {Neural Information Processing Systems}, keywords = {learning,statistics {\&} optimisation,theory {\&} algorithms}, title = {{Best arm identification in multi-armed bandits}}, year = {2010} } @article{bubeck2011x, abstract = {We consider a generalization of stochastic bandits where the set of arms, cX, is allowed to be a generic measurable space and the mean-payoff function is "locally Lipschitz" with respect to a dissimilarity function that is known to the decision maker. Under this condition we construct an arm selection policy, called HOO (hierarchical optimistic optimization), with improved regret bounds compared to previous results for a large class of problems. In particular, our results imply that if cX is the unit hypercube in a Euclidean space and the mean-payoff function has a finite number of global maxima around which the behavior of the function is locally continuous with a known smoothness degree, then the expected regret of HOO is bounded up to a logarithmic factor by sqrtn, i.e., the rate of growth of the regret is independent of the dimension of the space. We also prove the minimax optimality of our algorithm when the dissimilarity is a metric. Our basic strategy has quadratic computational complexity as a function of the number of time steps and does not rely on the doubling trick. We also introduce a modified strategy, which relies on the doubling trick but runs in linearithmic time. Both results are improvements with respect to previous approaches.}, author = {Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi and Stoltz, Gilles and Szepesv{\'{a}}ri, Csaba}, journal = {Journal of Machine Learning Research}, pages = {1587--1627}, title = {{X-armed bandits}}, url = {http://www.jmlr.org/papers/volume12/bubeck11a/bubeck11a.pdf}, volume = {12}, year = {2011} } @inproceedings{platt2007, address = {Vancouver, British Columbia, Canada}, booktitle = {Proceedings of the 21st conference on advances in Neural Information Processing Systems}, editor = {Platt, John C and Koller, Daphne and Singer, Yoram and Roweis, Sam T}, month = {dec}, publisher = {MIT Press}, series = {NIPS '07}, title = {{No Title}}, year = {2007} } @inproceedings{abernethyoptimal, author = {Abernethy, Jacob D and Bartlett, Peter L and Rakhlin, Alexander and Tewari, Ambuj}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Optimal strategies and minimax lower bounds for online convex games}} } @techreport{traca2015regulating, abstract = {In retail, there are predictable yet dramatic time-dependent patterns in customer behavior, such as periodic changes in the number of visitors, or increases in visitors just before major holidays (e.g., Christmas). The current paradigm of multi-armed bandit analysis does not take these known patterns into account, which means that despite the firm theoretical foundation of these methods, they are fundamentally flawed when it comes to real applications. This work provides a remedy that takes the time-dependent patterns into account, and we show how this remedy is implemented in the UCB and {\{}$\backslash$epsilon{\}}-greedy methods. In the corrected methods, exploitation (greed) is regulated over time, so that more exploitation occurs during higher reward periods, and more exploration occurs in periods of low reward. In order to understand why regret is reduced with the corrected methods, we present a set of bounds that provide insight into why we would want to exploit during periods of high reward, and discuss the impact on regret. Our proposed methods have excellent performance in experiments, and were inspired by a high-scoring entry in the Exploration and Exploitation 3 contest using data from Yahoo! Front Page. That entry heavily used time-series methods to regulate greed over time, which was substantially more effective than other contextual bandit methods.}, archivePrefix = {arXiv}, arxivId = {1505.05629}, author = {Trac{\`{a}}, Stefano and Rudin, Cynthia}, eprint = {1505.05629}, title = {{Regulating Greed Over Time}}, url = {https://arxiv.org/abs/1505.05629}, year = {2015} } @inproceedings{garivier2016maximin, abstract = {We study an original problem of pure exploration in a strategic bandit model motivated by Monte Carlo Tree Search. It consists in identifying the best action in a game, when the player may sample random outcomes of sequentially chosen pairs of actions. We propose two strategies for the fixed-confidence setting: Maximin-LUCB, based on lower-and upper-confidence bounds; and Maximin-Racing, which operates by successively eliminating the sub-optimal actions. We discuss the sample complexity of both methods and compare their performance empirically. We sketch a lower bound analysis, and possible connections to an optimal algorithm.}, archivePrefix = {arXiv}, arxivId = {arXiv:1602.04676v1}, author = {Garivier, Aur{\'{e}}lien and Kaufmann, Emilie and Koolen, Wouter M}, booktitle = {Conference on Learning Theory}, eprint = {arXiv:1602.04676v1}, title = {{Maximin action identification: A new bandit framework for games}}, year = {2016} } @inproceedings{pandey2007multi, address = {New York, NY, USA}, author = {Pandey, S and Chakrabarti, D and Agarwal, D}, booktitle = {ICML '07: Proceedings of the 24th international conference on Machine learning}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {721--728}, publisher = {ACM}, title = {{Multi-Armed Bandit Problems with Dependent Arms}}, year = {2007} } @inproceedings{wang2008algorithms, author = {Wang, Yizao and Audibert, Jean-Yves and Munos, R{\'{e}}mi}, booktitle = {Neural Information Processing Systems}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Algorithms for infinitely many-armed bandits}}, year = {2008} } @inproceedings{kuzborskij2019efficient, abstract = {We prove that two popular linear contextual bandit algorithms, OFUL and Thompson Sampling, can be made efficient using Frequent Directions, a deterministic online sketching technique. More precisely, we show that a sketch of size m allows a O(md) update time for both algorithms, as opposed to Ω(d 2) required by their non-sketched versions (where d is the dimension of context vectors). When the selected contexts span a subspace of dimension at most m, we show that this computational speedup is accompanied by an improved regret of order m √ T for sketched OFUL and of order m √ dT for sketched Thompson Sampling (ignoring log factors in both cases). Vice versa, when the dimension of the span exceeds m, the regret bounds become of order (1 + $\epsilon$ m) 3/2 d √ T for OFUL and of order (1 + $\epsilon$ m)d 3/2 √ T for Thompson Sampling, where $\epsilon$ m is bounded by the sum of the tail eigenvalues not covered by the sketch. Experiments on real-world datasets corroborate our theoretical results.}, archivePrefix = {arXiv}, arxivId = {arXiv:1809.11033v1}, author = {Kuzborskij, Ilja and Cella, Leonardo and Cesa-Bianchi, Nicol{\`{o}}}, booktitle = {International Conference on Artificial Intelligence and Statistics}, eprint = {arXiv:1809.11033v1}, title = {{Efficient linear bandits through matrix sketching}}, url = {https://arxiv.org/pdf/1809.11033.pdf}, year = {2019} } @unpublished{wang2019pucb, abstract = {Stochastic bandit algorithms can be used for challenging non-convex optimization problems. Hyperparameter tuning of neural networks is particularly challenging, necessitating new approaches. To this end, we present a method that adaptively partitions the combined space of hyperparameters, context, and training resources (e.g., total number of training iterations). By adaptively partitioning the space, the algorithm is able to focus on the portions of the hyperparameter search space that are most relevant in a practical way. By including the resources in the combined space, the method tends to use fewer training resources overall. Our experiments show that this method can surpass state-of-the-art methods in tuning neural networks on benchmark datasets. In some cases, our implementations can achieve the same levels of accuracy on benchmark datasets as existing state-of-the-art approaches while saving over 50{\%} of our computational resources (e.g. time, training iterations).}, archivePrefix = {arXiv}, arxivId = {1901.09277}, author = {Wang, Tianyu and Geng, Dawei and Rudin, Cynthia}, eprint = {1901.09277}, title = {{A practical bandit method with advantages in neural network tuning}}, url = {http://arxiv.org/pdf/1901.09277.pdf}, year = {2019} } @inproceedings{gabillon2013adaptive, author = {Gabillon, Victor and Kveton, Branislav and Wen, Zheng and Eriksson, Brian and Muthukrishnan, S.}, booktitle = {Neural Information Processing Systems}, title = {{Adaptive submodular maximization in bandit setting}}, year = {2013} } @inproceedings{servedio2008, address = {Helsinki, Finland}, booktitle = {Proceedings of the 21st annual Conference On Learning Theory}, editor = {Servedio, Rocco A and Zhang, Tong}, month = {jul}, publisher = {Omnipress}, series = {COLT '08}, title = {{No Title}}, volume = {80}, year = {2008} } @article{nino-nora2010computing, author = {Nino-Mora, J}, doi = {10.1287/ijoc.1100.0398}, issn = {10919856}, journal = {INFORMS Journal on Computing}, keywords = {accepted may 2010,accepted winfried grassmann,advance,analysis algorithms,area editor computational,bandits,computational complexity,dynamic programming,finite horizon,history,index policies,march 2009,markov,may 2010,probability analysis,published online articles,received,revised january 2010}, number = {2}, pages = {254--267}, title = {{Computing a Classic Index for Finite-Horizon Bandits}}, url = {http://joc.journal.informs.org/cgi/doi/10.1287/ijoc.1100.0398}, volume = {23}, year = {2010} } @inproceedings{kleinberg2008multi, author = {Kleinberg, Robert and Slivkins, Aleksandrs and Upfal, Eli}, booktitle = {Symposium on Theory Of Computing}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Multi-armed bandit problems in metric spaces}}, url = {https://arxiv.org/pdf/0809.4882.pdf}, year = {2008} } @inproceedings{auerlogarithmic, author = {Auer, Peter and Ortner, Ronald}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {49--56}, title = {{Logarithmic online regret bounds for undiscounted reinforcement learning}} } @article{berry1997bandit, abstract = {We consider a bandit problem consisting of a sequence of {\$}n{\$} choices from an infinite number of Bernoulli arms, with {\$}n /rightarrow /infty{\$}. The objective is to minimize the long-run failure rate. The Bernoulli parameters are independent observations from a distribution {\$}F{\$}. We first assume {\$}F{\$} to be the uniform distribution on (0, 1) and consider various extensions. In the uniform case we show that the best lower bound for the expected failure proportion is between {\$}/sqrt2//sqrtn{\$} and {\$}2//sqrtn{\$} and we exhibit classes of strategies that achieve the latter.}, author = {Berry, Donald A. and Chen, Robert W. and Zame, Alan and Heath, David C. and Shepp, Larry A.}, journal = {Annals of Statistics}, keywords = {Bandit problems,Dynamic allocation of bernoulli processes,Sequential experimentation,Staying with a winner,Switching with a loser}, pages = {2103--2116}, title = {{Bandit problems with infinitely many arms}}, volume = {25}, year = {1997} } @inproceedings{scarlett2017lower, author = {Scarlett, Jonathan and Bogunovic, Ilija and Cevher, Volkan}, booktitle = {Conference on Learning Theory}, title = {{Lower bounds on regret for noisy Gaussian process bandit optimization}}, url = {http://proceedings.mlr.press/v65/scarlett17a/scarlett17a.pdf}, year = {2017} } @inproceedings{bubeck2010open, author = {Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi}, booktitle = {Conference on Learning Theory}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Open-loop optimistic planning}}, url = {http://sbubeck.com/COLT10{\_}BM.pdf}, year = {2010} } @phdthesis{shalev-shwartz2007online, author = {Shalev-Shwartz, Shai}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jul}, school = {The Hebrew University of Jerusalem}, title = {{Online Learning: Theory, Algorithms, and Applications}}, year = {2007} } @inproceedings{abernethy2008competing, author = {Abernethy, Jacob D and Hazan, Elad and Rakhlin, Alexander}, booktitle = {Conference on Learning Theory}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Competing in the dark: An efficient algorithm for bandit linear optimization.}}, url = {http://web.eecs.umich.edu/{~}jabernet/123-Abernethy.pdf}, year = {2008} } @unpublished{honda2010asymptotically, annote = {arXiv:0905.2776}, author = {Honda, Junya and Takemura, Akimichi}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{An Asymptotically Optimal Policy for Finite Support Models in the Multiarmed Bandit Problem}}, year = {2010} } @inproceedings{yu2017contextual, abstract = {Contextual bandits with linear payoffs, which are also known as linear bandits, provide a powerful alternative for solving practical problems of sequential decisions, e.g., online advertisements. In the era of big data, con- textual data usually tend to be high-dimensional, which leads to new challenges for traditional linear bandits mostly designed for the setting of low-dimensional con- textual data. Due to the curse of dimensionality, there are two challenges in most of the current bandit algo- rithms: the first is high time-complexity; and the sec- ond is extreme large upper regret bounds with high- dimensional data. In this paper, in order to attack the above two challenges effectively, we develop an algo- rithm of Contextual Bandits via RAndom Projection (CBRAP) in the setting of linear payoffs, which works especially for high-dimensional contextual data. The proposed CBRAP algorithm is time-efficient and flex- ible, because it enables players to choose an arm in a low-dimensional space, and relaxes the sparsity as- sumption of constant number of non-zero components in previous work. Besides, we prove an upper regret bound for the proposed algorithm, which is associ- ated with reduced dimensions. By comparing with three benchmark algorithms, we demonstrate improved per- formance on cumulative payoffs of CBRAP during its sequential decisions on both synthetic and real-world datasets, as well as its superior time-efficiency. Introduction}, author = {Yu, Xiaotian and Lyu, Michael R. and King, Irwin}, booktitle = {AAAI Conference on Artificial Intelligence}, title = {{CBRAP: Contextual bandits with random projection}}, url = {https://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14742/14490}, year = {2017} } @article{Auer1995, author = {Auer, Peter and Cesa-Bianchi, Nicol{\`{o}} and Freund, Yoav and Schapire, Robert E}, keywords = {EXP3,adversarial bandits,bandit problem,game theory,matrix game,multi-armed bandit problem,rate of convergence,slot machines,stochastic games,well-behaved stochastic process}, mendeley-tags = {EXP3,adversarial bandits}, month = {oct}, pages = {322}, title = {{Gambling in a rigged casino: The adversarial multi-armed bandit problem}}, url = {http://dl.acm.org/citation.cfm?id=795662.796294}, year = {1995} } @inproceedings{wen2013sequential, author = {Wen, Zheng and Kveton, Branislav and Eriksson, Brian and Bhamidipati, Sandilya}, booktitle = {International Conference on Machine Learning}, title = {{Sequential Bayesian search}}, year = {2013} } @inproceedings{cesa-bianchi2012combinatorial, abstract = {We study sequential prediction problems in which, at each time instance, the forecaster chooses a vector from a given finite set S⊂ Rd. At the same time, the opponent chooses a loss vector in Rd and the forecaster suffers a loss that is the inner product of the two vectors. The goal of the forecaster is to achieve that, in the long run, the accumulated loss is not much larger than that of the best possible element in S. We consider the bandit setting in which the forecaster only has access to the losses of the chosen vectors (i.e., the entire loss vectors are not observed). We introduce a variant of a strategy by Dani, Hayes and Kakade achieving a regret bound that, for a variety of concrete choices of S, is of order √ndln|S| where n is the time horizon. This is not improvable in general and is better than previously known bounds. The examples we consider are all such that S⊂{\{} 0,1{\}}d, and we show how the combinatorial structure of these classes can be exploited to improve the regret bounds. We also point out computationally efficient implementations for various interesting choices of S. {\textcopyright} 2012 Elsevier Inc.}, author = {Cesa-Bianchi, Nicol{\`{o}} and Lugosi, G{\'{a}}bor}, booktitle = {Journal of Computer and System Sciences}, keywords = {Adversarial bandit problems,Online linear optimization,Online prediction}, number = {5}, pages = {1404--1422}, title = {{Combinatorial bandits}}, url = {http://cesa-bianchi.di.unimi.it/Pubblicazioni/comband.pdf}, volume = {78}, year = {2012} } @inproceedings{cesa-bianchi2013gang, author = {Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio and Zappella, Giovanni}, booktitle = {Neural Information Processing Systems}, title = {{A gang of bandits}}, url = {https://papers.nips.cc/paper/5006-a-gang-of-bandits.pdf}, year = {2013} } @inproceedings{bartlett2009regal, address = {Arlington, Virginia, United States}, author = {Bartlett, Peter L and Tewari, Ambuj}, booktitle = {Proceedings of the 25th conference on Uncertainty in Artificial Intelligence}, isbn = {978-0-9749039-5-8}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {35--42}, publisher = {AUAI Press}, series = {UAI '09}, title = {{REGAL: a regularization based algorithm for reinforcement learning in weakly communicating MDPs}}, year = {2009} } @inproceedings{neu2013efficient, author = {Neu, Gergely and Bart{\'{o}}k, G{\'{a}}bor}, booktitle = {Algorithmic Learning Theory}, title = {{An efficient algorithm for learning with semi-bandit feedback}}, year = {2013} } @inproceedings{katariya2016stochastic, abstract = {We propose stochastic rank-{\$}1{\$} bandits, a class of online learning problems where at each step a learning agent chooses a pair of row and column arms, and receives the product of their values as a reward. The main challenge of the problem is that the individual values of the row and column are unobserved. We assume that these values are stochastic and drawn independently. We propose a computationally-efficient algorithm for solving our problem, which we call Rank1Elim. We derive a {\$}O((K + L) (1 / \backslashDelta) \backslashlog n){\$} upper bound on its {\$}n{\$}-step regret, where {\$}K{\$} is the number of rows, {\$}L{\$} is the number of columns, and {\$}\backslashDelta{\$} is the minimum of the row and column gaps; under the assumption that the mean row and column rewards are bounded away from zero. To the best of our knowledge, we present the first bandit algorithm that finds the maximum entry of a rank-{\$}1{\$} matrix whose regret is linear in {\$}K + L{\$}, {\$}1 / \backslashDelta{\$}, and {\$}\backslashlog n{\$}. We also derive a nearly matching lower bound. Finally, we evaluate Rank1Elim empirically on multiple problems. We observe that it leverages the structure of our problems and can learn near-optimal solutions even if our modeling assumptions are mildly violated.}, author = {Katariya, Sumeet and Kveton, Branislav and Szepesv{\'{a}}ri, Csaba and Vernade, Claire and Wen, Zheng}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Stochastic rank-1 bandits}}, year = {2017} } @inproceedings{audibert2009minimax, author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien}, booktitle = {Conference on Learning Theory}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Minimax policies for adversarial and stochastic bandits}}, url = {https://hal-enpc.archives-ouvertes.fr/hal-00834882/document}, year = {2009} } @article{harisson1978, author = {Harrison, D and Rubinfeld, D L}, journal = {J. Environ. Economics {\&} Management}, pages = {81--102}, title = {{Hedonic prices and the demand for clean air}}, volume = {5}, year = {1978} } @incollection{filippi2010parametric, author = {Filippi, Sarah and Capp{\'{e}}, Olivier and Garivier, Aur{\'{e}}lien and Szepesv{\'{a}}ri, Csaba}, booktitle = {Advances in Neural Information Processing Systems 23}, editor = {Lafferty, J and Williams, C K I and Shawe-Taylor, J and Zemel, R S and Culotta, A}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {586--594}, title = {{Parametric Bandits: The Generalized Linear Case}}, year = {2010} } @inproceedings{defreitas2012exponential, abstract = {This paper analyzes the problem of Gaussian process (GP) bandits with deterministic observations. The analysis uses a branch and bound algorithm that is related to the UCB algorithm of (Srinivas et al, 2010). For GPs with Gaussian observation noise, with variance strictly greater than zero, Srinivas et al proved that the regret vanishes at the approximate rate of {\$}O(1/\backslashsqrt{\{}t{\}}){\$}, where t is the number of observations. To complement their result, we attack the deterministic case and attain a much faster exponential convergence rate. Under some regularity assumptions, we show that the regret decreases asymptotically according to {\$}O(e{\^{}}{\{}-\backslashfrac{\{}\backslashtau t{\}}{\{}(\backslashln t){\^{}}{\{}d/4{\}}{\}}{\}}){\$} with high probability. Here, d is the dimension of the search space and tau is a constant that depends on the behaviour of the objective function near its global maximum.}, archivePrefix = {arXiv}, arxivId = {1206.6457}, author = {de Freitas, Nando and Smola, Alex and Zoghi, Masrour}, booktitle = {International Conference on Machine Learning}, eprint = {1206.6457}, isbn = {978-1-4503-1285-1}, title = {{Exponential regret bounds for Gaussian process bandits with deterministic observations}}, url = {http://arxiv.org/abs/1206.6457}, year = {2012} } @inproceedings{kawale2015efficient, author = {Kawale, Jaya and Bui, Hung Hai and Kveton, Branislav and Tran-Thanh, Long and Chawla, Sanjay}, booktitle = {Neural Information Processing Systems}, title = {{Efficient Thompson sampling for online matrix-factorization recommendation}}, year = {2015} } @inproceedings{ortneronline, author = {Ortner, Ronald}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {123--137}, title = {{Online Regret Bounds for Markov Decision Processes with Deterministic Transitions}} } @inproceedings{hren2008optimistic, abstract = {If one possesses a model of a controlled deterministic system, then from any state, one may consider the set of all possible reachable states starting from that state and using any sequence of actions. This forms a tree whose size is exponential in the planning time horizon. Here we ask the question: given finite computational resources (e.g. CPU time), which may not be known ahead of time, what is the best way to explore this tree, such that once all resources have been used, the algorithm would be able to propose an action (or a sequence of actions) whose performance is as close as possible to optimality? The performance with respect to optimality is assessed in terms of the regret (with respect to the sum of discounted future rewards) resulting from choosing the action returned by the algorithm instead of an optimal action. In this paper we investigate an optimistic exploration of the tree, where the most promising states are explored first, and compare this approach to a naive uniform exploration. Bounds on the regret are derived both for uniform and optimistic exploration strategies. Numerical simulations illustrate the benefit of optimistic planning.}, author = {Hren, Jean-Francois and Munos, R{\'{e}}mi}, booktitle = {European Workshop on Reinforcement Learning}, title = {{Optimistic planning of deterministic systems}}, url = {https://hal.archives-ouvertes.fr/hal-00830182/document}, year = {2008} } @inproceedings{kocsis2006bandit, abstract = {For large state-space Markovian Decision Problems Monte-Carlo planning is one of the few viable approaches to find near-optimal solutions. In this paper we introduce a new algorithm, UCT, that applies bandit ideas to guide Monte-Carlo planning. In finite-horizon or discounted MDPs the algorithm is shown to be consistent and finite sample bounds are derived on the estimation error due to sampling. Experimental results show that in several domains, UCT is significantly more efficient than its alternatives.}, author = {Kocsis, Levente and Szepesv{\'{a}}ri, Csaba}, booktitle = {European Conference on Machine Learning}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Bandit-based Monte-Carlo planning}}, url = {http://ggp.stanford.edu/readings/uct.pdf}, year = {2006} } @inproceedings{hazan2011beyond, author = {Hazan, Elad and Kale, Satyen}, booktitle = {Conference on Learning Theory}, title = {{Beyond the regret minimization barrier: an optimal algorithm for stochastic strongly-convex optimization.}}, year = {2011} } @article{carpentier2014asimple, author = {Carpentier, Alexandra and Valko, Michal}, journal = {ArXiv e-prints}, title = {{Simple regret for infinitely many armed bandits}}, year = {2015} } @inproceedings{gu2014online, author = {Gu, Quanquan and Han, Jiawei}, booktitle = {International Conference on Data Mining}, title = {{Online spectral learning on a graph with bandit feedback}}, url = {https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7023409}, year = {2014} } @book{neumaier2008interval, author = {Neumaier, Arnold}, isbn = {9780521102148}, publisher = {Cambridge University Press}, series = {Encyclopedia of Mathematics and its Applications}, title = {{Interval Methods for Systems of Equations}}, url = {http://books.google.fr/books?id=ObInPwAACAAJ}, year = {2008} } @incollection{chapelle2011empirical, abstract = {Thompson sampling is one of oldest heuristic to address the exploration ex- ploitation trade-off, but it is surprisingly unpopular in the literature. We present here some empirical results using Thompson sampling on simulated and real data, and show that it is highly competitive. And since this heuristic is very easy to implement, we argue that it should be part of the standard baselines to compare against.}, author = {Chapelle, Olivier and Li, Lihong}, booktitle = {Neural Information Processing Systems}, title = {{An empirical evaluation of Thompson sampling}}, url = {https://arxiv.org/pdf/1605.08722.pdf}, year = {2011} } @inproceedings{garivier2016optimal, author = {Garivier, Aur{\'{e}}lien and Kaufmann, Emilie}, file = {::}, title = {{Optimal Best Arm Identification with Fixed Confidence}}, year = {2016} } @article{Lei2015a, abstract = {Social networks are commonly used for marketing purposes. For example, free samples of a product can be given to a few influential social network users (or "seed nodes"), with the hope that they will convince their friends to buy it. One way to formalize marketers' objective is through influence maximization (or IM), whose goal is to find the best seed nodes to activate under a fixed budget, so that the number of people who get influenced in the end is maximized. Recent solutions to IM rely on the influence probability that a user influences another one. However, this probability information may be unavailable or incomplete. In this paper, we study IM in the absence of complete information on influence probability. We call this problem Online Influence Maximization (OIM) since we learn influence probabilities at the same time we run influence campaigns. To solve OIM, we propose a multiple-trial approach, where (1) some seed nodes are selected based on existing influence information; (2) an influence campaign is started with these seed nodes; and (3) users' feedback is used to update influence information. We adopt the Explore-Exploit strategy, which can select seed nodes using either the current influence probability estimation (exploit), or the confidence bound on the estimation (explore). Any existing IM algorithm can be used in this framework. We also develop an incremental algorithm that can significantly reduce the overhead of handling users' feedback information. Our experiments show that our solution is more effective than traditional IM methods on the partial information.}, author = {Lei, Siyu and Maniu, Silviu and Mo, Luyi and Cheng, Reynold and Senellart, Pierre}, month = {jun}, pages = {13}, title = {{Online Influence Maximization (Extended Version)}}, url = {http://arxiv.org/abs/1506.01188}, year = {2015} } @inproceedings{lazarichybrid, author = {Lazaric, Alessandro and Munos, R{\'{e}}mi}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Hybrid Stochastic-Adversarial Online Learning}} } @inproceedings{bartlettadaptive, author = {Bartlett, Peter L and Hazan, Elad and Rakhlin, Alexander}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {65--72}, title = {{Adaptive Online Gradient Descent.}} } @inproceedings{kannan2018greedy, abstract = {Bandit learning is characterized by the tension between long-term exploration and short-term exploitation. However, as has recently been noted, in settings in which the choices of the learning algorithm correspond to important decisions about individual people (such as criminal recidivism prediction, lending, and sequential drug trials), exploration corresponds to explicitly sacrificing the well-being of one individual for the potential future benefit of others. This raises a fairness concern. In such settings, one might like to run a "greedy" algorithm, which always makes the (myopically) optimal decision for the individuals at hand - but doing this can result in a catastrophic failure to learn. In this paper, we consider the linear contextual bandit problem and revisit the performance of the greedy algorithm. We give a smoothed analysis, showing that even when contexts may be chosen by an adversary, small perturbations of the adversary's choices suffice for the algorithm to achieve "no regret", perhaps (depending on the specifics of the setting) with a constant amount of initial training data. This suggests that "generically" (i.e. in slightly perturbed environments), exploration and exploitation need not be in conflict in the linear setting.}, archivePrefix = {arXiv}, arxivId = {1801.03423}, author = {Kannan, Sampath and Morgenstern, Jamie and Roth, Aaron and Waggoner, Bo and Wu, Zhiwei Steven}, booktitle = {Advances in Neural Information Processing Systems 31 (NeurIPS)}, eprint = {1801.03423}, pages = {2231--2241}, title = {{A smoothed analysis of the greedy algorithm for the linear contextual bandit problem}}, url = {http://arxiv.org/pdf/1801.03423.pdf}, year = {2018} } @article{cicirello2005max, abstract = {The multiarmed bandit is often used as an analogy for the tradeoff between exploration and exploitation in search problems. The classic problem involves allocating trials to the arms of a multiarmed slot machine to maximize the expected sum of rewards. We pose a new variation of the multiarmed bandit—the Max K-Armed Bandit—in which trials must be allocated among the arms to maximize the expected best single sample reward of the series of trials. Motivation for the Max K-Armed Bandit is the allocation of restarts among a set of multistart stochastic search algorithms. We present an analysis of this Max K-Armed Bandit showing under certain assumptions that the optimal strategy allocates trials to the observed best arm at a rate increasing double exponentially relative to the other arms. This motivates an exploration strategy that follows a Boltzmann distribution with an exponentially decaying temperature parameter. We compare this exploration policy to policies that allocate trials to the observed best arm at rates faster (and slower) than double exponentially. The results confirm, for two scheduling domains, that the double exponential increase in the rate of allocations to the observed best heuristic outperforms the other approaches.}, author = {Cicirello, Vincent A. and Smith, Stephen F.}, journal = {AAAI Conference on Artificial Intelligence}, title = {{The max k-armed bandit: A new model of exploration applied to search heuristic selection}}, url = {http://www.aaai.org/Papers/AAAI/2005/AAAI05-215.pdf}, year = {2005} } @techreport{lehrer2003wide, author = {Lehrer, Ehud and Rosenberg, Dinah}, institution = {EconWPA}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{A Wide Range No-Regret Theorem}}, type = {Game Theory and Information}, year = {2003} } @inproceedings{balcazar2006, address = {Barcelona, Spain}, booktitle = {Proceedings of the 17th international conference on Algorithmic Learning Theory}, editor = {Balc{\'{a}}zar, Jos{\'{e}} L and Long, Philip M and Stephan, Frank}, isbn = {3-540-46649-5}, keywords = {bandits}, mendeley-tags = {bandits}, month = {oct}, publisher = {Springer}, series = {ALT '06, Lecture Notes in Computer Science}, title = {{No Title}}, volume = {4264}, year = {2006} } @incollection{langford2008epoch, address = {Cambridge, MA}, author = {Langford, John and Zhang, Tong}, booktitle = {Advances in Neural Information Processing Systems 20}, editor = {Platt, J C and Koller, D and Singer, Y and Roweis, S}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {817--824}, publisher = {MIT Press}, title = {{The Epoch-Greedy Algorithm for Multi-armed Bandits with Side Information}}, year = {2008} } @inproceedings{munos1999variable, author = {Munos, R{\'{e}}mi and Moore, Andrew}, booktitle = {Proceedings of the 16th International Joint Conference on Artificial Intelligence}, pages = {1348--1355}, title = {{Variable Resolution Discretization for High-Accuracy Solutions of Optimal Control Problems}}, year = {1999} } @inproceedings{garivier2011upper-confidence-bound, abstract = {Many problems, such as cognitive radio, parameter control of a scanning tunnelling microscope or internet advertisement, can be modelled as non-stationary bandit problems where the distributions of rewards changes abruptly at unknown time instants. In this paper, we analyze two algorithms designed for solving this issue: discounted UCB (D-UCB) and sliding-window UCB (SW-UCB). We establish an upper-bound for the expected regret by upper-bounding the expectation of the number of times suboptimal arms are played. The proof relies on an interesting Hoeffding type inequality for self normalized deviations with a random number of summands. We establish a lower-bound for the regret in presence of abrupt changes in the arms reward distributions. We show that the discounted UCB and the sliding-window UCB both match the lower-bound up to a logarithmic factor. Numerical simulations show that D-UCB and SW-UCB perform significantly better than existing soft-max methods like EXP3.S.}, author = {Garivier, Aur{\'{e}}lien and Moulines, Eric}, booktitle = {Algorithmic Learning Theory}, isbn = {978-3-642-24412-4}, title = {{On upper-confidence-bound policies for switching bandit problems}}, url = {https://arxiv.org/pdf/0805.3415.pdf}, year = {2011} } @article{Chen2014a, abstract = {We define a general framework for a large class of combinatorial multi-armed bandit (CMAB) problems, where subsets of base arms with unknown distributions form super arms. In each round, a super arm is played and the base arms contained in the super arm are played and their outcomes are observed. We further consider the extension in which more based arms could be probabilistically triggered based on the outcomes of already triggered arms. The reward of the super arm depends on the outcomes of all played arms, and it only needs to satisfy two mild assumptions, which allow a large class of nonlinear reward instances. We assume the availability of an offline ($\backslash$alpha,$\backslash$beta)-approximation oracle that takes the means of the outcome distributions of arms and outputs a super arm that with probability {\{}$\backslash$beta{\}} generates an {\{}$\backslash$alpha{\}} fraction of the optimal expected reward. The objective of an online learning algorithm for CMAB is to minimize ($\backslash$alpha,$\backslash$beta)-approximation regret, which is the difference between the $\backslash$alpha{\{}$\backslash$beta{\}} fraction of the expected reward when always playing the optimal super arm, and the expected reward of playing super arms according to the algorithm. We provide CUCB algorithm that achieves O(log n) distribution-dependent regret, where n is the number of rounds played, and we further provide distribution-independent bounds for a large class of reward functions. Our regret analysis is tight in that it matches the bound of UCB1 algorithm (up to a constant factor) for the classical MAB problem, and it significantly improves the regret bound in a earlier paper on combinatorial bandits with linear rewards. We apply our CMAB framework to two new applications, probabilistic maximum coverage and social influence maximization, both having nonlinear reward structures. In particular, application to social influence maximization requires our extension on probabilistically triggered arms.}, author = {Chen, Wei and Wang, Yajun and Yuan, Yang}, month = {jul}, title = {{Combinatorial Multi-Armed Bandit and Its Extension to Probabilistically Triggered Arms}}, url = {http://arxiv.org/abs/1407.8339}, year = {2014} } @inproceedings{cao2019nearly, author = {Cao, Yang and Wen, Zheng and Kveton, Branislav and Xie, Yao}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Nearly optimal adaptive procedure with change detection for piecewise-stationary bandit}}, year = {2019} } @inproceedings{bartletthigh, author = {Bartlett, Peter L and Dani, Varsha and Hayes, Thomas P and Kakade, Sham M and Rakhlin, Alexander and Tewari, Ambuj}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {335--342}, title = {{High-probability Regret Bounds for Bandit Online Linear Optimization}} } @article{bouneffouf2016multi-armed, author = {Bouneffouf, Djallel and F{\'{e}}raud, Raphael}, doi = {10.1016/j.neucom.2016.02.052}, issn = {0925-2312}, journal = {Neurocomputing}, number = {C}, pages = {16--21}, title = {{Multi-armed bandit problem with known trend}}, volume = {205}, year = {2016} } @article{li2016efficient, abstract = {Performance of machine learning algorithms depends critically on identifying a good set of hyperparameters. While current methods offer efficiencies by adaptively choosing new configurations to train, an alternative strategy is to adaptively allocate resources across the selected configurations. We formulate hyperparameter optimization as a pure-exploration non-stochastic infinitely many armed bandit problem where allocation of additional resources to an arm corresponds to training a configuration on larger subsets of the data. We introduce Hyperband for this framework and analyze its theoretical properties, providing several desirable guarantees. We compare Hyperband with state-of-the-art Bayesian optimization methods and a random search baseline on a comprehensive benchmark including 117 datasets. Our results on this benchmark demonstrate that while Bayesian optimization methods do not outperform random search trained for twice as long, Hyperband in favorable settings offers valuable speedups.}, archivePrefix = {arXiv}, arxivId = {1603.06560}, author = {Li, Lisha and Jamieson, Kevin and DeSalvo, Giulia and Rostamizadeh, Afshin and Talwalkar, Ameet}, eprint = {1603.06560}, journal = {arXiv:1603.06560}, title = {{Efficient hyperparameter optimization and infinitely many armed bandits}}, url = {https://arxiv.org/pdf/1603.06560.pdf}, year = {2016} } @inproceedings{neu2013efficient, author = {Neu, Gergely and Bart{\'{o}}k, G{\'{a}}bor}, booktitle = {Algorithmic Learning Theory}, title = {{An efficient algorithm for learning with semi-bandit feedback}}, year = {2013} } @inproceedings{caron2012leveraging, author = {Caron, St{\'{e}}phane and Kveton, Branislav and Lelarge, Marc and Bhagat, Smriti}, booktitle = {Uncertainty in Artificial Intelligence}, title = {{Leveraging side observations in stochastic bandits.}}, url = {https://arxiv.org/pdf/1210.4839.pdf}, year = {2012} } @inproceedings{audiffren2015cornering, author = {Audiffren, Julien and Ralaivola, Liva}, booktitle = {Neural Information Processing Systems}, file = {::}, title = {{Cornering stationary and restless mixing bandits with Remix-UCB}}, url = {https://papers.nips.cc/paper/6029-cornering-stationary-and-restless-mixing-bandits-with-remix-ucb}, year = {2015} } @inproceedings{heidari2016tight, abstract = {We consider a variant of the well-studied multi-armed bandit problem in which the reward from each action evolves monotonically in the number of times the decision maker chooses to take that action. We are motivated by settings in which we must give a series of homogeneous tasks to a finite set of arms (workers) whose performance may improve (due to learning) or decay (due to loss of interest) with repeated trials. We assume that the arm-dependent rates at which the rewards change are unknown to the decision maker, and propose algorithms with provably optimal policy regret bounds, a much stronger notion than the often-studied external regret. For the case where the rewards are increasing and concave, we give an algorithm whose policy regret is sublinear and has a (provably necessary) dependence on the time re-quired to distinguish the optimal arm from the rest. We illustrate the behavior and performance of this algorithm via simulations. For the decreasing case, we present a simple greedy approach and show that the policy regret of this algorithm is constant and upper bounded by the number of arms.}, author = {Heidari, Hoda and Kearns, Michael and Roth, Aaron}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Tight policy regret bounds for improving and decaying bandits}}, url = {https://www.cis.upenn.edu/{\%}7B{~}{\%}7Daaroth/Papers/decayingbandits.pdf}, year = {2016} } @article{bubeck2012regret, abstract = {Multi-armed bandit problems are the most basic examples of sequential decision problems with an exploration-exploitation trade-off. This is the balance between staying with the option that gave highest payoffs in the past and exploring new options that might give higher payoffs in the future. Although the study of bandit problems dates back to the Thirties, exploration-exploitation trade-offs arise in several modern applications, such as ad placement, website optimization, and packet routing. Mathematically, a multi-armed bandit is defined by the payoff process associated with each option. In this survey, we focus on two extreme cases in which the analysis of regret is particularly simple and elegant: i.i.d. payoffs and adversarial payoffs. Besides the basic setting of finitely many actions, we also analyze some of the most important variants and extensions, such as the contextual bandit model.}, archivePrefix = {arXiv}, arxivId = {1204.5721}, author = {Bubeck, S{\'{e}}bastien and Cesa-Bianchi, Nicol{\`{o}}}, eprint = {1204.5721}, journal = {Foundations and Trends in Machine Learning}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {1--122}, title = {{Regret analysis of stochastic and nonstochastic multi-armed bandit problems}}, url = {http://arxiv.org/abs/1204.5721}, volume = {5}, year = {2012} } @article{kawaguchi2016global, abstract = {This paper considers global optimization with a black-box unknown objective function that can be non-convex and non-differentiable. Such a difficult optimization problem arises in many real-world applications, such as parameter tuning in machine learning, engineering design problem, and planning with a complex physics simulator. This paper proposes a new global optimization algorithm, called Locally Oriented Global Optimization (LOGO), to aim for both fast convergence in practice and finite-time error bound in theory. The advantage and usage of the new algorithm are illustrated via theoretical analysis and an experiment conducted with 11 benchmark test functions. Further, we modify the LOGO algorithm to specifically solve a planning problem via policy search with continuous state/action space and long time horizon while maintaining its finite-time error bound. We apply the proposed planning method to accident management of a nuclear power plant. The result of the application study demonstrates the practical utility of our method.}, archivePrefix = {arXiv}, arxivId = {1607.04817}, author = {Kawaguchi, Kenji and Maruyama, Yu and Zheng, Xiaoyu}, doi = {10.1613/jair.4742}, eprint = {1607.04817}, issn = {10769757}, journal = {Journal of Artificial Intelligence Research}, pages = {153--195}, title = {{Global continuous optimization with error bound and fast convergence}}, volume = {56}, year = {2016} } @inproceedings{koller2008, address = {Vancouver, British Columbia, Canada}, booktitle = {Proceedings of the 22nd conference on advances in Neural Information Processing Systems}, editor = {Koller, Daphne and Schuurmans, Dale and Bengio, Yoshua and Bottou, L{\'{e}}on}, month = {dec}, publisher = {MIT Press}, series = {NIPS '08}, title = {{No Title}}, year = {2008} } @inproceedings{mellor2013nonstationary, abstract = {Thompson Sampling has recently been shown to be optimal in the Bernoulli Multi-Armed Bandit setting[Kaufmann et al., 2012]. This bandit problem assumes stationary distributions for the rewards. It is often unrealistic to model the real world as a stationary distribution. In this paper we derive and evaluate algorithms using Thompson Sampling for a Switching Multi-Armed Bandit Problem. We propose a Thompson Sampling strategy equipped with a Bayesian change point mechanism to tackle this problem. We develop algorithms for a variety of cases with constant switching rate: when switching occurs all arms change (Global Switching), switching occurs independently for each arm (Per-Arm Switching), when the switching rate is known and when it must be inferred from data. This leads to a family of algorithms we collectively term Change-Point Thompson Sampling (CTS). We show empirical results of the algorithm in 4 artificial environments, and 2 derived from real world data; news click-through[Yahoo!, 2011] and foreign exchange data[Dukascopy, 2012], comparing them to some other bandit algorithms. In real world data CTS is the most effective.}, archivePrefix = {arXiv}, arxivId = {1302.3721}, author = {Mellor, Joseph and Shapiro, Jonathan}, booktitle = {Proceedings of the 16th International Conference on Artificial Intelligence and Statistics (AIStats)}, eprint = {1302.3721}, issn = {15337928}, pages = {442--450}, title = {{Thompson sampling in switching environments with Bayesian online change point detection}}, url = {http://arxiv.org/pdf/1302.3721.pdf}, year = {2013} } @inproceedings{pandey2007bandits, author = {Pandey, S and Agarwal, D and Chakrabarti, D and Josifovski, V}, booktitle = {Proceedings of the Seventh SIAM International Conference on Data Mining}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Bandits for Taxonomies: A Model-based Approach}}, year = {2007} } @misc{snapnets, author = {Leskovec, Jure and Krevl, Andrej}, howpublished = {$\backslash$url{\{}http://snap.stanford.edu/data{\}}}, month = {jun}, title = {{{\{}SNAP Datasets{\}}: {\{}Stanford{\}} Large Network Dataset Collection}}, year = {2014} } @inproceedings{gentile2017context, author = {Gentile, Claudio and Li, Shuai and Kar, Purushottam and Karatzoglou, Alexandros and Zappella, Giovanni and Etrue, Evans}, booktitle = {International Conference on Machine Learning}, month = {jul}, title = {{On context-dependent clustering of bandits}}, url = {http://proceedings.mlr.press/v70/gentile17a/gentile17a.pdf}, year = {2017} } @article{jaksch2010near, address = {Cambridge, MA, USA}, author = {Jaksch, Thomas and Ortner, Ronald and Auer, Peter}, issn = {1532-4435}, journal = {Journal of Machine Learning Research}, keywords = {bandits}, mendeley-tags = {bandits}, month = {aug}, pages = {1563--1600}, publisher = {MIT Press}, title = {{Near-optimal Regret Bounds for Reinforcement Learning}}, volume = {99}, year = {2010} } @article{li2010contextual, abstract = {Personalized web services strive to adapt their services (advertisements, news articles, etc) to individual users by making use of both content and user information. Despite a few recent advances, this problem remains challenging for at least two reasons. First, web service is featured with dynamically changing pools of content, rendering traditional collaborative filtering methods inapplicable. Second, the scale of most web services of practical interest calls for solutions that are both fast in learning and computation. In this work, we model personalized recommendation of news articles as a contextual bandit problem, a principled approach in which a learning algorithm sequentially selects articles to serve users based on contextual information about the users and articles, while simultaneously adapting its article-selection strategy based on user-click feedback to maximize total user clicks. The contributions of this work are three-fold. First, we propose a new, general contextual bandit algorithm that is computationally efficient and well motivated from learning theory. Second, we argue that any bandit algorithm can be reliably evaluated offline using previously recorded random traffic. Finally, using this offline evaluation method, we successfully applied our new algorithm to a Yahoo! Front Page Today Module dataset containing over 33 million events. Results showed a 12.5{\%} click lift compared to a standard context-free bandit algorithm, and the advantage becomes even greater when data gets more scarce.}, author = {Li, Lihong and Chu, Wei and Langford, John and Schapire, Robert E.}, institution = {ACM}, journal = {International World Wide Web Conference}, keywords = {contextual bandit,exploitation dilemma,exploration,personalization,recommender sys,tems,web service}, publisher = {ACM Press}, title = {{A contextual-bandit approach to personalized news article recommendation}}, url = {http://rob.schapire.net/papers/www10.pdf}, year = {2010} } @article{whittle1998restless, abstract = {We consider a population of n projects which in general continue to evolve whether in operation or not (although by different rules). It is desired to choose the projects in operation at each instant of time so as to maximise the expected rate of reward, under a constraint upon the expected number of projects in operation. The Lagrange multiplier associated with this constraint defines an index which reduces to the Gittins index when projects not being operated are static. If one is constrained to operate m projects exactly then arguments are advanced to support the conjecture that, for m and n large in constant ratio, the policy of operating the m projects of largest current index is nearly optimal. The index is evaluated for some particular projects.}, author = {Whittle, Peter}, journal = {Journal of Applied Probability}, title = {{Restless bandits: activity allocation in a changing world}}, year = {1988} } @inproceedings{maillard2011finite, author = {Maillard, Odalric-Ambrym and Munos, R{\'{e}}mi and Stoltz, Gilles}, booktitle = {To appear in Proceedings of the 24th annual Conference On Learning Theory}, keywords = {bandits}, mendeley-tags = {bandits}, series = {COLT '11}, title = {{Finite-Time Analysis of Multi-armed Bandits Problems with Kullback-Leibler Divergences}}, year = {2011} } @inproceedings{slivkins2011multi-armed, author = {Slivkins, Aleksandrs}, booktitle = {Neural Information Processing Systems}, title = {{Multi-armed bandits on implicit metric spaces}}, url = {https://papers.nips.cc/paper/4332-multi-armed-bandits-on-implicit-metric-spaces.pdf}, year = {2011} } @inproceedings{garivier2011kl, author = {Garivier, Aur{\'{e}}lien and Capp{\'{e}}, Olivier}, booktitle = {Conference on Learning Theory}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{The KL-UCB algorithm for bounded stochastic bandits and beyond}}, url = {https://arxiv.org/pdf/1102.2490.pdf}, year = {2011} } @book{Mary2015a, abstract = {This paper addresses the on-line recommendation problem facing new users and new items; we assume that no information is available neither about users, nor about the items. The only source of information is a set of ratings given by users to some items. By on-line, we mean that the set of users, and the set of items, and the set of ratings is evolving along time and that at any moment, the recommendation system has to select items to recommend based on the currently available information, that is basically the sequence of past events. We also mean that each user comes with her preferences which may evolve along short and longer scales of time; so we have to continuously update their preferences. When the set of ratings is the only available source of information , the traditional approach is matrix factorization. In a decision making under uncertainty setting, actions should be selected to balance exploration with exploitation; this is best modeled as a bandit problem. Matrix factors provide a latent representation of users and items. These representations may then be used as contextual information by the bandit algorithm to select items. This last point is exactly the originality of this paper: the combination of matrix factorization and bandit algorithms to solve the on-line recommendation problem. Our work is driven by considering the recommendation problem as a feedback controlled loop. This leads to interactions between the representation learning, and the recommendation policy.}, address = {Cham}, author = {Mary, J{\'{e}}r{\'{e}}mie and Gaudel, Romaric and Preux, Philippe}, booktitle = {First International Workshop on Machine Learning, Optimization, and Big Data (MOD'15)}, editor = {Pardalos, Panos and Pavone, Mario and Farinella, Giovanni Maria and Cutello, Vincenzo}, keywords = {Collaborative Filtering,Matrix Factorization,Multi-Armed Bandtis,Recommender Systems,contextual Bandits,sequential Recommender Systems}, language = {en}, month = {jul}, pages = {325--336}, publisher = {Springer International Publishing}, series = {Lecture Notes in Computer Science}, title = {{Machine Learning, Optimization, and Big Data}}, url = {https://hal.inria.fr/hal-01256033}, volume = {9432}, year = {2015} } @inproceedings{chapelle2014modeling, abstract = {Download data set at http://labs.criteo.com/downloads/2014-conversion-logs-dataset/}, author = {Chapelle, Olivier}, booktitle = {Knowledge Discovery and Data Mining}, doi = {10.1145/2623330.2623634}, title = {{Modeling delayed feedback in display advertising}}, year = {2014} } @article{bartok2011minimax, abstract = {In a partial monitoring game, the learner repeatedly chooses an action, the environment responds with an outcome, and then the learner suffers a loss and receives a feedback signal, both of which are fixed functions of the action and the outcome. The goal of the learner is to minimize his regret, which is the difference between his total cumulative loss and the total loss of the best fixed action in hindsight. Assuming that the outcomes are generated in an i.i.d. fashion from an arbitrary and unknown probability distribution, we characterize the minimax regret of any partial monitoring game with finitely many actions and outcomes. It turns out that the minimax regret of any such game is either zero, {\&}z.Theta;̃(√T), {\&}z.Theta;(T2/3), or {\&}z.Theta;(T). We provide a computationally efficient learning algorithm that achieves the minimax regret within logarithmic factor for any game. {\textcopyright} 2011 G. Bart{\'{o}}k, D. P{\'{a}}l {\&} C. Szepesv{\'{a}}ri.}, author = {Bart{\'{o}}k, G{\'{a}}bor and P{\'{a}}l, D{\'{a}}vid and Szepesv{\'{a}}ri, Csaba}, journal = {Conference on Learning Theory}, keywords = {Imperfect feedback,Online learning,Regret analysis}, title = {{Minimax regret of finite partial-monitoring games in stochastic environments}}, year = {2011} } @inproceedings{ghosh2015ising, author = {Ghosh, Shaona and Pr{\"{u}}gel-Bennett, Adam}, booktitle = {European Conference on Machine Learning}, title = {{Ising bandits with side Information}}, year = {2015} } @inproceedings{kveton2015tight, author = {Kveton, Branislav and Wen, Zheng and Ashkan, Azin and Szepesvari, Csaba}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Tight regret bounds for stochastic combinatorial semi-bandits}}, url = {http://proceedings.mlr.press/v38/kveton15.pdf}, year = {2015} } @book{lattimore2019bandit, author = {Lattimore, Tor and Szepesv{\'{a}}ri, Csaba}, title = {{Bandit algorithms}}, url = {http://downloads.tor-lattimore.com/book.pdf}, year = {2019} } @inproceedings{audibert2007tuning, author = {Audibert, Jean-Yves and Munos, R{\'{e}}mi and Szepesv{\'{a}}ri, Csaba}, booktitle = {Algorithmic Learning Theory}, title = {{Tuning Bandit Algorithms in Stochastic Environments}}, year = {2007} } @inproceedings{kanade2009sleeping, author = {Kanade, Varun and McMahan, H Brendan and Bryan, Brent}, booktitle = {Proceedings of the 12th international conference on Artificial Intelligence and Statistics}, keywords = {bandits}, mendeley-tags = {bandits}, number = {5}, pages = {272--279}, series = {AI{\&}Stats '09}, title = {{Sleeping Experts and Bandits with Stochastic Action Availability and Adversarial Rewards}}, year = {2009} } @inproceedings{maillard2011adaptive, author = {Maillard, Odalric-Ambrym and Munos, R{\'{e}}mi}, booktitle = {To appear in Proceedings of the 14th international conference on Artificial Intelligence and Statistics}, keywords = {bandits}, mendeley-tags = {bandits}, series = {JMLR W{\&}CP}, title = {{Adaptive bandits: Towards the best history-dependent strategy}}, volume = {15}, year = {2011} } @inproceedings{bubeck2013bounded, abstract = {We study the stochastic multi-armed bandit problem when one knows the value {\$}\backslashmu{\^{}}{\{}(\backslashstar){\}}{\$} of an optimal arm, as a well as a positive lower bound on the smallest positive gap {\$}\backslashDelta{\$}. We propose a new randomized policy that attains a regret {\{}$\backslash$em uniformly bounded over time{\}} in this setting. We also prove several lower bounds, which show in particular that bounded regret is not possible if one only knows {\$}\backslashDelta{\$}, and bounded regret of order {\$}1/\backslashDelta{\$} is not possible if one only knows {\$}\backslashmu{\^{}}{\{}(\backslashstar){\}}{\$}}, author = {Bubeck, S{\'{e}}bastien and Perchet, Vianney and Rigollet, Philippe}, booktitle = {Conference on Learning Theory}, file = {::}, title = {{Bounded regret in stochastic multi-armed bandits}}, year = {2013} } @article{munos2014from, abstract = {This work covers several aspects of the optimism in the face of uncertainty principle applied to large scale optimization problems under finite numerical budget. The initial motivation for the research reported here originated from the empirical success of the so-called Monte-Carlo Tree Search method popularized in computer-go and further extended to many other games as well as optimization and planning problems. Our objective is to contribute to the development of theoretical foundations of the field by characterizing the complexity of the underlying optimization problems and designing efficient algorithms with performance guarantees. The main idea presented here is that it is possible to decompose a complex decision making problem (such as an optimization problem in a large search space) into a sequence of elementary decisions, where each decision of the sequence is solved using a (stochastic) multi-armed bandit (simple mathematical model for decision making in stochastic environments). This so-called hierarchical bandit approach (where the reward observed by a bandit in the hierarchy is itself the return of another bandit at a deeper level) possesses the nice feature of starting the exploration by a quasi-uniform sampling of the space and then focusing progressively on the most promising area, at different scales, according to the evaluations observed so far, and eventually performing a local search around the global optima of the function. The performance of the method is assessed in terms of the optimality of the returned solution as a function of the number of function evaluations. Our main contribution to the field of function optimization is a class of hierarchical optimistic algorithms designed for general search spaces (such as metric spaces, trees, graphs, Euclidean spaces, ...) with different algorithmic instantiations depending on whether the evaluations are noisy or noiseless and whether some measure of the ''smoothness'' of the function is known or unknown. The performance of the algorithms depend on the local behavior of the function around its global optima expressed in terms of the quantity of near-optimal states measured with some metric. If this local smoothness of the function is known then one can design very efficient optimization algorithms (with convergence rate independent of the space dimension), and when it is not known, we can build adaptive techniques that can, in some cases, perform almost as well as when it is known.}, author = {Munos, R{\'{e}}mi}, journal = {Foundations and Trends in Machine Learning}, pages = {1--130}, title = {{From bandits to Monte-Carlo tree search: The optimistic principle applied to optimization and planning}}, url = {https://hal.archives-ouvertes.fr/hal-00747575v5/document}, volume = {7(1)}, year = {2014} } @article{rakhlin2010online, author = {Rakhlin, Alexander and Sridharan, Karthik and Tewari, Ambuj}, journal = {ArXiv e-prints}, keywords = {bandits}, mendeley-tags = {bandits}, month = {nov}, title = {{Online Learning: Beyond Regret}}, year = {2010} } @article{cesa-bianchi2005minimizing, abstract = {We investigate label efficient prediction, a variant, proposed by Helmbold and Panizza, of the problem of prediction with expert advice. In this variant, the forecaster, after guessing the next element of the sequence to be predicted, does not observe its true value unless he asks for it, which he cannot do too often. We determine matching upper and lower bounds for the best possible excess prediction error, with respect to the best possible constant predictor, when the number of allowed queries is fixed. We also prove that Hannan consistency, a fundamental property in game-theoretic prediction models, can be achieved by a forecaster issuing a number of queries growing to infinity at a rate just slightly faster than logarithmic in the number of prediction rounds.}, author = {Cesa-Bianchi, Nicol{\`{o}} and Lugosi, G{\'{a}}bor and Stoltz, Gilles}, journal = {IEEE Transactions on Information Theory}, keywords = {Individual sequences,Label efficient prediction,On-line learning,Prediction with expert advice}, number = {6}, pages = {2152--2162}, title = {{Minimizing regret with label efficient prediction}}, volume = {51}, year = {2005} } @article{kandasamy2019multi-fidelity, abstract = {In many scientific and engineering applications, we are tasked with the maximisation of an expensive to evaluate black box function {\$}f{\$}. Traditional settings for this problem assume just the availability of this single function. However, in many cases, cheap approximations to {\$}f{\$} may be obtainable. For example, the expensive real world behaviour of a robot can be approximated by a cheap computer simulation. We can use these approximations to eliminate low function value regions cheaply and use the expensive evaluations of {\$}f{\$} in a small but promising region and speedily identify the optimum. We formalise this task as a $\backslash$emph{\{}multi-fidelity{\}} bandit problem where the target function and its approximations are sampled from a Gaussian process. We develop MF-GP-UCB, a novel method based on upper confidence bound techniques. In our theoretical analysis we demonstrate that it exhibits precisely the above behaviour, and achieves better regret than strategies which ignore multi-fidelity information. Empirically, MF-GP-UCB outperforms such naive strategies and other multi-fidelity methods on several synthetic and real experiments.}, archivePrefix = {arXiv}, arxivId = {1603.06288}, author = {Kandasamy, Kirthevasan and Dasarathy, Gautam and Oliva, Junier B. and Schneider, Jeff and Poczos, Barnabas}, eprint = {1603.06288}, journal = {Journal of Artificial Intelligence Research}, month = {mar}, title = {{Multi-fidelity Gaussian Process Bandit Optimisation}}, url = {http://arxiv.org/abs/1603.06288}, year = {2019} } @article{auer2002using, author = {Auer, Peter}, journal = {Journal of Machine Learning Research}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {397--422}, title = {{Using confidence bounds for exploitation-exploration trade-offs}}, url = {http://www.jmlr.org/papers/volume3/auer02a/auer02a.pdf}, volume = {3}, year = {2002} } @article{audibert2010regret, author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien}, journal = {Journal of Machine Learning Research}, keywords = {bandits}, mendeley-tags = {bandits}, month = {dec}, pages = {2785--2836}, publisher = {JMLR.org}, title = {{Regret Bounds and Minimax Policies under Partial Monitoring}}, volume = {11}, year = {2010} } @inproceedings{auer1995gambling, author = {Auer, Peter and Cesa-Bianchi, Nicol{\`{o}} and Freund, Yoav and Schapire, Robert E}, booktitle = {Proceedings of the 36th Annual Symposium on Foundations of Computer Science}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {322--331}, title = {{Gambling in a Rigged Casino: The Adversarial Multi-Armed Bandit problem}}, year = {1995} } @article{dudik2011efficient, abstract = {We address the problem of learning in an online setting where the learner repeatedly observes features, selects among a set of actions, and receives reward for the action taken. We provide the first efficient algorithm with an optimal regret. Our algorithm uses a cost sensitive classification learner as an oracle and has a running time mathrmpolylog(N), where N is the number of classification rules among which the oracle might choose. This is exponentially faster than all previous algorithms that achieve optimal regret in this setting. Our formulation also enables us to create an algorithm with regret that is additive rather than multiplicative in feedback delay as in all previous work.}, author = {Dudik, Miroslav and Hsu, Daniel and Kale, Satyen and Karampatziakis, Nikos and Langford, John and Reyzin, Lev and Zhang, Tong}, journal = {Proceedings of the 27th Conference on Uncertainty in Artificial Intelligence}, title = {{Efficient Optimal Learning for Contextual Bandits}}, url = {http://arxiv.org/abs/1106.2369}, year = {2011} } @inproceedings{munos2011optimistic, abstract = {We consider a global optimization problem of a deterministic function f in a semi-metric space, given a finite budget of n evaluations. The function f is assumed to be locally smooth (around one of its global maxima) with respect to a semi-metric. We describe two algorithms based on optimistic exploration that use a hierarchical partitioning of the space at all scales. A first contribution is an algorithm, DOO, that requires the knowledge of . We report a finite-sample performance bound in terms of a measure of the quantity of near-optimal states. We then define a second algorithm, SOO, which does not require the knowledge of the semi-metric under which f is smooth, and whose performance is almost as good as DOO optimally-fitted.}, author = {Munos, R{\'{e}}mi}, booktitle = {Neural Information Processing Systems}, title = {{Optimistic optimization of deterministic functions without the knowledge of its smoothness}}, url = {https://papers.nips.cc/paper/4304-optimistic-optimization-of-a-deterministic-function-without-the-knowledge-of-its-smoothness.pdf}, year = {2011} } @inproceedings{guillou2016scalable, author = {Guillou, Fr{\'{e}}d{\'{e}}ric and Gaudel, Romaric and Preux, Philippe}, booktitle = {Pacific Asia Conference on Information Systems}, title = {{Scalable explore-exploit collaborative filtering}}, year = {2016} } @inproceedings{carpentier2014extreme, abstract = {In many areas of medicine, security, and life sciences, we want to allocate limited resources to different sources in order to detect extreme values. In this paper, we study an efficient way to allocate these resources sequentially under limited feedback. While sequential design of experiments is well studied in bandit theory, the most commonly optimized property is the regret with respect to the maximum mean reward. However, in other problems such as network intrusion detection, we are interested in detecting the most extreme value output by the sources. Therefore, in our work we study extreme regret which measures the efficiency of an algorithm compared to the oracle policy selecting the source with the heaviest tail. We propose the ExtremeHunter algorithm, provide its analysis, and evaluate it empirically on synthetic and real-world experiments.}, author = {Carpentier, Alexandra and Valko, Michal}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Carpentier, Valko - 2014 - Extreme bandits(2).pdf:pdf}, title = {{Extreme bandits}}, year = {2014} } @article{Agrawal2012, abstract = {Thompson Sampling is one of the oldest heuristics for multi-armed bandit problems. It is a randomized algorithm based on Bayesian ideas, and has recently generated significant interest after several studies demonstrated it to have better empirical performance compared to the state-of-the-art methods. However, many questions regarding its theoretical performance remained open. In this paper, we design and analyze a generalization of Thompson Sampling algorithm for the stochastic contextual multi-armed bandit problem with linear payoff functions, when the contexts are provided by an adaptive adversary. This is among the most important and widely studied versions of the contextual bandits problem. We provide the first theoretical guarantees for the contextual version of Thompson Sampling. We prove a high probability regret bound of {\$}\backslashtilde{\{}O{\}}(d{\^{}}{\{}3/2{\}}\backslashsqrt{\{}T{\}}){\$} (or {\$}\backslashtilde{\{}O{\}}(d\backslashsqrt{\{}T \backslashlog(N){\}}){\$}), which is the best regret bound achieved by any computationally efficient algorithm available for this problem in the current literature, and is within a factor of {\$}\backslashsqrt{\{}d{\}}{\$} (or {\$}\backslashsqrt{\{}\backslashlog(N){\}}{\$}) of the information-theoretic lower bound for this problem.}, author = {Agrawal, Shipra and Goyal, Navin}, month = {sep}, title = {{Thompson Sampling for Contextual Bandits with Linear Payoffs}}, url = {http://arxiv.org/abs/1209.3352}, year = {2012} } @inproceedings{yue2011linear, author = {Yue, Yisong and Guestrin, Carlos}, booktitle = {Neural Information Processing Systems}, title = {{Linear submodular bandits and their application to diversified retrieval}}, year = {2011} } @article{russo2017tutorial, abstract = {Thompson sampling is an algorithm for online decision problems where actions are taken sequentially in a manner that must balance between exploiting what is known to maximize immediate performance and investing to accumulate new information that may improve future performance. The algorithm addresses a broad range of problems in a computationally efficient manner and is therefore enjoying wide use. This tutorial covers the algorithm and its application, illustrating concepts through a range of examples, including Bernoulli bandit problems, shortest path problems, dynamic pricing, recommendation, active learning with neural networks, and reinforcement learning in Markov decision processes. Most of these problems involve complex information structures, where information revealed by taking an action informs beliefs about other actions. We will also discuss when and why Thompson sampling is or is not effective and relations to alternative algorithms.}, archivePrefix = {arXiv}, arxivId = {1707.02038}, author = {Russo, Daniel and {Van Roy}, Benjamin and Kazerouni, Abbas and Osband, Ian and Wen, Zheng}, eprint = {1707.02038}, file = {::}, journal = {Foundations and Trends in Machine Learning}, month = {jul}, title = {{A tutorial on Thompson sampling}}, url = {http://arxiv.org/abs/1707.02038}, year = {2018} } @inproceedings{ben-david2009agnostic, author = {Ben-David, Shai and P{\'{a}}l, D{\'{a}}vid and Shalev-Shwartz, Shai}, booktitle = {22th annual conference on learning theory}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Agnostic Online Learning}}, year = {2009} } @inproceedings{antos2008active, abstract = {In this paper we consider the problem of actively learning the mean values of distributions associated with a finite number of options (arms). The algorithms can select which option to generate the next sample from in order to produce estimates with equally good precision for all the distributions. When an algorithm uses sample means to estimate the unknown values then the optimal solution, assuming full knowledge of the distributions, is to sample each option proportional to its variance. In this paper we propose an incremental algorithm that asymptotically achieves the same loss as an optimal rule. We prove that the excess loss suffered by this algorithm, apart from logarithmic factors, scales as n − 3/2 , which we conjecture to be the optimal rate. The performance of the algorithm is illustrated in a simple problem.}, author = {Antos, Andr{\'{a}}s and Grover, Varun and Szepesv{\'{a}}ri, Csaba and Freund, Yoav and Gy{\"{o}}rfi, L{\'{a}}szl{\'{o}} and Tur{\'{a}}n, Gy{\"{o}}rgy and Zeugmann, Thomas}, booktitle = {Algorithmic Learning Theory}, title = {{Active Learning in Multi-armed Bandits}}, year = {2008} } @article{cortez2009, author = {Cortez, P and Cerdeira, A and Almeida, F and Matos, T and Reis, J}, journal = {Decision Support Systems}, pages = {547--553}, publisher = {Elsevier}, title = {{Modeling wine preferences by data mining from physicochemical properties}}, volume = {47}, year = {2009} } @inproceedings{kocak2014efficient, abstract = {We consider online learning problems under a a partial observability model capturing situations where the information conveyed to the learner is between full information and bandit feedback. In the simplest variant, we assume that in addition to its own loss, the learner also gets to observe losses of some other actions. The revealed losses depend on the learner's action and a directed observation system chosen by the environment. For this setting, we propose the first algorithm that enjoys near-optimal regret guarantees without having to know the observation system before selecting its actions. Along similar lines, we also define a new partial information setting that models online combinatorial optimization problems where the feedback received by the learner is between semi-bandit and full feedback. As the predictions of our first algorithm cannot be always computed efficiently in this setting, we propose another algorithm with similar properties and with the benefit of always being computationally efficient, at the price of a slightly more complicated tuning mechanism. Both algorithms rely on a novel exploration strategy called implicit exploration, which is shown to be more efficient both computationally and information-theoretically than previously studied exploration strategies for the problem.}, author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Neu, Gergely and Valko, Michal and Munos, R{\'{e}}mi}, booktitle = {Neural Information Processing Systems}, title = {{Efficient learning by implicit exploration in bandit problems with side observations}}, url = {https://papers.nips.cc/paper/5462-efficient-learning-by-implicit-exploration-in-bandit-problems-with-side-observations.pdf}, year = {2014} } @inproceedings{lafferty2010, address = {Vancouver, British Columbia, Canada}, booktitle = {Proceedings of the 24th conference on advances in Neural Information Processing Systems}, editor = {Lafferty, John D and Williams, Chris K I and Shawe-Taylor, John and Zemel, Richard S and Culotta, Aron}, month = {dec}, series = {NIPS '10}, title = {{No Title}}, year = {2010} } @phdthesis{bubeck2010jeux, author = {Bubeck, S{\'{e}}bastien}, keywords = {bandits}, mendeley-tags = {bandits}, school = {Universit{\{}{\'{e}}{\}} des Sciences et des Technologies de Lille 1}, title = {{Jeux de bandits et fondations du clustering}}, year = {2010} } @article{Bao2016a, abstract = {Social networks have been popular platforms for information propagation. An important use case is viral marketing: given a promotion budget, an advertiser can choose some influential users as the seed set and provide them free or discounted sample products; in this way, the advertiser hopes to increase the popularity of the product in the users' friend circles by the world-of-mouth effect, and thus maximizes the number of users that information of the production can reach. There has been a body of literature studying the influence maximization problem. Nevertheless, the existing studies mostly investigate the problem on a one-off basis, assuming fixed known influence probabilities among users, or the knowledge of the exact social network topology. In practice, the social network topology and the influence probabilities are typically unknown to the advertiser, which can be varying over time, i.e., in cases of newly established, strengthened or weakened social ties. In this paper, we focus on a dynamic non-stationary social network and design a randomized algorithm, RSB, based on multi-armed bandit optimization, to maximize influence propagation over time. The algorithm produces a sequence of online decisions and calibrates its explore-exploit strategy utilizing outcomes of previous decisions. It is rigorously proven to achieve an upper-bounded regret in reward and applicable to large-scale social networks. Practical effectiveness of the algorithm is evaluated using both synthetic and real-world datasets, which demonstrates that our algorithm outperforms previous stationary methods under non-stationary conditions.}, author = {Bao, Yixin and Wang, Xiaoke and Wang, Zhi and Wu, Chuan and Lau, Francis C. M.}, month = {apr}, pages = {10}, title = {{Online Influence Maximization in Non-Stationary Social Networks}}, url = {http://arxiv.org/abs/1604.07638}, year = {2016} } @article{burnetas1996optimal, author = {Burnetas, Apostolos N. and Katehakis, Micha{\"{e}}l N.}, journal = {Advances in Applied Mathematics}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {122--142}, title = {{Optimal adaptive policies for sequential allocation problems}}, volume = {17(2)}, year = {1996} } @inproceedings{guillory2011online, author = {Guillory, Andrew and Bilmes, Jeff}, booktitle = {Neural Information Processing Systems}, title = {{Online submodular set cover, ranking, and repeated active learning}}, year = {2011} } @inproceedings{freund2008, address = {Budapest, Hungary}, booktitle = {Proceedings of the 19th international conference on Algorithmic Learning Theory}, editor = {Freund, Yoav and Gy{\"{o}}rfi, L{\'{a}}szl{\'{o}} and Tur{\'{a}}n, Gy{\"{o}}rgy and Zeugmann, Thomas}, isbn = {978-3-540-87986-2}, keywords = {bandits}, mendeley-tags = {bandits}, month = {oct}, publisher = {Springer}, series = {ALT '08, Lecture Notes in Computer Science}, title = {{No Title}}, volume = {5254}, year = {2008} } @inproceedings{seeger2003fast, author = {Seeger, Matthias and Williams, Christopher and Lawrence, Neil}, booktitle = {International Conference on Artificial Intelligence and Statistics}, file = {::}, keywords = {Gaussian process,Greedy forward selection,Sparse approximation}, title = {{Fast forward selection to speed up sparse Gaussian process regression}}, url = {https://infoscience.epfl.ch/record/161318?ln=en}, year = {2003} } @inproceedings{buccapatnam2014stochastic, author = {Buccapatnam, Swapna and Eryilmaz, Atilla and Shroff, Ness B.}, booktitle = {International Conference on Measurement and Modeling of Computer Systems}, title = {{Stochastic bandits with side observations on networks}}, url = {https://www.orie.cornell.edu/orie/research/groups/multheavytail/upload/mabSigfinal.pdf}, year = {2014} } @inproceedings{dasgupta2009, address = {Montreal,Quebec, Canada}, booktitle = {Proceedings of the 22nd annual Conference On Learning Theory}, editor = {Dasgupta, Sanjot and Klivans, Adam}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jun}, series = {COLT '09}, title = {{No Title}}, year = {2009} } @article{auer2010ucb, abstract = {In the stochastic multi-armed bandit problem we consider a modification of the UCB algorithm of Auer et al. 4. For this modified algorithm we give an improved bound on the regret with respect to the optimal reward. While for the original UCB algorithm the regret in K-armed bandits after T trials is bounded by const K log(T)/Delta, where Delta measures the distance between a suboptimal arm and the optimal arm, for the modified UCB algorithm we show an upper bound on the regret of const K log (T/Delta 2) Delta.}, author = {Auer, Peter and Ortner, Ronald}, journal = {Periodica Mathematica Hungarica}, keywords = {computational,information theoretic learning with statistics,learning,statistics {\&} optimisation,theory {\&} algorithms}, title = {{UCB revisited: Improved regret bounds for the stochastic multi-armed bandit problem}}, url = {http://personal.unileoben.ac.at/rortner/Pubs/UCBRev.pdf}, year = {2010} } @article{kaufmann2012thompson, abstract = {The question of the optimality of Thompson Sampling for solving the stochastic multi-armed bandit problem had been open since 1933. In this paper we answer it positively for the case of Bernoulli rewards by providing the first finite-time analysis that matches the asymptotic rate given in the Lai and Robbins lower bound for the cumulative regret. The proof is accompanied by a numerical comparison with other optimal policies, experiments that have been lacking in the literature until now for the Bernoulli case.}, archivePrefix = {arXiv}, arxivId = {1205.4217}, author = {Kaufmann, Emilie and Korda, Nathaniel and Munos, R{\'{e}}mi}, eprint = {1205.4217}, journal = {Algorithmic Learning Theory}, title = {{Thompson sampling: An asymptotically optimal finite-time analysis}}, url = {https://arxiv.org/pdf/1205.4217.pdf}, year = {2012} } @inproceedings{vernade2017stochastic, abstract = {Online advertising and product recommendation are important domains of applications for multi-armed bandit methods. In these fields, the reward that is immediately available is most often only a proxy for the actual outcome of interest, which we refer to as a conversion. For instance, in web advertising, clicks can be observed within a few seconds after an ad display but the corresponding sale --if any-- will take hours, if not days to happen. This paper proposes and investigates a new stochas-tic multi-armed bandit model in the framework proposed by Chapelle (2014) --based on empirical studies in the field of web advertising-- in which each action may trigger a future reward that will then happen with a stochas-tic delay. We assume that the probability of conversion associated with each action is unknown while the distribution of the conversion delay is known, distinguishing between the (idealized) case where the conversion events may be observed whatever their delay and the more realistic setting in which late conversions are censored. We provide performance lower bounds as well as two simple but efficient algorithms based on the UCB and KLUCB frameworks. The latter algorithm, which is preferable when conversion rates are low, is based on a Poissonization argument, of independent interest in other settings where aggregation of Bernoulli observations with different success probabilities is required.}, archivePrefix = {arXiv}, arxivId = {1706.09186}, author = {Vernade, Claire and Capp{\'{e}}, Olivier and Perchet, Vianney}, booktitle = {Uncertainty in Artificial Intelligence}, eprint = {1706.09186}, file = {::}, title = {{Stochastic bandit models for delayed conversions}}, url = {https://arxiv.org/pdf/1706.09186.pdf}, year = {2017} } @inproceedings{auer2007improved, author = {Auer, Peter and Ortner, Ronald and Szepesv{\'{a}}ri, Csaba}, booktitle = {Conference on Learning Theory}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Improved rates for the stochastic continuum-armed bandit problem}}, url = {https://pdfs.semanticscholar.org/0bd8/cbe79bd1b6e408d916bcdf4cbed5c9ed58c6.pdf}, year = {2007} } @inproceedings{dani2008price, author = {Dani, Varsha and Hayes, Thomas P and Kakade, Sham M}, booktitle = {Neural Information Processing Systems}, issn = {00368075}, keywords = {bandits}, mendeley-tags = {bandits}, publisher = {MIT Press}, title = {{The Price of Bandit Information for Online Optimization}}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.71.4607{\&}rep=rep1{\&}type=pdf}, year = {2008} } @inproceedings{qin2017ttei, abstract = {The expected improvement (EI) algorithm is a popular strategy for information collection in optimization under uncertainty. The algorithm is widely known to be too greedy, but nevertheless enjoys wide use due to its simplicity and ability to handle uncertainty and noise in a coherent decision theoretic framework. To provide rigorous insight into EI, we study its properties in a simple setting of Bayesian optimization where the domain consists of a finite grid of points. This is the so-called best-arm identification problem, where the goal is to allocate measurement effort wisely to confidently identify the best arm using a small number of measurements. In this framework, one can show formally that EI is far from optimal. To overcome this shortcoming, we introduce a simple modification of the expected improvement algorithm. Surprisingly, this simple change results in an algorithm that is asymptotically optimal for Gaussian best-arm identification problems, and provably outperforms standard EI by an order of magnitude.}, archivePrefix = {arXiv}, arxivId = {1705.10033}, author = {Qin, Chao and Klabjan, Diego and Russo, Daniel}, booktitle = {Neural Information Processing Systems}, eprint = {1705.10033}, pages = {5381--5391}, title = {{Improving the expected improvement algorithm}}, url = {http://arxiv.org/abs/1705.10033}, year = {2017} } @inproceedings{bonald2013two-target, author = {Bonald, Thomas and Prouti{\`{e}}re, Alexandre}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Bonald, Proutiere - 2013 - Two-Target Algorithms for Infinite-Armed Bandits with Bernoulli Rewards.pdf:pdf}, title = {{Two-target algorithms for infinite-armed bandits with Bernoulli rewards}}, year = {2013} } @article{bartok2014partial, abstract = {In a partial monitoring game, the learner repeatedly chooses an action, the environment responds with an outcome, and then the learner suffers a loss and receives a feedback signal, both of which are fixed functions of the action and the outcome. The goal of the learner is to minimize his regret, which is the difference between his total cumulative loss and the total loss of the best fixed action in hindsight. In this paper we characterize the minimax regret of any partial monitoring game with finitely many actions and outcomes. It turns out that the minimax regret of any such game is either zero, $\Theta$(√ T), $\Theta$(T 2/3), or $\Theta$(T). We provide computationally efficient learning algorithms that achieve the minimax regret within logarithmic factor for any game. In addition to the bounds on the minimax regret, if we assume that the outcomes are generated in an i.i.d. fashion, we prove individual upper bounds on the expected regret.}, author = {Bart{\'{o}}k, G{\'{a}}bor and Foster, Dean P. and P{\'{a}}l, D{\'{a}}vid and Rakhlin, Alexander and Szepesv{\'{a}}ri, Csaba}, journal = {Mathematics of Operations Research}, number = {4}, pages = {967--997}, title = {{Partial monitoring-classification, regret bounds, and algorithms}}, volume = {39}, year = {2014} } @inproceedings{katariya2017bernoulli, abstract = {The probability that a user will click a search result depends both on its relevance and its position on the results page. The position based model explains this behavior by ascribing to every item an attraction probability, and to every position an examination probability. To be clicked, a result must be both attractive and examined. The probabilities of an item-position pair being clicked thus form the entries of a rank-{\$}1{\$} matrix. We propose the learning problem of a Bernoulli rank-{\$}1{\$} bandit where at each step, the learning agent chooses a pair of row and column arms, and receives the product of their Bernoulli-distributed values as a reward. This is a special case of the stochastic rank-{\$}1{\$} bandit problem considered in recent work that proposed an elimination based algorithm Rank1Elim, and showed that Rank1Elim's regret scales linearly with the number of rows and columns on "benign" instances. These are the instances where the minimum of the average row and column rewards {\$}\backslashmu{\$} is bounded away from zero. The issue with Rank1Elim is that it fails to be competitive with straightforward bandit strategies as {\$}\backslashmu \backslashrightarrow 0{\$}. In this paper we propose Rank1ElimKL which simply replaces the (crude) confidence intervals of Rank1Elim with confidence intervals based on Kullback-Leibler (KL) divergences, and with the help of a novel result concerning the scaling of KL divergences we prove that with this change, our algorithm will be competitive no matter the value of {\$}\backslashmu{\$}. Experiments with synthetic data confirm that on benign instances the performance of Rank1ElimKL is significantly better than that of even Rank1Elim, while experiments with models derived from real data confirm that the improvements are significant across the board, regardless of whether the data is benign or not.}, author = {Katariya, Sumeet and Kveton, Branislav and Szepesv{\'{a}}ri, Csaba and Vernade, Claire and Wen, Zheng}, booktitle = {International Joint Conference on Artificial Intelligence}, file = {::}, title = {{Bernoulli rank-1 bandits for click feedback}}, year = {2017} } @article{whittle1980multi, author = {Whittle, Peter}, issn = {00359246}, journal = {Journal of the Royal Statistical Society. Series B (Methodological)}, keywords = {bandits}, mendeley-tags = {bandits}, number = {2}, pages = {143--149}, publisher = {Blackwell Publishing for the Royal Statistical Society}, title = {{Multi-Armed Bandits and the Gittins Index}}, volume = {42}, year = {1980} } @inproceedings{maillard2010online, address = {Berlin, Heidelberg}, author = {Maillard, Odalric-Ambrym and Munos, R{\'{e}}mi}, booktitle = {Proceedings of the 2010 European Conference on Machine Learning and Knowledge Discovery in Databases: Part II}, isbn = {3-642-15882-X, 978-3-642-15882-7}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {305--320}, publisher = {Springer-Verlag}, series = {ECML PKDD'10}, title = {{Online learning in adversarial Lipschitz environments}}, year = {2010} } @techreport{garivier2018klucb, abstract = {In the context of K-armed stochastic bandits with distribution only assumed to be supported by [0, 1], we introduce a new algorithm, KL-UCB-switch, and prove that it enjoys simultaneously a distribution-free regret bound of optimal order $\backslash$sqrt{\{}KT{\}} and a distribution-dependent regret bound of optimal order as well, that is, matching the $\backslash$kappa $\backslash$ln T lower bound by Lai and Robbins (1985) and Burnetas and Katehakis (1996).}, archivePrefix = {arXiv}, arxivId = {1805.05071}, author = {Garivier, Aur{\'{e}}lien and Hadiji, H{\'{e}}di and Menard, Pierre and Stoltz, Gilles}, eprint = {1805.05071}, month = {may}, title = {{KL-UCB-switch: optimal regret bounds for stochastic bandits from both a distribution-dependent and a distribution-free viewpoints}}, url = {http://arxiv.org/abs/1805.05071}, year = {2018} } @inproceedings{neu2014online, abstract = {Most work on sequential learning assumes a fixed set of actions that are available all the time. However, in practice, actions can consist of picking subsets of readings from sensors that may break from time to time, road segments that can be blocked or goods that are out of stock. In this paper we study learning algorithms that are able to deal with stochastic availability of such unreliable composite actions. We propose and analyze algorithms based on the Follow-The-Perturbed-Leader prediction method for several learning settings differing in the feedback provided to the learner. Our algorithms rely on a novel loss estimation technique that we call Counting Asleep Times. We deliver regret bounds for our algorithms for the previously studied full information and (semi-)bandit settings, as well as a natural middle point between the two that we call the restricted information setting. A special consequence of our results is a significant improvement of the best known performance guarantees achieved by an efficient algorithm for the sleeping bandit problem with stochastic availability. Finally, we evaluate our algorithms empirically and show their improvement over the known approaches.}, author = {Neu, Gergely and Valko, Michal}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Neu, Valko - 2014 - Online combinatorial optimization with stochastic decision sets and adversarial losses(2).pdf:pdf}, title = {{Online combinatorial optimization with stochastic decision sets and adversarial losses}}, year = {2014} } @inproceedings{kakadeefficient, author = {Kakade, Sham M and Shalev-Shwartz, Shai and Tewari, Ambuj}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {440--447}, title = {{Efficient bandit algorithms for online multiclass prediction}} } @inproceedings{chowdhury2017kernelized, author = {Chowdhury, Sayak Ray and Gopalan, Aditya}, booktitle = {International Conference on Machine Learning}, title = {{On kernelized multi-armed bandits}}, url = {http://proceedings.mlr.press/v70/chowdhury17a/chowdhury17a.pdf}, year = {2017} } @article{guha2007approximation, author = {Guha, Sudipto and Munagala, Kamesh and Shi, Peng}, journal = {CoRR}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Approximation Algorithms for Restless Bandit Problems}}, volume = {abs/0711.3}, year = {2007} } @article{poland2008nonstochastic, author = {Poland, Jan}, journal = {Theoretical Computuer Science}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jul}, number = {1-3}, pages = {77--93}, title = {{Nonstochastic bandits: Countable decision set, unbounded costs and reactive environments.}}, volume = {397}, year = {2008} } @inproceedings{alon2015online, abstract = {We study a general class of online learning problems where the feedback is specified by a graph. This class includes online prediction with expert advice and the multi-armed bandit problem, but also several learning problems where the online player does not necessarily observe his own loss. We analyze how the structure of the feedback graph controls the inherent difficulty of the induced {\$}T{\$}-round learning problem. Specifically, we show that any feedback graph belongs to one of three classes: strongly observable graphs, weakly observable graphs, and unobservable graphs. We prove that the first class induces learning problems with {\$}\backslashwidetilde\backslashTheta(\backslashalpha{\^{}}{\{}1/2{\}} T{\^{}}{\{}1/2{\}}){\$} minimax regret, where {\$}\backslashalpha{\$} is the independence number of the underlying graph; the second class induces problems with {\$}\backslashwidetilde\backslashTheta(\backslashdelta{\^{}}{\{}1/3{\}}T{\^{}}{\{}2/3{\}}){\$} minimax regret, where {\$}\backslashdelta{\$} is the domination number of a certain portion of the graph; and the third class induces problems with linear minimax regret. Our results subsume much of the previous work on learning with feedback graphs and reveal new connections to partial monitoring games. We also show how the regret is affected if the graphs are allowed to vary with time.}, author = {Alon, Noga and Cesa-Bianchi, Nicol{\`{o}} and Dekel, Ofer and Koren, Tomer}, booktitle = {Conference on Learning Theory}, title = {{Online learning with feedback graphs: Beyond bandits}}, url = {http://proceedings.mlr.press/v40/Alon15.pdf}, year = {2015} } @inproceedings{kleinberg2005nearly, author = {Kleinberg, Robert D}, booktitle = {Neural Information Processing Systems}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Nearly tight bounds for the continuum-armed bandit problem}}, url = {https://papers.nips.cc/paper/2634-nearly-tight-bounds-for-the-continuum-armed-bandit-problem.pdf}, year = {2005} } @inproceedings{ghosh2017misspecified, archivePrefix = {arXiv}, arxivId = {1704.06880}, author = {Ghosh, Avishek and Chowdhury, Sayak Ray and Gopalan, Aditya}, booktitle = {AAAI Conference on Artificial Intelligence}, eprint = {1704.06880}, file = {::}, month = {apr}, title = {{Misspecified linear bandits}}, url = {https://arxiv.org/abs/1704.06880}, year = {2017} } @inproceedings{Neu2015a, author = {Neu, Gergely}, booktitle = {Advances in Neural Information Processing Systems}, pages = {3150--3158}, title = {{Explore no more: Improved high-probability regret bounds for non-stochastic bandits}}, url = {http://machinelearning.wustl.edu/mlpapers/papers/NIPS2015{\_}5732}, year = {2015} } @inproceedings{cohen2008, address = {Helsinki, Finland}, booktitle = {Proceedings of the 25th International Conference on Machine Learning}, editor = {Cohen, William W and McCallum, Andrew and Roweis, Sam T}, isbn = {978-1-60558-205-4}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jun}, publisher = {ACM}, series = {ICML '08, ACM International Conference Proceeding Series}, title = {{No Title}}, volume = {307}, year = {2008} } @inproceedings{mannor2011from, author = {Mannor, Shie and Shamir, Ohad}, booktitle = {Neural Information Processing Systems}, title = {{From bandits to experts: On the value of side-observations}}, url = {https://papers.nips.cc/paper/4366-from-bandits-to-experts-on-the-value-of-side-observations.pdf}, year = {2011} } @article{tu2015bandit, author = {tao Tu, Shi and juan Zhu, Lan}, doi = {10.1007/s12204-015-1618-7}, issn = {19958188}, journal = {Journal of Shanghai Jiaotong University (Science)}, keywords = {bandit,matrix factorization,recommend}, number = {5}, pages = {535--539}, title = {{A bandit method using probabilistic matrix factorization in recommendation}}, volume = {20}, year = {2015} } @article{Calandriello2019, abstract = {Gaussian processes (GP) are a popular Bayesian approach for the optimization of black-box functions. Despite their effectiveness in simple problems, GP-based algorithms hardly scale to complex high-dimensional functions, as their per-iteration time and space cost is at least quadratic in the number of dimensions d and iterations t. Given a set of A alternative to choose from, the overall runtime O(t3A) quickly becomes prohibitive. In this paper, we introduce BKB (budgeted kernelized bandit), a novel approximate GP algorithm for optimization under bandit feedback that achieves near-optimal regret (and hence near-optimal convergence rate) with near-constant per-iteration complexity and no assumption on the input space or covariance of the GP. Combining a kernelized linear bandit algorithm (GP-UCB) with randomized matrix sketching technique (i.e., leverage score sampling), we prove that selecting inducing points based on their posterior variance gives an accurate low-rank approximation of the GP, preserving variance estimates and confidence intervals. As a consequence, BKB does not suffer from variance starvation, an important problem faced by many previous sparse GP approximations. Moreover, we show that our procedure selects at most Õ (deff) points, where deff is the effective dimension of the explored space, which is typically much smaller than both d and t. This greatly reduces the dimensionality of the problem, thus leading to a O(TAd2eff) runtime and O(Adeff) space complexity.}, archivePrefix = {arXiv}, arxivId = {1903.05594}, author = {Calandriello, Daniele and Carratino, Luigi and Lazaric, Alessandro and Valko, Michal and Rosasco, Lorenzo}, eprint = {1903.05594}, month = {mar}, title = {{Gaussian process optimization with adaptive sketching: Scalable and no regret}}, url = {https://arxiv.org/abs/1903.05594}, year = {2019} } @article{bnaya2013social, abstract = {In many cases the best way to find a profile or a set of profiles matching some criteria in a socialnetwork is via targeted crawling. An important challenge in targeted crawling is choosing the next profileto explore. Existing heuristics for targeted crawling are usually tailored for specific search criterionand could lead to short-sighted crawling decisions. In this paper we propose and evaluate a generic ap- proach for guiding targeted crawling which is based on recent developments in Artificial Intelligence. Ourapproach, based on the recently introduced variant of the Multi-Armed Bandit problem with volatile arms(VMAB), aims to provide a proper balance between exploration and exploitation during the crawling process. Unlike other heuristics which are hand tailored for specific type of search queries, our approach isgeneral-purpose. In addition, it provides provable performance guarantees. Experimental results indicate that our approach compares favorably with the best existing heuristics on two different domains.}, author = {Bnaya, Zahy and Puzis, Rami and Stern, Roni and Felner, Ariel}, journal = {Human Journal}, number = {2}, pages = {84--98}, title = {{Social network search as a volatile multi-armed bandit problem}}, volume = {2}, year = {2013} } @article{lai1985asymptotically, author = {Lai, Tze L. and Robbins, Herbert}, journal = {Advances in Applied Mathematics}, keywords = {bandit,bandits}, mendeley-tags = {bandits}, number = {1}, pages = {4--22}, publisher = {Elsevier}, title = {{Asymptotically efficient adaptive allocation rules}}, url = {https://ac.els-cdn.com/0196885885900028/1-s2.0-0196885885900028-main.pdf?{\_}tid=6ded14a5-1fe6-4c09-a1e3-9738a40b46d4{\&}acdnat=1539373065{\_}3220aa4053ab6e1f5db385fd4ef37e61}, volume = {6}, year = {1985} } @inproceedings{zolghadr2013online, author = {Zolghadr, Navid and Bartok, Gabor and Greiner, Russell and Gy{\"{o}}rgy, Andr{\'{a}}s and Szepesvari, Csaba}, booktitle = {Advances in Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Zolghadr et al. - 2013 - Online Learning with Costly Features and Labels.pdf:pdf}, pages = {1241--1249}, title = {{Online Learning with Costly Features and Labels}}, url = {http://papers.nips.cc/paper/5149-online-learning-with-costly-features-and-labels}, year = {2013} } @inproceedings{slivkins2008adapting, author = {Slivkins, Aleksandrs and Upfal, Eli}, booktitle = {COLT}, editor = {Servedio, Rocco A and Zhang, Tong}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {343--354}, publisher = {Omnipress}, title = {{Adapting to a Changing Environment: the Brownian Restless Bandits}}, year = {2008} } @inproceedings{bshouty2007, address = {San Diego, CA, USA}, booktitle = {Proceedings of the 20th annual Conference On Learning Theory}, editor = {Bshouty, Nader H and Gentile, Claudio}, isbn = {978-3-540-72925-9}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jun}, publisher = {Springer}, series = {COLT '07, Lecture Notes in Computer Science}, title = {{No Title}}, volume = {4539}, year = {2007} } @inproceedings{hanawal2015cheap, abstract = {We consider stochastic sequential learning problems where the learner can observe the average reward of several actions. Such a setting is interesting in many applications involving monitoring and surveillance, where the set of the actions to observe represent some (geographical) area. The importance of this setting is that in these applications, it is actually cheaper to observe average reward of a group of actions rather than the reward of a single action. We show that when the reward is smooth over a given graph representing the neighboring actions, we can maximize the cumulative reward of learning while minimizing the sensing cost. In this paper we propose CheapUCB, an algorithm that matches the regret guarantees of the known algorithms for this setting and at the same time guarantees a linear cost again over them. As a by-product of our analysis, we establish a Omega($\backslash$sqrt(dT)) lower bound on the cumulative regret of spectral bandits for a class of graphs with effective dimension d.}, author = {Hanawal, Manjesh and Saligrama, Venkatesh and Valko, Michal and Munos, R{\'{e}}mi}, booktitle = {International Conference on Machine Learning}, title = {{Cheap bandits}}, url = {http://proceedings.mlr.press/v37/hanawal15.pdf}, year = {2015} } @inproceedings{auer2005, address = {Bertinoro, Italy}, booktitle = {Proceedings of the 18th annual Conference On Learning Theory}, editor = {Auer, Peter and Meir, Ron}, isbn = {3-540-26556-2}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jun}, publisher = {Springer}, series = {COLT '05, Lecture Notes in Computer Science}, title = {{No Title}}, volume = {3559}, year = {2005} } @article{thompson1933likelihood, author = {Thompson, William R.}, journal = {Biometrika}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {285--294}, title = {{On the likelihood that one unknown probability exceeds another in view of the evidence of two samples}}, url = {https://www.jstor.org/stable/2332286}, volume = {25}, year = {1933} } @inproceedings{coquelin2007bandit, author = {Coquelin, Pierre-Arnaud and Munos, R{\'{e}}mi}, booktitle = {Uncertainty in Artificial Intelligence}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Bandit algorithms for tree search}}, url = {https://arxiv.org/pdf/1408.2028.pdf}, year = {2007} } @article{srinivas2010gaussian, abstract = {Many applications require optimizing an unknown, noisy function that is expensive to evaluate. We formalize this task as a multi-armed bandit problem, where the payoff function is either sampled from a Gaussian process (GP) or has low RKHS norm. We resolve the important open problem of deriving regret bounds for this setting, which imply novel convergence rates for GP optimization. We analyze GP-UCB, an intuitive upper-confidence based algorithm, and bound its cumulative regret in terms of maximal information gain, establishing a novel connection between GP optimization and experimental design. Moreover, by bounding the latter in terms of operator spectra, we obtain explicit sublinear regret bounds for many commonly used covariance functions. In some important cases, our bounds have surprisingly weak dependence on the dimensionality. In our experiments on real sensor data, GP-UCB compares favorably with other heuristical GP optimization approaches.}, author = {Srinivas, Niranjan and Krause, Andreas and Kakade, Sham M. and Seeger, Matthias}, journal = {International Conference on Machine Learning}, title = {{Gaussian process optimization in the bandit setting: No regret and experimental design}}, url = {https://arxiv.org/pdf/0912.3995.pdf}, year = {2010} } @book{Shawe-Taylor2004, author = {Shawe-Taylor, John and Cristianini, Nelo}, publisher = {Cambridge University Press}, title = {{Kernel Methods for Pattern Analysis}}, year = {2004} } @inproceedings{kalai2010, booktitle = {Proceedings of the 23rd annual Conference On Learning Theory}, editor = {Kalai, Adam Tauman and Mohri, Mehryar}, isbn = {978-0-9822529-2-5}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jun}, publisher = {Omnipress}, title = {{No Title}}, year = {2010} } @article{grunewalder2017approximations, abstract = {The multi-armed restless bandit problem is studied in the case where the pay-offs are not necessarily independent over time nor across the arms. Even though this version of the problem provides a more realistic model for most real-world applications, it cannot be optimally solved in practice since it is known to be PSPACE-hard. The objective of this paper is to characterize special sub-classes of the problem where good approximate solutions can be found using tractable approaches. Specifically, it is shown that in the case where the joint distribution over the arms is {\$}\backslashvarphi{\$}-mixing, and under some conditions on the {\$}\backslashvarphi{\$}-mixing coefficients, a modified version of UCB can prove optimal. On the other hand, it is shown that when the pay-off distributions are strongly dependent, simple switching strategies may be devised which leverage the strong inter-dependencies. To this end, an example is provided using Gaussian Processes. The techniques developed in this paper apply, more generally, to the problem of online sampling under dependence.}, archivePrefix = {arXiv}, arxivId = {1702.06972}, author = {Grunewalder, Steffen and Khaleghi, Azadeh}, eprint = {1702.06972}, issn = {1533-7928}, journal = {Journal of Machine Learning Research}, number = {14}, pages = {1--37}, publisher = {MIT Press}, title = {{Approximations of the Restless Bandit Problem}}, url = {http://www.jmlr.org/papers/v20/17-547.html http://arxiv.org/abs/1702.06972}, volume = {20}, year = {2017} } @techreport{combes2015unimodal, abstract = {We consider stochastic bandit problems with a continuous set of arms and where the expected reward is a continuous and unimodal function of the arm. No further assumption is made regarding the smoothness and the structure of the expected reward function. For these problems, we propose the Stochastic Pentachotomy (SP) algorithm, and derive finite-time upper bounds on its regret and optimization error. In particular, we show that, for any expected reward function {\$}\backslashmu{\$} that behaves as {\$}\backslashmu(x)=\backslashmu(x{\^{}}\backslashstar)-C|x-x{\^{}}\backslashstar|{\^{}}\backslashxi{\$} locally around its maximizer {\$}x{\^{}}\backslashstar{\$} for some {\$}\backslashxi, C{\textgreater}0{\$}, the SP algorithm is order-optimal. Namely its regret and optimization error scale as {\$}O(\backslashsqrt{\{}T\backslashlog(T){\}}){\$} and {\$}O(\backslashsqrt{\{}\backslashlog(T)/T{\}}){\$}, respectively, when the time horizon {\$}T{\$} grows large. These scalings are achieved without the knowledge of {\$}\backslashxi{\$} and {\$}C{\$}. Our algorithm is based on asymptotically optimal sequential statistical tests used to successively trim an interval that contains the best arm with high probability. To our knowledge, the SP algorithm constitutes the first sequential arm selection rule that achieves a regret and optimization error scaling as {\$}O(\backslashsqrt{\{}T{\}}){\$} and {\$}O(1/\backslashsqrt{\{}T{\}}){\$}, respectively, up to a logarithmic factor for non-smooth expected reward functions, as well as for smooth functions with unknown smoothness.}, archivePrefix = {arXiv}, arxivId = {1406.7447}, author = {Combes, Richard and Prouti{\`{e}}re, Alexandre}, eprint = {1406.7447}, title = {{Unimodal Bandits without Smoothness}}, year = {2015} } @inproceedings{alon2013from, abstract = {We consider the partial observability model for multi-armed bandits, introduced by Mannor and Shamir. Our main result is a characterization of regret in the directed observability model in terms of the dominating and independence numbers of the observability graph. We also show that in the undirected case, the learner can achieve optimal regret without even accessing the observability graph before selecting an action. Both results are shown using variants of the Exp3 algorithm operating on the observability graph in a time-efficient manner.}, author = {Alon, Noga and Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio and Mansour, Yishay}, booktitle = {Neural Information Processing Systems}, title = {{From bandits to experts: A tale of domination and independence}}, url = {https://papers.nips.cc/paper/4908-from-bandits-to-experts-a-tale-of-domination-and-independence.pdf}, year = {2013} } @inproceedings{brodley2004, address = {Banff, Alberta, Canada}, booktitle = {Proceedings of the 21st International Conference on Machine Learning}, editor = {Brodley, Carla E}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jul}, publisher = {ACM}, series = {ICML '04, ACM International Conference Proceeding Series}, title = {{No Title}}, volume = {69}, year = {2004} } @misc{Guillou2015a, abstract = {Recommender Systems (RS) aim at suggesting to users one or several items in which they might have interest. Following the feedback they receive from the user, these systems have to adapt their model in order to improve future recommendations. The repetition of these steps defines the RS as a sequential process. This sequential aspect raises an exploration-exploitation dilemma, which is surprisingly rarely taken into account for RS without contextual information. In this paper we present an explore-exploit collaborative filtering RS, based on Matrix Factor-ization and Bandits algorithms. Using experiments on artificial and real datasets, we show the importance and practicability of using sequential approaches to perform recommendation. We also study the impact of the model update on both the quality and the computation time of the recommendation procedure.}, author = {Guillou, Fr{\'{e}}d{\'{e}}ric and Gaudel, Romaric and Preux, Philippe}, booktitle = {NIPS'15 Workshop: Machine Learning for eCommerce}, keywords = {Collaborative Filtering,Matrix Factorization,Multi-Armed Bandtis,Recommender Systems,sequential Recommender Systems}, language = {en}, month = {dec}, title = {{Collaborative Filtering as a Multi-Armed Bandit}}, url = {https://hal.inria.fr/hal-01256254}, year = {2015} } @article{alon2014nonstochastic, abstract = {We present and study a partial-information model of online learning, where a decision maker repeatedly chooses from a finite set of actions, and observes some subset of the associated losses. This naturally models several situations where the losses of different actions are related, and knowing the loss of one action provides information on the loss of other actions. Moreover, it generalizes and interpolates between the well studied full-information setting (where all losses are revealed) and the bandit setting (where only the loss of the action chosen by the player is revealed). We provide several algorithms addressing different variants of our setting, and provide tight regret bounds depending on combinatorial properties of the information feedback structure.}, archivePrefix = {arXiv}, arxivId = {1409.8428}, author = {Alon, Noga and Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio and Mannor, Shie and Mansour, Yishay and Shamir, Ohad}, eprint = {1409.8428}, journal = {SIAM Journal on Computing}, number = {6}, pages = {1785--1826}, title = {{Nonstochastic multi-armed bandits with graph-structured feedback}}, url = {https://arxiv.org/abs/1409.8428}, volume = {46}, year = {2017} } @inproceedings{scholkopf2006, address = {Vancouver, British Columbia, Canada}, booktitle = {Proceedings of the 20th conference on advances in Neural Information Processing Systems}, editor = {Sch{\"{o}}lkopf, Bernhard and Platt, John C and Hoffman, Thomas}, isbn = {0-262-19568-2}, keywords = {bandits}, mendeley-tags = {bandits}, month = {dec}, publisher = {MIT Press}, series = {NIPS '06}, title = {{No Title}}, year = {2006} } @inproceedings{vaswani2015influence, abstract = {Most work on influence maximization assumes network influence probabilities are given. The few papers that propose algorithms for learning these probabilities assume the availability of a batch of diffusion cascades and learn the probabilities offline. We tackle the real but difficult problems of (i)learning in influence probabilities and (ii) maximizing influence spread, when no cascades are available as input, by adopting a combinatorial multi-armed bandit (CMAB) paradigm. We formulate the above problems respectively as network exploration, i.e., minimizing the error in learned influence probabilities, and minimization of loss in spread from choosing suboptimal seed sets over the rounds of a CMAB game. We propose algorithms for both problems and establish bounds on their performance. Finally, we demonstrate the effectiveness and usefulness of the proposed algorithms via a comprehensive set of experiments over three real datasets.}, author = {Vaswani, Sharan and Lakshmanan, Laks. V. S. and {Mark Schmidt}}, booktitle = {NIPS workshop on Networks in the Social and Information Sciences 2015}, title = {{Influence maximization with bandits}}, year = {2015} } @inproceedings{sen2017contextual, abstract = {Motivated by online recommendation and advertising systems, we consider a causal model for stochastic contextual bandits with a latent low-dimensional confounder. In our model, there are {\$}L{\$} observed contexts and {\$}K{\$} arms of the bandit. The observed context influences the reward obtained through a latent confounder variable with cardinality {\$}m{\$} ({\$}m \backslashll L,K{\$}). The arm choice and the latent confounder causally determines the reward while the observed context is correlated with the confounder. Under this model, the {\$}L \backslashtimes K{\$} mean reward matrix {\$}\backslashmathbf{\{}U{\}}{\$} (for each context in {\$}[L]{\$} and each arm in {\$}[K]{\$}) factorizes into non-negative factors {\$}\backslashmathbf{\{}A{\}}{\$} ({\$}L \backslashtimes m{\$}) and {\$}\backslashmathbf{\{}W{\}}{\$} ({\$}m \backslashtimes K{\$}). This insight enables us to propose an {\$}\backslashepsilon{\$}-greedy NMF-Bandit algorithm that designs a sequence of interventions (selecting specific arms), that achieves a balance between learning this low-dimensional structure and selecting the best arm to minimize regret. Our algorithm achieves a regret of {\$}\backslashmathcal{\{}O{\}}\backslashleft(L\backslashmathrm{\{}poly{\}}(m, \backslashlog K) \backslashlog T \backslashright){\$} at time {\$}T{\$}, as compared to {\$}\backslashmathcal{\{}O{\}}(LK\backslashlog T){\$} for conventional contextual bandits, assuming a constant gap between the best arm and the rest for each context. These guarantees are obtained under mild sufficiency conditions on the factors that are weaker versions of the well-known Statistical RIP condition. We further propose a class of generative models that satisfy our sufficient conditions, and derive a lower bound of {\$}\backslashmathcal{\{}O{\}}\backslashleft(Km\backslashlog T\backslashright){\$}. These are the first regret guarantees for online matrix completion with bandit feedback, when the rank is greater than one. We further compare the performance of our algorithm with the state of the art, on synthetic and real world data-sets.}, archivePrefix = {arXiv}, arxivId = {1606.00119}, author = {Sen, Rajat and Shanmugam, Karthikeyan and Kocaoglu, Murat and Dimakis, Alexandros G. and Shakkottai, Sanjay}, booktitle = {International Conference on Artificial Intelligence and Statistics}, eprint = {1606.00119}, file = {::}, title = {{Contextual bandits with Latent confounders: An NMF approach}}, year = {2017} } @article{silver2016mastering, abstract = {The game of Go has long been viewed as the most challenging of classic games for artificial intelligence owing to its enormous search space and the difficulty of evaluating board positions and moves. Here we introduce a new approach to computer Go that uses ‘value networks' to evaluate board positions and ‘policy networks' to select moves. These deep neural networks are trained by a novel combination of supervised learning from human expert games, and reinforcement learning from games of self-play. Without any lookahead search, the neural networks play Go at the level of state-of-the-art Monte Carlo tree search programs that simulate thousands of random games of self-play. We also introduce a new search algorithm that combines Monte Carlo simulation with value and policy networks. Using this search algorithm, our program AlphaGo achieved a 99.8{\%} winning rate against other Go programs, and defeated the human European Go champion by 5 games to 0. This is the first time that a computer program has defeated a human professional player in the full-sized game of Go, a feat previously thought to be at least a decade away.}, author = {Silver, David and Huang, Aja and Maddison, Chris J. and Guez, Arthur and Sifre, Laurent and van den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and Dieleman, Sander and Grewe, Dominik and Nham, John and Kalchbrenner, Nal and Sutskever, Ilya and Lillicrap, Timothy and Leach, Madeleine and Kavukcuoglu, Koray and Graepel, Thore and Hassabis, Demis}, journal = {Nature}, number = {7587}, pages = {484--489}, shorttitle = {Nature}, title = {{Mastering the game of Go with deep neural networks and tree search}}, url = {http://www0.cs.ucl.ac.uk/staff/d.silver/web/Publications{\_}files/unformatted{\_}final{\_}mastering{\_}go.pdf}, volume = {529}, year = {2016} } @inproceedings{menard2017minimax, abstract = {We propose the kl-UCB ++ algorithm for regret minimization in stochastic bandit models with exponential families of distributions. We prove that it is simultaneously asymptotically optimal (in the sense of Lai and Robbins' lower bound) and minimax optimal. This is the first algorithm proved to enjoy these two properties at the same time. This work thus merges two different lines of research with simple and clear proofs.}, archivePrefix = {arXiv}, arxivId = {arXiv:1702.07211v2}, author = {M{\'{e}}nard, Pierre and Garivier, Aur{\'{e}}lien and Hanneke, Steve and Reyzin, Lev}, booktitle = {Algorithmic Learning Theory}, eprint = {arXiv:1702.07211v2}, keywords = {Stochastic multi-armed bandits,asymptotic optimality,mini-max optimality,regret analysis,upper confidence bound (UCB)}, title = {{A minimax and asymptotically optimal algorithm for stochastic bandits}}, year = {2017} } @inproceedings{joulani2013online, author = {Joulani, Pooria and Gyorgy, Andras and Szepesvari, Csaba}, booktitle = {International Conference on Machine Learning}, title = {{Online learning under delayed feedback}}, url = {http://proceedings.mlr.press/v28/joulani13.pdf}, year = {2013} } @inproceedings{zinkevich2003online, author = {Zinkevich, Martin}, booktitle = {Proceedings of the 20th International Conference on Machine Learning}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {928--936}, title = {{Online Convex Programming and Generalized Infinitesimal Gradient Ascent}}, year = {2003} } @article{gai2012combinatorial, abstract = {In the classic multi-armed bandits problem, the goal is to have a policy for dynamically operating arms that each yield stochastic rewards with unknown means. The key metric of interest is regret, defined as the gap between the expected total reward accumulated by an omniscient player that knows the reward means for each arm, and the expected total reward accumulated by the given policy. The policies presented in prior work have storage, computation and regret all growing linearly with the number of arms, which is not scalable when the number of arms is large. We consider in this work a broad class of multi-armed bandits with dependent arms that yield rewards as a linear combination of a set of unknown parameters. For this general framework, we present efficient policies that are shown to achieve regret that grows logarithmically with time, and polynomially in the number of unknown parameters (even though the number of dependent arms may grow exponentially). Furthermore, these policies only require storage that grows linearly in the number of unknown parameters. We show that this generalization is broadly applicable and useful for many interesting tasks in networks that can be formulated as tractable combinatorial optimization problems with linear objective functions, such as maximum weight matching, shortest path, and minimum spanning tree computations.}, author = {Gai, Yi and Krishnamachari, Bhaskar and Jain, Rahul}, journal = {Transactions on Networking}, keywords = {Combinatorial network optimization,multi-armed bandits (MABs),online learning}, number = {5}, pages = {1466--1478}, title = {{Combinatorial network optimization with unknown variables: Multi-armed bandits with linear rewards and individual observations}}, url = {https://ieeexplore.ieee.org/document/6166915/}, volume = {20}, year = {2012} } @incollection{seldin2011pac, author = {Seldin, Yevgeny and Auer, Peter and Laviolette, Francois and Shawe-Taylor, John S and Ortner, Ronald}, booktitle = {Neural Information Processing Systems (NIPS)}, pages = {1683--1691}, title = {{PAC-Bayesian Analysis of Contextual Bandits}}, year = {2011} } @article{bastani2017contextual, abstract = {The contextual bandit literature has traditionally focused on algorithms that address the exploration-exploitation tradeoff. In particular, greedy algorithms that exploit current estimates without any exploration may be sub-optimal in general. However, exploration-free greedy algorithms are desirable in practical settings where exploration may be costly or unethical (e.g., clinical trials). Surprisingly, we find that a simple greedy algorithm can be rate-optimal (achieves asymptotically optimal regret) if there is sufficient randomness in the observed contexts (covariates). We prove that this is always the case for a two-armed bandit under a general class of context distributions that satisfy a condition we term {\$}\backslashtextit{\{}covariate diversity{\}}{\$}. Furthermore, even absent this condition, we show that a greedy algorithm can be rate optimal with positive probability. Thus, standard bandit algorithms may unnecessarily explore. Motivated by these results, we introduce Greedy-First, a new algorithm that uses only observed contexts and rewards to determine whether to follow a greedy algorithm or to explore. We prove that this algorithm is rate-optimal without any additional assumptions on the context distribution or the number of arms. Extensive simulations demonstrate that Greedy-First successfully reduces exploration and outperforms existing (exploration-based) contextual bandit algorithms such as Thompson sampling or upper confidence bound (UCB).}, archivePrefix = {arXiv}, arxivId = {1704.09011}, author = {Bastani, Hamsa and Bayati, Mohsen and Khosravi, Khashayar}, eprint = {1704.09011}, journal = {arXiv preprint arXiv:1704.09011}, keywords = {contextual bandit,exploration-exploitation,greedy algorithm,sequential decision-making}, title = {{Mostly exploration-free algorithms for contextual bandits}}, url = {http://arxiv.org/pdf/1704.09011.pdf}, year = {2017} } @inproceedings{abbasi2011improved, author = {Abbasi-Yadkori, Yasin and P{\'{a}}l, D{\'{a}}vid and Szepesv{\'{a}}ri, Csaba}, booktitle = {Neural Information Processing Systems}, title = {{Improved algorithms for linear stochastic bandits}}, url = {https://yasinov.github.io/linear-bandits-nips2011.pdf}, year = {2011} } @inproceedings{xia2015budgeted, abstract = {Thompson sampling is one of the earliest randomized algorithms for multi-armed bandits (MAB). In this paper, we extend the Thompson sampling to Budgeted MAB, where there is random cost for pulling an arm and the total cost is constrained by a budget. We start with the case of Bernoulli bandits, in which the random rewards (costs) of an arm are independently sampled from a Bernoulli distribution. To implement the Thompson sampling algorithm in this case, at each round, we sample two numbers from the posterior distributions of the reward and cost for each arm, obtain their ratio, select the arm with the maximum ratio, and then update the posterior distributions. We prove that the distribution-dependent regret bound of this algorithm is {\$}O(\backslashln B){\$}, where {\$}B{\$} denotes the budget. By introducing a Bernoulli trial, we further extend this algorithm to the setting that the rewards (costs) are drawn from general distributions, and prove that its regret bound remains almost the same. Our simulation results demonstrate the effectiveness of the proposed algorithm.}, archivePrefix = {arXiv}, arxivId = {1505.00146}, author = {Xia, Yingce and Li, Haifang and Qin, Tao and Yu, Nenghai and Liu, Tie-Yan}, booktitle = {International Joint Conference on Artificial Intelligence}, eprint = {1505.00146}, isbn = {9781577357384}, title = {{Thompson sampling for budgeted multi-armed bandits}}, url = {http://arxiv.org/pdf/1505.00146.pdf}, year = {2015} } @article{robbins1952some, author = {Robbins, Herbert}, journal = {Bulletin of the American Mathematics Society}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {527--535}, title = {{Some aspects of the sequential design of experiments}}, url = {https://projecteuclid.org/download/pdf{\_}1/euclid.bams/1183517370}, volume = {58}, year = {1952} } @inproceedings{chakrabartimortal, author = {Chakrabarti, Deepayan and Kumar, Ravi and Radlinski, Filip and Upfal, Eli}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {273--280}, title = {{Mortal Multi-Armed Bandits.}} } @article{rusmevichientong2010linearly, address = {Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA}, author = {Rusmevichientong, Paat and Tsitsiklis, John N}, journal = {Math. Oper. Res.}, keywords = {bandits}, mendeley-tags = {bandits}, month = {may}, number = {2}, pages = {395--411}, publisher = {Informs}, title = {{Linearly Parameterized Bandits}}, volume = {35}, year = {2010} } @inproceedings{bnaya2013bandit, author = {Bnaya, Zahy and Puzis, Rami and Stern, Roni and Felner, Ariel}, booktitle = {International Conference on Social Computing}, keywords = {Data mining,Educational institutions,Equations,Heuristic algorithms,Heuristics Search,Mathematical model,Multi-armed bandit,Social Network Intelligence,Social network services,Tin,VMAB,bandit algorithms,multiarmed bandit problem with volatile arms,profiles matching,query processing,search criterion,search engines,social network crawler,social network queries,social networking (online),targeted crawling}, title = {{Bandit algorithms for social network queries}}, year = {2013} } @inproceedings{flaxman2005online, author = {Flaxman, Abraham D and Kalai, Adam Tauman and {Brendan McMahan}, Hugh}, booktitle = {Proceedings of the 16th annual ACM-SIAM Symposium On Discrete Algorithms}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {385--394}, publisher = {SIAM}, series = {SODA '05}, title = {{Online convex optimization in the bandit setting: gradient descent without a gradient}}, year = {2005} } @book{gittins1989multi, author = {Gittins, John C and Weber, Richard and Glazebrook, Kevin}, keywords = {bandits}, mendeley-tags = {bandits}, publisher = {Wiley}, title = {{Multi-armed Bandit Allocation Indices}}, year = {1989} } @incollection{russo2013eluder, author = {Russo, Daniel and {Van Roy}, Benjamin}, booktitle = {Neural Information Processing Systems}, title = {{Eluder Dimension and the Sample Complexity of Optimistic Exploration}}, url = {http://papers.nips.cc/paper/4909-eluder-dimension-and-the-sample-complexity-of-optimistic-exploration.pdf}, year = {2013} } @article{beygelzimer2010contextual, abstract = {We address the problem of learning in an online, bandit setting where the learner must repeatedly select among K actions, but only receives partial feedback based on its choices. We establish two new facts: First, using a new algorithm called Exp4.P, we show that it is possible to compete with the best in a set of N experts with probability {\$}1-delta while incurring regret at most O(sqrtKTln(N/delta)) over T time steps. The new algorithm is tested empirically in a large-scale, real-world dataset. Second, we give a new algorithm called VE that competes with a possibly infinite set of policies of VC-dimension d while incurring regret at most O(sqrtT(dln(T) + ln (1/delta))) with probability {\$}1-delta. These guarantees improve on those of all previous algorithms, whether in a stochastic or adversarial environment, and bring us closer to providing supervised learning type guarantees for the contextual bandit setting.}, author = {Beygelzimer, Alina and Langford, John and Li, Lihong and Reyzin, Lev and Schapire, Robert E}, journal = {Machine Learning}, pages = {14}, title = {{Contextual Bandit Algorithms with Supervised Learning Guarantees}}, url = {http://arxiv.org/abs/1002.4058}, volume = {15}, year = {2010} } @inproceedings{li2015online, abstract = {Classical collaborative filtering, and content-based filtering methods try to learn a static recommendation model given training data. These approaches are far from ideal in highly dynamic recommendation domains such as news recommendation and computational advertisement, where the set of items and users is very fluid. In this work, we investigate an adaptive clustering technique for content recommendation based on exploration-exploitation strategies in contextual multi-armed bandit settings. Our algorithm takes into account the collaborative effects that arise due to the interaction of the users with the items, by dynamically grouping users based on the items under consideration and, at the same time, grouping items based on the similarity of the clusterings induced over the users. The resulting algorithm thus takes advantage of preference patterns in the data in a way akin to collaborative filtering methods. We provide an empirical analysis on medium-size real-world datasets, showing scalability and increased prediction performance (as measured by click-through rate) over state-of-the-art methods for clustering bandits. We also provide a regret analysis within a standard linear stochastic noise setting.}, author = {Li, Shuai and Karatzoglou, Alexandros and Gentile, Claudio}, booktitle = {Conference on Research and Development in Information Retrieval}, title = {{Collaborative filtering bandits}}, url = {https://arxiv.org/pdf/1502.03473.pdf}, year = {2016} } @inproceedings{gavald`a2009, address = {Porto, Portugal}, booktitle = {Proceedings of the 20th international conference on Algorithmic Learning Theory}, editor = {Gavald{\`{a}}, Ricard and Lugosi, G{\'{a}}bor and Zeugmann, Thomas and Zilles, Sandra}, isbn = {978-3-642-04413-7}, keywords = {bandits}, mendeley-tags = {bandits}, month = {oct}, publisher = {Springer}, series = {ALT '09, Lecture Notes in Computer Science}, title = {{No Title}}, volume = {5809}, year = {2009} } @inproceedings{hazanextracting, author = {Hazan, Elad and Kale, Satyen}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {57--68}, title = {{Extracting Certainty from Uncertainty: Regret Bounded by Variation in Costs.}} } @inproceedings{lu2010contextual, annote = {From Duplicate 1 ( Contextual Multi-Armed Bandits - Lu, Tyler; P{\'{a}}l, David; P{\'{a}}l, Martin ) And Duplicate 4 ( Contextual Multi-Armed Bandits - Lu, Tyler; P{\'{a}}l, David; P{\'{a}}l, Martin ) From Duplicate 1 ( Contextual Multi-Armed Bandits - Lu, Tyler; P{\'{a}}l, David; P{\'{a}}l, Martin ) }, author = {Lu, Tyler and P{\'{a}}l, D{\'{a}}vid and P{\'{a}}l, Martin}, booktitle = {Proceedings of the 13th international conference on Artificial Intelligence and Statistics}, editor = {Teh, Yee Whye and Titterington, Mike}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {485--492}, title = {{Contextual Multi-Armed Bandits}}, volume = {9}, year = {2010} } @inproceedings{kalenon, author = {Kale, Satyen and Reyzin, Lev and Schapire, Robert E}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {1054--1062}, title = {{Non-Stochastic Bandit Slate Problems}} } @article{cope2009, author = {Cope, Eric W}, journal = {IEEE Transactions on Automatic Control}, number = {6}, pages = {1243--1253}, title = {{Regret and convergence bounds for immediate-reward reinforcement learning with continuous action spaces}}, url = {https://pdfs.semanticscholar.org/f7ec/f6102c14e939ce115a36a1efd02fe5034173.pdf}, volume = {54}, year = {2009} } @inproceedings{bubeck2009pure, abstract = {We consider the framework of stochastic multi-armed bandit problems and study the possibilities and limitations of strategies that perform an online exploration of the arms. The strategies are assessed in terms of their simple regret, a regret notion that captures the fact that exploration is only constrained by the number of available rounds (not necessarily known in advance), in contrast to the case when the cumulative regret is considered and when exploitation needs to be performed at the same time. We believe that this performance criterion is suited to situations when the cost of pulling an arm is expressed in terms of resources rather than rewards. We discuss the links between the simple and the cumulative regret. The main result is that the required explorationexploitation trade-offs are qualitatively different, in view of a general lower bound on the simple regret in terms of the cumulative regret.}, annote = {From Duplicate 1 ( Pure exploration in multi-armed bandits problems - Bubeck, S{\'{e}}bastien; Munos, R{\'{e}}mi; Stoltz, Gilles ) From Duplicate 1 ( Pure exploration in multi-armed bandits problems - Bubeck, S{\'{e}}bastien; Munos, R{\'{e}}mi; Stoltz, Gilles ) From Duplicate 4 ( Pure Exploration in Multi-armed Bandits Problems - Bubeck, S{\'{e}}bastien; Munos, R{\'{e}}mi; Stoltz, Gilles ) }, author = {Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi and Stoltz, Gilles}, booktitle = {Algorithmic Learning Theory}, keywords = {bandits,computational,information theoretic learning with statistics,theory {\&} algorithms}, mendeley-tags = {bandits}, title = {{Pure exploration in multi-armed bandits problems}}, url = {https://arxiv.org/abs/0802.2655}, year = {2009} } @inproceedings{streeter2006asymptotically, abstract = {We present an asymptotically optimal algorithm for the max variant of the k-armed bandit problem. Given a set of k slot machines, each yielding payoff from a fixed (but unknown) distribution, we wish to allocate trials to the machines so as to maximize the expected maximum payoff � received over a series of n trials. Subject to certain distributional assumptions, we show that O ln ( 1 ln(n)2) $\delta$ ɛ2 � trials are sufficient to identify, with probability at least 1 − $\delta$, a machine whose expected maximum payoff is within ɛ of optimal. This result leads to a strategy for solving the problem that is asymptotically optimal in the following sense: the gap between the expected maximum payoff obtained by using our strategy for n trials and that obtained by pulling the single best arm for all n trials approaches zero as n → ∞.}, author = {Streeter, Matthew J. and Smith, Stephen F.}, booktitle = {AAAI Conference on Artificial Intelligence Intelligence}, doi = {10.1.1.91.4735}, keywords = {constraint satisfaction,satisfiability}, pages = {135--142}, title = {{An Asymptotically Optimal Algorithm for the Max k-Armed Bandit Problem}}, year = {2006} } @article{Perrault2019, abstract = {We improve the efficiency of algorithms for stochastic $\backslash$emph{\{}combinatorial semi-bandits{\}}. In most interesting problems, state-of-the-art algorithms take advantage of structural properties of rewards, such as $\backslash$emph{\{}independence{\}}. However, while being minimax optimal in terms of regret, these algorithms are intractable. In our paper, we first reduce their implementation to a specific $\backslash$emph{\{}submodular maximization{\}}. Then, in case of $\backslash$emph{\{}matroid{\}} constraints, we design adapted approximation routines, thereby providing the first efficient algorithms that exploit the reward structure. In particular, we improve the state-of-the-art efficient gap-free regret bound by a factor {\$}\backslashsqrt{\{}k{\}}{\$}, where {\$}k{\$} is the maximum action size. Finally, we show how our improvement translates to more general $\backslash$emph{\{}budgeted combinatorial semi-bandits{\}}.}, archivePrefix = {arXiv}, arxivId = {1902.03794}, author = {Perrault, Pierre and Perchet, Vianney and Valko, Michal}, eprint = {1902.03794}, month = {feb}, title = {{Exploiting Structure of Uncertainty for Efficient Combinatorial Semi-Bandits}}, url = {http://arxiv.org/abs/1902.03794}, year = {2019} } @article{badanidiyuru2014resourceful, abstract = {We study contextual bandits with ancillary constraints on resources, which are common in real-world applications such as choosing ads or dynamic pricing of items. We design the first algorithm for solving these problems, and prove a regret guarantee with near-optimal statistical properties.}, archivePrefix = {arXiv}, arxivId = {arXiv:1402.6779v2}, author = {Badanidiyuru, A and Langford, John and Slivkins, Aleksandrs}, eprint = {arXiv:1402.6779v2}, journal = {arXiv preprint arXiv:1402.6779}, pages = {1--22}, title = {{Resourceful Contextual Bandits}}, url = {http://arxiv.org/abs/1402.6779}, year = {2014} } @article{wang2005bandit, author = {Wang, Chih-chun and Kulkarni, Sanjeev R and Poor, H Vincent}, journal = {IEEE Transactions on Automatic Control}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {338--355}, title = {{Bandit problems with side observations}}, volume = {50}, year = {2005} } @inproceedings{grunewalder2010regret, author = {Gr{\"{u}}new{\"{a}}lder, Steffen and Audibert, Jean-Yves and Opper, Manfred and Shawe-Taylor, John}, booktitle = {Proceedings of the 13th International Conference on Artificial Intelligence and Statistics}, keywords = {bandits}, mendeley-tags = {bandits}, title = {{Regret Bounds for Gaussian Process Bandit Problems}}, year = {2010} } @inproceedings{auer2016algorithm, abstract = {We present an algorithm that achieves almost optimal pseudo-regret bounds against adversarial and stochastic bandits. Against adversarial bandits the pseudo-regret is {\$}O(K\backslashsqrt{\{}n \backslashlog n{\}}){\$} and against stochastic bandits the pseudo-regret is {\$}O(\backslashsum{\_}i (\backslashlog n)/\backslashDelta{\_}i){\$}. We also show that no algorithm with {\$}O(\backslashlog n){\$} pseudo-regret against stochastic bandits can achieve {\$}\backslashtilde{\{}O{\}}(\backslashsqrt{\{}n{\}}){\$} expected regret against adaptive adversarial bandits. This complements previous results of Bubeck and Slivkins (2012) that show {\$}\backslashtilde{\{}O{\}}(\backslashsqrt{\{}n{\}}){\$} expected adversarial regret with {\$}O((\backslashlog n){\^{}}2){\$} stochastic pseudo-regret.}, archivePrefix = {arXiv}, arxivId = {1605.08722}, author = {Auer, Peter and Chiang, Chao-Kai}, booktitle = {Conference on Learning Theory}, eprint = {1605.08722}, file = {::}, month = {may}, title = {{An algorithm with nearly optimal pseudo-regret for both stochastic and adversarial bandits}}, url = {http://arxiv.org/abs/1605.08722}, year = {2016} } @inproceedings{becker2002, address = {Vancouver, British Columbia, Canada}, booktitle = {Proceedings of the 16th conference on advances in Neural Information Processing Systems}, editor = {Becker, Suzanna and Thrun, Sebastian and Obermayer, Klaus}, isbn = {0-262-02550-7}, month = {dec}, publisher = {MIT Press}, series = {NIPS '02}, title = {{No Title}}, year = {2002} } @inproceedings{lagree2017algorithms, abstract = {Influence maximization is the problem of finding influential users, or nodes, in a graph so as to maximize the spread of information. It has many applications in advertising and marketing on social networks. In this paper, we study a highly generic version of influence maximization, one of optimizing influence campaigns by sequentially selecting "spread seeds" from a set of influencers, a small subset of the node population, under the hypothesis that, in a given campaign, previously activated nodes remain "persistently" active throughout and thus do not yield further rewards. This problem is in particular relevant for an important form of online marketing, known as influencer marketing, in which the marketers target a sub-population of influential people, instead of the entire base of potential buyers. Importantly, we make no assumptions on the underlying diffusion model and we work in a setting where neither a diffusion network nor historical activation data are available. We call this problem online influencer marketing with persistence (in short, OIMP). We first discuss motivating scenarios and present our general approach. We introduce an estimator on the influencers' remaining potential -- the expected number of nodes that can still be reached from a given influencer -- and justify its strength to rapidly estimate the desired value, relying on real data gathered from Twitter. We then describe a novel algorithm, GT-UCB, relying on upper confidence bounds on the remaining potential. We show that our approach leads to high-quality spreads on both simulated and real datasets, even though it makes almost no assumptions on the diffusion medium. Importantly, it is orders of magnitude faster than state-of-the-art influence maximization methods, making it possible to deal with large-scale online scenarios.}, author = {Lagr{\'{e}}e, Paul and Capp{\'{e}}, Olivier and Cautis, Bogdan and Maniu, Silviu}, booktitle = {International Conference on Data Mining}, file = {::}, title = {{Algorithms for Online Influencer Marketing}}, year = {2017} } @inproceedings{chu2011contextual, author = {Chu, Lei and Li, Lihong and Reyzin, Lev and Schapire, Robert E}, booktitle = {International Conference on Artificial Intelligence and Statistics}, title = {{Contextual bandits with linear payoff functions}}, url = {http://proceedings.mlr.press/v15/chu11a/chu11a.pdf}, year = {2011} } @inproceedings{liu2018change-detection, author = {Liu, Fang and Lee, Joohyun and Shroff, Ness}, booktitle = {AAAI Conference on Artificial Intelligence}, title = {{A change-detection based framework for piecewise-stationary multi-armed bandit problem}}, url = {https://arxiv.org/pdf/1711.03539.pdf}, year = {2017} } @article{abernethy2008efficient, abstract = {We introduce an efficient algorithm for the problem of online linear optimization in the bandit setting which achieves the optimal O (T regret. The setting is a natural generalization of the non-stochastic multi-armed bandit problem, and the existence of an efficient optimal algorithm has been posed as an open problem in a number of recent papers. We show how the difficulties encountered by previous approaches are overcome by the use of a self-concordant potential function. Our approach presents a novel connection between online learning and interior point methods.}, author = {Abernethy, Jacob Duncan and Hazan, Elad and Rakhlin, Alexander}, doi = {10.1080/09544820500115717}, institution = {EECS Department, University of California, Berkeley}, issn = {09544828}, journal = {Online}, number = {3}, pages = {540--543}, publisher = {Citeseer}, title = {{An Efficient Algorithm for Bandit Linear Optimization}}, url = {http://www.informaworld.com/openurl?genre=article{\&}doi=10.1080/09544820500115717{\&}magic=crossref}, volume = {1}, year = {2008} } @article{bowling2015heads, abstract = {Poker is a family of games that exhibit imperfect information, where players do not have full knowledge of past events. Whereas many perfect-information games have been solved (e.g., Connect Four and checkers), no nontrivial imperfect-information game played competitively by humans has previously been solved. Here, we announce that heads-up limit Texas hold'em is now essentially weakly solved. Furthermore, this computation formally proves the common wisdom that the dealer in the game holds a substantial advantage. This result was enabled by a new algorithm, CFR+, which is capable of solving extensive-form games orders of magnitude larger than previously possible. I'll see your program and raise you mine One of the fundamental differences between playing chess and two-handed poker is that the chessboard and the pieces on it are visible throughout the entire game, but an opponent's cards in poker are private. This informational deficit increases the complexity and the uncertainty in calculating the best course of action—to raise, to fold, or to call. Bowling et al. now report that they have developed a computer program that can do just that for the heads-up variant of poker known as Limit Texas Hold 'em (see the Perspective by Sandholm). Science, this issue p. 145; see also p. 122}, author = {Bowling, Michael and Burch, Neil and Johanson, Michael and Tammelin, Oskari}, journal = {Science}, number = {6218}, pages = {145--149}, title = {{Heads-up limit hold'em poker is solved}}, volume = {347}, year = {2015} } @article{mannor2004sample, author = {Mannor, Shie and Tsitsiklis, John N}, journal = {Journal of Machine Learning Research}, keywords = {bandits}, mendeley-tags = {bandits}, pages = {623--648}, title = {{The Sample Complexity of Exploration in the Multi-Armed Bandit Problem}}, volume = {5}, year = {2004} } @inproceedings{furnkranz2010, address = {Haifa, Israel}, booktitle = {Proceedings of the 27th International Conference on Machine Learning}, editor = {F{\"{u}}rnkranz, Johannes and Joachims, Thorsten}, keywords = {bandits}, mendeley-tags = {bandits}, month = {jun}, publisher = {Omnipress}, series = {ICML '10}, title = {{No Title}}, year = {2010} } @article{bubeck2013bandits, author = {Bubeck, S{\'{e}}bastien and Cesa-Bianchi, Nicol{\`{o}} and Lugosi, G{\'{a}}bor}, doi = {10.1109/TIT.2013.2277869}, issn = {0018-9448}, journal = {Information Theory, IEEE Transactions on}, keywords = {Electronic mail,Equations,Heavy-tailed distributions,Indexes,Probability distribution,Random variables,Robustness,Standards,regret bounds,robust estimators,stochastic multi-armed bandit}, number = {11}, pages = {7711--7717}, title = {{Bandits With Heavy Tail}}, volume = {59}, year = {2013} } @phdthesis{filippi2010strategies, author = {Filippi, Sarah}, keywords = {bandits}, mendeley-tags = {bandits}, school = {T{\{}{\'{e}}{\}}l{\{}{\'{e}}{\}}com ParisTech}, title = {{Strat{\{}{\'{e}}{\}}gies optimistes en apprentissage par renforcement}}, year = {2010} } @article{bull2015adaptive, author = {Bull, Adam D.}, journal = {Bernoulli}, keywords = {bandits on taxonomies,continuum-armed bandits,noisy global optimisation,tree-armed bandits,zooming dimension}, number = {4}, pages = {2289--2307}, title = {{Adaptive-treed bandits}}, url = {https://arxiv.org/pdf/1302.2489.pdf}, volume = {21}, year = {2015} } @inproceedings{cohen2006, address = {Pittsburgh, Pennsylvania, USA}, booktitle = {Proceedings of the 23rd International Conference on Machine Learning}, editor = {Cohen, William W and Moore, Andrew}, isbn = {1-59593-383-2}, month = {jun}, publisher = {ACM}, series = {ICML '06, ACM International Conference Proceeding Series}, title = {{No Title}}, volume = {148}, year = {2006} } @article{munos2002variable, author = {Munos, R{\'{e}}mi and Moore, Andrew}, journal = {Machine Learning}, pages = {291--323}, title = {{Variable Resolution Discretization in Optimal Control}}, volume = {49}, year = {2002} } @misc{pandora2013, author = {Pandora}, howpublished = {http://www.pandora.com}, title = {{Internet Radio}}, url = {http://www.pandora.com}, year = {2013} } @article{auer2002nonstochastic, author = {Auer, Peter and Cesa-Bianchi, Nicol{\`{o}} and Freund, Yoav and Schapire, Robert E.}, journal = {Journal on Computing}, keywords = {bandits}, mendeley-tags = {bandits}, number = {1}, pages = {48--77}, title = {{The nonstochastic multi-armed bandit problem}}, url = {https://epubs.siam.org/doi/pdf/10.1137/S0097539701398375}, volume = {32}, year = {2002} } @inproceedings{wang2014bayesian, abstract = {Bayesian optimization is a powerful global optimization technique for expensive black-box functions. One of its shortcomings is that it requires auxiliary optimization of an acquisition function at each iteration. This auxiliary optimization can be costly and very hard to carry out in practice. Moreover, it creates serious theoretical concerns, as most of the convergence results assume that the exact optimum of the acquisition function can be found. In this paper, we introduce a new technique for efficient global optimization that combines Gaussian process confidence bounds and treed simultaneous optimistic optimization to eliminate the need for auxiliary optimization of acquisition functions. The experiments with global optimization benchmarks and a novel application to automatic information extraction demonstrate that the resulting technique is more efficient than the two approaches from which it draws inspiration. Unlike most theoretical analyses of Bayesian optimization with Gaussian processes, our finite-time convergence rate proofs do not require exact optimization of an acquisition function. That is, our approach eliminates the unsatisfactory assumption that a difficult, potentially NP-hard, problem has to be solved in order to obtain vanishing regret rates.}, archivePrefix = {arXiv}, arxivId = {1402.7005}, author = {Wang, Ziyu and Shakibi, Babak and Jin, Lin and de Freitas, Nando}, booktitle = {International Conference on Artificial Intelligence and Statistics}, eprint = {1402.7005}, file = {::}, issn = {15337928}, month = {feb}, title = {{Bayesian Multi-Scale Optimistic Optimization}}, url = {http://arxiv.org/abs/1402.7005}, year = {2014} } @article{garivier2018explore, abstract = {We revisit lower bounds on the regret in the case of multi-armed bandit problems. We obtain non-asymptotic, distribution-dependent bounds and provide straightforward proofs based only on well-known properties of Kullback-Leibler divergences. These bounds show in particular that in an initial phase the regret grows almost linearly, and that the well-known logarithmic growth of the regret only holds in a final phase. The proof techniques come to the essence of the information-theoretic arguments used and they are deprived of all unnecessary complications.}, archivePrefix = {arXiv}, arxivId = {1602.07182}, author = {Garivier, Aur{\'{e}}lien and M{\'{e}}nard, Pierre and Stoltz, Gilles}, eprint = {1602.07182}, file = {::}, journal = {Mathematics of Operations Research}, title = {{Explore First, Exploit Next: The True Shape of Regret in Bandit Problems}}, year = {2018} } @book{Audibert2007a, address = {Berlin, Heidelberg}, author = {Audibert, Jean-Yves and Munos, R{\'{e}}mi and Szepesv{\'{a}}ri, Csaba}, editor = {Hutter, Marcus and Servedio, Rocco A. and Takimoto, Eiji}, issn = {0302-9743}, month = {oct}, pages = {150--165}, publisher = {Springer Berlin Heidelberg}, series = {Lecture Notes in Computer Science}, title = {{Algorithmic Learning Theory}}, url = {http://dl.acm.org/citation.cfm?id=1422422.1422442}, volume = {4754}, year = {2007} } @inproceedings{preux2014bandits, abstract = {We consider function optimization as a sequential decision making problem under the budget constraint. Such constraint limits the number of objective function evaluations allowed during the optimization. We consider an algorithm inspired by a continuous version of a multi-armed bandit problem which attacks this optimization problem by solving the tradeoff between exploration (initial quasi-uniform search of the domain) and exploitation (local optimization around the potentially global maxima). We introduce the so-called Simultaneous Optimistic Optimization (SOO), a deterministic algorithm that works by domain partitioning. The benefit of such an approach are the guarantees on the returned solution and the numerical eficiency of the algorithm. We present this machine learning rooted approach to optimization, and provide the empirical assessment of SOO on the CEC'2014 competition on single objective real-parameter numerical optimization testsuite.}, author = {Preux, Philippe and Munos, R{\'{e}}mi and Valko, Michal}, booktitle = {Congress on Evolutionary Computation}, title = {{Bandits attack function optimization}}, url = {https://hal.inria.fr/hal-00978637/document}, year = {2014} } @inproceedings{lagree2016multiple-play, author = {Lagr{\'{e}}e, Paul and Vernade, Claire and Capp{\'{e}}, Olivier}, booktitle = {Neural Information Processing Systems}, file = {:Users/valkom/Library/Application Support/Mendeley Desktop/Downloaded/Lagr{\'{e}}e, Vernade, Cappe - 2016 - Multiple-play bandits in the position-based model.pdf:pdf}, title = {{Multiple-play bandits in the position-based model}}, url = {https://papers.nips.cc/paper/6546-multiple-play-bandits-in-the-position-based-model.pdf}, year = {2016} } @article{bubeck2011pure, abstract = {We consider the framework of stochastic multi-armed bandit problems and study the possibilities and limitations of forecasters that perform an on-line exploration of the arms. These forecasters are assessed in terms of their simple regret, a regret notion that captures the fact that exploration is only constrained by the number of available rounds (not necessarily known in advance), in contrast to the case when the cumulative regret is considered and when exploitation needs to be performed at the same time. We believe that this performance criterion is suited to situations when the cost of pulling an arm is expressed in terms of resources rather than rewards. We discuss the links between the simple and the cumulative regret. One of the main results in the case of a finite number of arms is a general lower bound on the simple regret of a forecaster in terms of its cumulative regret: the smaller the latter, the larger the former. Keeping this result in mind, we then exhibit upper bounds on the simple regret of some forecasters. The paper ends with a study devoted to continuous-armed bandit problems; we show that the simple regret can be minimized with respect to a family of probability distributions if and only if the cumulative regret can be minimized for it. Based on this equivalence, we are able to prove that the separable metric spaces are exactly the metric spaces on which these regrets can be minimized with respect to the family of all probability distributions with continuous mean-payoff functions. ?? 2010 Elsevier B.V. All rights reserved.}, author = {Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi and Stoltz, Gilles}, journal = {Theoretical Computer Science}, keywords = {Continuous-armed bandits,Efficient exploration,Multi-armed bandits,Simple regret}, number = {19}, pages = {1832--1852}, title = {{Pure exploration in finitely-armed and continuous-armed bandits}}, url = {https://hal.archives-ouvertes.fr/hal-00257454v6/document}, volume = {412}, year = {2011} } @inproceedings{badanidiyuru2013bandits, abstract = {Multi-armed bandit problems are the predominant theoretical model of exploration-exploitation tradeoffs in learning, and they have countless applications ranging from medical trials, to communication networks, to Web search and advertising. In many of these application domains the learner may be constrained by one or more supply (or budget) limits, in addition to the customary limitation on the time horizon. The literature lacks a general model encompassing these sorts of problems. We introduce such a model, called "bandits with knapsacks", that combines aspects of stochastic integer programming with online learning. A distinctive feature of our problem, in comparison to the existing regret-minimization literature, is that the optimal policy for a given latent distribution may significantly outperform the policy that plays the optimal fixed arm. Consequently, achieving sub linear regret in the bandits-with-knapsacks problem is significantly more challenging than in conventional bandit problems. We present two algorithms whose reward is close to the information-theoretic optimum: one is based on a novel "balanced exploration" paradigm, while the other is a primal-dual algorithm that uses multiplicative updates. Further, we prove that the regret achieved by both algorithms is optimal up to polylogarithmic factors. We illustrate the generality of the problem by presenting applications in a number of different domains including electronic commerce, routing, and scheduling. As one example of a concrete application, we consider the problem of dynamic posted pricing with limited supply and obtain the first algorithm whose regret, with respect to the optimal dynamic policy, is sub linear in the supply.}, archivePrefix = {arXiv}, arxivId = {1305.2545}, author = {Badanidiyuru, Ashwinkumar and Kleinberg, Robert and Slivkins, Aleksandrs}, booktitle = {Proceedings - Annual IEEE Symposium on Foundations of Computer Science, FOCS}, doi = {10.1109/FOCS.2013.30}, eprint = {1305.2545}, isbn = {9780769551357}, issn = {02725428}, keywords = {Dynamic ad allocation,Dynamic pricing,Dynamic procurement,Exploratio}, pages = {207--216}, title = {{Bandits with knapsacks}}, year = {2013} }